Skip to content

Commit

Permalink
Bfloat16 support for fbgemm::pack_segments_forward/backward_cpu
Browse files Browse the repository at this point in the history
Summary: add bfloat16 support for pack_segments

Reviewed By: hanzlfs

Differential Revision: D62655781
  • Loading branch information
jvandebon authored and facebook-github-bot committed Sep 19, 2024
1 parent 904a1c6 commit 2a7a1a6
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
10 changes: 6 additions & 4 deletions fbgemm_gpu/src/sparse_ops/sparse_ops_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2843,8 +2843,9 @@ Tensor pack_segments_forward_cpu(
TORCH_CHECK(
t_in.dtype() == at::ScalarType::Float ||
t_in.dtype() == at::ScalarType::Double ||
t_in.dtype() == at::ScalarType::Half,
"t_in must be of type float or double or half");
t_in.dtype() == at::ScalarType::Half ||
t_in.dtype() == at::ScalarType::BFloat16,
"t_in must be of type float, double, half, or bfloat16");
TORCH_CHECK_GT(max_length, 0);

const auto t_in_cont = t_in.expect_contiguous();
Expand Down Expand Up @@ -2911,8 +2912,9 @@ Tensor pack_segments_backward_cpu(
TORCH_CHECK(
data.dtype() == at::ScalarType::Float ||
data.dtype() == at::ScalarType::Double ||
data.dtype() == at::ScalarType::Half,
"data must be of type float or double or half");
data.dtype() == at::ScalarType::Half ||
data.dtype() == at::ScalarType::BFloat16,
"data must be of type float, double, half, or bfloat16");
TORCH_CHECK(
max_length == data.sizes()[1],
"max_length should be equal to the second dimension of the packed segments");
Expand Down
10 changes: 7 additions & 3 deletions fbgemm_gpu/test/sparse/pack_segments_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def _pack_segments_ref(
[
torch.float,
torch.half,
torch.bfloat16,
]
),
torch_compile=st.booleans(),
Expand Down Expand Up @@ -192,6 +193,7 @@ def test_pack_segments(
[
torch.float,
torch.half,
torch.bfloat16,
]
),
torch_compile=st.booleans(),
Expand All @@ -207,7 +209,8 @@ def test_pack_segments_smaller_max_len(
dtype: torch.dtype,
torch_compile: bool,
) -> None:
input_data = torch.tensor(np.random.rand(batch_size, n, k), dtype=dtype)
input_raw = np.random.rand(batch_size, n, k)
input_data = torch.tensor(input_raw, dtype=dtype)
lengths = torch.tensor(
get_n_rand_num_summing_to_k(divisions, batch_size), dtype=torch.int
)
Expand All @@ -221,10 +224,10 @@ def test_pack_segments_smaller_max_len(

packed_ref = self._pack_segments_ref(
lengths,
input_data,
input_raw,
max_length=max_length,
)
# pyre-fixme[6]: For 2nd param expected `Tensor` but got `ndarray`.
packed_ref = torch.Tensor(packed_ref).to(dtype)
self.assertTrue(torch.equal(packed_tensor, packed_ref))

if gpu_available:
Expand All @@ -248,6 +251,7 @@ def test_pack_segments_smaller_max_len(
[
torch.float,
torch.half,
torch.bfloat16,
]
),
)
Expand Down

0 comments on commit 2a7a1a6

Please sign in to comment.