Skip to content

Commit

Permalink
Benchmark block_bucketize_sparse_features uneven sharding for GPU (py…
Browse files Browse the repository at this point in the history
…torch#2169)

Summary:
Pull Request resolved: pytorch#2169

CPU implementation D51288847

Reviewed By: tissue3

Differential Revision: D51533599

fbshipit-source-id: cf9c7fcbe7043f385e97901a95916287c9f618a5
  • Loading branch information
gnahzg authored and facebook-github-bot committed Dec 4, 2023
1 parent 88fc6e7 commit dbc3157
Showing 1 changed file with 19 additions and 8 deletions.
27 changes: 19 additions & 8 deletions fbgemm_gpu/bench/sparse_ops_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,9 +874,11 @@ def ben(fn, name, ad_indices, ad_lengths, batch_offsets, num_ads_in_batch):
@click.option("--batch-size", default=4096)
@click.option("--bucket-num", default=16)
@click.option("--input-precision", type=str, default="long")
@click.option("--device", type=click.Choice(["cpu", "cuda"]), default="cpu")
def block_bucketize_sparse_features_bench(
row_size: int, batch_size: int, bucket_num: int, input_precision: str
row_size: int, batch_size: int, bucket_num: int, input_precision: str, device: str
) -> None:

dtype = torch.int
if input_precision == "int":
dtype = torch.int
Expand All @@ -900,25 +902,34 @@ def block_bucketize_sparse_features_bench(
block_sizes = torch.tensor([bucket_size] * lengths.numel(), dtype=dtype)

bucket_pos = [j * bucket_size for j in range(bucket_num + 1)]
block_bucketize_pos = [torch.tensor(bucket_pos)] * lengths.numel()
block_bucketize_pos = [torch.tensor(bucket_pos, device=device)] * lengths.numel()
test_param = {"uneven": block_bucketize_pos, "even": None}
print("device {device}")
for name, is_block_bucketize_pos in test_param.items():
time, output = benchmark_torch_function(
torch.ops.fbgemm.block_bucketize_sparse_features,
(
lengths,
indices,
lengths if device == "cpu" else lengths.to(device),
indices if device == "cpu" else indices.to(device),
False,
True,
block_sizes,
block_sizes if device == "cpu" else block_sizes.to(device),
bucket_num,
weights,
weights
if device == "cpu"
else (weights.to(device) if weights is not None else None),
None,
-1, # unused
is_block_bucketize_pos,
is_block_bucketize_pos
if device == "cpu"
else (
[i.to(device) for i in is_block_bucketize_pos]
if is_block_bucketize_pos is not None
else None
),
),
iters=100,
device="cpu",
device=device,
)

num_bytes = 0
Expand Down

0 comments on commit dbc3157

Please sign in to comment.