Skip to content

Commit

Permalink
switch between hip and cuda c++ lib so load (pytorch#2236)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#2236

- Switch to hip related TARGETS (w/ _hip suffix) when AMD GPU build is used.
- Add "supports_python_dlopen = True," to support dlopen on related deps.
- Add missing deps like `"//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings_hip",`

Reviewed By: q10, zoranzhao

Differential Revision: D52435932

fbshipit-source-id: 7ad845f294b49c4bf69f120ed26a0e6742b6ce48
  • Loading branch information
jianyuh authored and facebook-github-bot committed Dec 28, 2023
1 parent a3b44fd commit 0009e24
Show file tree
Hide file tree
Showing 26 changed files with 176 additions and 47 deletions.
5 changes: 4 additions & 1 deletion fbgemm_gpu/bench/batched_unary_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
else:
from fbgemm_gpu.bench.bench_utils import benchmark_torch_function

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")


Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/bench/histogram_binning_calibration_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
# pyre-ignore[21]
from fbgemm_gpu import open_source # noqa: F401
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")


Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/bench/jagged_tensor_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
else:
from fbgemm_gpu.bench.bench_utils import benchmark_torch_function

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")


Expand Down
9 changes: 8 additions & 1 deletion fbgemm_gpu/bench/merge_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@
else:
from fbgemm_gpu.bench.bench_utils import benchmark_torch_function

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings")
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_hip"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings"
)
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_cpu"
)
Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/bench/quantize_ops_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
else:
from fbgemm_gpu.bench.bench_utils import benchmark_torch_function

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")


Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/bench/sparse_ops_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
else:
from fbgemm_gpu.bench.bench_utils import benchmark_torch_function

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:index_select_ops")

Expand Down
14 changes: 10 additions & 4 deletions fbgemm_gpu/bench/split_embeddings_cache_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,16 @@
# pyre-ignore[21]
from fbgemm_gpu import open_source # noqa: F401
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils")
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings"
)
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils_hip")
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings_hip"
)
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils")
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings"
)


# pyre-ignore
Expand Down
11 changes: 8 additions & 3 deletions fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,14 @@

logging.basicConfig(level=logging.DEBUG)

torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings"
)
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings_hip"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings"
)


logging.basicConfig(level=logging.DEBUG)
Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/bench/stride_gemm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
# pyre-ignore[21]
from fbgemm_gpu import open_source # noqa: F401
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")


Expand Down
25 changes: 19 additions & 6 deletions fbgemm_gpu/codegen/split_embedding_codegen_lookup_invoker.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,30 @@ from .lookup_args import *

# Provide compatibility to downstream packages for eventual migration to the split training / inference packages
try:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cuda_training")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_hip_training")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cuda_training")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cpu_training")
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cpu")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils_hip")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings_hip")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:embedding_inplace_update_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:cumem_utils")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:embedding_inplace_update")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:split_table_batched_embeddings")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:embedding_inplace_update")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:embedding_inplace_update_cpu")

{%- endif %}
Expand Down
6 changes: 5 additions & 1 deletion fbgemm_gpu/fbgemm_gpu/batched_unary_embeddings_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
# pyre-ignore[21]
from fbgemm_gpu import open_source # noqa: F401
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")


Expand Down
11 changes: 8 additions & 3 deletions fbgemm_gpu/fbgemm_gpu/permute_pooled_embedding_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:permute_pooled_embedding_ops_cpu"
)
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:permute_pooled_embedding_ops_gpu"
)
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:permute_pooled_embedding_ops_gpu_hip"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:permute_pooled_embedding_ops_gpu"
)


class PermutePooledEmbeddings:
Expand Down
6 changes: 5 additions & 1 deletion fbgemm_gpu/fbgemm_gpu/quantize_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
# pyre-ignore[21]
from fbgemm_gpu import open_source # noqa: F401
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")

TORCH_HALF_MIN: float = torch.finfo(torch.float16).min
Expand Down
21 changes: 17 additions & 4 deletions fbgemm_gpu/fbgemm_gpu/sparse_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,28 @@
# pyre-ignore
from fbgemm_gpu import open_source # noqa: F401
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_hip"
)
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_hip"
)
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings"
)
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings")
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_cpu"
)
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cpu")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine_cpu")

import torch.utils._pytree as pytree
Expand Down
7 changes: 6 additions & 1 deletion fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@
)

try:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops")
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_hip"
)
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cpu")
except Exception:
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@
)

try:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cuda_inference"
)
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_hip_inference"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cuda_inference"
)
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cpu_inference"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,14 @@
)

try:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cuda_training"
)
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_hip_training"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cuda_training"
)
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/codegen:embedding_ops_cpu_training"
)
Expand Down
11 changes: 8 additions & 3 deletions fbgemm_gpu/fbgemm_gpu/ssd_split_table_batched_embeddings_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,14 @@
from torch.autograd.profiler import record_function

try:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings"
)
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings_hip"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:ssd_split_table_batched_embeddings"
)
except OSError:
# Keep for BC: will be deprecated soon.
torch.ops.load_library(
Expand Down
6 changes: 5 additions & 1 deletion fbgemm_gpu/test/batched_unary_embeddings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
from test_utils import gpu_unavailable

except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
from fbgemm_gpu.test.test_utils import gpu_unavailable

Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/test/input_combine_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
# pyre-ignore[21]
from test_utils import cpu_and_maybe_gpu, optests
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine")
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:input_combine_cpu")
from fbgemm_gpu.test.test_utils import cpu_and_maybe_gpu, optests

Expand Down
6 changes: 5 additions & 1 deletion fbgemm_gpu/test/jagged_tensor_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
TEST_WITH_ROCM,
)
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
import fbgemm_gpu.sparse_ops # noqa: F401, E402
from fbgemm_gpu.test.test_utils import (
Expand Down
6 changes: 5 additions & 1 deletion fbgemm_gpu/test/layout_transform_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
from test_utils import gpu_unavailable

except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
from fbgemm_gpu.test.test_utils import gpu_unavailable

Expand Down
10 changes: 9 additions & 1 deletion fbgemm_gpu/test/merge_pooled_embeddings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@
# pyre-ignore[21]
from test_utils import gpu_unavailable
except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings")
if torch.version.hip:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_hip"
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings"
)

torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_cpu"
)
Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/test/metric_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
from fbgemm_gpu import open_source # noqa: F401

except Exception:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:metric_ops")
if torch.version.hip:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:metric_ops_hip")
else:
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:metric_ops")


class MetricOpsTest(unittest.TestCase):
Expand Down
Loading

0 comments on commit 0009e24

Please sign in to comment.