From 2ab9b619a028657f7dba17ca3de89a3f6c5ee95f Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Sun, 19 May 2024 11:34:47 +0000 Subject: [PATCH] 2024-05-19 nightly release (37c283ccb0ed51555994f743fa2a3a188f5a479f) --- .../gen_ai/src/quantize/cutlass_extensions.cu | 8 +++++++ .../gen_ai/test/quantize/quantize_test.py | 4 ++++ fbgemm_gpu/fbgemm_gpu/__init__.py | 21 +++++++++++++++---- fbgemm_gpu/fbgemm_gpu/docs/__init__.py | 5 ++++- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu b/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu index d973d28ac..ef44aedb1 100644 --- a/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu +++ b/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu @@ -1896,6 +1896,14 @@ at::Tensor f8f8bf16_cublas( "CUDA version is older than 12.0"); // requires CUDA>=12 } at::Tensor f8f8bf16( + at::Tensor XQ, // FP8 + at::Tensor WQ, // FP8 + at::Tensor scale, + bool use_fast_accum) { + throw std::runtime_error( + "CUDA version is older than 12.0"); // requires CUDA>=12 +} +at::Tensor f8f8bf16_tensorwise( at::Tensor XQ, // FP8 at::Tensor WQ, // FP8 double scale, diff --git a/fbgemm_gpu/experimental/gen_ai/test/quantize/quantize_test.py b/fbgemm_gpu/experimental/gen_ai/test/quantize/quantize_test.py index 627454336..8f0235f5d 100644 --- a/fbgemm_gpu/experimental/gen_ai/test/quantize/quantize_test.py +++ b/fbgemm_gpu/experimental/gen_ai/test/quantize/quantize_test.py @@ -259,3 +259,7 @@ def test_quantize_fp8_per_tensor_with_ub( zq_ref = (x @ w.T).to(torch.bfloat16) torch.testing.assert_close(zq, zq_ref, atol=1.0e-3, rtol=1.0e-3) + + +if __name__ == "__main__": + unittest.main() diff --git a/fbgemm_gpu/fbgemm_gpu/__init__.py b/fbgemm_gpu/fbgemm_gpu/__init__.py index 555c8574e..1d913d28a 100644 --- a/fbgemm_gpu/fbgemm_gpu/__init__.py +++ b/fbgemm_gpu/fbgemm_gpu/__init__.py @@ -11,8 +11,18 @@ try: torch.ops.load_library(os.path.join(os.path.dirname(__file__), "fbgemm_gpu_py.so")) -except Exception as e: - print(e) +except Exception as error_ranking: + try: + torch.ops.load_library( + os.path.join( + os.path.dirname(__file__), + "experimental/gen_ai/fbgemm_gpu_experimental_gen_ai_py.so", + ) + ) + except Exception as error_gen_ai: + # When both ranking/gen_ai so files are not available, print the error logs + print(error_ranking) + print(error_gen_ai) # Since __init__.py is only used in OSS context, we define `open_source` here # and use its existence to determine whether or not we are in OSS context @@ -24,5 +34,8 @@ # Export the version string from the version file auto-generated by setup.py from fbgemm_gpu.docs.version import __version__ # noqa: F401, E402 -# Trigger meta operator registrations -from . import sparse_ops # noqa: F401, E402 +try: + # Trigger meta operator registrations + from . import sparse_ops # noqa: F401, E402 +except Exception: + pass diff --git a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py index 05551ca6d..250f9d58e 100644 --- a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py +++ b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py @@ -6,4 +6,7 @@ # LICENSE file in the root directory of this source tree. # Trigger the manual addition of docstrings to pybind11-generated operators -from . import jagged_tensor_ops, table_batched_embedding_ops # noqa: F401 +try: + from . import jagged_tensor_ops, table_batched_embedding_ops # noqa: F401 +except Exception: + pass