Misc updates (#2719)

Summary: - Silence JIT error if JIT compilation fails with PyTorch nightlies - Update docmentation on experimental (GenAI) builds - Update test scripts Pull Request resolved: #2719 Reviewed By: spcyppt Differential Revision: D58436403 Pulled By: q10 fbshipit-source-id: 5ab9a107eb5a56f6ec6a656420112633bc57953e
pytorch · Jun 11, 2024 · f899222 · f899222
1 parent 182f3a3
commit f899222
Show file tree

Hide file tree

Showing 9 changed files with 60 additions and 13 deletions.
diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
@@ -246,6 +246,7 @@ test_all_fbgemm_gpu_modules () {
     # shellcheck disable=SC2086
     fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
     echo "[TEST] Determined FBGEMM_GPU variant from installation: ${fbgemm_gpu_variant}"
+    echo "[TEST] Will be running tests specific to this variant ..."
   fi
 
   # Determine the test directories to include for testing
@@ -357,7 +358,7 @@ test_fbgemm_gpu_build_and_install_and_run () {
   test_fbgemm_gpu_build_and_install "${env_name}" "${pytorch_variant_type}"   || return 1
 
   cd ~/FBGEMM/                                                                || return 1
-  test_all_fbgemm_gpu_modules "${env_name}" "${pytorch_variant_type}"         || return 1
+  test_all_fbgemm_gpu_modules "${env_name}"                                   || return 1
 }
 
 test_fbgemm_gpu_setup_and_pip_install () {
@@ -390,7 +391,7 @@ test_fbgemm_gpu_setup_and_pip_install () {
     install_fbgemm_gpu_pip        "${env_name}" "${fbgemm_gpu_channel_version}" "${variant_type}/${variant_version}"                                    || return 1
     cd ~/FBGEMM                                                                                                                                         || return 1
 
-    test_all_fbgemm_gpu_modules "${env_name}" "${variant_type}";
+    test_all_fbgemm_gpu_modules "${env_name}"
     local retcode=$?
 
     echo "################################################################################"
@@ -417,7 +418,7 @@ test_fbgemm_gpu_setup_and_pip_install () {
     3.12
   )
 
-  if [ "$variant_type" == "cuda" ]; then
+  if [ "$variant_type" == "cuda" ] || [ "$variant_type" == "genai" ]; then
     local variant_versions=(
       11.8.0
       12.1.1

diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml
@@ -184,7 +184,7 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: ${{ matrix.host-machine.timeout }}
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cpu
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
     - name: Push Wheel to PyPI
       if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }}

diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml
@@ -205,7 +205,7 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: 20
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cuda
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
     - name: Push Wheel to PyPI
       if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}

diff --git a/.github/workflows/fbgemm_gpu_ci_rocm.yml b/.github/workflows/fbgemm_gpu_ci_rocm.yml
@@ -196,4 +196,4 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: 20
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV rocm
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
diff --git a/.github/workflows/fbgemm_gpu_pip.yml b/.github/workflows/fbgemm_gpu_pip.yml
@@ -100,7 +100,7 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: ${{ matrix.host-machine.timeout }}
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cpu
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
 
   test_pypi_install_cuda:
@@ -161,7 +161,7 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: 20
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cuda
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
 
   test_pypi_install_rocm:
@@ -228,4 +228,4 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: 20
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV rocm
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
diff --git a/.github/workflows/fbgemm_gpu_release_cpu.yml b/.github/workflows/fbgemm_gpu_release_cpu.yml
@@ -176,7 +176,7 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: ${{ matrix.host-machine.timeout }}
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cpu
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
     - name: Push FBGEMM_GPU (CPU version) Binary to PYPI
       if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' }}

diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml
@@ -186,7 +186,7 @@ jobs:
 
     - name: Test with PyTest
       timeout-minutes: 20
-      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cuda
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
     - name: Push FBGEMM_GPU Binary to PYPI
       if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}

diff --git a/fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst
@@ -5,6 +5,13 @@ Build Instructions
 scripts bundled in the FBGEMM repo under
 `setup_env.bash <https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/setup_env.bash>`_.
 
+The currently available FBGEMM_GPU build variants are:
+
+* CPU-only
+* CUDA
+* GenAI (experimental)
+* ROCm
+
 The general steps for building FBGEMM_GPU are as follows:
 
 #. Set up an isolated build environment.
@@ -588,6 +595,37 @@ toolchains have been properly installed.
       --nccl_lib_path=${NCCL_LIB_PATH} \
       -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
 
+.. _fbgemm-gpu.build.process.genai:
+
+Experimental-Only (GenAI) Build
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default, the CUDA build of FBGEMM_GPU includes all experimental modules that
+are used for GenAI applications.  The instructions for building just the
+experimental modules are the same as those for a CUDA build, but with specifying
+``--package_variant=genai`` in the build invocation:
+
+.. code:: sh
+
+  # Build the wheel artifact only
+  python setup.py bdist_wheel \
+      --package_variant=genai \
+      --package_name="${package_name}" \
+      --python-tag="${python_tag}" \
+      --plat-name="${python_plat_name}" \
+      --nvml_lib_path=${NVML_LIB_PATH} \
+      --nccl_lib_path=${NCCL_LIB_PATH} \
+      -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
+
+  # Build and install the library into the Conda environment
+  python setup.py install \
+      --package_variant=genai \
+      --nvml_lib_path=${NVML_LIB_PATH} \
+      --nccl_lib_path=${NCCL_LIB_PATH} \
+      -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
+
+Note that currently, only CUDA is supported for the experimental modules.
+
 .. _fbgemm-gpu.build.process.rocm:
 
 ROCm Build

diff --git a/fbgemm_gpu/test/tbe/training/forward_test.py b/fbgemm_gpu/test/tbe/training/forward_test.py
@@ -257,8 +257,16 @@ def execute_forward_(  # noqa C901
         )
 
         if not use_cpu and torch.cuda.is_available():
-            # NOTE: test TorchScript-compatible!
-            cc = torch.jit.script(cc)
+            # NOTE: Test TorchScript-compatible!
+            try:
+                # Occasionally, we run into the following error when running
+                # against PyTorch nightly:
+                #
+                # RuntimeError: Can't redefine method:
+                # forward on class: __torch__.fbgemm_gpu.split_table_batched_embeddings_ops_training.___torch_mangle_0.SplitTableBatchedEmbeddingBagsCodegen (of Python compilation unit at: 0x5e74890)
+                cc = torch.jit.script(cc)
+            except Exception as e:
+                print(f"Torch JIT compilation failed: {e}")
 
         for t in range(T):
             cc.split_embedding_weights()[t].data.copy_(