Skip to content

Commit

Permalink
Sub-package checks (pytorch#2755)
Browse files Browse the repository at this point in the history
Summary:
- Add sub-package checks after installation

Pull Request resolved: pytorch#2755

Reviewed By: spcyppt

Differential Revision: D58789289

Pulled By: q10

fbshipit-source-id: d55b82549d3a8ddd12d7c39f2fd51a5a11481949
  • Loading branch information
q10 authored and facebook-github-bot committed Jun 20, 2024
1 parent 3f0f340 commit 969b443
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 22 deletions.
34 changes: 23 additions & 11 deletions .github/scripts/fbgemm_gpu_install.bash
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ __install_fetch_version_and_variant_info () {
echo ""
}

__install_list_subpackages_info () {
__install_check_subpackages () {
# shellcheck disable=SC2086,SC2155
local fbgemm_gpu_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(dir(fbgemm_gpu))")

Expand All @@ -64,6 +64,22 @@ __install_list_subpackages_info () {
echo "[CHECK] fbgemm_gpu.experimental: ${experimental_packages}"
echo "################################################################################"
echo ""


echo "[INSTALL] Check for installation of Python sources ..."
local subpackages=(
"fbgemm_gpu.docs"
"fbgemm_gpu.tbe.cache"
"fbgemm_gpu.tbe.ssd"
)

for package in "${subpackages[@]}"; do
(test_python_import_package "${env_name}" "${package}") || return 1
done

if [ "$installed_fbgemm_gpu_variant" != "genai" ]; then
(test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
fi
}

__install_check_operator_registrations () {
Expand Down Expand Up @@ -105,20 +121,16 @@ __fbgemm_gpu_post_install_checks () {
local env_prefix=$(env_name_or_prefix "${env_name}")

# Print PyTorch and CUDA versions for sanity check
__install_print_dependencies_info
__install_print_dependencies_info || return 1

# Fetch the version and variant info from the package
__install_fetch_version_and_variant_info

# List out FBGEMM_GPU subpackages
__install_list_subpackages_info
__install_fetch_version_and_variant_info || return 1

echo "[INSTALL] Check for installation of Python sources ..."
if [ "$installed_fbgemm_gpu_variant" != "genai" ]; then
(test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
fi
# Check FBGEMM_GPU subpackages are installed correctly
__install_check_subpackages || return 1

__install_check_operator_registrations
# Check operator registrations are working
__install_check_operator_registrations || return 1
}

install_fbgemm_gpu_wheel () {
Expand Down
21 changes: 17 additions & 4 deletions .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -65,29 +65,43 @@ run_python_test () {
}

__configure_fbgemm_gpu_test_cpu () {
# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")
echo "[TEST] Set environment variables for CPU-only testing ..."

# Prevent automatically running CUDA-enabled tests on a GPU-capable machine
# shellcheck disable=SC2086
print_exec conda env config vars set ${env_prefix} CUDA_VISIBLE_DEVICES=-1

ignored_tests=(
./tbe/ssd/ssd_split_table_batched_embeddings_test.py
# These tests have non-CPU operators referenced in @given
./uvm/copy_test.py
./uvm/uvm_test.py
)
}

__configure_fbgemm_gpu_test_cuda () {
# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")
echo "[TEST] Set environment variables for CPU-only testing ..."

# Disabled by default; enable for debugging
# shellcheck disable=SC2086
# print_exec conda env config vars set ${env_prefix} CUDA_LAUNCH_BLOCKING=1

# Remove CUDA device specificity when running CUDA tests
# shellcheck disable=SC2086
print_exec conda env config vars unset ${env_prefix} CUDA_VISIBLE_DEVICES

ignored_tests=(
./tbe/ssd/ssd_split_table_batched_embeddings_test.py
)
}

__configure_fbgemm_gpu_test_rocm () {
# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

echo "[TEST] Set environment variables for ROCm testing ..."

# shellcheck disable=SC2086
print_exec conda env config vars set ${env_prefix} FBGEMM_TEST_WITH_ROCM=1
# shellcheck disable=SC2086
Expand All @@ -102,7 +116,6 @@ __configure_fbgemm_gpu_test_rocm () {
fi

ignored_tests=(
./tbe/ssd/ssd_split_table_batched_embeddings_test.py
# https://github.com/pytorch/FBGEMM/issues/1559
./batched_unary_embeddings_test.py
)
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/utils_base.bash
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ test_python_import_package () {

# shellcheck disable=SC2086
if conda run ${env_prefix} python -c "import ${python_import}"; then
echo "[CHECK] Python package '${python_import}' found."
echo "[CHECK] Python (sub-)package '${python_import}' found ..."
else
echo "[CHECK] Python package '${python_import}' was not found, or the package is broken!"
echo "[CHECK] Python (sub-)package '${python_import}' was not found! Please check if the Python sources have been packaged correctly."
return 1
fi
}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fbgemm_gpu_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ jobs:
]
# ROCm machines are limited, so we only test a subset of Python versions
python-version: [ "3.11", "3.12" ]
rocm-version: [ "5.7" ]
rocm-version: [ "6.0.2" ]

steps:
- name: Setup Build Container
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,11 @@ symbols with ``GLIBCXX`` when compiling FBGEMM_CPU:

.. code:: sh
# Fix GCC to 10.4.0, to keep compatibility with older versions of GLIBCXX
gcc_version=15.0.7
# Set GCC to 10.4.0 to keep compatibility with older versions of GLIBCXX
#
# A newer versions of GCC also works, but will need to be accompanied by an
# appropriate updated version of the sysroot_linux package.
gcc_version=10.4.0
conda install -n ${env_name} -c conda-forge -y gxx_linux-64=${gcc_version} sysroot_linux-64=2.17
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ environment:
# Enable for debugging failed kernel executions
export CUDA_LAUNCH_BLOCKING=1
# For operators involving NCCL, if the rpath is not set up correctly for
# libnccl.so.2, LD_LIBRARY_PATH will need to be updated.
export LD_LIBRARY_PATH="/path/to/nccl/lib:${LD_LIBRARY_PATH}"
python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning split_table_batched_embeddings_test.py
Testing with the ROCm Variant
Expand Down
6 changes: 6 additions & 0 deletions fbgemm_gpu/test/tbe/ssd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
16 changes: 14 additions & 2 deletions fbgemm_gpu/test/tbe/ssd/ssd_split_table_batched_embeddings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,22 @@

from hypothesis import assume, given, settings, Verbosity

from .. import common # noqa E402
from ..common import open_source


if open_source:
# pyre-ignore[21]
from test_utils import gpu_unavailable, running_on_github
else:
from fbgemm_gpu.test.test_utils import gpu_unavailable, running_on_github


MAX_EXAMPLES = 40


@unittest.skipIf(not torch.cuda.is_available(), "Skip when CUDA is not available")
@unittest.skipIf(*running_on_github)
@unittest.skipIf(*gpu_unavailable)
class SSDSplitTableBatchedEmbeddingsTest(unittest.TestCase):
def get_physical_table_arg_indices_(self, feature_table_map: List[int]):
"""
Expand Down Expand Up @@ -765,7 +776,8 @@ def test_ssd_cache(
)


@unittest.skipIf(not torch.cuda.is_available(), "Skip when CUDA is not available")
@unittest.skipIf(*running_on_github)
@unittest.skipIf(*gpu_unavailable)
class SSDIntNBitTableBatchedEmbeddingsTest(unittest.TestCase):
def test_nbit_ssd(self) -> None:
import tempfile
Expand Down

0 comments on commit 969b443

Please sign in to comment.