From f325a7966d2eb521a96134978bc9e1c653538d7e Mon Sep 17 00:00:00 2001 From: Benson Ma Date: Thu, 21 Sep 2023 21:36:23 -0700 Subject: [PATCH] Fix nova script (#2033) Summary: - Fix a syntax issue with bash associative arrays that caused the Nova build scripts to fail Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/2033 Reviewed By: spcyppt Differential Revision: D49529109 Pulled By: q10 fbshipit-source-id: d2e1ac49e3490bc89b266c7596a19117efa0caa8 --- .github/scripts/fbgemm_gpu_build.bash | 2 +- .github/scripts/fbgemm_gpu_test.bash | 21 +++++---- .github/scripts/nova_prescript.bash | 8 ++-- .github/scripts/test_torchrec.bash | 64 +++++++++------------------ .github/scripts/utils_cuda.bash | 2 +- 5 files changed, 38 insertions(+), 59 deletions(-) diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash index 5354c24ab..b783651d8 100644 --- a/.github/scripts/fbgemm_gpu_build.bash +++ b/.github/scripts/fbgemm_gpu_build.bash @@ -290,7 +290,7 @@ build_fbgemm_gpu_package () { fbgemm_variant="$3" fbgemm_variant_targets="$4" if [ "$fbgemm_variant" == "" ]; then - echo "Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME VARIANT [TARGETS]" + echo "Usage: ${FUNCNAME[0]} ENV_NAME RELEASE_TYPE VARIANT [VARIANT_TARGETS]" echo "Example(s):" echo " ${FUNCNAME[0]} build_env nightly cpu # Nightly CPU-only variant" echo " ${FUNCNAME[0]} build_env nightly cuda # Nightly CUDA variant for default target(s)" diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash index be5a95ab1..5c589dc85 100644 --- a/.github/scripts/fbgemm_gpu_test.bash +++ b/.github/scripts/fbgemm_gpu_test.bash @@ -129,13 +129,14 @@ run_fbgemm_gpu_tests () { ################################################################################ test_setup_conda_environment () { - local python_version="$1" - local pytorch_installer="$2" - local pytorch_version="$3" - local pytorch_variant_type="$4" - local pytorch_variant_version="$5" + local env_name="$1" + local python_version="$2" + local pytorch_installer="$3" + local pytorch_version="$4" + local pytorch_variant_type="$5" + local pytorch_variant_version="$6" if [ "$pytorch_variant_type" == "" ]; then - echo "Usage: ${FUNCNAME[0]} PYTHON_VERSION PYTORCH_INSTALLER PYTORCH_VERSION PYTORCH_VARIANT_TYPE [PYTORCH_VARIANT_VERSION]" + echo "Usage: ${FUNCNAME[0]} ENV_NAME PYTHON_VERSION PYTORCH_INSTALLER PYTORCH_VERSION PYTORCH_VARIANT_TYPE [PYTORCH_VARIANT_VERSION]" echo "Example(s):" echo " ${FUNCNAME[0]} build_env 3.8 pip test cuda 11.8.0 # Setup environment with pytorch-test for Python 3.8 + CUDA 11.8.0" return 1 @@ -148,9 +149,11 @@ test_setup_conda_environment () { echo "" fi - local env_name="test_py${python_version}_${pytorch_installer}_pytorch_${pytorch_version}_${pytorch_variant_type}" - if [ "$pytorch_variant_version" != "" ]; then - local env_name="${env_name}_${pytorch_variant_version}" + if [ "$env_name" == "" ]; then + local env_name="test_py${python_version}_${pytorch_installer}_pytorch_${pytorch_version}_${pytorch_variant_type}" + if [ "$pytorch_variant_version" != "" ]; then + local env_name="${env_name}_${pytorch_variant_version}" + fi fi echo "Creating the Build Environment: ${env_name} ..." diff --git a/.github/scripts/nova_prescript.bash b/.github/scripts/nova_prescript.bash index 9cb9b8400..f52e3b163 100644 --- a/.github/scripts/nova_prescript.bash +++ b/.github/scripts/nova_prescript.bash @@ -57,15 +57,15 @@ if [[ $CU_VERSION = cu* ]]; then echo "[NOVA] ------------------------------------------" echo "[NOVA] Building the CUDA variant of FBGEMM_GPU ..." - fbgemm_variant="cuda" + export fbgemm_variant="cuda" elif [[ $CU_VERSION = rocm* ]]; then echo "[NOVA] Building the ROCm variant of FBGEMM_GPU ..." - fbgemm_variant="rocm" + export fbgemm_variant="rocm" else echo "[NOVA] Building the CPU variant of FBGEMM_GPU ..." - fbgemm_variant="cpu" + export fbgemm_variant="cpu" fi # Install the necessary Python eggs for building @@ -78,7 +78,7 @@ export BUILD_FROM_NOVA # Build FBGEMM_GPU nightly by default if [[ ${CHANNEL} == "" ]]; then - CHANNEL="nightly" + export CHANNEL="nightly" fi # Build the wheel diff --git a/.github/scripts/test_torchrec.bash b/.github/scripts/test_torchrec.bash index c298c5049..9bdb63cbd 100644 --- a/.github/scripts/test_torchrec.bash +++ b/.github/scripts/test_torchrec.bash @@ -11,41 +11,8 @@ set -e # shellcheck source=/dev/null . "$(dirname "$(realpath -s "$0")")/setup_env.bash" -create_conda_pytorch_environment () { - local env_name="$1" - local python_version="$2" - local pytorch_channel_name="$3" - local cuda_version="$4" - if [ "$python_version" == "" ]; then - echo "Usage: ${FUNCNAME[0]} ENV_NAME PYTHON_VERSION PYTORCH_CHANNEL_NAME CUDA_VERSION" - echo "Example:" - echo " ${FUNCNAME[0]} build_env 3.10 pytorch-nightly 11.7.1" - return 1 - fi - - # Create the Conda environment - create_conda_environment "${env_name}" "${python_version}" - - # Convert the channels to versions - if [ "${pytorch_channel_name}" == "pytorch-nightly" ]; then - pytorch_version="nightly" - elif [ "${pytorch_channel_name}" == "pytorch-test" ]; then - pytorch_version="test" - else - pytorch_version="latest" - fi - - if [ "${cuda_version}" == "" ]; then - # Install the CPU variant of PyTorch - install_pytorch_conda "${env_name}" "${pytorch_version}" cpu - else - # Install CUDA and the GPU variant of PyTorch - install_cuda "${env_name}" "${cuda_version}" - install_pytorch_conda "${env_name}" "${pytorch_version}" - fi -} - verbose=0 +env_name=test_binary torchrec_package_name="" python_version="" cuda_version="x" @@ -53,7 +20,8 @@ fbgemm_wheel_path="x" miniconda_prefix="${HOME}/miniconda" usage () { - echo "Usage: bash test_torchrec.bash -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]" + # shellcheck disable=SC2086 + echo "Usage: bash $(basename ${BASH_SOURCE[0]}) -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]" echo "-v : verbose" echo "-h : help" echo "PACKAGE_NAME : output package name of TorchRec (e.g., torchrec_nightly)" @@ -65,7 +33,8 @@ usage () { echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file" echo "MINICONDA_PREFIX : path to install Miniconda (default: \$HOME/miniconda)" echo "Example: Python 3.10 + PyTorch nightly (CUDA 11.7), install miniconda at \$HOME/miniconda, using dist/fbgemm_gpu_nightly.whl" - echo " bash test_torchrec.bash -v -o torchrec_nightly -p 3.10 -P pytorch-nightly -c 11.7 -w dist/fbgemm_gpu_nightly.whl" + # shellcheck disable=SC2086 + echo " bash $(basename ${BASH_SOURCE[0]}) -v -o torchrec_nightly -p 3.10 -P pytorch-nightly -c 11.7 -w dist/fbgemm_gpu_nightly.whl" } while getopts vho:p:P:c:m:w: flag @@ -112,30 +81,37 @@ echo "## 1. Set up Miniconda" setup_miniconda "$miniconda_prefix" ################################################################################ -echo "## 2. Create test_binary environment" +echo "## 2. Create Conda environment" ################################################################################ -create_conda_pytorch_environment test_binary "$python_version" "$pytorch_channel_name" "$cuda_version" +if [ "${cuda_version}" == "" ]; then + pytorch_variant="cuda ${cuda_version}" +else + pytorch_variant="cpu" +fi + +# shellcheck disable=SC2086 +test_setup_conda_environment "$env_name" "$python_version" pip "$pytorch_channel_name" $pytorch_variant # Comment out FBGEMM_GPU since we will install it from "$fbgemm_wheel_path" sed -i 's/fbgemm-gpu/#fbgemm-gpu/g' requirements.txt -conda run -n test_binary python -m pip install -r requirements.txt +conda run -n "$env_name" python -m pip install -r requirements.txt # Install FBGEMM_GPU from a local wheel file. -conda run -n test_binary python -m pip install "$fbgemm_wheel_path" -conda run -n test_binary python -c "import fbgemm_gpu" +conda run -n "$env_name" python -m pip install "$fbgemm_wheel_path" +conda run -n "$env_name" python -c "import fbgemm_gpu" ################################################################################ echo "## 3. Build TorchRec" ################################################################################ rm -rf dist -conda run -n test_binary python setup.py bdist_wheel --package_name "${torchrec_package_name}" --python-tag="py${python_tag}" +conda run -n "$env_name" python setup.py bdist_wheel --package_name "${torchrec_package_name}" --python-tag="py${python_tag}" ################################################################################ echo "## 4. Import TorchRec" ################################################################################ -conda run -n test_binary python -m pip install dist/"${torchrec_package_name}"*.whl -conda run -n test_binary python -c "import torchrec" +conda run -n "$env_name" python -m pip install dist/"${torchrec_package_name}"*.whl +conda run -n "$env_name" python -c "import torchrec" echo "Test succeeded" diff --git a/.github/scripts/utils_cuda.bash b/.github/scripts/utils_cuda.bash index 285c9a5ea..10efbcf90 100644 --- a/.github/scripts/utils_cuda.bash +++ b/.github/scripts/utils_cuda.bash @@ -102,7 +102,7 @@ install_cudnn () { # Install cuDNN manually # Based on install script in https://github.com/pytorch/builder/blob/main/common/install_cuda.sh - local cudnn_packages=( + declare -A cudnn_packages=( ["115"]="https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-${PLATFORM_NAME_LC}-8.3.2.44_cuda11.5-archive.tar.xz" ["116"]="https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-${PLATFORM_NAME_LC}-8.3.2.44_cuda11.5-archive.tar.xz" ["117"]="https://ossci-linux.s3.amazonaws.com/cudnn-${PLATFORM_NAME_LC}-8.5.0.96_cuda11-archive.tar.xz"