From bbc676eb6f4f664635914065cda4549bd3962464 Mon Sep 17 00:00:00 2001 From: Benson Ma Date: Fri, 15 Sep 2023 15:31:21 -0700 Subject: [PATCH] Change scripts to accommodate conda prefix to work with Nova (#2022) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/2022 Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/2021 Nova creates conda environments with `--prefix` where the environments are outside of the default `envs` folder. Current script only works with `--name`. This diff changes the scripts to accommodate conda prefix. https://github.com/pytorch/FBGEMM/actions/runs/6189731547 Reviewed By: spcyppt Differential Revision: D49306286 fbshipit-source-id: 2f2ca00645526639369de3f555dbab30da56739e --- .github/scripts/fbgemm_gpu_build.bash | 40 +++++++++++++++++------ .github/scripts/fbgemm_gpu_docs.bash | 20 +++++++++--- .github/scripts/fbgemm_gpu_install.bash | 14 ++++++-- .github/scripts/fbgemm_gpu_lint.bash | 24 +++++++++++--- .github/scripts/fbgemm_gpu_test.bash | 15 +++++++-- .github/scripts/nova_postscript.bash | 2 +- .github/scripts/nova_prescript.bash | 2 +- .github/scripts/setup_env.bash | 43 +++++++++++++++++-------- .github/scripts/utils_base.bash | 39 ++++++++++++++++++---- .github/scripts/utils_conda.bash | 15 ++++++--- .github/scripts/utils_cuda.bash | 25 ++++++++++---- .github/scripts/utils_pytorch.bash | 22 +++++++++---- 12 files changed, 198 insertions(+), 63 deletions(-) diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash index 9d82fb61b..e651f2d72 100644 --- a/.github/scripts/fbgemm_gpu_build.bash +++ b/.github/scripts/fbgemm_gpu_build.bash @@ -40,10 +40,16 @@ prepare_fbgemm_gpu_build () { git submodule sync git submodule update --init --recursive + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[BUILD] Installing other build dependencies ..." - (exec_with_retries conda run --no-capture-output -n "${env_name}" python -m pip install -r requirements.txt) || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run --no-capture-output ${env_prefix} python -m pip install -r requirements.txt) || return 1 + # shellcheck disable=SC2086 (test_python_import_package "${env_name}" numpy) || return 1 + # shellcheck disable=SC2086 (test_python_import_package "${env_name}" skbuild) || return 1 echo "[BUILD] Successfully ran git submodules update" @@ -78,7 +84,8 @@ __configure_fbgemm_gpu_build_rocm () { fi echo "[BUILD] Setting the following ROCm targets: ${arch_list}" - print_exec conda env config vars set -n "${env_name}" PYTORCH_ROCM_ARCH="${arch_list}" + # shellcheck disable=SC2086 + print_exec conda env config vars set ${env_prefix} PYTORCH_ROCM_ARCH="${arch_list}" echo "[BUILD] Setting ROCm build args ..." build_args=() @@ -116,8 +123,8 @@ __configure_fbgemm_gpu_build_cuda () { # Build only CUDA 7.0 and 8.0 (i.e. V100 and A100) because of 100 MB binary size limits from PyPI. echo "[BUILD] Setting CUDA build args ..." - # shellcheck disable=SC2155 - local nvml_lib_path=$(conda run --no-capture-output -n "${env_name}" printenv NVML_LIB_PATH) + # shellcheck disable=SC2155,SC2086 + local nvml_lib_path=$(conda run --no-capture-output ${env_prefix} printenv NVML_LIB_PATH) build_args=( --nvml_lib_path="${nvml_lib_path}" -DTORCH_CUDA_ARCH_LIST="'${arch_list}'" @@ -187,8 +194,8 @@ __build_fbgemm_gpu_common_pre_steps () { echo "[BUILD] Determined Python package name to use: ${package_name}" # Extract the Python tag - # shellcheck disable=SC2207 - python_version=($(conda run --no-capture-output -n "${env_name}" python --version)) + # shellcheck disable=SC2207,SC2086 + python_version=($(conda run --no-capture-output ${env_prefix} python --version)) # shellcheck disable=SC2206 python_version_arr=(${python_version[1]//./ }) python_tag="py${python_version_arr[0]}${python_version_arr[1]}" @@ -196,7 +203,8 @@ __build_fbgemm_gpu_common_pre_steps () { echo "[BUILD] Running pre-build cleanups ..." print_exec rm -rf dist - print_exec conda run --no-capture-output -n "${env_name}" python setup.py clean + # shellcheck disable=SC2086 + print_exec conda run --no-capture-output ${env_prefix} python setup.py clean echo "[BUILD] Printing git status ..." print_exec git status @@ -285,6 +293,9 @@ build_fbgemm_gpu_package () { return 1 fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Set up and configure the build __build_fbgemm_gpu_common_pre_steps || return 1 __configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1 @@ -315,7 +326,8 @@ build_fbgemm_gpu_package () { # Distribute Python extensions as wheels on Linux echo "[BUILD] Building FBGEMM-GPU wheel (VARIANT=${fbgemm_variant}) ..." - print_exec conda run --no-capture-output -n "${env_name}" \ + # shellcheck disable=SC2086 + print_exec conda run --no-capture-output ${env_prefix} \ python setup.py "${run_multicore}" bdist_wheel \ --package_name="${package_name}" \ --python-tag="${python_tag}" \ @@ -351,6 +363,9 @@ build_fbgemm_gpu_install () { return 1 fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Set up and configure the build __build_fbgemm_gpu_common_pre_steps || return 1 __configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1 @@ -365,7 +380,8 @@ build_fbgemm_gpu_install () { # Parallelism may need to be limited to prevent the build from being # canceled for going over ulimits echo "[BUILD] Building + installing FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..." - print_exec conda run --no-capture-output -n "${env_name}" \ + # shellcheck disable=SC2086 + print_exec conda run --no-capture-output ${env_prefix} \ python setup.py install "${build_args[@]}" # Run checks on the built libraries @@ -395,6 +411,9 @@ build_fbgemm_gpu_develop () { return 1 fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Set up and configure the build __build_fbgemm_gpu_common_pre_steps || return 1 __configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1 @@ -409,7 +428,8 @@ build_fbgemm_gpu_develop () { # Parallelism may need to be limited to prevent the build from being # canceled for going over ulimits echo "[BUILD] Building (develop) FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..." - print_exec conda run --no-capture-output -n "${env_name}" \ + # shellcheck disable=SC2086 + print_exec conda run --no-capture-output ${env_prefix} \ python setup.py build develop "${build_args[@]}" # Run checks on the built libraries diff --git a/.github/scripts/fbgemm_gpu_docs.bash b/.github/scripts/fbgemm_gpu_docs.bash index 98e90a416..d2b21f564 100644 --- a/.github/scripts/fbgemm_gpu_docs.bash +++ b/.github/scripts/fbgemm_gpu_docs.bash @@ -31,11 +31,15 @@ install_docs_tools () { test_network_connection || return 1 + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Installing docs tools ..." - (exec_with_retries conda install -n "${env_name}" -c conda-forge -y \ + # shellcheck disable=SC2086 + (exec_with_retries conda install ${env_prefix} -c conda-forge -y \ doxygen) || return 1 - # Check binaries are visible in the PAATH + # Check binaries are visible in the PATH (test_binpath "${env_name}" doxygen) || return 1 echo "[INSTALL] Successfully installed all the docs tools" @@ -62,14 +66,20 @@ build_fbgemm_gpu_docs () { echo "" fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[BUILD] Installing docs-build dependencies ..." - (exec_with_retries conda run -n "${env_name}" python -m pip install -r requirements.txt) || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run ${env_prefix} python -m pip install -r requirements.txt) || return 1 echo "[BUILD] Running Doxygen build ..." - (exec_with_retries conda run -n "${env_name}" doxygen Doxyfile.in) || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run ${env_prefix} doxygen Doxyfile.in) || return 1 echo "[BUILD] Building HTML pages ..." - (exec_with_retries conda run -n "${env_name}" make html) || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run ${env_prefix} make html) || return 1 echo "[INSTALL] FBGEMM-GPU documentation build completed" } diff --git a/.github/scripts/fbgemm_gpu_install.bash b/.github/scripts/fbgemm_gpu_install.bash index 38cf3280e..50353efa4 100644 --- a/.github/scripts/fbgemm_gpu_install.bash +++ b/.github/scripts/fbgemm_gpu_install.bash @@ -20,7 +20,8 @@ __fbgemm_gpu_post_install_checks () { (test_python_import_symbol "${env_name}" fbgemm_gpu __version__) || return 1 echo "[CHECK] Printing out the FBGEMM-GPU version ..." - installed_fbgemm_gpu_version=$(conda run -n "${env_name}" python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)") + # shellcheck disable=SC2086 + installed_fbgemm_gpu_version=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)") echo "[CHECK] The installed version is: ${installed_fbgemm_gpu_version}" } @@ -46,8 +47,12 @@ install_fbgemm_gpu_wheel () { print_exec sha256sum "${wheel_path}" print_exec md5sum "${wheel_path}" + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Installing FBGEMM-GPU wheel: ${wheel_path} ..." - (exec_with_retries conda run -n "${env_name}" python -m pip install "${wheel_path}") || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run ${env_prefix} python -m pip install "${wheel_path}") || return 1 __fbgemm_gpu_post_install_checks || return 1 @@ -131,9 +136,12 @@ install_fbgemm_gpu_pip () { fi fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Attempting to install FBGEMM-GPU ${fbgemm_gpu_version}+${fbgemm_gpu_variant} through PIP ..." # shellcheck disable=SC2086 - (exec_with_retries conda run -n "${env_name}" pip install ${fbgemm_gpu_package}) || return 1 + (exec_with_retries conda run ${env_prefix} pip install ${fbgemm_gpu_package}) || return 1 __fbgemm_gpu_post_install_checks || return 1 diff --git a/.github/scripts/fbgemm_gpu_lint.bash b/.github/scripts/fbgemm_gpu_lint.bash index c129ecc94..122d54786 100644 --- a/.github/scripts/fbgemm_gpu_lint.bash +++ b/.github/scripts/fbgemm_gpu_lint.bash @@ -31,8 +31,12 @@ install_lint_tools () { test_network_connection || return 1 + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Installing lint tools ..." - (exec_with_retries conda install -n "${env_name}" -c conda-forge -y \ + # shellcheck disable=SC2086 + (exec_with_retries conda install ${env_prefix} -c conda-forge -y \ click \ flake8 \ ufmt) || return 1 @@ -72,10 +76,14 @@ lint_fbgemm_gpu_flake8 () { echo "::add-matcher::fbgemm_gpu/test/lint/flake8_problem_matcher.json" + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # E501 = line too long # W503 = line break before binary operator (deprecated) # E203 = whitespace before ":" - (print_exec conda run -n "${env_name}" flake8 --ignore=E501,W503,E203 .) || return 1 + # shellcheck disable=SC2086 + (print_exec conda run ${env_prefix} flake8 --ignore=E501,W503,E203 .) || return 1 echo "[TEST] Finished running flake8 lint checks" } @@ -102,8 +110,12 @@ lint_fbgemm_gpu_ufmt () { fbgemm_gpu/bench ) + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + for p in "${lint_paths[@]}"; do - (print_exec conda run -n "${env_name}" ufmt diff "${p}") || return 1 + # shellcheck disable=SC2086 + (print_exec conda run ${env_prefix} ufmt diff "${p}") || return 1 done echo "[TEST] Finished running ufmt lint checks" @@ -131,8 +143,12 @@ lint_fbgemm_gpu_copyright () { fbgemm_gpu/bench ) + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + for p in "${lint_paths[@]}"; do - (print_exec conda run -n "${env_name}" python fbgemm_gpu/test/lint/check_meta_header.py --path="${p}" --fixit=False) || return 1 + # shellcheck disable=SC2086 + (print_exec conda run ${env_prefix} python fbgemm_gpu/test/lint/check_meta_header.py --path="${p}" --fixit=False) || return 1 done echo "[TEST] Finished running Meta Copyright Header checks" diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash index 200b536aa..b593caa95 100644 --- a/.github/scripts/fbgemm_gpu_test.bash +++ b/.github/scripts/fbgemm_gpu_test.bash @@ -28,7 +28,11 @@ run_python_test () { echo "################################################################################" fi - if print_exec conda run --no-capture-output -n "${env_name}" python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + + # shellcheck disable=SC2086 + if print_exec conda run --no-capture-output ${env_prefix} python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then echo "[TEST] Python test suite PASSED: ${python_test_file}" echo "" else @@ -62,10 +66,14 @@ run_fbgemm_gpu_tests () { echo "" fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Enable ROCM testing if specified if [ "$fbgemm_variant" == "rocm" ]; then echo "[TEST] Set environment variable FBGEMM_TEST_WITH_ROCM to enable ROCm tests ..." - print_exec conda env config vars set -n "${env_name}" FBGEMM_TEST_WITH_ROCM=1 + # shellcheck disable=SC2086 + print_exec conda env config vars set ${env_prefix} FBGEMM_TEST_WITH_ROCM=1 fi # These are either non-tests or currently-broken tests in both FBGEMM_GPU and FBGEMM_GPU-CPU @@ -90,7 +98,8 @@ run_fbgemm_gpu_tests () { fi echo "[TEST] Installing pytest ..." - print_exec conda install -n "${env_name}" -y pytest + # shellcheck disable=SC2086 + print_exec conda install ${env_prefix} -y pytest echo "[TEST] Checking imports ..." (test_python_import_package "${env_name}" fbgemm_gpu) || return 1 diff --git a/.github/scripts/nova_postscript.bash b/.github/scripts/nova_postscript.bash index 099dbb3c2..c1e5d5bfd 100644 --- a/.github/scripts/nova_postscript.bash +++ b/.github/scripts/nova_postscript.bash @@ -8,7 +8,7 @@ echo "Current working directory: $(pwd)" cd "${FBGEMM_REPO}" || echo "Failed to cd to ${FBGEMM_REPO}" PRELUDE="${FBGEMM_REPO}/.github/scripts/setup_env.bash" -BUILD_ENV_NAME=base +BUILD_ENV_NAME=${CONDA_ENV} GITHUB_ENV=TRUE export GITHUB_ENV diff --git a/.github/scripts/nova_prescript.bash b/.github/scripts/nova_prescript.bash index 0133b656f..89106525f 100644 --- a/.github/scripts/nova_prescript.bash +++ b/.github/scripts/nova_prescript.bash @@ -8,7 +8,7 @@ echo "Current working directory: $(pwd)" cd "${FBGEMM_REPO}" || echo "Failed to cd to ${FBGEMM_REPO}" PRELUDE="${FBGEMM_REPO}/.github/scripts/setup_env.bash" -BUILD_ENV_NAME=base +BUILD_ENV_NAME=${CONDA_ENV} echo "--------------------------" echo "----- conda env list -----" conda env list diff --git a/.github/scripts/setup_env.bash b/.github/scripts/setup_env.bash index ee823d5af..e26b95c72 100755 --- a/.github/scripts/setup_env.bash +++ b/.github/scripts/setup_env.bash @@ -104,16 +104,21 @@ install_cxx_compiler () { else archname="$MACHINE_NAME_LC" fi + + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Installing C/C++ compilers through Conda (architecture = ${archname}) ..." - (exec_with_retries conda install -n "${env_name}" -y "gxx_linux-${archname}"=10.4.0 "sysroot_linux-${archname}"=2.17 -c conda-forge) || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda install ${env_prefix} -y "gxx_linux-${archname}"=10.4.0 "sysroot_linux-${archname}"=2.17 -c conda-forge) || return 1 # The compilers are visible in the PATH as `x86_64-conda-linux-gnu-cc` and # `x86_64-conda-linux-gnu-c++`, so symlinks will need to be created echo "[INSTALL] Setting the C/C++ compiler symlinks ..." - # shellcheck disable=SC2155 - local cc_path=$(conda run -n "${env_name}" printenv CC) - # shellcheck disable=SC2155 - local cxx_path=$(conda run -n "${env_name}" printenv CXX) + # shellcheck disable=SC2155,SC2086 + local cc_path=$(conda run ${env_prefix} printenv CC) + # shellcheck disable=SC2155,SC2086 + local cxx_path=$(conda run ${env_prefix} printenv CXX) print_exec ln -s "${cc_path}" "$(dirname "$cc_path")/cc" print_exec ln -s "${cc_path}" "$(dirname "$cc_path")/gcc" @@ -129,22 +134,25 @@ install_cxx_compiler () { # https://stackoverflow.com/questions/2224334/gcc-dump-preprocessor-defines echo "[INFO] Printing out all preprocessor defines in the C compiler ..." - print_exec conda run -n "${env_name}" cc -dM -E - + # shellcheck disable=SC2086 + print_exec conda run ${env_prefix} cc -dM -E - # https://stackoverflow.com/questions/2224334/gcc-dump-preprocessor-defines echo "[INFO] Printing out all preprocessor defines in the C++ compiler ..." - print_exec conda run -n "${env_name}" c++ -dM -E -x c++ - + # shellcheck disable=SC2086 + print_exec conda run ${env_prefix} c++ -dM -E -x c++ - # Print out the C++ version - print_exec conda run -n "${env_name}" c++ --version + # shellcheck disable=SC2086 + print_exec conda run ${env_prefix} c++ --version # https://stackoverflow.com/questions/4991707/how-to-find-my-current-compilers-standard-like-if-it-is-c90-etc echo "[INFO] Printing the default version of the C standard used by the compiler ..." - print_exec "conda run -n ${env_name} cc -dM -E - | grep __STDC_VERSION__" + print_exec "conda run ${env_prefix} cc -dM -E - | grep __STDC_VERSION__" # https://stackoverflow.com/questions/2324658/how-to-determine-the-version-of-the-c-standard-used-by-the-compiler echo "[INFO] Printing the default version of the C++ standard used by the compiler ..." - print_exec "conda run -n ${env_name} c++ -dM -E -x c++ - | grep __cplusplus" + print_exec "conda run ${env_prefix} c++ -dM -E -x c++ - | grep __cplusplus" echo "[INSTALL] Successfully installed C/C++ compilers" } @@ -167,8 +175,12 @@ install_build_tools () { test_network_connection || return 1 + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Installing build tools ..." - (exec_with_retries conda install -n "${env_name}" -y \ + # shellcheck disable=SC2086 + (exec_with_retries conda install ${env_prefix} -y \ click \ cmake \ hypothesis \ @@ -218,13 +230,18 @@ publish_to_pypi () { test_network_connection || return 1 + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Installing twine ..." - print_exec conda install -n "${env_name}" -y twine + # shellcheck disable=SC2086 + print_exec conda install ${env_prefix} -y twine (test_python_import_package "${env_name}" twine) || return 1 (test_python_import_package "${env_name}" OpenSSL) || return 1 echo "[PUBLISH] Uploading package(s) to PyPI: ${package_name} ..." - conda run -n "${env_name}" \ + # shellcheck disable=SC2086 + conda run ${env_prefix} \ python -m twine upload \ --username __token__ \ --password "${pypi_token}" \ diff --git a/.github/scripts/utils_base.bash b/.github/scripts/utils_base.bash index 5cfafc5f9..8be352ad3 100644 --- a/.github/scripts/utils_base.bash +++ b/.github/scripts/utils_base.bash @@ -70,6 +70,18 @@ exec_with_retries () { # Assert Functions ################################################################################ +env_name_or_prefix () { + local env=$1 + if [[ ${env} == /* ]]; then + # If the input string is a PATH (i.e. starts with '/'), then determine the + # Conda environment by directory prefix + echo "-p ${env}"; + else + # Else, determine the Conda environment by name + echo "-n ${env}"; + fi +} + test_network_connection () { wget --timeout 1 pypi.org -O /dev/null local exit_status=$? @@ -95,7 +107,10 @@ test_python_import_symbol () { return 1 fi - if conda run -n "${env_name}" python -c "from ${package_name} import ${target_symbol}"; then + local env_prefix=$(env_name_or_prefix "${env_name}") + + # shellcheck disable=SC2086 + if conda run ${env_prefix} python -c "from ${package_name} import ${target_symbol}"; then echo "[CHECK] Found symbol '${target_symbol}' in Python package '${package_name}'." else echo "[CHECK] Could not find symbol '${target_symbol}' in Python package '${package_name}'; the package might be missing or broken." @@ -113,7 +128,10 @@ test_python_import_package () { return 1 fi - if conda run -n "${env_name}" python -c "import ${python_import}"; then + local env_prefix=$(env_name_or_prefix "${env_name}") + + # shellcheck disable=SC2086 + if conda run ${env_prefix} python -c "import ${python_import}"; then echo "[CHECK] Python package '${python_import}' found." else echo "[CHECK] Python package '${python_import}' was not found, or the package is broken!" @@ -131,7 +149,10 @@ test_binpath () { return 1 fi - if conda run -n "${env_name}" which "${bin_name}"; then + local env_prefix=$(env_name_or_prefix "${env_name}") + + # shellcheck disable=SC2086 + if conda run ${env_prefix} which "${bin_name}"; then echo "[CHECK] Binary ${bin_name} found in PATH" else echo "[CHECK] Binary ${bin_name} not found in PATH!" @@ -149,12 +170,15 @@ test_filepath () { return 1 fi - # shellcheck disable=SC2155 - local conda_prefix=$(conda run -n "${env_name}" printenv CONDA_PREFIX) + local env_prefix=$(env_name_or_prefix "${env_name}") + + # shellcheck disable=SC2155,SC2086 + local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX) # shellcheck disable=SC2155 local file_path=$(find "${conda_prefix}" -type f -name "${file_name}") # shellcheck disable=SC2155 local link_path=$(find "${conda_prefix}" -type l -name "${file_name}") + if [ "${file_path}" != "" ]; then echo "[CHECK] ${file_name} found in CONDA_PREFIX PATH (file): ${file_path}" elif [ "${link_path}" != "" ]; then @@ -175,7 +199,10 @@ test_env_var () { return 1 fi - if conda run -n "${env_name}" printenv "${env_key}"; then + local env_prefix=$(env_name_or_prefix "${env_name}") + + # shellcheck disable=SC2086 + if conda run ${env_prefix} printenv "${env_key}"; then echo "[CHECK] Environment variable ${env_key} is defined in the Conda environment" else echo "[CHECK] Environment variable ${env_key} is not defined in the Conda environment!" diff --git a/.github/scripts/utils_conda.bash b/.github/scripts/utils_conda.bash index 5251dfc6f..96aae5dfe 100644 --- a/.github/scripts/utils_conda.bash +++ b/.github/scripts/utils_conda.bash @@ -106,22 +106,29 @@ create_conda_environment () { local conda_prefix=$(conda run -n base printenv CONDA_PREFIX) print_exec rm -rf "${conda_prefix}/envs/${env_name}" + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # The `-y` flag removes any existing Conda environment with the same name echo "[SETUP] Creating new Conda environment (Python ${python_version}) ..." - (exec_with_retries conda create -y --name "${env_name}" python="${python_version}") || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda create -y ${env_prefix} python="${python_version}") || return 1 echo "[SETUP] Upgrading PIP to latest ..." - (exec_with_retries conda run -n "${env_name}" pip install --upgrade pip) || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run ${env_prefix} pip install --upgrade pip) || return 1 # The pyOpenSSL and cryptography packages versions need to line up for PyPI publishing to work # https://stackoverflow.com/questions/74981558/error-updating-python3-pip-attributeerror-module-lib-has-no-attribute-openss echo "[SETUP] Upgrading pyOpenSSL ..." - (exec_with_retries conda run -n "${env_name}" python -m pip install "pyOpenSSL>22.1.0") || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda run ${env_prefix} python -m pip install "pyOpenSSL>22.1.0") || return 1 # This test fails with load errors if the pyOpenSSL and cryptography package versions don't align echo "[SETUP] Testing pyOpenSSL import ..." (test_python_import_package "${env_name}" OpenSSL) || return 1 - echo "[SETUP] Installed Python version: $(conda run -n "${env_name}" python --version)" + # shellcheck disable=SC2086 + echo "[SETUP] Installed Python version: $(conda run ${env_prefix} python --version)" echo "[SETUP] Successfully created Conda environment: ${env_name}" } diff --git a/.github/scripts/utils_cuda.bash b/.github/scripts/utils_cuda.bash index 0263eb641..705ef8dc2 100644 --- a/.github/scripts/utils_cuda.bash +++ b/.github/scripts/utils_cuda.bash @@ -43,9 +43,13 @@ install_cuda () { # Clean up packages before installation conda_cleanup + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Install CUDA packages echo "[INSTALL] Installing CUDA ${cuda_version} ..." - (exec_with_retries conda install --force-reinstall -n "${env_name}" -y cuda -c "nvidia/label/cuda-${cuda_version}") || return 1 + # shellcheck disable=SC2086 + (exec_with_retries conda install --force-reinstall ${env_prefix} -y cuda -c "nvidia/label/cuda-${cuda_version}") || return 1 # Ensure that nvcc is properly installed (test_binpath "${env_name}" nvcc) || return 1 @@ -58,18 +62,21 @@ install_cuda () { (test_filepath "${env_name}" libnvidia-ml.so) || return 1 echo "[INSTALL] Set environment variable NVML_LIB_PATH ..." - # shellcheck disable=SC2155 - local conda_prefix=$(conda run -n "${env_name}" printenv CONDA_PREFIX) + # shellcheck disable=SC2155,SC2086 + local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX) # shellcheck disable=SC2155 local nvml_lib_path=$(find "${conda_prefix}" -name libnvidia-ml.so) - print_exec conda env config vars set -n "${env_name}" NVML_LIB_PATH="${nvml_lib_path}" + # shellcheck disable=SC2086 + print_exec conda env config vars set ${env_prefix} NVML_LIB_PATH="${nvml_lib_path}" # https://stackoverflow.com/questions/27686382/how-can-i-dump-all-nvcc-preprocessor-defines echo "[INFO] Printing out all preprocessor defines in nvcc ..." - print_exec conda run -n "${env_name}" nvcc --compiler-options -dM -E -x cu - < /dev/null + # shellcheck disable=SC2086 + print_exec conda run ${env_prefix} nvcc --compiler-options -dM -E -x cu - < /dev/null # Print nvcc version - print_exec conda run -n "${env_name}" nvcc --version + # shellcheck disable=SC2086 + print_exec conda run ${env_prefix} nvcc --version echo "[INSTALL] Successfully installed CUDA ${cuda_version}" } @@ -145,9 +152,13 @@ install_cudnn () { cd - || return 1 rm -rf "$tmp_dir" + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Export the environment variables to the Conda environment echo "[INSTALL] Set environment variables CUDNN_INCLUDE_DIR and CUDNN_LIBRARY ..." - print_exec conda env config vars set -n "${env_name}" CUDNN_INCLUDE_DIR="${install_path}/include" CUDNN_LIBRARY="${install_path}/lib" + # shellcheck disable=SC2086 + print_exec conda env config vars set ${env_prefix} CUDNN_INCLUDE_DIR="${install_path}/include" CUDNN_LIBRARY="${install_path}/lib" echo "[INSTALL] Successfully installed cuDNN (for CUDA ${cuda_version})" } diff --git a/.github/scripts/utils_pytorch.bash b/.github/scripts/utils_pytorch.bash index 0cf7916dc..c586bc4dd 100644 --- a/.github/scripts/utils_pytorch.bash +++ b/.github/scripts/utils_pytorch.bash @@ -59,25 +59,30 @@ install_pytorch_conda () { # Clean up packages before installation conda_cleanup + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + # Install PyTorch packages # NOTE: Installation of large package might fail due to corrupt package download # Use --force-reinstall to address this on retries - https://datascience.stackexchange.com/questions/41732/conda-verification-failed echo "[INSTALL] Attempting to install '${pytorch_package}' (${pytorch_version}, variant = ${pytorch_variant_type}) through Conda using channel '${pytorch_channel}' ..." # shellcheck disable=SC2086 - (exec_with_retries conda install --force-reinstall -n "${env_name}" -y ${pytorch_package} -c "${pytorch_channel}") || return 1 + (exec_with_retries conda install --force-reinstall ${env_prefix} -y ${pytorch_package} -c "${pytorch_channel}") || return 1 # Check that PyTorch is importable (test_python_import_package "${env_name}" torch.distributed) || return 1 # Print out the actual installed PyTorch version - installed_pytorch_version=$(conda run -n "${env_name}" python -c "import torch; print(torch.__version__)") + # shellcheck disable=SC2086 + installed_pytorch_version=$(conda run ${env_prefix} python -c "import torch; print(torch.__version__)") echo "[CHECK] NOTE: The installed version is: ${installed_pytorch_version}" # Run check for GPU variant if [ "$pytorch_variant_type" == "cuda" ]; then # Ensure that the PyTorch build is the GPU variant (i.e. contains cuDNN reference) # This test usually applies to the PyTorch nightly builds - if conda list -n "${env_name}" pytorch | grep cudnn; then + # shellcheck disable=SC2086 + if conda list ${env_prefix} pytorch | grep cudnn; then echo "[CHECK] The installed PyTorch ${pytorch_version} contains references to cuDNN" else echo "[CHECK] The installed PyTorch ${pytorch_version} appears to be the CPU-only version as it is missing references to cuDNN!" @@ -153,21 +158,26 @@ install_pytorch_pip () { local pytorch_channel="https://download.pytorch.org/whl/${pytorch_variant}/" fi + # shellcheck disable=SC2155 + local env_prefix=$(env_name_or_prefix "${env_name}") + echo "[INSTALL] Attempting to install PyTorch ${pytorch_version}+${pytorch_variant} through PIP using channel ${pytorch_channel} ..." # shellcheck disable=SC2086 - (exec_with_retries conda run -n "${env_name}" pip install ${pytorch_package} --extra-index-url ${pytorch_channel}) || return 1 + (exec_with_retries conda run ${env_prefix} pip install ${pytorch_package} --extra-index-url ${pytorch_channel}) || return 1 # Check that PyTorch is importable (test_python_import_package "${env_name}" torch.distributed) || return 1 # Print out the actual installed PyTorch version - installed_pytorch_version=$(conda run -n "${env_name}" python -c "import torch; print(torch.__version__)") + # shellcheck disable=SC2086 + installed_pytorch_version=$(conda run ${env_prefix} python -c "import torch; print(torch.__version__)") echo "[CHECK] NOTE: The installed version is: ${installed_pytorch_version}" if [ "$pytorch_variant_type" != "cpu" ]; then # Ensure that the PyTorch build is of the correct variant # This test usually applies to the PyTorch nightly builds - if conda run -n "${env_name}" pip list torch | grep torch | grep "${pytorch_variant}"; then + # shellcheck disable=SC2086 + if conda run ${env_prefix} pip list torch | grep torch | grep "${pytorch_variant}"; then echo "[CHECK] The installed PyTorch ${pytorch_version} is the correct variant (${pytorch_variant})" else echo "[CHECK] The installed PyTorch ${pytorch_version} appears to be an incorrect variant as it is missing references to ${pytorch_variant}!"