Skip to content

Commit

Permalink
Enable debug flags in compilation (#2734)
Browse files Browse the repository at this point in the history
Summary:
- Enable debug flags in compilation for CUDA and HIP variants

Pull Request resolved: #2734

Reviewed By: spcyppt

Differential Revision: D58607704

Pulled By: q10

fbshipit-source-id: a1670e107d0bdf087e59d118f8a0c8871ff75bfd
  • Loading branch information
q10 committed Jun 14, 2024
1 parent 1dbcad9 commit ddac8dd
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 52 deletions.
48 changes: 18 additions & 30 deletions .github/scripts/fbgemm_gpu_build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,10 @@ __configure_fbgemm_gpu_build_rocm () {
print_exec conda env config vars set ${env_prefix} PYTORCH_ROCM_ARCH="${arch_list}"

echo "[BUILD] Setting ROCm build args ..."
# shellcheck disable=SC2155
local cxx_flags="-DTORCH_USE_HIP_DSA"

build_args=(
--package_variant=rocm
# HIP_ROOT_DIR now required for HIP to be correctly detected by CMake
-DHIP_ROOT_DIR=/opt/rocm
# Enable device-side assertions in HIP
# https://stackoverflow.com/questions/44284275/passing-compiler-options-in-cmake-command-line
-DCMAKE_C_FLAGS="'${cxx_flags}'"
-DCMAKE_CXX_FLAGS="'${cxx_flags}'"
)
}

Expand Down Expand Up @@ -251,26 +244,14 @@ __configure_fbgemm_gpu_build_genai () {
done
}

# shellcheck disable=SC2120
__configure_fbgemm_gpu_build () {
local fbgemm_variant="$1"
local fbgemm_variant_targets="$2"
if [ "$fbgemm_variant" == "" ]; then
echo "Usage: ${FUNCNAME[0]} FBGEMM_VARIANT"
echo "Example(s):"
echo " ${FUNCNAME[0]} cpu # CPU-only variant using Clang"
echo " ${FUNCNAME[0]} cuda # CUDA variant for default target(s)"
echo " ${FUNCNAME[0]} cuda '7.0;8.0' # CUDA variant for custom target(s)"
echo " ${FUNCNAME[0]} rocm # ROCm variant for default target(s)"
echo " ${FUNCNAME[0]} rocm 'gfx906;gfx908;gfx90a' # ROCm variant for custom target(s)"
return 1
else
echo "################################################################################"
echo "# Configure FBGEMM-GPU Build"
echo "#"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""
fi
echo "################################################################################"
echo "# Configure FBGEMM-GPU Build"
echo "#"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")
Expand Down Expand Up @@ -302,6 +283,13 @@ __configure_fbgemm_gpu_build () {
--verbose
)

# Set debugging options
if [ "$fbgemm_release_channel" != "release" ] || [ "$BUILD_DEBUG" -eq 1 ]; then
build_args+=(
--debug
)
fi

# shellcheck disable=SC2145
echo "[BUILD] FBGEMM_GPU build arguments have been set: ${build_args[@]}"
}
Expand Down Expand Up @@ -502,8 +490,8 @@ run_fbgemm_gpu_postbuild_checks () {
return 1
fi

__print_library_infos
__verify_library_symbols
__print_library_infos || return 1
__verify_library_symbols || return 1
}

################################################################################
Expand Down Expand Up @@ -531,7 +519,7 @@ build_fbgemm_gpu_package () {

# Set up and configure the build
__build_fbgemm_gpu_common_pre_steps || return 1
__configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1
__configure_fbgemm_gpu_build || return 1

echo "################################################################################"
echo "# Build FBGEMM-GPU Package (Wheel)"
Expand Down Expand Up @@ -596,7 +584,7 @@ build_fbgemm_gpu_install () {

# Set up and configure the build
__build_fbgemm_gpu_common_pre_steps || return 1
__configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1
__configure_fbgemm_gpu_build || return 1

echo "################################################################################"
echo "# Build + Install FBGEMM-GPU Package"
Expand Down
36 changes: 20 additions & 16 deletions .github/scripts/fbgemm_gpu_install.bash
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,6 @@ __install_print_dependencies_info () {
echo ""
}

__install_list_subpackages_info () {
# shellcheck disable=SC2086,SC2155
local fbgemm_gpu_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(dir(fbgemm_gpu))")
# shellcheck disable=SC2086,SC2155
local experimental_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu.experimental; print(dir(fbgemm_gpu.experimental))")
echo "################################################################################"
echo "[CHECK] FBGEMM_GPU Experimental Packages"
echo "[CHECK] fbgemm_gpu: ${fbgemm_gpu_packages}"
echo "[CHECK] fbgemm_gpu.experimental: ${experimental_packages}"
echo "################################################################################"
echo ""
}

__install_fetch_version_and_variant_info () {
echo "[INSTALL] Checking imports and symbols ..."
(test_python_import_package "${env_name}" fbgemm_gpu) || return 1
Expand All @@ -62,6 +49,23 @@ __install_fetch_version_and_variant_info () {
echo ""
}

__install_list_subpackages_info () {
# shellcheck disable=SC2086,SC2155
local fbgemm_gpu_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(dir(fbgemm_gpu))")

if [ "$installed_fbgemm_gpu_variant" == "cuda" ] || [ "$installed_fbgemm_gpu_variant" == "genai" ]; then
# shellcheck disable=SC2086,SC2155
local experimental_packages=$(conda run ${env_prefix} python -c "import fbgemm_gpu.experimental; print(dir(fbgemm_gpu.experimental))")
fi

echo "################################################################################"
echo "[CHECK] FBGEMM_GPU Experimental Packages"
echo "[CHECK] fbgemm_gpu: ${fbgemm_gpu_packages}"
echo "[CHECK] fbgemm_gpu.experimental: ${experimental_packages}"
echo "################################################################################"
echo ""
}

__install_check_operator_registrations () {
echo "[INSTALL] Check for operator registrations ..."
if [ "$installed_fbgemm_gpu_variant" == "genai" ]; then
Expand Down Expand Up @@ -103,12 +107,12 @@ __fbgemm_gpu_post_install_checks () {
# Print PyTorch and CUDA versions for sanity check
__install_print_dependencies_info

# List out FBGEMM_GPU subpackages
__install_list_subpackages_info

# Fetch the version and variant info from the package
__install_fetch_version_and_variant_info

# List out FBGEMM_GPU subpackages
__install_list_subpackages_info

echo "[INSTALL] Check for installation of Python sources ..."
if [ "$installed_fbgemm_gpu_variant" != "genai" ]; then
(test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
Expand Down
9 changes: 9 additions & 0 deletions .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ __configure_fbgemm_gpu_test_cpu () {
}

__configure_fbgemm_gpu_test_cuda () {
# Disabled by default; enable for debugging
# shellcheck disable=SC2086
# print_exec conda env config vars set ${env_prefix} CUDA_LAUNCH_BLOCKING=1

ignored_tests=(
./tbe/ssd/ssd_split_table_batched_embeddings_test.py
)
Expand Down Expand Up @@ -407,6 +411,11 @@ test_fbgemm_gpu_setup_and_pip_install () {
echo "# Run Result : $([ $retcode -eq 0 ] && echo "PASSED" || echo "FAILED")"
echo "################################################################################"

if [ $retcode -eq 0 ]; then
# Clean out environment only if there were no errors
conda remove -n "$env_name" -y --all
fi

cd - || return 1
return $retcode
}
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/nova_dir.bash
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ export BUILD_FROM_NOVA=1
if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi
if [[ "$CU_VERSION" == "cu118" ]]; then export TORCH_CUDA_ARCH_LIST='7.0;8.0' && echo "$TORCH_CUDA_ARCH_LIST"; fi
if [[ "$CU_VERSION" == "cu121" ]]; then export TORCH_CUDA_ARCH_LIST='7.0;8.0;9.0;9.0a' && echo "$TORCH_CUDA_ARCH_LIST"; fi
if [[ "$CU_VERSION" == "cu124" ]]; then export TORCH_CUDA_ARCH_LIST='8.0;9.0;9.0a' && echo "$TORCH_CUDA_ARCH_LIST"; fi
if [[ "$CU_VERSION" == "cu124" ]]; then export TORCH_CUDA_ARCH_LIST='7.0;8.0;9.0;9.0a' && echo "$TORCH_CUDA_ARCH_LIST"; fi
20 changes: 15 additions & 5 deletions fbgemm_gpu/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def from_args(cls, argv: List[str]):
action="store_true",
help="Print verbose logs during the build.",
)
parser.add_argument(
"--debug",
action="store_true",
help="Enable DEBUG features in compilation such as PyTorch device-side assertions.",
)
parser.add_argument(
"--dryrun",
action="store_true",
Expand Down Expand Up @@ -237,14 +242,19 @@ def _get_cxx11_abi():
_get_cxx11_abi(),
]

cxx_args = []
cxx_flags = []

if self.args.verbose:
print("[SETUP.PY] Building in VERBOSE mode ...")
cmake_args.extend(
["-DCMAKE_VERBOSE_MAKEFILE=ON", "-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE"]
)

if self.args.debug:
# Enable device-side assertions in CUDA and HIP
# https://stackoverflow.com/questions/44284275/passing-compiler-options-in-cmake-command-line
cxx_flags.extend(["-DTORCH_USE_CUDA_DSA", "-DTORCH_USE_HIP_DSA"])

if self.args.package_variant == "cpu":
print("[SETUP.PY] Building the CPU-ONLY variant of FBGEMM_GPU ...")
cmake_args.append("-DFBGEMM_CPU_ONLY=ON")
Expand All @@ -258,7 +268,7 @@ def _get_cxx11_abi():

if self.args.nccl_lib_path:
nccl_root = os.path.dirname(os.path.dirname(self.args.nccl_lib_path))
cxx_args.extend([f"-L{nccl_root}/lib"])
cxx_flags.extend([f"-L{nccl_root}/lib"])
cmake_args.extend(
[
f"-DNCCL_INCLUDE_DIRS={nccl_root}/include",
Expand All @@ -270,7 +280,7 @@ def _get_cxx11_abi():
print("[SETUP.PY] Setting CMake flags ...")
path = self.args.cxxprefix

cxx_args.extend(
cxx_flags.extend(
[
"-fopenmp=libgomp",
"-stdlib=libstdc++",
Expand All @@ -286,8 +296,8 @@ def _get_cxx11_abi():

cmake_args.extend(
[
f"-DCMAKE_C_FLAGS='{' '.join(cxx_args)}'",
f"-DCMAKE_CXX_FLAGS='{' '.join(cxx_args)}'",
f"-DCMAKE_C_FLAGS='{' '.join(cxx_flags)}'",
f"-DCMAKE_CXX_FLAGS='{' '.join(cxx_flags)}'",
]
)

Expand Down

0 comments on commit ddac8dd

Please sign in to comment.