diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash index 5769cc393..06c0e4e5a 100644 --- a/.github/scripts/fbgemm_gpu_test.bash +++ b/.github/scripts/fbgemm_gpu_test.bash @@ -205,6 +205,22 @@ run_fbgemm_gpu_tests () { done } +test_all_fbgemm_gpu_modules () { + local env_name="$1" + local fbgemm_variant="$2" + + local target_directories=( + fbgemm_gpu/test + fbgemm_gpu/experimental/example/test + ) + + for test_dir in "${target_directories[@]}"; do + cd "${test_dir}" || return 1 + run_fbgemm_gpu_tests "${env_name}" "${fbgemm_variant}" || return 1 + cd - || return 1 + done +} + ################################################################################ # FBGEMM_GPU Test Bulk-Combination Functions @@ -292,9 +308,8 @@ test_fbgemm_gpu_build_and_install () { cd ~/FBGEMM/ || return 1 install_fbgemm_gpu_wheel "${env_name}" fbgemm_gpu/dist/*.whl || return 1 - cd ~/FBGEMM/fbgemm_gpu/test || return 1 - run_fbgemm_gpu_tests "${env_name}" "${pytorch_variant_type}" || return 1 - cd - || return 1 + cd ~/FBGEMM/ || return 1 + test_all_fbgemm_gpu_modules "${env_name}" "${pytorch_variant_type}" || return 1 } test_fbgemm_gpu_setup_and_pip_install () { @@ -323,11 +338,11 @@ test_fbgemm_gpu_setup_and_pip_install () { local env_name="test_py${py_version}_pytorch_${pytorch_channel_version}_fbgemm_${fbgemm_gpu_channel_version}_${variant_type}/${variant_version}" local env_name="${env_name//\//_}" - test_setup_conda_environment "${env_name}" 'no-compiler' "${py_version}" pip "${pytorch_channel_version}" "${variant_type}" "${variant_version}" || return 1 - install_fbgemm_gpu_pip "${env_name}" "${fbgemm_gpu_channel_version}" "${variant_type}/${variant_version}" || return 1 - cd ~/FBGEMM/fbgemm_gpu/test || return 1 + test_setup_conda_environment "${env_name}" 'no-compiler' "${py_version}" pip "${pytorch_channel_version}" "${variant_type}" "${variant_version}" || return 1 + install_fbgemm_gpu_pip "${env_name}" "${fbgemm_gpu_channel_version}" "${variant_type}/${variant_version}" || return 1 + cd ~/FBGEMM || return 1 - run_fbgemm_gpu_tests "${env_name}" "${variant_type}"; + test_all_fbgemm_gpu_modules "${env_name}" "${variant_type}"; local retcode=$? echo "################################################################################" diff --git a/.github/scripts/nova_postscript.bash b/.github/scripts/nova_postscript.bash index a9f2ad992..dc3871ca7 100644 --- a/.github/scripts/nova_postscript.bash +++ b/.github/scripts/nova_postscript.bash @@ -42,8 +42,8 @@ else fi $CONDA_RUN python3 -c "import torch; print('cuda.is_available() ', torch.cuda.is_available()); print ('device_count() ',torch.cuda.device_count());" -cd "${FBGEMM_REPO}/fbgemm_gpu/test" || { echo "[NOVA] Failed to cd to fbgemm_gpu/test from $(pwd)"; }; -run_fbgemm_gpu_tests "${BUILD_ENV_NAME}" "${fbgemm_variant}" +cd "${FBGEMM_REPO}" || { echo "[NOVA] Failed to cd to ${FBGEMM_REPO} from $(pwd)"; }; +test_all_fbgemm_gpu_modules "${BUILD_ENV_NAME}" "${fbgemm_variant}" # Workaround EACCES: permission denied error at checkout step chown -R 1000:1000 /__w/FBGEMM/FBGEMM/ || echo "Unable to chown 1000:1000 from $USER, uid: $(id -u)" diff --git a/.github/scripts/utils_base.bash b/.github/scripts/utils_base.bash index 7ea56f816..bb814617f 100644 --- a/.github/scripts/utils_base.bash +++ b/.github/scripts/utils_base.bash @@ -88,7 +88,7 @@ env_name_or_prefix () { } test_network_connection () { - wget -q --timeout 1 pypi.org -O /dev/null + exec_with_retries 3 wget -q --timeout 1 pypi.org -O /dev/null local exit_status=$? # https://man7.org/linux/man-pages/man1/wget.1.html @@ -96,7 +96,8 @@ test_network_connection () { echo "[CHECK] Network does not appear to be blocked." else echo "[CHECK] Network check exit status: ${exit_status}" - echo "[CHECK] Network appears to be blocked; please proxy the network connetions, i.e. re-run the command prefixed with 'with-proxy'." + echo "[CHECK] Network appears to be blocked or suffering from poor connection." + echo "[CHECK] Please remember to proxy the network connetions if needed, i.e. re-run the command prefixed with 'with-proxy'." return 1 fi } diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml index 9d19b06f9..e5fd8d0ad 100644 --- a/.github/workflows/fbgemm_gpu_ci_cpu.yml +++ b/.github/workflows/fbgemm_gpu_ci_cpu.yml @@ -182,7 +182,7 @@ jobs: - name: Test with PyTest timeout-minutes: ${{ matrix.host-machine.timeout }} - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cpu - name: Push Wheel to PyPI if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }} diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml index b76870245..fd68558f2 100644 --- a/.github/workflows/fbgemm_gpu_ci_cuda.yml +++ b/.github/workflows/fbgemm_gpu_ci_cuda.yml @@ -202,7 +202,7 @@ jobs: - name: Test with PyTest timeout-minutes: 20 - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cuda + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cuda - name: Push Wheel to PyPI if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }} diff --git a/.github/workflows/fbgemm_gpu_ci_rocm.yml b/.github/workflows/fbgemm_gpu_ci_rocm.yml index f3fca6f5b..4e35f8cd5 100644 --- a/.github/workflows/fbgemm_gpu_ci_rocm.yml +++ b/.github/workflows/fbgemm_gpu_ci_rocm.yml @@ -191,4 +191,4 @@ jobs: - name: Test with PyTest timeout-minutes: 20 - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV rocm diff --git a/.github/workflows/fbgemm_gpu_pip.yml b/.github/workflows/fbgemm_gpu_pip.yml index 8ef3f1d85..342f56294 100644 --- a/.github/workflows/fbgemm_gpu_pip.yml +++ b/.github/workflows/fbgemm_gpu_pip.yml @@ -99,7 +99,7 @@ jobs: - name: Test with PyTest timeout-minutes: ${{ matrix.host-machine.timeout }} - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cpu test_pypi_install_cuda: @@ -159,7 +159,7 @@ jobs: - name: Test with PyTest timeout-minutes: 20 - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cuda + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cuda test_pypi_install_rocm: @@ -225,4 +225,4 @@ jobs: - name: Test with PyTest timeout-minutes: 20 - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV rocm diff --git a/.github/workflows/fbgemm_gpu_release_cpu.yml b/.github/workflows/fbgemm_gpu_release_cpu.yml index 426143814..a21a90eb0 100644 --- a/.github/workflows/fbgemm_gpu_release_cpu.yml +++ b/.github/workflows/fbgemm_gpu_release_cpu.yml @@ -174,7 +174,7 @@ jobs: - name: Test with PyTest timeout-minutes: ${{ matrix.host-machine.timeout }} - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cpu - name: Push FBGEMM_GPU (CPU version) Binary to PYPI if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' }} diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml index ea806f357..c64082660 100644 --- a/.github/workflows/fbgemm_gpu_release_cuda.yml +++ b/.github/workflows/fbgemm_gpu_release_cuda.yml @@ -184,7 +184,7 @@ jobs: - name: Test with PyTest timeout-minutes: 20 - run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cuda + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV cuda - name: Push FBGEMM_GPU Binary to PYPI if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }} diff --git a/cmake/modules/CudaSetup.cmake b/cmake/modules/CudaSetup.cmake new file mode 100644 index 000000000..d86963109 --- /dev/null +++ b/cmake/modules/CudaSetup.cmake @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake) + + +################################################################################ +# CUDA Setup +################################################################################ + +# Set NVML_LIB_PATH if provided, or detect the default lib path +if(NOT NVML_LIB_PATH) + set(DEFAULT_NVML_LIB_PATH + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so") + + if(EXISTS ${DEFAULT_NVML_LIB_PATH}) + message(STATUS "Setting NVML_LIB_PATH: \ + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so") + set(NVML_LIB_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so") + endif() +endif() + +if(NVML_LIB_PATH) + message(STATUS "Found NVML_LIB_PATH: ${NVML_LIB_PATH}") +endif() diff --git a/cmake/modules/CxxCompilerSetup.cmake b/cmake/modules/CxxCompilerSetup.cmake new file mode 100644 index 000000000..11fb3f891 --- /dev/null +++ b/cmake/modules/CxxCompilerSetup.cmake @@ -0,0 +1,83 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake) + + +################################################################################ +# CMake C++ Setup +################################################################################ + +# SET THE C AND C++ VERSIONS HERE +set(C_VERSION 17) +set(CXX_VERSION 20) + +# Set the default C++ standard to CXX_VERSION if CMAKE_CXX_STANDARD is not +# supplied by CMake command invocation. +# Individual targets can have this value overridden; see +# https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD.html +# https://cmake.org/cmake/help/latest/prop_tgt/CXX_STANDARD.html +# https://cmake.org/cmake/help/latest/prop_tgt/HIP_STANDARD.html +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD ${CXX_VERSION}) + set(CMAKE_HIP_STANDARD ${CXX_VERSION}) + set(CXX_STANDARD ${CXX_VERSION}) + set(HIP_STANDARD ${CXX_VERSION}) +endif() +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(HIP_STANDARD_REQUIRED ON) + +# Set the default C standard to C_VERSION if CMAKE_C_STANDARD is not supplied +# by CMake command invocation. +# Individual targets can have this value overridden; see +# https://cmake.org/cmake/help/latest/variable/CMAKE_C_STANDARD.html +# https://cmake.org/cmake/help/latest/prop_tgt/C_STANDARD.html +if(NOT CMAKE_C_STANDARD) + set(C_STANDARD ${C_VERSION}) + set(CMAKE_C_STANDARD ${C_VERSION}) +endif() +set(CMAKE_C_EXTENSIONS OFF) +set(CMAKE_C_STANDARD_REQUIRED ON) + +if(DEFINED GLIBCXX_USE_CXX11_ABI) + if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") + endif() +endif() + +BLOCK_PRINT( + "Default C compiler flags" + "(values may be overridden by CMAKE_CXX_STANDARD and CXX_STANDARD):" + "" + "${CMAKE_C_FLAGS}" +) + +BLOCK_PRINT( + "Default C++ compiler flags" + "(values may be overridden by CMAKE_CXX_STANDARD and CXX_STANDARD):" + "" + "${CMAKE_CXX_FLAGS}" +) + +# Strip all symbols from the .SO file after building +add_link_options($<$:-s>) + +# Set flags for AVX2 +set(AVX2_FLAGS "-mavx2;-mf16c;-mfma;-fopenmp") +if(NOT FBGEMM_CPU_ONLY AND WSL_MODE) + # NVCC in WSL complains about unknown -mavx options + # https://github.com/pytorch/FBGEMM/issues/2135 + set(AVX2_FLAGS "-Xcompiler;-mavx;-Xcompiler;-mavx2;-Xcompiler;-mf16c;-Xcompiler;-mfma;-fopenmp") +endif() + +# Set flags for AVX512 +set(AVX512_FLAGS "-mavx2;-mf16c;-mfma;-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-fopenmp") +if(NOT FBGEMM_CPU_ONLY AND WSL_MODE) + set(AVX512_FLAGS "-Xcompiler;-mavx2;-Xcompiler;-mf16c;-Xcompiler;-mfma;-Xcompiler;-mavx512f;-Xcompiler;-mavx512bw;-Xcompiler;-mavx512dq;-Xcompiler;-mavx512vl;-fopenmp") +endif() diff --git a/cmake/modules/FindAVX.cmake b/cmake/modules/FindAVX.cmake index 0cf20f5a4..5bd8cffd6 100644 --- a/cmake/modules/FindAVX.cmake +++ b/cmake/modules/FindAVX.cmake @@ -82,7 +82,6 @@ MACRO(CHECK_SSE lang type flags) ENDIF() MARK_AS_ADVANCED(${lang}_${type}_FOUND ${lang}_${type}_FLAGS) - ENDMACRO() CHECK_SSE(C "AVX" " ;-mavx;/arch:AVX") diff --git a/cmake/modules/PyTorchSetup.cmake b/cmake/modules/PyTorchSetup.cmake new file mode 100644 index 000000000..a5b73eb6f --- /dev/null +++ b/cmake/modules/PyTorchSetup.cmake @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake) + + +################################################################################ +# PyTorch Dependencies Setup +################################################################################ + +find_package(Torch REQUIRED) + +# +# Toch Cuda Extensions are normally compiled with the flags below. However we +# disabled -D__CUDA_NO_HALF_CONVERSIONS__ here as it caused "error: no suitable +# constructor exists to convert from "int" to "__half" errors in +# gen_embedding_forward_quantized_split_[un]weighted_codegen_cuda.cu +# + +set(TORCH_CUDA_OPTIONS + --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ + # -D__CUDA_NO_HALF_CONVERSIONS__ + -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__) diff --git a/cmake/modules/RocmSetup.cmake b/cmake/modules/RocmSetup.cmake new file mode 100644 index 000000000..7e37893bf --- /dev/null +++ b/cmake/modules/RocmSetup.cmake @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake) + + +################################################################################ +# ROCm and HIPify Setup +################################################################################ + +if(USE_ROCM) + # Load CMake modules + list(APPEND CMAKE_MODULE_PATH + "${PROJECT_SOURCE_DIR}/cmake" + "${THIRDPARTY}/hipify_torch/cmake") + include(Hip) + include(Hipify) + + # Configure compiler for HIP + list(APPEND HIP_HCC_FLAGS + " \"-Wno-#pragma-messages\" " + " \"-Wno-#warnings\" " + -Wno-cuda-compat + -Wno-deprecated-declarations + -Wno-format + -Wno-ignored-attributes + -Wno-unused-result) + + BLOCK_PRINT( + "HIP found: ${HIP_FOUND}" + "HIPCC compiler flags:" + "" + "${HIP_HCC_FLAGS}" + ) +endif() diff --git a/cmake/modules/Utilities.cmake b/cmake/modules/Utilities.cmake new file mode 100644 index 000000000..2630a22df --- /dev/null +++ b/cmake/modules/Utilities.cmake @@ -0,0 +1,20 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +################################################################################ +# Utility Functions +################################################################################ + +function(BLOCK_PRINT) + message("") + message("") + message("================================================================================") + foreach(ARG IN LISTS ARGN) + message("${ARG}") + endforeach() + message("================================================================================") + message("") +endfunction() diff --git a/fbgemm_gpu/CMakeLists.txt b/fbgemm_gpu/CMakeLists.txt index b23bf74af..7b3ba7ecd 100644 --- a/fbgemm_gpu/CMakeLists.txt +++ b/fbgemm_gpu/CMakeLists.txt @@ -10,20 +10,12 @@ cmake_minimum_required(VERSION 3.25.0 FATAL_ERROR) -function(BLOCK_PRINT) - message("") - message("") - message("================================================================================") - foreach(ARG IN LISTS ARGN) - message("${ARG}") - endforeach() - message("================================================================================") - message("") -endfunction() - set(CMAKEMODULES ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules) set(FBGEMM ${CMAKE_CURRENT_SOURCE_DIR}/..) set(THIRDPARTY ${FBGEMM}/third_party) +set(CMAKE_CODEGEN_DIR ${CMAKE_CURRENT_SOURCE_DIR}/codegen) + +include(${CMAKEMODULES}/Utilities.cmake) ################################################################################ @@ -53,81 +45,13 @@ else() endif() -################################################################################ -# FBGEMM_GPU C++ Setup -################################################################################ - -# Set the default C++ standard to C++20 if CMAKE_CXX_STANDARD is not supplied -# by CMake command invocation. -# Individual targets can have this value overridden; see -# https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD.html -# https://cmake.org/cmake/help/latest/prop_tgt/CXX_STANDARD.html -# https://cmake.org/cmake/help/latest/prop_tgt/HIP_STANDARD.html -if(NOT CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 20) - set(CMAKE_HIP_STANDARD 20) - set(CXX_STANDARD 20) - set(HIP_STANDARD 20) -endif() -set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(HIP_STANDARD_REQUIRED ON) - -# Set the default C standard to C17 -# Individual targets can have this value overridden; see -# https://cmake.org/cmake/help/latest/variable/CMAKE_C_STANDARD.html -# https://cmake.org/cmake/help/latest/prop_tgt/C_STANDARD.html -set(C_STANDARD 20) -set(CMAKE_C_STANDARD 17) -set(CMAKE_C_EXTENSIONS OFF) -set(CMAKE_C_STANDARD_REQUIRED ON) - -if(DEFINED GLIBCXX_USE_CXX11_ABI) - if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") - endif() -endif() - -BLOCK_PRINT( - "Default C compiler flags" - "(values may be overridden by CMAKE_CXX_STANDARD and CXX_STANDARD):" - "" - "${CMAKE_C_FLAGS}" -) - -BLOCK_PRINT( - "Default C++ compiler flags" - "(values may be overridden by CMAKE_CXX_STANDARD and CXX_STANDARD):" - "" - "${CMAKE_CXX_FLAGS}" -) - -# Strip all symbols from the .SO file after building -add_link_options($<$:-s>) - -# Set flags for AVX2 -set(AVX2_FLAGS "-mavx2;-mf16c;-mfma;-fopenmp") -if(NOT FBGEMM_CPU_ONLY AND WSL_MODE) - # NVCC in WSL complains about unknown -mavx options - # https://github.com/pytorch/FBGEMM/issues/2135 - set(AVX2_FLAGS "-Xcompiler;-mavx;-Xcompiler;-mavx2;-Xcompiler;-mf16c;-Xcompiler;-mfma;-fopenmp") -endif() - -# Set flags for AVX512 -set(AVX512_FLAGS "-mavx2;-mf16c;-mfma;-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-fopenmp") -if(NOT FBGEMM_CPU_ONLY AND WSL_MODE) - set(AVX512_FLAGS "-Xcompiler;-mavx2;-Xcompiler;-mf16c;-Xcompiler;-mfma;-Xcompiler;-mavx512f;-Xcompiler;-mavx512bw;-Xcompiler;-mavx512dq;-Xcompiler;-mavx512vl;-fopenmp") -endif() - -set(CMAKE_CODEGEN_DIR ${CMAKE_CURRENT_SOURCE_DIR}/codegen) - - ################################################################################ # FBGEMM_GPU Build Kickstart ################################################################################ +# FBGEMM_GPU C++ Setup - must be set BEFORE project declaration +include(${CMAKEMODULES}/CxxCompilerSetup.cmake) + if(SKBUILD) BLOCK_PRINT("The project is built using scikit-build") endif() @@ -135,87 +59,26 @@ endif() if(FBGEMM_CPU_ONLY OR USE_ROCM) project( fbgemm_gpu - VERSION 0.3.1 + VERSION 0.7.0 LANGUAGES CXX C) else() project( fbgemm_gpu - VERSION 0.3.1 + VERSION 0.7.0 LANGUAGES CXX C CUDA) endif() +# AVX Flags Setup - must be set AFTER project declaration include(${CMAKEMODULES}/FindAVX.cmake) - -################################################################################ # PyTorch Dependencies Setup -################################################################################ - -find_package(Torch REQUIRED) +include(${CMAKEMODULES}/PyTorchSetup.cmake) -# -# Toch Cuda Extensions are normally compiled with the flags below. However we -# disabled -D__CUDA_NO_HALF_CONVERSIONS__ here as it caused "error: no suitable -# constructor exists to convert from "int" to "__half" errors in -# gen_embedding_forward_quantized_split_[un]weighted_codegen_cuda.cu -# - -set(TORCH_CUDA_OPTIONS - --expt-relaxed-constexpr -D__CUDA_NO_HALF_OPERATORS__ - # -D__CUDA_NO_HALF_CONVERSIONS__ - -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__) - - -################################################################################ # CUDA Setup -################################################################################ - -# Set NVML_LIB_PATH if provided, or detect the default lib path -if(NOT NVML_LIB_PATH) - set(DEFAULT_NVML_LIB_PATH - "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so") - - if(EXISTS ${DEFAULT_NVML_LIB_PATH}) - message(STATUS "Setting NVML_LIB_PATH: \ - ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so") - set(NVML_LIB_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so") - endif() -endif() - -if(NVML_LIB_PATH) - message(STATUS "Found NVML_LIB_PATH: ${NVML_LIB_PATH}") -endif() - +include(${CMAKEMODULES}/CudaSetup.cmake) -################################################################################ # ROCm and HIPify Setup -################################################################################ - -if(USE_ROCM) - # Load CMake modules - list(APPEND CMAKE_MODULE_PATH - "${PROJECT_SOURCE_DIR}/cmake" - "${THIRDPARTY}/hipify_torch/cmake") - include(Hip) - include(Hipify) - - # Configure compiler for HIP - list(APPEND HIP_HCC_FLAGS - " \"-Wno-#pragma-messages\" " - " \"-Wno-#warnings\" " - -Wno-cuda-compat - -Wno-deprecated-declarations - -Wno-format - -Wno-ignored-attributes - -Wno-unused-result) - - BLOCK_PRINT( - "HIP found: ${HIP_FOUND}" - "HIPCC compiler flags:" - "" - "${HIP_HCC_FLAGS}" - ) -endif() +include(${CMAKEMODULES}/RocmSetup.cmake) ################################################################################ @@ -823,3 +686,11 @@ install(FILES ${gen_python_source_files} install(FILES ${gen_defused_optim_py_files} DESTINATION fbgemm_gpu/split_embedding_optimizer_codegen) + + + +################################################################################ +# Build Experimental Modules +################################################################################ + +add_subdirectory(experimental/example) diff --git a/fbgemm_gpu/experimental/example/CMakeLists.txt b/fbgemm_gpu/experimental/example/CMakeLists.txt new file mode 100644 index 000000000..d6d4b55aa --- /dev/null +++ b/fbgemm_gpu/experimental/example/CMakeLists.txt @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +include(${CMAKEMODULES}/Utilities.cmake) + +################################################################################ +# Target Sources +################################################################################ + +set(experimental_example_cpp_source_files + src/example_ops.cpp) + +set(experimental_example_python_source_files + example/__init__.py + example/utils.py) + +################################################################################ +# Build Shared Library +################################################################################ + +add_library(fbgemm_gpu_experimental_example_py MODULE + ${experimental_example_cpp_source_files}) + +target_include_directories(fbgemm_gpu_experimental_example_py PRIVATE ${TORCH_INCLUDE_DIRS}) +target_link_libraries(fbgemm_gpu_experimental_example_py ${TORCH_LIBRARIES}) + +# Remove `lib` from the output artifact name `libfbgemm_gpu_py.so` +set_target_properties(fbgemm_gpu_experimental_example_py PROPERTIES PREFIX "") + +################################################################################ +# Install Shared Library and Python Files +################################################################################ + +install(TARGETS fbgemm_gpu_experimental_example_py + DESTINATION fbgemm_gpu/experimental/example) + +install(FILES ${experimental_example_python_source_files} + DESTINATION fbgemm_gpu/experimental/example) diff --git a/fbgemm_gpu/experimental/example/example/__init__.py b/fbgemm_gpu/experimental/example/example/__init__.py new file mode 100644 index 000000000..d4bea7d44 --- /dev/null +++ b/fbgemm_gpu/experimental/example/example/__init__.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os + +import torch + +try: + torch.ops.load_library( + os.path.join(os.path.dirname(__file__), "fbgemm_gpu_experimental_example_py.so") + ) +except Exception as e: + print(e) + +# Since __init__.py is only used in OSS context, we define `open_source` here +# and use its existence to determine whether or not we are in OSS context +open_source: bool = True diff --git a/fbgemm_gpu/experimental/example/example/utils.py b/fbgemm_gpu/experimental/example/example/utils.py new file mode 100644 index 000000000..19a98377f --- /dev/null +++ b/fbgemm_gpu/experimental/example/example/utils.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import torch + + +def add_tensors(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + return torch.ops.fbgemm.add_tensors_float(a, b) diff --git a/fbgemm_gpu/experimental/example/src/example_ops.cpp b/fbgemm_gpu/experimental/example/src/example_ops.cpp new file mode 100644 index 000000000..585630373 --- /dev/null +++ b/fbgemm_gpu/experimental/example/src/example_ops.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +namespace fbgemm_gpu::experimental { + +at::Tensor add_tensors_float(const at::Tensor& a, const at::Tensor& b) { + return a.to(at::kFloat) + b.to(at::kFloat); +} + +TORCH_LIBRARY_FRAGMENT(fbgemm, m) { + m.def("add_tensors_float(Tensor a, Tensor b) -> Tensor"); +} + +TORCH_LIBRARY_IMPL(fbgemm, CPU, m) { + m.impl( + "add_tensors_float", + torch::dispatch( + c10::DispatchKey::CPU, + TORCH_FN(fbgemm_gpu::experimental::add_tensors_float))); +} + +} // namespace fbgemm_gpu::experimental diff --git a/fbgemm_gpu/experimental/example/test/add_tensors_float_test.py b/fbgemm_gpu/experimental/example/test/add_tensors_float_test.py new file mode 100644 index 000000000..5d0cd40e2 --- /dev/null +++ b/fbgemm_gpu/experimental/example/test/add_tensors_float_test.py @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import unittest + +import torch + +from fbgemm_gpu.experimental.example import utils + + +class ExampleTest(unittest.TestCase): + def test_add_tensors_float(self) -> None: + a = torch.tensor([1, 2, 3]) + b = torch.tensor([4, 5, 6]) + expected = torch.tensor([5, 7, 9], dtype=torch.float) + c = utils.add_tensors(a, b) + torch.testing.assert_close(c.cpu(), expected.cpu()) + + +if __name__ == "__main__": + unittest.main()