diff --git a/.github/workflows/fbgemm_gpu_ci.yml b/.github/workflows/fbgemm_gpu_ci.yml index ee3e46ccfa..a1817a3761 100644 --- a/.github/workflows/fbgemm_gpu_ci.yml +++ b/.github/workflows/fbgemm_gpu_ci.yml @@ -88,10 +88,10 @@ jobs: - name: Prepare FBGEMM_GPU Build run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV - - name: Build FBGEMM_GPU-ROCM Nightly + - name: Build FBGEMM_GPU-ROCm Nightly run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_develop $BUILD_ENV rocm gfx90a - - name: Test FBGEMM_GPU-ROCM Nightly Installation + - name: Test FBGEMM_GPU-ROCm Nightly Installation timeout-minutes: 10 run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm @@ -154,10 +154,10 @@ jobs: - name: Prepare FBGEMM_GPU Build run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV - - name: Build FBGEMM_GPU-ROCM Nightly + - name: Build FBGEMM_GPU-ROCm Nightly run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_develop $BUILD_ENV rocm - - name: Test FBGEMM_GPU-ROCM Nightly Installation + - name: Test FBGEMM_GPU-ROCm Nightly Installation timeout-minutes: 15 run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm diff --git a/.github/workflows/fbgemm_gpu_cuda_release.yml b/.github/workflows/fbgemm_gpu_cuda_release.yml index bb4ad8fa67..886e223a27 100644 --- a/.github/workflows/fbgemm_gpu_cuda_release.yml +++ b/.github/workflows/fbgemm_gpu_cuda_release.yml @@ -27,6 +27,12 @@ on: type: boolean required: false default: false + cuda_version: + description: CUDA Version to Use for PyPI Publishing + type: choice + required: false + options: [ "11.8.0", "12.1.1" ] + default: "11.8.0" concurrency: # Cancel previous runs in the PR if a new commit is pushed @@ -124,8 +130,6 @@ jobs: ] python-version: [ "3.8", "3.9", "3.10", "3.11" ] cuda-version: [ "11.8.0", "12.1.1" ] - # Specify exactly ONE CUDA version for artifact publish - cuda-version-publish: [ "11.8.0" ] needs: build_artifact steps: @@ -171,7 +175,7 @@ jobs: run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV - name: Push FBGEMM_GPU Binary to PYPI - if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish }} + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }} env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu-*.whl "$PYPI_TOKEN" diff --git a/.github/workflows/fbgemm_gpu_pypi.yml b/.github/workflows/fbgemm_gpu_pypi.yml new file mode 100644 index 0000000000..ce47628953 --- /dev/null +++ b/.github/workflows/fbgemm_gpu_pypi.yml @@ -0,0 +1,189 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +name: FBGEMM_GPU-CPU Nightly Build + +on: + # Manual Trigger + # + workflow_dispatch: + inputs: + fbgemm_gpu_variant: + description: FBGEMM-GPU Variant + type: choice + required: true + options: [ "cpu", "cuda", "rocm" ] + default: "cpu" + fbgemm_gpu_version: + description: FBGEMM-GPU Version (e.g. '0.5.0rc1') + type: string + required: true + + +test_pypi_install_cpu: + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant == 'cpu' }} + runs-on: ${{ matrix.host-machine.instance }} + container: + image: amazonlinux:2023 + options: --user root + defaults: + run: + shell: bash + env: + PRELUDE: .github/scripts/setup_env.bash + BUILD_ENV: test_install + strategy: + fail-fast: false + matrix: + host-machine: [ + { instance: "linux.4xlarge" }, + { instance: "linux.arm64.2xlarge" }, + ] + python-version: [ "3.8", "3.9", "3.10", "3.11" ] + + steps: + - name: Setup Build Container + run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which + + - name: Checkout the Repository + uses: actions/checkout@v3 + + - name: Display System Info + run: . $PRELUDE; print_system_info; print_ec2_info + + - name: Display GPU Info + run: . $PRELUDE; print_gpu_info + + - name: Setup Miniconda + run: . $PRELUDE; setup_miniconda $HOME/miniconda + + - name: Create Conda Environment + run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }} + + - name: Install PyTorch-CPU + run: . $PRELUDE; install_pytorch_pip $BUILD_ENV test cpu + + - name: Install FBGEMM_GPU-CPU + run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pypi $BUILD_ENV cuda ${{ github.event.inputs.fbgemm_gpu_version }} + + - name: Test with PyTest + timeout-minutes: 10 + run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu + + +test_pypi_install_cuda: + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant == 'cuda' }} + runs-on: ${{ matrix.host-machine.instance }} + defaults: + run: + shell: bash + env: + PRELUDE: .github/scripts/setup_env.bash + BUILD_ENV: test_install + ENFORCE_NVIDIA_GPU: 1 + strategy: + fail-fast: false + matrix: + host-machine: [ + { instance: "linux.g5.4xlarge.nvidia.gpu" }, + ] + python-version: [ "3.8", "3.9", "3.10", "3.11" ] + cuda-version: [ "11.8.0", "12.1.1" ] + # Specify exactly ONE CUDA version for artifact publish + cuda-version-publish: [ "11.8.0" ] + + steps: + - name: Checkout the Repository + uses: actions/checkout@v3 + + - name: Install NVIDIA Drivers and NVIDIA-Docker Runtime + uses: pytorch/test-infra/.github/actions/setup-nvidia@main + + - name: Display System Info + run: . $PRELUDE; print_system_info; print_ec2_info + + - name: Display GPU Info + run: . $PRELUDE; print_gpu_info + + - name: Setup Miniconda + run: . $PRELUDE; setup_miniconda $HOME/miniconda + + - name: Create Conda Environment + run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }} + + - name: Install CUDA + run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }} + + - name: Install PyTorch-CUDA + run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda ${{ matrix.cuda-version }} + + - name: Install FBGEMM_GPU-CUDA + run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pypi $BUILD_ENV cuda ${{ github.event.inputs.fbgemm_gpu_version }} + + - name: Test with PyTest + timeout-minutes: 10 + run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV + + +test_pypi_install_rocm: + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant == 'rocm' }} + runs-on: ${{ matrix.host-machine.instance }} + container: + image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete" + options: --user root --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined + defaults: + run: + shell: bash + env: + PRELUDE: .github/scripts/setup_env.bash + BUILD_ENV: test_install + ENFORCE_AMD_GPU: 1 + strategy: + fail-fast: false + matrix: + host-machine: [ + { instance: "rocm" }, + ] + # ROCm machines are limited, so we only test against Python 3.10 + python-version: [ "3.10" ] + rocm-version: [ "5.5.1", "5.6" ] + + steps: + - name: Setup Build Container + run: | + apt update -y + apt install -y git wget + git config --global --add safe.directory '*' + + - name: Checkout the Repository + uses: actions/checkout@v3 + + - name: Display System Info + run: . $PRELUDE; print_system_info + + - name: Display GPU Info + run: . $PRELUDE; print_gpu_info + + - name: Free Disk Space + run: . $PRELUDE; free_disk_space + + - name: Setup Miniconda + run: . $PRELUDE; setup_miniconda $HOME/miniconda + + - name: Create Conda Environment + run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }} + + - name: Install Build Tools + run: . $PRELUDE; install_build_tools $BUILD_ENV + + - name: Install PyTorch-ROCm + run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly rocm ${{ matrix.rocm-version }} + + - name: Install FBGEMM_GPU-ROCm + run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pypi $BUILD_ENV rocm ${{ github.event.inputs.fbgemm_gpu_version }} + + - name: Test FBGEMM_GPU-ROCm + timeout-minutes: 15 + run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm