Skip to content

Commit

Permalink
[T162270879][fbgemm_gpu] Add CUDA artifact selection on publish
Browse files Browse the repository at this point in the history
- Add CUDA version selection on artifact publishing
  • Loading branch information
q10 committed Sep 8, 2023
1 parent f664fd9 commit 6952885
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 7 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/fbgemm_gpu_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ jobs:
- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV

- name: Build FBGEMM_GPU-ROCM Nightly
- name: Build FBGEMM_GPU-ROCm Nightly
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_develop $BUILD_ENV rocm gfx90a

- name: Test FBGEMM_GPU-ROCM Nightly Installation
- name: Test FBGEMM_GPU-ROCm Nightly Installation
timeout-minutes: 10
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm

Expand Down Expand Up @@ -154,10 +154,10 @@ jobs:
- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV

- name: Build FBGEMM_GPU-ROCM Nightly
- name: Build FBGEMM_GPU-ROCm Nightly
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_develop $BUILD_ENV rocm

- name: Test FBGEMM_GPU-ROCM Nightly Installation
- name: Test FBGEMM_GPU-ROCm Nightly Installation
timeout-minutes: 15
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm

Expand Down
10 changes: 7 additions & 3 deletions .github/workflows/fbgemm_gpu_cuda_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ on:
type: boolean
required: false
default: false
cuda_version:
description: CUDA Version to Use for PyPI Publishing
type: choice
required: false
options: [ "11.8.0", "12.1.1" ]
default: "11.8.0"

concurrency:
# Cancel previous runs in the PR if a new commit is pushed
Expand Down Expand Up @@ -124,8 +130,6 @@ jobs:
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
cuda-version: [ "11.8.0", "12.1.1" ]
# Specify exactly ONE CUDA version for artifact publish
cuda-version-publish: [ "11.8.0" ]
needs: build_artifact

steps:
Expand Down Expand Up @@ -171,7 +175,7 @@ jobs:
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV

- name: Push FBGEMM_GPU Binary to PYPI
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish }}
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu-*.whl "$PYPI_TOKEN"
189 changes: 189 additions & 0 deletions .github/workflows/fbgemm_gpu_pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

name: FBGEMM_GPU-CPU Nightly Build

on:
# Manual Trigger
#
workflow_dispatch:
inputs:
fbgemm_gpu_variant:
description: FBGEMM-GPU Variant
type: choice
required: true
options: [ "cpu", "cuda", "rocm" ]
default: "cpu"
fbgemm_gpu_version:
description: FBGEMM-GPU Version (e.g. '0.5.0rc1')
type: string
required: true


test_pypi_install_cpu:
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant == 'cpu' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
strategy:
fail-fast: false
matrix:
host-machine: [
{ instance: "linux.4xlarge" },
{ instance: "linux.arm64.2xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]

steps:
- name: Setup Build Container
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which

- name: Checkout the Repository
uses: actions/checkout@v3

- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install PyTorch-CPU
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV test cpu

- name: Install FBGEMM_GPU-CPU
run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pypi $BUILD_ENV cuda ${{ github.event.inputs.fbgemm_gpu_version }}

- name: Test with PyTest
timeout-minutes: 10
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu


test_pypi_install_cuda:
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant == 'cuda' }}
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
ENFORCE_NVIDIA_GPU: 1
strategy:
fail-fast: false
matrix:
host-machine: [
{ instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
cuda-version: [ "11.8.0", "12.1.1" ]
# Specify exactly ONE CUDA version for artifact publish
cuda-version-publish: [ "11.8.0" ]

steps:
- name: Checkout the Repository
uses: actions/checkout@v3

- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
uses: pytorch/test-infra/.github/actions/setup-nvidia@main

- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install CUDA
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}

- name: Install PyTorch-CUDA
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda ${{ matrix.cuda-version }}

- name: Install FBGEMM_GPU-CUDA
run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pypi $BUILD_ENV cuda ${{ github.event.inputs.fbgemm_gpu_version }}

- name: Test with PyTest
timeout-minutes: 10
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV


test_pypi_install_rocm:
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant == 'rocm' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete"
options: --user root --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
ENFORCE_AMD_GPU: 1
strategy:
fail-fast: false
matrix:
host-machine: [
{ instance: "rocm" },
]
# ROCm machines are limited, so we only test against Python 3.10
python-version: [ "3.10" ]
rocm-version: [ "5.5.1", "5.6" ]

steps:
- name: Setup Build Container
run: |
apt update -y
apt install -y git wget
git config --global --add safe.directory '*'
- name: Checkout the Repository
uses: actions/checkout@v3

- name: Display System Info
run: . $PRELUDE; print_system_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Free Disk Space
run: . $PRELUDE; free_disk_space

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV

- name: Install PyTorch-ROCm
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly rocm ${{ matrix.rocm-version }}

- name: Install FBGEMM_GPU-ROCm
run: . $PRELUDE; cd fbgemm_gpu; install_fbgemm_gpu_pypi $BUILD_ENV rocm ${{ github.event.inputs.fbgemm_gpu_version }}

- name: Test FBGEMM_GPU-ROCm
timeout-minutes: 15
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm

0 comments on commit 6952885

Please sign in to comment.