diff --git a/.github/workflows/fbgemm_gpu_ci_genai.yml b/.github/workflows/fbgemm_gpu_ci_genai.yml
index 07f8f80595..3ffdf45c0a 100644
--- a/.github/workflows/fbgemm_gpu_ci_genai.yml
+++ b/.github/workflows/fbgemm_gpu_ci_genai.yml
@@ -127,7 +127,6 @@ jobs:
 
   # Download the built artifact from GHA, test on GPU, and push to PyPI
   test_and_publish_artifact:
-    # runs-on: linux.4xlarge.nvidia.gpu
     # Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml
     runs-on: ${{ matrix.host-machine.instance }}
     defaults:
diff --git a/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml b/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml
new file mode 100644
index 0000000000..ebbc69e2f6
--- /dev/null
+++ b/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml
@@ -0,0 +1,183 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This workflow is used for FBGEMM_GPU-GenAI CI, and is meant to be used for
+# copies of the FBGEMM repos hosted outside of the pytorch org.
+name: FBGEMM_GPU-GenAI CI (Generic Runner)
+
+on:
+  # PR Trigger
+  #
+  pull_request:
+    branches:
+      - main
+
+  # Push Trigger (enable to catch errors coming out of multiple merges)
+  #
+  push:
+    branches:
+      - main
+
+  # Manual Trigger
+  #
+  workflow_dispatch:
+
+concurrency:
+  # Cancel previous runs in the PR if a new commit is pushed
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # Build on CPU hosts and upload to GHA
+  build_artifact:
+    runs-on: ${{ matrix.host-machine.instance }}
+    container:
+      image: amazonlinux:2023
+      options: --user root
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: build_binary
+      BUILD_VARIANT: genai
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { arch: x86, instance: "ubuntu-latest" },
+        ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
+        cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
+        compiler: [ "gcc", "clang" ]
+
+    steps:
+    - name: Setup Build Container
+      run: yum update -y; yum install -y binutils findutils git pciutils sudo tar wget which
+
+    - name: Checkout the Repository
+      uses: actions/checkout@v4
+      with:
+        submodules: true
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install C/C++ Compilers
+      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
+
+    - name: Install Build Tools
+      run: . $PRELUDE; install_build_tools $BUILD_ENV
+
+    - name: Install CUDA
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+
+    # Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
+    - name: Install PyTorch Nightly
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda/${{ matrix.cuda-version }}
+
+    - name: Collect PyTorch Environment Info
+      if: ${{ success() || failure() }}
+      run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
+
+    - name: Install cuDNN
+      run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }}
+
+    - name: Prepare FBGEMM_GPU Build
+      run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
+
+    - name: Build FBGEMM_GPU Wheel
+      run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV nightly genai
+
+    - name: Upload Built Wheel as GHA Artifact
+      # Cannot upgrade to actions/upload-artifact@v4 yet because GLIBC on the instance is too old
+      uses: actions/upload-artifact@v3
+      with:
+        name: fbgemm_gpu_nightly_genai_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
+        path: fbgemm_gpu/dist/*.whl
+        if-no-files-found: error
+
+  # Download the built artifact from GHA, test on GPU, and push to PyPI
+  test_artifact:
+    runs-on: ${{ matrix.host-machine.instance }}
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: build_binary
+      BUILD_VARIANT: genai
+      ENFORCE_CUDA_DEVICE: 0
+      CUDA_VISIBLE_DEVICES: -1
+    strategy:
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { arch: x86, instance: "ubuntu-latest" },
+        ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
+        cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
+        # Specify exactly ONE CUDA version for artifact publish
+        cuda-version-publish: [ "12.1.1" ]
+        compiler: [ "gcc", "clang" ]
+    needs: build_artifact
+
+    steps:
+    # Cannot upgrade to actions/checkout@v4 yet because GLIBC on the instance is too old
+    - name: Checkout the Repository
+      uses: actions/checkout@v3
+      with:
+        submodules: true
+
+    - name: Download Wheel Artifact from GHA
+      # Cannot upgrade to actions/download-artifact@v4 yet because GLIBC on the instance is too old
+      uses: actions/download-artifact@v3
+      with:
+        name: fbgemm_gpu_nightly_genai_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info; print_ec2_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install C/C++ Compilers for Updated LIBGCC
+      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV clang
+
+    - name: Install CUDA
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+
+    - name: Install PyTorch Nightly
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda/${{ matrix.cuda-version }}
+
+    - name: Collect PyTorch Environment Info
+      if: ${{ success() || failure() }}
+      run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
+
+    - name: Prepare FBGEMM_GPU Build
+      run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
+
+    - name: Install FBGEMM_GPU Wheel
+      run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
+
+    - name: Test with PyTest
+      timeout-minutes: 30
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV