Skip to content

Commit

Permalink
Merge branch 'release/0.11' into yutji/releasenote-0.11
Browse files Browse the repository at this point in the history
  • Loading branch information
yukirora authored Sep 21, 2024
2 parents 6ea7a65 + 1d2d054 commit a227917
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
10 changes: 6 additions & 4 deletions dockerfile/cuda11.1.1.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,13 @@ RUN cd /tmp && \
rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}*

# Install HPC-X
ENV HPCX_VERSION=v2.9.0
RUN cd /opt && \
wget -q https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tbz && \
tar xf hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tbz && \
ln -s hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64 hpcx && \
rm hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tbz
rm -rf hpcx && \
wget -q https://content.mellanox.com/hpc/hpc-x/${HPCX_VERSION}/hpcx-${HPCX_VERSION}-gcc-inbox-ubuntu20.04-x86_64.tbz -O hpcx.tbz && \
tar xf hpcx.tbz && \
mv hpcx-${HPCX_VERSION}-gcc-inbox-ubuntu20.04-x86_64 hpcx && \
rm hpcx.tbz

# Install NCCL RDMA SHARP plugins
RUN cd /tmp && \
Expand Down
19 changes: 18 additions & 1 deletion dockerfile/cuda12.4.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ FROM nvcr.io/nvidia/pytorch:24.03-py3
# - CUDA: 12.4.0
# - cuDNN: 9.0.0.306
# - cuBLAS: 12.4.2.65
# - NCCL: v2.20
# - NCCL: v2.23.4-1
# - TransformerEngine 1.4
# Mellanox:
# - OFED: 23.07-0.5.1.2
Expand Down Expand Up @@ -115,6 +115,23 @@ RUN cd /tmp && \
mv amd-blis /opt/AMD && \
rm -rf aocl-blis-linux-aocc-4.0.tar.gz

# Install NCCL 2.23.4
RUN cd /tmp && \
git clone -b v2.23.4-1 https://github.com/NVIDIA/nccl.git && \
cd nccl && \
make -j ${NUM_MAKE_JOBS} src.build && \
make install && \
rm -rf /tmp/nccl

# Install UCX v1.16.0 with multi-threading support
RUN cd /tmp && \
wget https://github.com/openucx/ucx/releases/download/v1.16.0/ucx-1.16.0.tar.gz && \
tar xzf ucx-1.16.0.tar.gz && \
cd ucx-1.16.0 && \
./contrib/configure-release-mt --prefix=/usr/local && \
make -j ${NUM_MAKE_JOBS} && \
make install

ENV PATH="${PATH}" \
LD_LIBRARY_PATH="/usr/local/lib:/usr/local/mpi/lib:${LD_LIBRARY_PATH}" \
SB_HOME=/opt/superbench \
Expand Down

0 comments on commit a227917

Please sign in to comment.