Skip to content

Commit

Permalink
Merge branch 'feature/sw'
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianDeconinck committed Apr 23, 2024
2 parents a95473c + 1db9f2a commit 7f7bc8d
Show file tree
Hide file tree
Showing 11 changed files with 475 additions and 1 deletion.
8 changes: 7 additions & 1 deletion sw_stack/discover/sles15/HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ All versions of the software for a given version are saved in `basics.sh`.
`build` directory is the throwaway directory where everything is downloaded then built.
`install` is saves all library and executable once build is done.

Last edit: _March 22th 2024_
Last edit: _April 22th 2024_

## v2024.04.00

### Changes

- NDSL `2024.03.01` -> `2024.04.00`

## v2024.03.00

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
load("comp/gcc/12.3.0")
load("lib/mkl/2023.2.0")
load("nvidia/nvhpc-byo-compiler/23.9")

local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.04.00/install"

-- Fix: GT4Py expects CUDA_HOME to be set --
setenv("CUDA_HOME", os.getenv("NVHPC_ROOT"))

-- UCX --
local ucx_pkgdir = pathJoin(install_dir, "ucx")
prepend_path("LD_LIBRARY_PATH",pathJoin(ucx_pkgdir,"lib"))

-- OMPI --
local ompi_pkgdir = pathJoin(install_dir, "ompi")

setenv("M_MPI_ROOT",ompi_pkgdir)
setenv("OPENMPI",ompi_pkgdir)
setenv("MPI_HOME",ompi_pkgdir)

prepend_path("PATH",pathJoin(ompi_pkgdir,"bin"))
prepend_path("LD_LIBRARY_PATH",pathJoin(ompi_pkgdir,"lib"))
prepend_path("INCLUDE",pathJoin(ompi_pkgdir,"include"))
prepend_path("MANPATH",pathJoin(ompi_pkgdir,"share/man"))

setenv("OMPI_MCA_orte_tmpdir_base","/tmp")
setenv("TMPDIR","/tmp")
setenv("OMP_STACKSIZE","1G")
setenv("OMPI_MCA_mca_base_component_show_load_errors","0")
setenv("PMIX_MCA_mca_base_component_show_load_errors","0")

-- BOOST HEADERS (as expected by gt4py) --
local boost_pkgdir = pathJoin(install_dir, "boost")
setenv("BOOST_ROOT", boost_pkgdir)

-- Python 3 --
local py_pkgdir = pathJoin(install_dir, "python3")
prepend_path("PATH",pathJoin(py_pkgdir,"bin"))
prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib"))
prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib64"))

-- Baselibs at a BASEDIR --
local baselibs_pkgdir = pathJoin(install_dir, "baselibs-7.17.1/install/x86_64-pc-linux-gnu/")
setenv("BASEDIR", baselibs_pkgdir)
49 changes: 49 additions & 0 deletions sw_stack/discover/sles15/modulefiles/SMTStack/2024.04.00.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
load("comp/gcc/12.3.0")
load("lib/mkl/2023.2.0")
load("nvidia/nvhpc-byo-compiler/23.9")

local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.04.00/install"

-- Fix: GT4Py expects CUDA_HOME to be set --
setenv("CUDA_HOME", os.getenv("NVHPC_ROOT"))

-- UCX --
local ucx_pkgdir = pathJoin(install_dir, "ucx")
prepend_path("LD_LIBRARY_PATH",pathJoin(ucx_pkgdir,"lib"))

-- OMPI --
local ompi_pkgdir = pathJoin(install_dir, "ompi")

setenv("M_MPI_ROOT",ompi_pkgdir)
setenv("OPENMPI",ompi_pkgdir)
setenv("MPI_HOME",ompi_pkgdir)

prepend_path("PATH",pathJoin(ompi_pkgdir,"bin"))
prepend_path("LD_LIBRARY_PATH",pathJoin(ompi_pkgdir,"lib"))
prepend_path("INCLUDE",pathJoin(ompi_pkgdir,"include"))
prepend_path("MANPATH",pathJoin(ompi_pkgdir,"share/man"))

setenv("OMPI_MCA_orte_tmpdir_base","/tmp")
setenv("TMPDIR","/tmp")
setenv("OMP_STACKSIZE","1G")
setenv("OMPI_MCA_mca_base_component_show_load_errors","0")
setenv("PMIX_MCA_mca_base_component_show_load_errors","0")

-- BOOST HEADERS (as expected by gt4py) --
local boost_pkgdir = pathJoin(install_dir, "boost")
setenv("BOOST_ROOT", boost_pkgdir)

-- Python 3 --
local py_pkgdir = pathJoin(install_dir, "python3")
prepend_path("PATH",pathJoin(py_pkgdir,"bin"))
prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib"))
prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib64"))

-- Load venv --
local py_pkgdir = pathJoin(install_dir, "venv")
prepend_path("PATH",pathJoin(py_pkgdir,"bin"))
setenv("VIRTUAL_ENV", py_pkgdir)

-- Baselibs at a BASEDIR --
local baselibs_pkgdir = pathJoin(install_dir, "baselibs-7.17.1/install/x86_64-pc-linux-gnu/")
setenv("BASEDIR", baselibs_pkgdir)
37 changes: 37 additions & 0 deletions sw_stack/discover/sles15/src/2024.04.00/basics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

export DSLSW_VERSION="2024.04.00"
echo "DSL Software Stack v${DSLSW_VERSION}"

# Version
export DSLSW_GDRCOPY_VER=2.3
export DSLSW_OMPI_MAJOR_VER=4.1
export DSLSW_OMPI_VER=${DSLSW_OMPI_MAJOR_VER}.6
export DSLSW_UCX_VER=1.15.0
export DSLSW_CUDA_VER=12.2
export DSLSW_OSUMICRO_VER=7.3
export DSLSW_LAPACK_VER=3.11.0
export DSLSW_PY_VER=3.11.7
export DSLSW_BASELIBS_VER=7.17.1
export DSLSW_SERIALBOX_VER=2.6.2-unreleased
export DSLSW_SERIALBOX_SHA=88ac4e4dfc824953d068fe63c8e7b3dd9560a914
export DSLSW_GNU_VER=12.2.0
export DSLSW_NDSL_VER=2024.04.00
export DSLSW_BOOST_VER=1.76.0
export DSLSW_BOOST_VER_STR=1_76_0

# Base directory & versioning
export DSLSW_BASE=$PWD/build
mkdir -p $DSLSW_BASE
export DSLSW_INSTALL_DIR=$PWD/install
mkdir -p $DSLSW_INSTALL_DIR

# Modules
module use -a /discover/nobackup/projects/geosongpu/sw_sles15/live/modulefiles/
module load SMTStack/${DSLSW_VERSION}
CUDA_DIR=/usr/local/other/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/

# Enforce proper compilers
export FC=gfortran
export CC=gcc
export CXX=g++
147 changes: 147 additions & 0 deletions sw_stack/discover/sles15/src/2024.04.00/build_0_on-node.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/bin/bash

# Source the shared basics
source ./basics.sh

echo " === GDR Copy (requires kernel running on the box) === "
#cd $DSLSW_BASE/gdrcopy-$DSLSW_GDRCOPY_VER
#make prefix=$DSLSW_INSTALL_DIR/gdrcopy CUDA=$CUDA_DIR all install
#exit 0

echo " === UCX === "
cd $DSLSW_BASE/ucx-$DSLSW_UCX_VER
./configure --prefix=$DSLSW_INSTALL_DIR/ucx \
--enable-optimizations \
--disable-logging \
--disable-debug \
--disable-assertions \
--disable-params-check \
--without-xpmem \
--without-java \
--without-go \
--with-cuda=$CUDA_DIR \
--with-gdrcopy=/usr/src/gdrdrv-$DSLSW_GDRCOPY_VER/

make -j 32 install
#exit 0

echo " === OpenMPI === "

# NSL lib (-lnsl) was not symlink from libnsl.so.1 which lead to issues (--disable-getpwuid is an attempt to squash that, which seems unsucessful). Potentially, removing the LSF scheduler build would work.

# libxml2 has a /zero on it's init (https://gitlab.gnome.org/GNOME/libxml2/-/blob/7846b0a677f8d3ce72486125fa281e92ac9970e8/xpath.c#L505) which seems to trigger a sigfpe. Relying on the internal but potentially wobly XML parser of OMPI

cd $DSLSW_BASE/openmpi-${DSLSW_OMPI_VER}
./configure --prefix=$DSLSW_INSTALL_DIR/ompi \
--disable-libxml2 \
--disable-wrapper-rpath \
--disable-wrapper-runpath \
--with-pmix \
--with-cuda=$CUDA_DIR \
--with-cuda-libdir=$CUDA_DIR/lib64/stubs \
--with-ucx=$DSLSW_INSTALL_DIR/ucx \
--with-slurm \
--enable-mpi1-compatibility

make -j32 all
make install
export PATH=$DSLSW_INSTALL_DIR/ompi/bin:$PATH
export LD_LIBRARY_PATH=$DSLSW_INSTALL_DIR/ompi/lib:$DSLSW_INSTALL_DIR/ucx/lib:$LD_LIBRARY_PATH

echo " === OSU === "

cd $DSLSW_BASE/osu-micro-benchmarks-$DSLSW_OSUMICRO_VER
./configure \
CC=mpicc \
CXX=mpicxx \
--prefix=$DSLSW_INSTALL_DIR/osu \
--enable-cuda \
--with-cuda-include=$CUDA_DIR/include \
--with-cuda=$CUDA_DIR \
--with-cuda-libpath=$CUDA_DIR/lib64/stubs/

make -j32
make install

echo " === Python === "
cd $DSLSW_BASE/Python-$DSLSW_PY_VER
./configure --prefix=$DSLSW_INSTALL_DIR/python3 --enable-shared --enable-optimizations

make -j32
make install

echo " === Serialbox === "
cd $DSLSW_BASE/serialbox-$DSLSW_SERIALBOX_VER
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=$DSLSW_INSTALL_DIR/serialbox \
-DSERIALBOX_ENABLE_FORTRAN=ON \
-DSERIALBOX_EXAMPLES=OFF \
..
make -j32 install


echo " === Baselibs === "
cd $DSLSW_BASE/baselibs-$DSLSW_BASELIBS_VER
make ESMF_COMM=openmpi \
BUILD=ESSENTIALS \
ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch \
prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux \
install

if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then
echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build."
else
echo " === GNU gcc/gfortran/g++ with OpenACC and OpenMP Offload on NVIDIA GPUs === "
module rm comp/gcc/12.3.0
module rm nvidia/nvhpc-nompi/23.9
unset CC
unset CXX
unset FC

# Build assembler and linking tools
cd $DSLSW_BASE/gnu/nvptx-tools
./configure \
--with-cuda-driver-include=$CUDA_DIR/include \
--with-cuda-driver-lib=$CUDA_DIR/lib64 \
--prefix=$DSLSW_INSTALL_DIR/gnu
make || exit 1
make install || exit 1
cd ..

# Set up the GCC source tree
cd $DSLSW_BASE/gnu/gcc
ln -s ../nvptx-newlib/newlib newlib
cd ..
export target=$(gcc/config.guess)

# Build nvptx GCC
mkdir build-nvptx-gcc
cd build-nvptx-gcc
../gcc/configure \
--target=nvptx-none --with-build-time-tools=$DSLSW_INSTALL_DIR/gnu/nvptx-none/bin \
--enable-as-accelerator-for=$target \
--disable-sjlj-exceptions \
--enable-newlib-io-long-long \
--enable-languages="c,c++,fortran,lto" \
--prefix=$DSLSW_INSTALL_DIR/gnu
make -j`nproc` || exit 1
make install || exit 1
cd ..

# Build host GCC
mkdir build-host-gcc
cd build-host-gcc
../gcc/configure \
--enable-offload-targets=nvptx-none \
--with-cuda-driver-include=$CUDA_DIR/include \
--with-cuda-driver-lib=$CUDA_DIR/lib64 \
--disable-bootstrap \
--disable-multilib \
--enable-languages="c,c++,fortran,lto" \
--prefix=$DSLSW_INSTALL_DIR/gnu
make -j`nproc` || exit 1
make install || exit 1
cd ..
fi

12 changes: 12 additions & 0 deletions sw_stack/discover/sles15/src/2024.04.00/build_1_on-login.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# Source the shared basics
source ./basics.sh

echo " === Make NDSL venv === "
cd $DSLSW_INSTALL_DIR
python3 -m venv venv
source ./venv/bin/activate
pip install --upgrade setuptools pip
pip install -e $DSLSW_INSTALL_DIR/ndsl
pip install mpi4py cffi cupy-cuda12x
11 changes: 11 additions & 0 deletions sw_stack/discover/sles15/src/2024.04.00/build_2_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

source ./basics.sh

echo $DSLSW_INSTALL_DIR
echo `which $FC`
echo `which $CC`

echo $LD_LIBRARY_PATH

rm -rf build
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

source ./basics.sh

rm -rf $DSLSW_BASE
67 changes: 67 additions & 0 deletions sw_stack/discover/sles15/src/2024.04.00/download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

# Source the share basics
source ./basics.sh

cd $DSLSW_BASE

# GDR Copy should be present in /usr/src/gdrdrv-*
#wget -c https://github.com/NVIDIA/gdrcopy/archive/refs/tags/v$DSLSW_GDRCOPY_VER.tar.gz
#tar zxpvf v$DSLSW_GDRCOPY_VER.tar.gz
#rm v$DSLSW_GDRCOPY_VER.tar.gz

wget https://github.com/openucx/ucx/releases/download/v${DSLSW_UCX_VER}/ucx-${DSLSW_UCX_VER}.tar.gz
tar xfp ucx-$DSLSW_UCX_VER.tar.gz
rm ucx-$DSLSW_UCX_VER.tar.gz

wget https://download.open-mpi.org/release/open-mpi/v$DSLSW_OMPI_MAJOR_VER/openmpi-${DSLSW_OMPI_VER}.tar.gz
tar xfzp openmpi-$DSLSW_OMPI_VER.tar.gz
rm openmpi-$DSLSW_OMPI_VER.tar.gz

wget https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-$DSLSW_OSUMICRO_VER.tar.gz
tar xfp osu-micro-benchmarks-$DSLSW_OSUMICRO_VER.tar.gz
rm osu-micro-benchmarks-$DSLSW_OSUMICRO_VER.tar.gz

wget https://www.python.org/ftp/python/$DSLSW_PY_VER/Python-$DSLSW_PY_VER.tgz
tar zxpvf Python-$DSLSW_PY_VER.tgz
rm Python-$DSLSW_PY_VER.tgz

git clone https://github.com/GridTools/serialbox.git serialbox-$DSLSW_SERIALBOX_VER
cd serialbox-$DSLSW_SERIALBOX_VER
git checkout $DSLSW_SERIALBOX_SHA
cd $DSLSW_BASE

git clone --recurse-submodules -b v$DSLSW_BASELIBS_VER https://github.com/GEOS-ESM/ESMA-Baselibs.git ./baselibs-$DSLSW_BASELIBS_VER
cd ./baselibs-$DSLSW_BASELIBS_VER
make download
echo "=>Baselibs >> Removing HDF4 from the ESSENTIALS"
sed -i 's/ESSENTIAL_DIRS = jpeg zlib szlib hdf4 hdf5/ESSENTIAL_DIRS = jpeg zlib szlib hdf5/g' GNUmakefile
sed -i 's/\/zlib \/szlib \/jpeg \/hdf5 \/hdf \/netcdf,\\/\/ \/zlib \/szlib \/jpeg \/hdf5 \/netcdf,\\/g' GNUmakefile
cd $DSLSW_BASE

if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then
echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build."
else
mkdir gnu
cd gnu
git clone https://github.com/SourceryTools/nvptx-tools
git clone git://sourceware.org/git/newlib-cygwin.git nvptx-newlib
git clone --branch releases/gcc-${DSLSW_GNU_VER} git://gcc.gnu.org/git/gcc.git gcc
cd gcc
contrib/download_prerequisites
fi

# Stream include out of boost source
cd $DSLSW_INSTALL_DIR
wget https://boostorg.jfrog.io/artifactory/main/release/$DSLSW_BOOST_VER/source/boost_$DSLSW_BOOST_VER_STR.tar.gz
tar zxpvf boost_$DSLSW_BOOST_VER_STR.tar.gz
rm boost_$DSLSW_BOOST_VER_STR.tar.gz
mkdir -p boost/include
mv boost_$DSLSW_BOOST_VER_STR/boost boost/include
rm -r boost_$DSLSW_BOOST_VER_STR
cd $DSLSW_BASE

# Git clone `ndsl`, with the minimuum amount of history
cd $DSLSW_INSTALL_DIR
git clone --recurse-submodules --shallow-submodules -b $DSLSW_NDSL_VER --single-branch --depth 1 https://github.com/NOAA-GFDL/NDSL.git ndsl
cd $DSLSW_BASE
Loading

0 comments on commit 7f7bc8d

Please sign in to comment.