From f2cd2397589d2109b1fd5b2a2a32be7f370ec4c8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 09:44:58 -0400 Subject: [PATCH 01/18] Add `no-dsl` module Add venv based module Move py to 3.11.7 Move Baselibs to 7.17.1 Add Boost headers Add NDSL 2024.03.00 --- sw_stack/discover/sles15/HISTORY.md | 18 ++- .../modulefiles/SMTStack/1.0.0-no-ndsl.lua | 35 ++++++ .../{DSLwork => SMTStack}/1.0.0.lua | 10 +- sw_stack/discover/sles15/src/1.0.0/basics.sh | 15 +-- sw_stack/discover/sles15/src/1.0.0/build.sh | 110 ++++++++++-------- .../discover/sles15/src/1.0.0/download.sh | 32 +++-- 6 files changed, 144 insertions(+), 76 deletions(-) create mode 100644 sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua rename sw_stack/discover/sles15/modulefiles/{DSLwork => SMTStack}/1.0.0.lua (81%) diff --git a/sw_stack/discover/sles15/HISTORY.md b/sw_stack/discover/sles15/HISTORY.md index 8a12524..9929921 100644 --- a/sw_stack/discover/sles15/HISTORY.md +++ b/sw_stack/discover/sles15/HISTORY.md @@ -6,10 +6,14 @@ All versions of the software for a given version are saved in `basics.sh`. `build` directory is the throwaway directory where everything is downloaded then built. `install` is saves all library and executable once build is done. -Last edit: _December 29th 2023_ +Last edit: _March 18th 2024_ ## v1.0.0 +### Options + +`BUILD_GCC_OFFLOAD`: builds an offload ready GCC 12.2 + ### OpenMPI We build OpenMPI throught the UCX layer with cuda-enabled and GRDCopy and GPUDirect on. @@ -20,6 +24,12 @@ We build OpenMPI throught the UCX layer with cuda-enabled and GRDCopy and GPUDir - UCX: 1.15.0 - OpenMPI: 4.1.6 [^3] - OSU-MICROBENCHMARK: 7.3 +- Boost headers: 1.76.0 +- NDSL: 2024.03.00 + +When defining `BUILD_GCC_OFFLOAD`: + +- GCC with offload: 12.2.0 Test of the stack can be done via the `osu-microbenchmark` with latency & bandwith saved in `osu-bench.sh`. @@ -36,12 +46,8 @@ _Note:_ ### Python -- Python: 3.8.10 [^4] +- Python: 3.11.7 [^4] ### Serialbox - Latest stable is 2.6.1. Development is over. - -_Note:_ - -- [^4]: `3.10.12` leads to failure in DaCe parsing. diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua new file mode 100644 index 0000000..3632089 --- /dev/null +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua @@ -0,0 +1,35 @@ +load("comp/gcc/12.3.0") +load("nvidia/nvhpc-nompi/23.9") + +local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/v1.0.0/install/" + +-- UCX -- +local ucx_pkgdir = pathJoin(install_dir, "ucx") +prepend_path("LD_LIBRARY_PATH",pathJoin(ucx_pkgdir,"lib")) + +-- OMPI -- +local ompi_pkgdir = pathJoin(install_dir, "ompi") + +setenv("M_MPI_ROOT",ompi_pkgdir) +setenv("OPENMPI",ompi_pkgdir) +setenv("MPI_HOME",ompi_pkgdir) + +prepend_path("PATH",pathJoin(ompi_pkgdir,"bin")) +prepend_path("LD_LIBRARY_PATH",pathJoin(ompi_pkgdir,"lib")) +prepend_path("INCLUDE",pathJoin(ompi_pkgdir,"include")) +prepend_path("MANPATH",pathJoin(ompi_pkgdir,"share/man")) + +setenv("OMPI_MCA_orte_tmpdir_base","/tmp") +setenv("TMPDIR","/tmp") +setenv("OMP_STACKSIZE","1G") +setenv("OMPI_MCA_mca_base_component_show_load_errors","0") +setenv("PMIX_MCA_mca_base_component_show_load_errors","0") + +-- BOOST HEADERS (as expected by gt4py) -- +local boost_pkgdir = pathJoin(install_dir, "boost") +setenv("BOOST_ROOT", boost_pkgdir) + +-- Python 3 -- +local py_pkgdir = pathJoin(install_dir, "python3") +prepend_path("PATH",pathJoin(py_pkgdir,"bin")) +prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib")) diff --git a/sw_stack/discover/sles15/modulefiles/DSLwork/1.0.0.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0.lua similarity index 81% rename from sw_stack/discover/sles15/modulefiles/DSLwork/1.0.0.lua rename to sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0.lua index 11d9eac..20f566e 100644 --- a/sw_stack/discover/sles15/modulefiles/DSLwork/1.0.0.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0.lua @@ -25,7 +25,11 @@ setenv("OMP_STACKSIZE","1G") setenv("OMPI_MCA_mca_base_component_show_load_errors","0") setenv("PMIX_MCA_mca_base_component_show_load_errors","0") --- Python 3 -- -local py_pkgdir = pathJoin(install_dir, "python3") +-- BOOST HEADERS (as expected by gt4py) -- +local boost_pkgdir = pathJoin(install_dir, "boost") +setenv("BOOST_ROOT", boost_pkgdir) + +-- Load venv -- +local py_pkgdir = pathJoin(install_dir, "/venv/bin") prepend_path("PATH",pathJoin(py_pkgdir,"bin")) -prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib")) + diff --git a/sw_stack/discover/sles15/src/1.0.0/basics.sh b/sw_stack/discover/sles15/src/1.0.0/basics.sh index 541670f..a62de20 100755 --- a/sw_stack/discover/sles15/src/1.0.0/basics.sh +++ b/sw_stack/discover/sles15/src/1.0.0/basics.sh @@ -11,10 +11,13 @@ export DSLSW_UCX_VER=1.15.0 export DSLSW_CUDA_VER=12.2 export DSLSW_OSUMICRO_VER=7.3 export DSLSW_LAPACK_VER=3.11.0 -export DSLSW_PY_VER=3.8.10 -export DSLSW_BASELIBS_VER=7.14.1 +export DSLSW_PY_VER=3.11.7 +export DSLSW_BASELIBS_VER=7.17.1 export DSLSW_SERIALBOX_VER=2.6.1 export DSLSW_GNU_VER=12.2.0 +export DSLSW_NDSL=2024.03.00 +export DSLSW_BOOST_VER=1.76.0 +export DSLSW_BOOST_VER_STR=1_76_0 # Base directory & versioning export DSLSW_BASE=$PWD/build @@ -26,16 +29,10 @@ mkdir -p $DSLSW_INSTALL_DIR module load nvidia/nvhpc-nompi/23.9 CUDA_DIR=/usr/local/other/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/ module load comp/gcc/12.3.0 -module load other/boost/1.77.0 module use -a /discover/nobackup/projects/geosongpu/sw_sles15/modulesfiles/ -module load DSLwork/1.0.0 +module load SMTStack/1.0.0 # Enforce proper compilers export FC=gfortran export CC=gcc export CXX=g++ - -echo "TODO: make modules!" # once we have modules this isn't required a module load (if available) will be enough -export LD_LIBRARY_PATH=$DSLSW_INSTALL_DIR/ompi/lib:$DSLSW_INSTALL_DIR/ucx/lib:$DSLSW_INSTALL_DIR/python3/lib:$LD_LIBRARY_PATH -export PATH=$DSLSW_INSTALL_DIR/ompi/bin:$DSLSW_INSTALL_DIR/python3/bin:$PATH:$DSLSW_INSTALL_DIR/osu/libexec/osu-micro-benchmarks/mpi/pt2pt/ - diff --git a/sw_stack/discover/sles15/src/1.0.0/build.sh b/sw_stack/discover/sles15/src/1.0.0/build.sh index e5e9e74..1b8e332 100755 --- a/sw_stack/discover/sles15/src/1.0.0/build.sh +++ b/sw_stack/discover/sles15/src/1.0.0/build.sh @@ -95,54 +95,62 @@ make ESMF_COMM=openmpi \ --prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux \ install -echo " === GNU gcc/gfortran/g++ with OpenACC and OpenMP Offload on NVIDIA GPUs === " -module rm comp/gcc/12.3.0 -module rm nvidia/nvhpc-nompi/23.9 -unset CC -unset CXX -unset FC - -# Build assembler and linking tools -cd $DSLSW_BASE/gnu/nvptx-tools -./configure \ - --with-cuda-driver-include=$CUDA_DIR/include \ - --with-cuda-driver-lib=$CUDA_DIR/lib64 \ - --prefix=$DSLSW_INSTALL_DIR/gnu -make || exit 1 -make install || exit 1 -cd .. - -# Set up the GCC source tree -cd $DSLSW_BASE/gnu/gcc -ln -s ../nvptx-newlib/newlib newlib -cd .. -export target=$(gcc/config.guess) - -# Build nvptx GCC -mkdir build-nvptx-gcc -cd build-nvptx-gcc -../gcc/configure \ - --target=nvptx-none --with-build-time-tools=$DSLSW_INSTALL_DIR/gnu/nvptx-none/bin \ - --enable-as-accelerator-for=$target \ - --disable-sjlj-exceptions \ - --enable-newlib-io-long-long \ - --enable-languages="c,c++,fortran,lto" \ - --prefix=$DSLSW_INSTALL_DIR/gnu -make -j`nproc` || exit 1 -make install || exit 1 -cd .. - -# Build host GCC -mkdir build-host-gcc -cd build-host-gcc -../gcc/configure \ - --enable-offload-targets=nvptx-none \ - --with-cuda-driver-include=$CUDA_DIR/include \ - --with-cuda-driver-lib=$CUDA_DIR/lib64 \ - --disable-bootstrap \ - --disable-multilib \ - --enable-languages="c,c++,fortran,lto" \ - --prefix=$DSLSW_INSTALL_DIR/gnu -make -j`nproc` || exit 1 -make install || exit 1 -cd .. +if [ -z ${BUILD_GCC_OFFLOAD+x} ] + echo " === GNU gcc/gfortran/g++ with OpenACC and OpenMP Offload on NVIDIA GPUs === " + module rm comp/gcc/12.3.0 + module rm nvidia/nvhpc-nompi/23.9 + unset CC + unset CXX + unset FC + + # Build assembler and linking tools + cd $DSLSW_BASE/gnu/nvptx-tools + ./configure \ + --with-cuda-driver-include=$CUDA_DIR/include \ + --with-cuda-driver-lib=$CUDA_DIR/lib64 \ + --prefix=$DSLSW_INSTALL_DIR/gnu + make || exit 1 + make install || exit 1 + cd .. + + # Set up the GCC source tree + cd $DSLSW_BASE/gnu/gcc + ln -s ../nvptx-newlib/newlib newlib + cd .. + export target=$(gcc/config.guess) + + # Build nvptx GCC + mkdir build-nvptx-gcc + cd build-nvptx-gcc + ../gcc/configure \ + --target=nvptx-none --with-build-time-tools=$DSLSW_INSTALL_DIR/gnu/nvptx-none/bin \ + --enable-as-accelerator-for=$target \ + --disable-sjlj-exceptions \ + --enable-newlib-io-long-long \ + --enable-languages="c,c++,fortran,lto" \ + --prefix=$DSLSW_INSTALL_DIR/gnu + make -j`nproc` || exit 1 + make install || exit 1 + cd .. + + # Build host GCC + mkdir build-host-gcc + cd build-host-gcc + ../gcc/configure \ + --enable-offload-targets=nvptx-none \ + --with-cuda-driver-include=$CUDA_DIR/include \ + --with-cuda-driver-lib=$CUDA_DIR/lib64 \ + --disable-bootstrap \ + --disable-multilib \ + --enable-languages="c,c++,fortran,lto" \ + --prefix=$DSLSW_INSTALL_DIR/gnu + make -j`nproc` || exit 1 + make install || exit 1 + cd .. +fi + +cd $DSLSW_INSTALL_DIR +./python3/bin/python3 -m venv venv +source ./venv/bin/activate +pip install --upgrade setuptools pip +pip install -e $DSLSW_INSTALL_DIR/ndsl diff --git a/sw_stack/discover/sles15/src/1.0.0/download.sh b/sw_stack/discover/sles15/src/1.0.0/download.sh index 954b679..53391ba 100755 --- a/sw_stack/discover/sles15/src/1.0.0/download.sh +++ b/sw_stack/discover/sles15/src/1.0.0/download.sh @@ -43,12 +43,30 @@ sed -i 's/ESSENTIAL_DIRS = jpeg zlib szlib hdf4 hdf5/ESSENTIAL_DIRS = jpeg zlib sed -i 's/\/zlib \/szlib \/jpeg \/hdf5 \/hdf \/netcdf,\\/\/ \/zlib \/szlib \/jpeg \/hdf5 \/netcdf,\\/g' GNUmakefile cd $DSLSW_BASE -mkdir gnu -cd gnu -git clone https://github.com/SourceryTools/nvptx-tools -git clone git://sourceware.org/git/newlib-cygwin.git nvptx-newlib -git clone --branch releases/gcc-${DSLSW_GNU_VER} git://gcc.gnu.org/git/gcc.git gcc -cd gcc -contrib/download_prerequisites +if [ -z ${BUILD_GCC_OFFLOAD+x} ] + mkdir gnu + cd gnu + git clone https://github.com/SourceryTools/nvptx-tools + git clone git://sourceware.org/git/newlib-cygwin.git nvptx-newlib + git clone --branch releases/gcc-${DSLSW_GNU_VER} git://gcc.gnu.org/git/gcc.git gcc + cd gcc + contrib/download_prerequisites +fi +# Stream include out of boost source +cd $DSLSW_INSTALL_DIR +wget https://boostorg.jfrog.io/artifactory/main/release/$DSLSW_BOOST_VER/source/boost_$DSLSW_BOOST_VER_STR.tar.gz +tar zxpvf boost_$DSLSW_BOOST_VER_STR.tar.gz +rm boost_$DSLSW_BOOST_VER_STR.tar.gz +mkdir -p boost/include +mv boost_$DSLSW_BOOST_VER_STR/boost boost/include +rm -r boost_$DSLSW_BOOST_VER_STR +cd $DSLSW_BASE + +# Git clone `ndsl`, with the minimuum amount of history +cd $DSLSW_INSTALL_DIR +git clone --recurse-submodules --shallow-submodules \ + -b $DSLSW_NDSL \ + --single-branch --depth 1 \ + https://github.com/NOAA-GFDL/NDSL.git ndsl cd $DSLSW_BASE From a238f8464dba37cc34d25d3529206f6e61cf1c74 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 10:55:48 -0400 Subject: [PATCH 02/18] Fix basics sourcing --- sw_stack/discover/sles15/TODO.md | 1 - sw_stack/discover/sles15/src/1.0.0/build.sh | 2 +- sw_stack/discover/sles15/src/1.0.0/check.sh | 2 +- sw_stack/discover/sles15/src/1.0.0/download.sh | 2 +- sw_stack/discover/sles15/src/1.0.0/osu-bench.sh | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sw_stack/discover/sles15/TODO.md b/sw_stack/discover/sles15/TODO.md index 5ce75ba..d05b781 100644 --- a/sw_stack/discover/sles15/TODO.md +++ b/sw_stack/discover/sles15/TODO.md @@ -1,4 +1,3 @@ # TODO - Test the build of baselibs -- Build GCC with OpenACC & OpenMP offloading diff --git a/sw_stack/discover/sles15/src/1.0.0/build.sh b/sw_stack/discover/sles15/src/1.0.0/build.sh index 1b8e332..014bb48 100755 --- a/sw_stack/discover/sles15/src/1.0.0/build.sh +++ b/sw_stack/discover/sles15/src/1.0.0/build.sh @@ -1,7 +1,7 @@ #!/bin/bash # Source the shared basics -source ./basics.v1.0.0.sh +source ./basics.sh echo " === GDR Copy (requires kernel running on the box) === " #cd $DSLSW_BASE/gdrcopy-$DSLSW_GDRCOPY_VER diff --git a/sw_stack/discover/sles15/src/1.0.0/check.sh b/sw_stack/discover/sles15/src/1.0.0/check.sh index db9fc56..fd6e6c5 100755 --- a/sw_stack/discover/sles15/src/1.0.0/check.sh +++ b/sw_stack/discover/sles15/src/1.0.0/check.sh @@ -1,6 +1,6 @@ #!/bin/bash -source ./basics.v1.0.0.sh +source ./basics.sh echo $DSLSW_INSTALL_DIR echo `which $FC` diff --git a/sw_stack/discover/sles15/src/1.0.0/download.sh b/sw_stack/discover/sles15/src/1.0.0/download.sh index 53391ba..ee17184 100755 --- a/sw_stack/discover/sles15/src/1.0.0/download.sh +++ b/sw_stack/discover/sles15/src/1.0.0/download.sh @@ -1,7 +1,7 @@ #!/bin/sh # Source the share basics -source ./basics.v1.0.0.sh +source ./basics.sh cd $DSLSW_BASE diff --git a/sw_stack/discover/sles15/src/1.0.0/osu-bench.sh b/sw_stack/discover/sles15/src/1.0.0/osu-bench.sh index cff0b68..4658a1b 100644 --- a/sw_stack/discover/sles15/src/1.0.0/osu-bench.sh +++ b/sw_stack/discover/sles15/src/1.0.0/osu-bench.sh @@ -1,6 +1,6 @@ #!/bin/bash -source basics.sh +source ./basics.sh # need to be ran on a salloc with 2 process on one node each if [[ $(hostname -s) != warpa* ]]; then From e00db777e052b7d0c0d657089258b1220c6c742b Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 11:05:56 -0400 Subject: [PATCH 03/18] Fix bash conditional --- sw_stack/discover/sles15/src/1.0.0/download.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sw_stack/discover/sles15/src/1.0.0/download.sh b/sw_stack/discover/sles15/src/1.0.0/download.sh index ee17184..91dd96e 100755 --- a/sw_stack/discover/sles15/src/1.0.0/download.sh +++ b/sw_stack/discover/sles15/src/1.0.0/download.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Source the share basics source ./basics.sh @@ -43,7 +43,7 @@ sed -i 's/ESSENTIAL_DIRS = jpeg zlib szlib hdf4 hdf5/ESSENTIAL_DIRS = jpeg zlib sed -i 's/\/zlib \/szlib \/jpeg \/hdf5 \/hdf \/netcdf,\\/\/ \/zlib \/szlib \/jpeg \/hdf5 \/netcdf,\\/g' GNUmakefile cd $DSLSW_BASE -if [ -z ${BUILD_GCC_OFFLOAD+x} ] +if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then mkdir gnu cd gnu git clone https://github.com/SourceryTools/nvptx-tools From 08e550f530c95c4dbe4b6883237f68f84bddd950 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 11:13:12 -0400 Subject: [PATCH 04/18] Fix optional GCC offload build --- sw_stack/discover/sles15/src/1.0.0/build.sh | 3 +++ sw_stack/discover/sles15/src/1.0.0/check.sh | 2 ++ sw_stack/discover/sles15/src/1.0.0/download.sh | 2 ++ 3 files changed, 7 insertions(+) diff --git a/sw_stack/discover/sles15/src/1.0.0/build.sh b/sw_stack/discover/sles15/src/1.0.0/build.sh index 014bb48..dca5e79 100755 --- a/sw_stack/discover/sles15/src/1.0.0/build.sh +++ b/sw_stack/discover/sles15/src/1.0.0/build.sh @@ -96,6 +96,8 @@ make ESMF_COMM=openmpi \ install if [ -z ${BUILD_GCC_OFFLOAD+x} ] + echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build." +else echo " === GNU gcc/gfortran/g++ with OpenACC and OpenMP Offload on NVIDIA GPUs === " module rm comp/gcc/12.3.0 module rm nvidia/nvhpc-nompi/23.9 @@ -149,6 +151,7 @@ if [ -z ${BUILD_GCC_OFFLOAD+x} ] cd .. fi +echo " === Make NDSL venv === " cd $DSLSW_INSTALL_DIR ./python3/bin/python3 -m venv venv source ./venv/bin/activate diff --git a/sw_stack/discover/sles15/src/1.0.0/check.sh b/sw_stack/discover/sles15/src/1.0.0/check.sh index fd6e6c5..1740e49 100755 --- a/sw_stack/discover/sles15/src/1.0.0/check.sh +++ b/sw_stack/discover/sles15/src/1.0.0/check.sh @@ -7,3 +7,5 @@ echo `which $FC` echo `which $CC` echo $LD_LIBRARY_PATH + +rm -rf build \ No newline at end of file diff --git a/sw_stack/discover/sles15/src/1.0.0/download.sh b/sw_stack/discover/sles15/src/1.0.0/download.sh index 91dd96e..fe941e2 100755 --- a/sw_stack/discover/sles15/src/1.0.0/download.sh +++ b/sw_stack/discover/sles15/src/1.0.0/download.sh @@ -44,6 +44,8 @@ sed -i 's/\/zlib \/szlib \/jpeg \/hdf5 \/hdf \/netcdf,\\/\/ \/zlib \/szlib \/jpe cd $DSLSW_BASE if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then + echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build." +else mkdir gnu cd gnu git clone https://github.com/SourceryTools/nvptx-tools From b0419fbd397062cb1bafdbede3fe0160fad70b1b Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 11:50:07 -0400 Subject: [PATCH 05/18] Fix NDSL clone --- sw_stack/discover/sles15/src/1.0.0/basics.sh | 2 +- sw_stack/discover/sles15/src/1.0.0/download.sh | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sw_stack/discover/sles15/src/1.0.0/basics.sh b/sw_stack/discover/sles15/src/1.0.0/basics.sh index a62de20..1012639 100755 --- a/sw_stack/discover/sles15/src/1.0.0/basics.sh +++ b/sw_stack/discover/sles15/src/1.0.0/basics.sh @@ -15,7 +15,7 @@ export DSLSW_PY_VER=3.11.7 export DSLSW_BASELIBS_VER=7.17.1 export DSLSW_SERIALBOX_VER=2.6.1 export DSLSW_GNU_VER=12.2.0 -export DSLSW_NDSL=2024.03.00 +export DSLSW_NDSL_VER=2024.03.00 export DSLSW_BOOST_VER=1.76.0 export DSLSW_BOOST_VER_STR=1_76_0 diff --git a/sw_stack/discover/sles15/src/1.0.0/download.sh b/sw_stack/discover/sles15/src/1.0.0/download.sh index fe941e2..7388171 100755 --- a/sw_stack/discover/sles15/src/1.0.0/download.sh +++ b/sw_stack/discover/sles15/src/1.0.0/download.sh @@ -67,8 +67,5 @@ cd $DSLSW_BASE # Git clone `ndsl`, with the minimuum amount of history cd $DSLSW_INSTALL_DIR -git clone --recurse-submodules --shallow-submodules \ - -b $DSLSW_NDSL \ - --single-branch --depth 1 \ - https://github.com/NOAA-GFDL/NDSL.git ndsl +git clone --recurse-submodules --shallow-submodules -b $DSLSW_NDSL_VER --single-branch --depth 1 https://github.com/NOAA-GFDL/NDSL.git ndsl cd $DSLSW_BASE From cb6189d49cc1b516b5704157dd8277dae24a8d72 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 12:47:32 -0400 Subject: [PATCH 06/18] Fix skip GCC on build --- sw_stack/discover/sles15/src/1.0.0/build.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sw_stack/discover/sles15/src/1.0.0/build.sh b/sw_stack/discover/sles15/src/1.0.0/build.sh index dca5e79..40287c6 100755 --- a/sw_stack/discover/sles15/src/1.0.0/build.sh +++ b/sw_stack/discover/sles15/src/1.0.0/build.sh @@ -89,13 +89,9 @@ make -j32 install echo " === Baselibs === " -make ESMF_COMM=openmpi \ - BUILD=ESSENTIALS \ - ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch \ - --prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux \ - install +make ESMF_COMM=openmpi BUILD=ESSENTIALS ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch --prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux install -if [ -z ${BUILD_GCC_OFFLOAD+x} ] +if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build." else echo " === GNU gcc/gfortran/g++ with OpenACC and OpenMP Offload on NVIDIA GPUs === " From a6101859e121e47d84b7dc80d10232dd7e051833 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 14:19:55 -0400 Subject: [PATCH 07/18] 1.0.0 -> 2024.03.00 Fix module load --- sw_stack/discover/sles15/HISTORY.md | 2 +- .../SMTStack/{1.0.0-no-ndsl.lua => 2024.03.00-no-ndsl.lua} | 3 ++- .../modulefiles/SMTStack/{1.0.0.lua => 2024.03.00.lua} | 3 ++- .../discover/sles15/src/{1.0.0 => 2024.03.00}/basics.sh | 6 +++--- sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/build.sh | 0 sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/check.sh | 0 .../discover/sles15/src/{1.0.0 => 2024.03.00}/download.sh | 0 .../discover/sles15/src/{1.0.0 => 2024.03.00}/osu-bench.sh | 0 8 files changed, 8 insertions(+), 6 deletions(-) rename sw_stack/discover/sles15/modulefiles/SMTStack/{1.0.0-no-ndsl.lua => 2024.03.00-no-ndsl.lua} (86%) rename sw_stack/discover/sles15/modulefiles/SMTStack/{1.0.0.lua => 2024.03.00.lua} (86%) rename sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/basics.sh (85%) rename sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/build.sh (100%) rename sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/check.sh (100%) rename sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/download.sh (100%) rename sw_stack/discover/sles15/src/{1.0.0 => 2024.03.00}/osu-bench.sh (100%) diff --git a/sw_stack/discover/sles15/HISTORY.md b/sw_stack/discover/sles15/HISTORY.md index 9929921..ed073b7 100644 --- a/sw_stack/discover/sles15/HISTORY.md +++ b/sw_stack/discover/sles15/HISTORY.md @@ -8,7 +8,7 @@ All versions of the software for a given version are saved in `basics.sh`. Last edit: _March 18th 2024_ -## v1.0.0 +## v2024.03.00 ### Options diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua similarity index 86% rename from sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua rename to sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua index 3632089..3b1e312 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0-no-ndsl.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua @@ -1,7 +1,8 @@ load("comp/gcc/12.3.0") load("nvidia/nvhpc-nompi/23.9") -local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/v1.0.0/install/" +local version = getenv("DSLSW_VERSION") +local install_dir = pathJoin(pathJoin("/discover/nobackup/projects/geosongpu/sw_sles15/live/src/", version), "install/") -- UCX -- local ucx_pkgdir = pathJoin(install_dir, "ucx") diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua similarity index 86% rename from sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0.lua rename to sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua index 20f566e..f25a6a9 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/1.0.0.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua @@ -1,7 +1,8 @@ load("comp/gcc/12.3.0") load("nvidia/nvhpc-nompi/23.9") -local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/v1.0.0/install/" +local version = getenv("DSLSW_VERSION") +local install_dir = pathJoin(pathJoin("/discover/nobackup/projects/geosongpu/sw_sles15/live/src/", version), "install/") -- UCX -- local ucx_pkgdir = pathJoin(install_dir, "ucx") diff --git a/sw_stack/discover/sles15/src/1.0.0/basics.sh b/sw_stack/discover/sles15/src/2024.03.00/basics.sh similarity index 85% rename from sw_stack/discover/sles15/src/1.0.0/basics.sh rename to sw_stack/discover/sles15/src/2024.03.00/basics.sh index 1012639..87c377a 100755 --- a/sw_stack/discover/sles15/src/1.0.0/basics.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/basics.sh @@ -1,6 +1,6 @@ #!/bin/bash -export DSLSW_VERSION="1.0.0" +export DSLSW_VERSION="2024.03.0" echo "DSL Software Stack v${DSLSW_VERSION}" # Version @@ -29,8 +29,8 @@ mkdir -p $DSLSW_INSTALL_DIR module load nvidia/nvhpc-nompi/23.9 CUDA_DIR=/usr/local/other/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/ module load comp/gcc/12.3.0 -module use -a /discover/nobackup/projects/geosongpu/sw_sles15/modulesfiles/ -module load SMTStack/1.0.0 +module use -a /discover/nobackup/projects/geosongpu/sw_sles15/live/modulefiles/ +module load SMTStack/${DSLSW_VERSION} # Enforce proper compilers export FC=gfortran diff --git a/sw_stack/discover/sles15/src/1.0.0/build.sh b/sw_stack/discover/sles15/src/2024.03.00/build.sh similarity index 100% rename from sw_stack/discover/sles15/src/1.0.0/build.sh rename to sw_stack/discover/sles15/src/2024.03.00/build.sh diff --git a/sw_stack/discover/sles15/src/1.0.0/check.sh b/sw_stack/discover/sles15/src/2024.03.00/check.sh similarity index 100% rename from sw_stack/discover/sles15/src/1.0.0/check.sh rename to sw_stack/discover/sles15/src/2024.03.00/check.sh diff --git a/sw_stack/discover/sles15/src/1.0.0/download.sh b/sw_stack/discover/sles15/src/2024.03.00/download.sh similarity index 100% rename from sw_stack/discover/sles15/src/1.0.0/download.sh rename to sw_stack/discover/sles15/src/2024.03.00/download.sh diff --git a/sw_stack/discover/sles15/src/1.0.0/osu-bench.sh b/sw_stack/discover/sles15/src/2024.03.00/osu-bench.sh similarity index 100% rename from sw_stack/discover/sles15/src/1.0.0/osu-bench.sh rename to sw_stack/discover/sles15/src/2024.03.00/osu-bench.sh From 3400e28bbf1a21b10eeac6d91ab01cef0d47487f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 14:43:55 -0400 Subject: [PATCH 08/18] NDSL -> 2024.03.01 Revert DSL_VER read --- sw_stack/discover/sles15/HISTORY.md | 2 +- .../sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua | 3 +-- sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua | 3 +-- sw_stack/discover/sles15/src/2024.03.00/basics.sh | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/sw_stack/discover/sles15/HISTORY.md b/sw_stack/discover/sles15/HISTORY.md index ed073b7..e1632ba 100644 --- a/sw_stack/discover/sles15/HISTORY.md +++ b/sw_stack/discover/sles15/HISTORY.md @@ -25,7 +25,7 @@ We build OpenMPI throught the UCX layer with cuda-enabled and GRDCopy and GPUDir - OpenMPI: 4.1.6 [^3] - OSU-MICROBENCHMARK: 7.3 - Boost headers: 1.76.0 -- NDSL: 2024.03.00 +- NDSL: 2024.03.01 When defining `BUILD_GCC_OFFLOAD`: diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua index 3b1e312..07c2b2c 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua @@ -1,8 +1,7 @@ load("comp/gcc/12.3.0") load("nvidia/nvhpc-nompi/23.9") -local version = getenv("DSLSW_VERSION") -local install_dir = pathJoin(pathJoin("/discover/nobackup/projects/geosongpu/sw_sles15/live/src/", version), "install/") +local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.03.00/install" -- UCX -- local ucx_pkgdir = pathJoin(install_dir, "ucx") diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua index f25a6a9..d52d7cf 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua @@ -1,8 +1,7 @@ load("comp/gcc/12.3.0") load("nvidia/nvhpc-nompi/23.9") -local version = getenv("DSLSW_VERSION") -local install_dir = pathJoin(pathJoin("/discover/nobackup/projects/geosongpu/sw_sles15/live/src/", version), "install/") +local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.03.00/install" -- UCX -- local ucx_pkgdir = pathJoin(install_dir, "ucx") diff --git a/sw_stack/discover/sles15/src/2024.03.00/basics.sh b/sw_stack/discover/sles15/src/2024.03.00/basics.sh index 87c377a..34d2e86 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/basics.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/basics.sh @@ -15,7 +15,7 @@ export DSLSW_PY_VER=3.11.7 export DSLSW_BASELIBS_VER=7.17.1 export DSLSW_SERIALBOX_VER=2.6.1 export DSLSW_GNU_VER=12.2.0 -export DSLSW_NDSL_VER=2024.03.00 +export DSLSW_NDSL_VER=2024.03.01 export DSLSW_BOOST_VER=1.76.0 export DSLSW_BOOST_VER_STR=1_76_0 From 34c4c322d4c14d3a0703dfdcf26de8ac0e72e9ea Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Mon, 18 Mar 2024 14:48:14 -0400 Subject: [PATCH 09/18] Bad version name --- sw_stack/discover/sles15/src/2024.03.00/basics.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sw_stack/discover/sles15/src/2024.03.00/basics.sh b/sw_stack/discover/sles15/src/2024.03.00/basics.sh index 34d2e86..269c690 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/basics.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/basics.sh @@ -1,6 +1,6 @@ #!/bin/bash -export DSLSW_VERSION="2024.03.0" +export DSLSW_VERSION="2024.03.00" echo "DSL Software Stack v${DSLSW_VERSION}" # Version From f1a24f46136725fe57a70b0461968c9b4fd84b5f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 19 Mar 2024 10:07:36 -0400 Subject: [PATCH 10/18] Fix `baselibs` --- sw_stack/discover/sles15/src/2024.03.00/build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sw_stack/discover/sles15/src/2024.03.00/build.sh b/sw_stack/discover/sles15/src/2024.03.00/build.sh index 40287c6..102aeff 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/build.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/build.sh @@ -82,14 +82,14 @@ cd $DSLSW_BASE/serialbox-$DSLSW_SERIALBOX_VER mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=$DSLSW_INSTALL_DIR/serialbox \ - -DSERIALBOX_ENABLE_FORTRAN=ON \ - -DSERIALBOX_EXAMPLES=OFF \ - .. + -DSERIALBOX_ENABLE_FORTRAN=ON \ + -DSERIALBOX_EXAMPLES=OFF \ + .. make -j32 install echo " === Baselibs === " -make ESMF_COMM=openmpi BUILD=ESSENTIALS ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch --prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux install +ESMF_COMM=openmpi BUILD=ESSENTIALS ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch make --prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux install if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build." From 9d5c203dfdfce5bfcda23cc5c56ad4574dfe8784 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 19 Mar 2024 14:39:26 -0400 Subject: [PATCH 11/18] Split builds for internet requirement Fix baselibs --- .../sles15/src/2024.03.00/build_on-login.sh | 11 +++++++++++ .../src/2024.03.00/{build.sh => build_on-node.sh} | 13 ++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) create mode 100755 sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh rename sw_stack/discover/sles15/src/2024.03.00/{build.sh => build_on-node.sh} (92%) diff --git a/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh b/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh new file mode 100755 index 0000000..fd6aaa1 --- /dev/null +++ b/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Source the shared basics +source ./basics.sh + +echo " === Make NDSL venv === " +cd $DSLSW_INSTALL_DIR +./python3/bin/python3 -m venv venv +source ./venv/bin/activate +pip install --upgrade setuptools pip +pip install -e $DSLSW_INSTALL_DIR/ndsl diff --git a/sw_stack/discover/sles15/src/2024.03.00/build.sh b/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh similarity index 92% rename from sw_stack/discover/sles15/src/2024.03.00/build.sh rename to sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh index 102aeff..d05d753 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/build.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh @@ -89,7 +89,12 @@ make -j32 install echo " === Baselibs === " -ESMF_COMM=openmpi BUILD=ESSENTIALS ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch make --prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux install +cd $DSLSW_BASE/baselibs-$DSLSW_BASELIBS_VER +make ESMF_COMM=openmpi \ + BUILD=ESSENTIALS \ + ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch \ + prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux \ + install if [ -z ${BUILD_GCC_OFFLOAD+x} ]; then echo "Skip building offloaded GCC. Define BUILD_GCC_OFFLOAD to build." @@ -147,9 +152,3 @@ else cd .. fi -echo " === Make NDSL venv === " -cd $DSLSW_INSTALL_DIR -./python3/bin/python3 -m venv venv -source ./venv/bin/activate -pip install --upgrade setuptools pip -pip install -e $DSLSW_INSTALL_DIR/ndsl From 3fe1f311e5476aee2ce5f627a42f5579256b9d81 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 20 Mar 2024 15:17:40 -0400 Subject: [PATCH 12/18] Move to unreleased serialbox for gcc12 --- sw_stack/discover/sles15/HISTORY.md | 5 +++-- sw_stack/discover/sles15/src/2024.03.00/basics.sh | 3 ++- sw_stack/discover/sles15/src/2024.03.00/download.sh | 7 +++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sw_stack/discover/sles15/HISTORY.md b/sw_stack/discover/sles15/HISTORY.md index e1632ba..ed98750 100644 --- a/sw_stack/discover/sles15/HISTORY.md +++ b/sw_stack/discover/sles15/HISTORY.md @@ -26,6 +26,7 @@ We build OpenMPI throught the UCX layer with cuda-enabled and GRDCopy and GPUDir - OSU-MICROBENCHMARK: 7.3 - Boost headers: 1.76.0 - NDSL: 2024.03.01 +- Serialbox: 2.6.3-unreleased When defining `BUILD_GCC_OFFLOAD`: @@ -42,11 +43,11 @@ _Note:_ ### Baselibs - LAPACK/BLAS: 3.11.0 -- BASELIBS: 7.14.1 +- BASELIBS: 7.17.1 ### Python -- Python: 3.11.7 [^4] +- Python: 3.11.7 ### Serialbox diff --git a/sw_stack/discover/sles15/src/2024.03.00/basics.sh b/sw_stack/discover/sles15/src/2024.03.00/basics.sh index 269c690..4738382 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/basics.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/basics.sh @@ -13,7 +13,8 @@ export DSLSW_OSUMICRO_VER=7.3 export DSLSW_LAPACK_VER=3.11.0 export DSLSW_PY_VER=3.11.7 export DSLSW_BASELIBS_VER=7.17.1 -export DSLSW_SERIALBOX_VER=2.6.1 +export DSLSW_SERIALBOX_VER=2.6.2-unreleased +export DSLSW_SERIALBOX_SHA=88ac4e4dfc824953d068fe63c8e7b3dd9560a914 export DSLSW_GNU_VER=12.2.0 export DSLSW_NDSL_VER=2024.03.01 export DSLSW_BOOST_VER=1.76.0 diff --git a/sw_stack/discover/sles15/src/2024.03.00/download.sh b/sw_stack/discover/sles15/src/2024.03.00/download.sh index 7388171..abec7f2 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/download.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/download.sh @@ -30,10 +30,9 @@ wget https://www.python.org/ftp/python/$DSLSW_PY_VER/Python-$DSLSW_PY_VER.tgz tar zxpvf Python-$DSLSW_PY_VER.tgz rm Python-$DSLSW_PY_VER.tgz -wget https://github.com/GridTools/serialbox/archive/refs/tags/v$DSLSW_SERIALBOX_VER.tar.gz -mv v$DSLSW_SERIALBOX_VER.tar.gz serialbox-$DSLSW_SERIALBOX_VER.tar.gz -tar zxpvf serialbox-$DSLSW_SERIALBOX_VER.tar.gz -rm serialbox-$DSLSW_SERIALBOX_VER.tar.gz +git clone https://github.com/GridTools/serialbox.git serialbox-$DSLSW_SERIALBOX_VER +cd serialbox-$DSLSW_SERIALBOX_VER +git checkout $DSLSW_SERIALBOX_SHA git clone --recurse-submodules -b v$DSLSW_BASELIBS_VER https://github.com/GEOS-ESM/ESMA-Baselibs.git ./baselibs-$DSLSW_BASELIBS_VER cd ./baselibs-$DSLSW_BASELIBS_VER From fe18df14526c45bd61cfcc3804ddb50974c0b28f Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 20 Mar 2024 15:17:58 -0400 Subject: [PATCH 13/18] Lapack and Py3 in modules --- .../modulefiles/SMTStack/2024.03.00-no-ndsl.lua | 5 +++++ .../sles15/modulefiles/SMTStack/2024.03.00.lua | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua index 07c2b2c..54cb0df 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua @@ -25,6 +25,10 @@ setenv("OMP_STACKSIZE","1G") setenv("OMPI_MCA_mca_base_component_show_load_errors","0") setenv("PMIX_MCA_mca_base_component_show_load_errors","0") +-- LAPACK -- +local lapack_pkgdir = pathJoin(install_dir, "lapack") +prepend_path("LD_LIBRARY_PATH",pathJoin(lapack_pkgdir,"lib64")) + -- BOOST HEADERS (as expected by gt4py) -- local boost_pkgdir = pathJoin(install_dir, "boost") setenv("BOOST_ROOT", boost_pkgdir) @@ -33,3 +37,4 @@ setenv("BOOST_ROOT", boost_pkgdir) local py_pkgdir = pathJoin(install_dir, "python3") prepend_path("PATH",pathJoin(py_pkgdir,"bin")) prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib")) +prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib64")) diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua index d52d7cf..8510b79 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua @@ -25,11 +25,21 @@ setenv("OMP_STACKSIZE","1G") setenv("OMPI_MCA_mca_base_component_show_load_errors","0") setenv("PMIX_MCA_mca_base_component_show_load_errors","0") +-- LAPACK -- +local lapack_pkgdir = pathJoin(install_dir, "lapack") +prepend_path("LD_LIBRARY_PATH",pathJoin(lapack_pkgdir,"lib64")) + -- BOOST HEADERS (as expected by gt4py) -- local boost_pkgdir = pathJoin(install_dir, "boost") setenv("BOOST_ROOT", boost_pkgdir) --- Load venv -- -local py_pkgdir = pathJoin(install_dir, "/venv/bin") +-- Python 3 -- +local py_pkgdir = pathJoin(install_dir, "python3") prepend_path("PATH",pathJoin(py_pkgdir,"bin")) +prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib")) +prepend_path("LD_LIBRARY_PATH",pathJoin(py_pkgdir,"lib64")) +-- Load venv -- +local py_pkgdir = pathJoin(install_dir, "venv") +prepend_path("PATH",pathJoin(py_pkgdir,"bin")) +setenv("VIRTUAL_ENV", py_pkgdir) From 9333a8dc14778f189dcdd33b8e96a93087c87173 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 20 Mar 2024 15:18:23 -0400 Subject: [PATCH 14/18] Fix Lapack build Install mpi4py and cffi by default --- sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh | 1 + sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh b/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh index fd6aaa1..7146c69 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh @@ -9,3 +9,4 @@ cd $DSLSW_INSTALL_DIR source ./venv/bin/activate pip install --upgrade setuptools pip pip install -e $DSLSW_INSTALL_DIR/ndsl +pip install mpi4py cffi diff --git a/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh b/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh index d05d753..346f61e 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh @@ -67,7 +67,7 @@ echo " === Lapack === " cd $DSLSW_BASE/lapack-$DSLSW_LAPACK_VER mkdir build cd build -cmake .. -DCMAKE_INSTALL_PREFIX=$DSLSW_INSTALL_DIR/lapack +cmake .. -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$DSLSW_INSTALL_DIR/lapack make -j32 install echo " === Python === " From 72d4feb02ccb3d0baaacdd90c08848c703b05f56 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 20 Mar 2024 16:37:56 -0400 Subject: [PATCH 15/18] Move to byo-compiler flavor of nvhpc --- .../sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua | 2 +- sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua | 2 +- sw_stack/discover/sles15/src/2024.03.00/basics.sh | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua index 54cb0df..dee22f5 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua @@ -1,5 +1,5 @@ load("comp/gcc/12.3.0") -load("nvidia/nvhpc-nompi/23.9") +load("nvidia/nvhpc-byo-compiler/23.9") local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.03.00/install" diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua index 8510b79..2f5da8c 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua @@ -1,5 +1,5 @@ load("comp/gcc/12.3.0") -load("nvidia/nvhpc-nompi/23.9") +load("nvidia/nvhpc-byo-compiler/23.9") local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.03.00/install" diff --git a/sw_stack/discover/sles15/src/2024.03.00/basics.sh b/sw_stack/discover/sles15/src/2024.03.00/basics.sh index 4738382..3ed1d6a 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/basics.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/basics.sh @@ -27,11 +27,9 @@ export DSLSW_INSTALL_DIR=$PWD/install mkdir -p $DSLSW_INSTALL_DIR # Modules -module load nvidia/nvhpc-nompi/23.9 -CUDA_DIR=/usr/local/other/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/ -module load comp/gcc/12.3.0 module use -a /discover/nobackup/projects/geosongpu/sw_sles15/live/modulefiles/ module load SMTStack/${DSLSW_VERSION} +CUDA_DIR=/usr/local/other/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/ # Enforce proper compilers export FC=gfortran From 4242949d57af25999938d67299eae0a90d1ae604 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 21 Mar 2024 10:34:10 -0400 Subject: [PATCH 16/18] Fix baselibs pre-step --- sw_stack/discover/sles15/src/2024.03.00/download.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/sw_stack/discover/sles15/src/2024.03.00/download.sh b/sw_stack/discover/sles15/src/2024.03.00/download.sh index abec7f2..97faf0d 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/download.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/download.sh @@ -33,6 +33,7 @@ rm Python-$DSLSW_PY_VER.tgz git clone https://github.com/GridTools/serialbox.git serialbox-$DSLSW_SERIALBOX_VER cd serialbox-$DSLSW_SERIALBOX_VER git checkout $DSLSW_SERIALBOX_SHA +cd $DSLSW_BASE git clone --recurse-submodules -b v$DSLSW_BASELIBS_VER https://github.com/GEOS-ESM/ESMA-Baselibs.git ./baselibs-$DSLSW_BASELIBS_VER cd ./baselibs-$DSLSW_BASELIBS_VER From f77d1c8fce7034879c0e76461e5ceb0c5832b6a1 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 21 Mar 2024 12:19:30 -0400 Subject: [PATCH 17/18] Use MKL from module instead of LAPACK from build --- .../sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua | 5 +---- .../discover/sles15/modulefiles/SMTStack/2024.03.00.lua | 5 +---- .../2024.03.00/{build_on-login.sh => build_0_on-login.sh} | 0 .../2024.03.00/{build_on-node.sh => build_1_on-node.sh} | 7 ------- sw_stack/discover/sles15/src/2024.03.00/download.sh | 4 ---- 5 files changed, 2 insertions(+), 19 deletions(-) rename sw_stack/discover/sles15/src/2024.03.00/{build_on-login.sh => build_0_on-login.sh} (100%) rename sw_stack/discover/sles15/src/2024.03.00/{build_on-node.sh => build_1_on-node.sh} (96%) diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua index dee22f5..1e24a88 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00-no-ndsl.lua @@ -1,4 +1,5 @@ load("comp/gcc/12.3.0") +load("lib/mkl/2023.2.0") load("nvidia/nvhpc-byo-compiler/23.9") local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.03.00/install" @@ -25,10 +26,6 @@ setenv("OMP_STACKSIZE","1G") setenv("OMPI_MCA_mca_base_component_show_load_errors","0") setenv("PMIX_MCA_mca_base_component_show_load_errors","0") --- LAPACK -- -local lapack_pkgdir = pathJoin(install_dir, "lapack") -prepend_path("LD_LIBRARY_PATH",pathJoin(lapack_pkgdir,"lib64")) - -- BOOST HEADERS (as expected by gt4py) -- local boost_pkgdir = pathJoin(install_dir, "boost") setenv("BOOST_ROOT", boost_pkgdir) diff --git a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua index 2f5da8c..ff6000d 100644 --- a/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua +++ b/sw_stack/discover/sles15/modulefiles/SMTStack/2024.03.00.lua @@ -1,4 +1,5 @@ load("comp/gcc/12.3.0") +load("lib/mkl/2023.2.0") load("nvidia/nvhpc-byo-compiler/23.9") local install_dir = "/discover/nobackup/projects/geosongpu/sw_sles15/live/src/2024.03.00/install" @@ -25,10 +26,6 @@ setenv("OMP_STACKSIZE","1G") setenv("OMPI_MCA_mca_base_component_show_load_errors","0") setenv("PMIX_MCA_mca_base_component_show_load_errors","0") --- LAPACK -- -local lapack_pkgdir = pathJoin(install_dir, "lapack") -prepend_path("LD_LIBRARY_PATH",pathJoin(lapack_pkgdir,"lib64")) - -- BOOST HEADERS (as expected by gt4py) -- local boost_pkgdir = pathJoin(install_dir, "boost") setenv("BOOST_ROOT", boost_pkgdir) diff --git a/sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh b/sw_stack/discover/sles15/src/2024.03.00/build_0_on-login.sh similarity index 100% rename from sw_stack/discover/sles15/src/2024.03.00/build_on-login.sh rename to sw_stack/discover/sles15/src/2024.03.00/build_0_on-login.sh diff --git a/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh b/sw_stack/discover/sles15/src/2024.03.00/build_1_on-node.sh similarity index 96% rename from sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh rename to sw_stack/discover/sles15/src/2024.03.00/build_1_on-node.sh index 346f61e..1bac314 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/build_on-node.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/build_1_on-node.sh @@ -63,13 +63,6 @@ cd $DSLSW_BASE/osu-micro-benchmarks-$DSLSW_OSUMICRO_VER make -j32 make install -echo " === Lapack === " -cd $DSLSW_BASE/lapack-$DSLSW_LAPACK_VER -mkdir build -cd build -cmake .. -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$DSLSW_INSTALL_DIR/lapack -make -j32 install - echo " === Python === " cd $DSLSW_BASE/Python-$DSLSW_PY_VER ./configure --prefix=$DSLSW_INSTALL_DIR/python3 --enable-shared --enable-optimizations diff --git a/sw_stack/discover/sles15/src/2024.03.00/download.sh b/sw_stack/discover/sles15/src/2024.03.00/download.sh index 97faf0d..6e0b2f6 100755 --- a/sw_stack/discover/sles15/src/2024.03.00/download.sh +++ b/sw_stack/discover/sles15/src/2024.03.00/download.sh @@ -22,10 +22,6 @@ wget https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-$D tar xfp osu-micro-benchmarks-$DSLSW_OSUMICRO_VER.tar.gz rm osu-micro-benchmarks-$DSLSW_OSUMICRO_VER.tar.gz -wget https://github.com/Reference-LAPACK/lapack/archive/refs/tags/v$DSLSW_LAPACK_VER.tar.gz -tar xfzp v$DSLSW_LAPACK_VER.tar.gz -rm v$DSLSW_LAPACK_VER.tar.gz - wget https://www.python.org/ftp/python/$DSLSW_PY_VER/Python-$DSLSW_PY_VER.tgz tar zxpvf Python-$DSLSW_PY_VER.tgz rm Python-$DSLSW_PY_VER.tgz From 8dfc4228cfdddb2398ecd6bb02eeea592b48c203 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 21 Mar 2024 12:19:36 -0400 Subject: [PATCH 18/18] Update HISTORY --- sw_stack/discover/sles15/HISTORY.md | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/sw_stack/discover/sles15/HISTORY.md b/sw_stack/discover/sles15/HISTORY.md index ed98750..b098548 100644 --- a/sw_stack/discover/sles15/HISTORY.md +++ b/sw_stack/discover/sles15/HISTORY.md @@ -14,19 +14,22 @@ Last edit: _March 18th 2024_ `BUILD_GCC_OFFLOAD`: builds an offload ready GCC 12.2 -### OpenMPI +### Stack We build OpenMPI throught the UCX layer with cuda-enabled and GRDCopy and GPUDirect on. - GDRCOPY: Must be installed on the compiling machine as a kernel module. - GCC: 12.3.0 [^1] (via `comp/gcc/12.3.0` on discover) - CUDA (via `nvhpc`): 12.2 [^2] (via `nvidia/nvhpc-nompi/23.9` on discover) +- MKL: 2023.2.0 (via `lib/mkl/2023.2.0` on discover) - UCX: 1.15.0 - OpenMPI: 4.1.6 [^3] - OSU-MICROBENCHMARK: 7.3 - Boost headers: 1.76.0 +- BASELIBS: 7.17.1 [^4] +- Python: 3.11.7 - NDSL: 2024.03.01 -- Serialbox: 2.6.3-unreleased +- Serialbox: 2.6.2-unreleased [^5] When defining `BUILD_GCC_OFFLOAD`: @@ -39,16 +42,5 @@ _Note:_ - [^1]: `gcc-13.2.0` fails during GEOS with an internal compiler error - [^2]: `nvhpc` ships with a prebuilt `openmpi` which can cause issues. Make sure to load the `nompi` module. - [^3]: `openmpi-5.0.0` fails at GEOS runtime on a call to `libxml2` that does a divide by zero (triggering a sigfpe). We revert to `4.1.6`. - -### Baselibs - -- LAPACK/BLAS: 3.11.0 -- BASELIBS: 7.17.1 - -### Python - -- Python: 3.11.7 - -### Serialbox - -- Latest stable is 2.6.1. Development is over. +- [^4]: As per GEOS 11.5.2 @env +- [^5]: Latest stable is 2.6.1. Development is crawlingly slow but some fixes for GCC 12+ are in `main`