From 642840507edf565dfa6d147efcc4a95ba6dce4ba Mon Sep 17 00:00:00 2001 From: Tony Craig Date: Fri, 27 Sep 2024 10:42:23 -0700 Subject: [PATCH] Update Derecho Port (#974) Update derecho port update inteloneapi, validate, use -O1, problems with -check all. update cray to ncarenv/23.09 and cce/16.0.1, answers change update intel to ncarenv/23.09 and intel/2023.2.1, answer bit-for-bit update nvhpc to ncarenv/23.09 and nvhpc/23.7, answers change update queue so smaller jobs go into develop (shared) instead of main Add ifndef __INTEL_LLVM_COMPILER (for intel oneapi) around an OMP loop that the compiler doesn't handle properly (reported to intel) in ice_history.F90. Update QC documentation in the user guide to clarify where/how to run the cice.t-test.py script. --- cicecore/cicedyn/analysis/ice_history.F90 | 4 ++++ configuration/scripts/cice.batch.csh | 7 ++++++- .../scripts/machines/Macros.derecho_intelclassic | 2 +- .../scripts/machines/Macros.derecho_inteloneapi | 11 ++++++----- configuration/scripts/machines/env.derecho_cray | 12 ++++++------ configuration/scripts/machines/env.derecho_intel | 12 ++++++------ .../scripts/machines/env.derecho_inteloneapi | 15 ++++++++------- configuration/scripts/machines/env.derecho_nvhpc | 12 ++++++------ configuration/scripts/options/set_env.gx1 | 1 + doc/source/user_guide/ug_testing.rst | 6 ++++-- 10 files changed, 48 insertions(+), 34 deletions(-) create mode 100644 configuration/scripts/options/set_env.gx1 diff --git a/cicecore/cicedyn/analysis/ice_history.F90 b/cicecore/cicedyn/analysis/ice_history.F90 index 32f744477..a0313c56c 100644 --- a/cicecore/cicedyn/analysis/ice_history.F90 +++ b/cicecore/cicedyn/analysis/ice_history.F90 @@ -2325,9 +2325,11 @@ subroutine accum_hist (dt) ! increment field !--------------------------------------------------------------- +#ifndef __INTEL_LLVM_COMPILER !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block, & !$OMP k,n,qn,ns,sn,rho_ocn,rho_ice,Tice,Sbr,phi,rhob,dfresh,dfsalt,sicen, & !$OMP worka,workb,worka3,Tinz4d,Sinz4d,Tsnz4d) +#endif do iblk = 1, nblocks this_block = get_block(blocks_ice(iblk),iblk) @@ -3637,7 +3639,9 @@ subroutine accum_hist (dt) call accum_hist_snow (iblk) enddo ! iblk +#ifndef __INTEL_LLVM_COMPILER !$OMP END PARALLEL DO +#endif call icepack_warnings_flush(nu_diag) if (icepack_warnings_aborted()) call abort_ice(error_message=subname, & diff --git a/configuration/scripts/cice.batch.csh b/configuration/scripts/cice.batch.csh index 520d165a3..a68852602 100755 --- a/configuration/scripts/cice.batch.csh +++ b/configuration/scripts/cice.batch.csh @@ -34,12 +34,17 @@ cat >> ${jobfile} << EOFB EOFB else if (${ICE_MACHINE} =~ derecho*) then +set memstr = "" +if (${ncores} <= 8 && ${runlength} <= 1 && ${batchmem} <= 20) then + set queue = "develop" + set memstr = ":mem=${batchmem}GB" +endif cat >> ${jobfile} << EOFB #PBS -q ${queue} #PBS -l job_priority=regular #PBS -N ${ICE_CASENAME} #PBS -A ${acct} -#PBS -l select=${nnodes}:ncpus=${corespernode}:mpiprocs=${taskpernodelimit}:ompthreads=${nthrds} +#PBS -l select=${nnodes}:ncpus=${corespernode}:mpiprocs=${taskpernodelimit}:ompthreads=${nthrds}${memstr} #PBS -l walltime=${batchtime} #PBS -j oe #PBS -W umask=022 diff --git a/configuration/scripts/machines/Macros.derecho_intelclassic b/configuration/scripts/machines/Macros.derecho_intelclassic index e0ffd44e4..f2250b84c 100644 --- a/configuration/scripts/machines/Macros.derecho_intelclassic +++ b/configuration/scripts/machines/Macros.derecho_intelclassic @@ -1,5 +1,5 @@ #============================================================================== -# Makefile macros for NCAR derecho, intel compiler +# Makefile macros for NCAR derecho, intelclassic compiler #============================================================================== CPP := fpp diff --git a/configuration/scripts/machines/Macros.derecho_inteloneapi b/configuration/scripts/machines/Macros.derecho_inteloneapi index ae6640388..23825d6ca 100644 --- a/configuration/scripts/machines/Macros.derecho_inteloneapi +++ b/configuration/scripts/machines/Macros.derecho_inteloneapi @@ -1,5 +1,5 @@ #============================================================================== -# Makefile macros for NCAR derecho, intel compiler +# Makefile macros for NCAR derecho, inteloneapi compiler #============================================================================== CPP := fpp @@ -12,11 +12,12 @@ FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -trace FFLAGS_NOOPT:= -O0 ifeq ($(ICE_BLDDEBUG), true) - FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created -link_mpi=dbg -# FFLAGS += -O0 -g -check all -fpe0 -ftrapuv -fp-model except -check noarg_temp_created -link_mpi=dbg -stand f08 -# FFLAGS += -O0 -g -check all -fpe0 -ftrapuv -fp-model except -check noarg_temp_created -init=snan,arrays -link_mpi=dbg +# -check uninit is needed on the ld step but it still throws errors in 2023.* and 2024.0.*, likely compiler bug + FFLAGS += -O0 -g -check bounds -check pointers -fpe0 -check noarg_temp_created -link_mpi=dbg +# FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created -link_mpi=dbg +# LDFLAGS += -check uninit else - FFLAGS += -O2 + FFLAGS += -O1 endif SCC := icx diff --git a/configuration/scripts/machines/env.derecho_cray b/configuration/scripts/machines/env.derecho_cray index 47cebd5cb..4e886ab71 100644 --- a/configuration/scripts/machines/env.derecho_cray +++ b/configuration/scripts/machines/env.derecho_cray @@ -10,16 +10,16 @@ if ("$inp" != "-nomodules") then source ${MODULESHOME}/init/csh module --force purge -module load ncarenv/23.06 +module load ncarenv/23.09 module load craype -module load cce/15.0.1 +module load cce/16.0.1 module load ncarcompilers -module load cray-mpich/8.1.25 +module load cray-mpich/8.1.27 module load netcdf/4.9.2 #module load hdf5/1.12.2 #module load netcdf-mpi/4.9.2 -module load cray-libsci/23.02.1.1 +module load cray-libsci/23.09.1.1 if ($?ICE_IOTYPE) then if ($ICE_IOTYPE =~ pio*) then @@ -29,7 +29,7 @@ if ($ICE_IOTYPE =~ pio*) then if ($ICE_IOTYPE == "pio1") then module load parallelio/1.10.1 else - module load parallelio/2.6.1 + module load parallelio/2.6.2 endif endif endif @@ -61,7 +61,7 @@ setenv OMP_STACKSIZE 64M setenv ICE_MACHINE_MACHNAME derecho setenv ICE_MACHINE_MACHINFO "HPE Cray EX Milan Slingshot 11" setenv ICE_MACHINE_ENVNAME cray -setenv ICE_MACHINE_ENVINFO "cce 15.0.1, cray-mpich 8.1.25, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.1" +setenv ICE_MACHINE_ENVINFO "Cray clang/fortran cce 16.0.1, cray-mpich 8.1.27, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.2" setenv ICE_MACHINE_MAKE gmake setenv ICE_MACHINE_WKDIR /glade/derecho/scratch/$user/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /glade/campaign/cesm/development/pcwg diff --git a/configuration/scripts/machines/env.derecho_intel b/configuration/scripts/machines/env.derecho_intel index 63626dc33..05c719838 100644 --- a/configuration/scripts/machines/env.derecho_intel +++ b/configuration/scripts/machines/env.derecho_intel @@ -10,16 +10,16 @@ if ("$inp" != "-nomodules") then source ${MODULESHOME}/init/csh module --force purge -module load ncarenv/23.06 +module load ncarenv/23.09 module load craype -module load intel/2023.0.0 +module load intel/2023.2.1 module load ncarcompilers -module load cray-mpich/8.1.25 +module load cray-mpich/8.1.27 module load netcdf/4.9.2 #module load hdf5/1.12.2 #module load netcdf-mpi/4.9.2 -module load cray-libsci/23.02.1.1 +module load cray-libsci/23.09.1.1 if ($?ICE_IOTYPE) then if ($ICE_IOTYPE =~ pio*) then @@ -29,7 +29,7 @@ if ($ICE_IOTYPE =~ pio*) then if ($ICE_IOTYPE == "pio1") then module load parallelio/1.10.1 else - module load parallelio/2.6.1 + module load parallelio/2.6.2 endif endif endif @@ -61,7 +61,7 @@ setenv OMP_STACKSIZE 64M setenv ICE_MACHINE_MACHNAME derecho setenv ICE_MACHINE_MACHINFO "HPE Cray EX Milan Slingshot 11" setenv ICE_MACHINE_ENVNAME intel -setenv ICE_MACHINE_ENVINFO "ifort 2021.8.0 20221119, oneAPI DPC++/C++ 2023.0.0.20221201), cray-mpich 8.1.25, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.1" +setenv ICE_MACHINE_ENVINFO "ifort 2021.10.0 20230609, oneAPI DPC++/C++ 2023.2.0.20230721, cray-mpich 8.1.27, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.2" setenv ICE_MACHINE_MAKE gmake setenv ICE_MACHINE_WKDIR /glade/derecho/scratch/$user/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /glade/campaign/cesm/development/pcwg diff --git a/configuration/scripts/machines/env.derecho_inteloneapi b/configuration/scripts/machines/env.derecho_inteloneapi index 8f3911036..79715ba2a 100644 --- a/configuration/scripts/machines/env.derecho_inteloneapi +++ b/configuration/scripts/machines/env.derecho_inteloneapi @@ -10,16 +10,17 @@ if ("$inp" != "-nomodules") then source ${MODULESHOME}/init/csh module --force purge -module load ncarenv/23.06 +module load ncarenv/23.09 module load craype -module load intel-oneapi/2023.0.0 -module load ncarcompilers -module load cray-mpich/8.1.25 +module load intel-oneapi/2023.2.1 +#module load mkl/2023.3.0 +module load ncarcompilers/1.0.0 +module load cray-mpich/8.1.27 module load netcdf/4.9.2 #module load hdf5/1.12.2 #module load netcdf-mpi/4.9.2 -module load cray-libsci/23.02.1.1 +module load cray-libsci/23.09.1.1 if ($?ICE_IOTYPE) then if ($ICE_IOTYPE =~ pio*) then @@ -29,7 +30,7 @@ if ($ICE_IOTYPE =~ pio*) then if ($ICE_IOTYPE == "pio1") then module load parallelio/1.10.1 else - module load parallelio/2.6.1 + module load parallelio/2.6.2 endif endif endif @@ -61,7 +62,7 @@ setenv OMP_STACKSIZE 64M setenv ICE_MACHINE_MACHNAME derecho setenv ICE_MACHINE_MACHINFO "HPE Cray EX Milan Slingshot 11" setenv ICE_MACHINE_ENVNAME inteloneapi -setenv ICE_MACHINE_ENVINFO "ifx 2023.0.0 20221201, oneAPI DPC++/C++ 2023.0.0.20221201, cray-mpich 8.1.25, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.1" +setenv ICE_MACHINE_ENVINFO "oneAPI DPC++/C++/ifx 2023.2.0 20230721, cray-mpich 8.1.27, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.2" setenv ICE_MACHINE_MAKE gmake setenv ICE_MACHINE_WKDIR /glade/derecho/scratch/$user/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /glade/campaign/cesm/development/pcwg diff --git a/configuration/scripts/machines/env.derecho_nvhpc b/configuration/scripts/machines/env.derecho_nvhpc index 34342769c..99aa6430a 100644 --- a/configuration/scripts/machines/env.derecho_nvhpc +++ b/configuration/scripts/machines/env.derecho_nvhpc @@ -10,16 +10,16 @@ if ("$inp" != "-nomodules") then source ${MODULESHOME}/init/csh module --force purge -module load ncarenv/23.06 +module load ncarenv/23.09 module load craype -module load nvhpc/23.5 +module load nvhpc/23.7 module load ncarcompilers -module load cray-mpich/8.1.25 +module load cray-mpich/8.1.27 module load netcdf/4.9.2 #module load hdf5/1.12.2 #module load netcdf-mpi/4.9.2 -module load cray-libsci/23.02.1.1 +module load cray-libsci/23.09.1.1 if ($?ICE_IOTYPE) then if ($ICE_IOTYPE =~ pio*) then @@ -29,7 +29,7 @@ if ($ICE_IOTYPE =~ pio*) then if ($ICE_IOTYPE == "pio1") then module load parallelio/1.10.1 else - module load parallelio/2.6.0 + module load parallelio/2.6.2 endif endif endif @@ -61,7 +61,7 @@ setenv OMP_STACKSIZE 64M setenv ICE_MACHINE_MACHNAME derecho setenv ICE_MACHINE_MACHINFO "HPE Cray EX Milan Slingshot 11" setenv ICE_MACHINE_ENVNAME nvhpc -setenv ICE_MACHINE_ENVINFO "nvc 23.5-0, cray-mpich 8.1.25, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.0" +setenv ICE_MACHINE_ENVINFO "nvfortran 23.7-0, cray-mpich 8.1.25, netcdf4.9.2, pnetcdf1.12.3, pio1.10.1, pio2.6.0" setenv ICE_MACHINE_MAKE gmake setenv ICE_MACHINE_WKDIR /glade/derecho/scratch/$user/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /glade/campaign/cesm/development/pcwg diff --git a/configuration/scripts/options/set_env.gx1 b/configuration/scripts/options/set_env.gx1 new file mode 100644 index 000000000..479de3d8e --- /dev/null +++ b/configuration/scripts/options/set_env.gx1 @@ -0,0 +1 @@ +setenv ICE_MEMUSE 5 diff --git a/doc/source/user_guide/ug_testing.rst b/doc/source/user_guide/ug_testing.rst index 6867214b5..4fea329fb 100644 --- a/doc/source/user_guide/ug_testing.rst +++ b/doc/source/user_guide/ug_testing.rst @@ -1107,11 +1107,13 @@ You can also setup a conda env with the same utitities To run the validation test, setup a baseline run with the original baseline model and then a perturbation run based on recent model changes. Use ``--set qc`` in both runs in addition -to other settings needed. Then use the QC script to compare history output, +to other settings needed. Then use the QC script to compare history output. The QC script should +be run from the ``configuration/scripts/tests/QC`` directory because other files from that +directory are required for the script. .. code-block:: bash - cp configuration/scripts/tests/QC/cice.t-test.py . + cd configuration/scripts/tests/QC ./cice.t-test.py /path/to/baseline/history /path/to/test/history The script will produce output similar to: