From 7a724e03fc1398307320b5898207df49747db0fd Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 26 Aug 2024 12:13:18 -0600 Subject: [PATCH] Support ATM forecast only on Google (#2832) Support global-workflow ATM forecast only runs on Google. Add/Modify env, yaml, and python scripts changes to make global-workflow ATM forecast only runs on GSP. Resolves #2831 Refs #2826 Refs #2711 --- env/GOOGLEPW.env | 55 +++++++++++++++++++++++ parm/config/gfs/config.resources | 8 ++++ parm/config/gfs/config.resources.GOOGLEPW | 11 +++++ workflow/hosts.py | 2 +- workflow/hosts/googlepw.yaml | 26 +++++++++++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100755 env/GOOGLEPW.env create mode 100644 parm/config/gfs/config.resources.GOOGLEPW create mode 100644 workflow/hosts/googlepw.yaml diff --git a/env/GOOGLEPW.env b/env/GOOGLEPW.env new file mode 100755 index 0000000000..f5582ccd4d --- /dev/null +++ b/env/GOOGLEPW.env @@ -0,0 +1,55 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +# Configure MPI environment +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 + +ulimit -s unlimited +ulimit -a + +# Calculate common variables +# Check first if the dependent variables are set +if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then + max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) + NTHREADSmax=${threads_per_task:-${max_threads_per_task}} + NTHREADS1=${threads_per_task:-1} + [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} + [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} + APRUN="${launcher} -n ${ntasks}" +else + echo "ERROR config.resources must be sourced before sourcing GOOGLEPW.env" + exit 2 +fi + +if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + + export launcher="srun --mpi=pmi2 -l" + + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) + (( ufs_ntasks = nnodes*tasks_per_node )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ufs_ntasks}" + unset nnodes ufs_ntasks + +elif [[ "${step}" = "post" ]]; then + + export NTHREADS_NP=${NTHREADS1} + export APRUN_NP="${APRUN}" + + export NTHREADS_DWN=${threads_per_task_dwn:-1} + [[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task} + export APRUN_DWN="${launcher} -n ${ntasks_dwn}" + +fi diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 2b17293151..978dca6d51 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -120,6 +120,14 @@ case ${machine} in # shellcheck disable=SC2034 mem_node_max="" ;; + "GOOGLEPW") + export PARTITION_BATCH="compute" + npe_node_max=30 + max_tasks_per_node=30 + # TODO Supply a max mem/node value for GOOGLE + # shellcheck disable=SC2034 + mem_node_max="" + ;; "CONTAINER") max_tasks_per_node=1 # TODO Supply a max mem/node value for a container diff --git a/parm/config/gfs/config.resources.GOOGLEPW b/parm/config/gfs/config.resources.GOOGLEPW new file mode 100644 index 0000000000..21e54013c7 --- /dev/null +++ b/parm/config/gfs/config.resources.GOOGLEPW @@ -0,0 +1,11 @@ +#! /usr/bin/env bash + +# GOOGLE-specific job resources + +export is_exclusive="True" +unset memory + +# shellcheck disable=SC2312 +for mem_var in $(env | grep '^memory_' | cut -d= -f1); do + unset "${mem_var}" +done diff --git a/workflow/hosts.py b/workflow/hosts.py index 6244cf564e..34ea067ade 100644 --- a/workflow/hosts.py +++ b/workflow/hosts.py @@ -17,7 +17,7 @@ class Host: SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', 'HERCULES', 'WCOSS2', 'S4', 'CONTAINER', 'GAEA', - 'AWSPW', 'AZUREPW'] + 'AWSPW', 'AZUREPW', 'GOOGLEPW'] def __init__(self, host=None): diff --git a/workflow/hosts/googlepw.yaml b/workflow/hosts/googlepw.yaml new file mode 100644 index 0000000000..38180dd750 --- /dev/null +++ b/workflow/hosts/googlepw.yaml @@ -0,0 +1,26 @@ +BASE_GIT: '' #TODO: This does not yet exist. +DMPDIR: '' # TODO: This does not yet exist. +PACKAGEROOT: '' #TODO: This does not yet exist. +COMINsyn: '' #TODO: This does not yet exist. +HOMEDIR: '/contrib/${USER}' +STMP: '/lustre/${USER}/stmp/' +PTMP: '/lustre/${USER}/ptmp/' +NOSCRUB: '${HOMEDIR}' +ACCOUNT: '${USER}' +SCHEDULER: slurm +QUEUE: batch +QUEUE_SERVICE: batch +PARTITION_BATCH: compute +PARTITION_SERVICE: compute +RESERVATION: '' +CLUSTERS: '' +CHGRP_RSTPROD: 'YES' +CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. +HPSSARCH: 'NO' +HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below. +BASE_IC: '/bucket/global-workflow-shared-data/ICSDIR/prototype_ICs' +LOCALARCH: 'NO' +ATARDIR: '' # TODO: This will not yet work from GOOGLE. +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' +SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions.