From 7a724e03fc1398307320b5898207df49747db0fd Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang@noaa.gov>
Date: Mon, 26 Aug 2024 12:13:18 -0600
Subject: [PATCH] Support ATM forecast only on Google (#2832)

Support global-workflow ATM forecast only runs on Google.

Add/Modify env, yaml, and python scripts changes to make global-workflow
ATM forecast only runs on GSP.

  Resolves #2831
  Refs #2826
  Refs #2711
---
 env/GOOGLEPW.env                          | 55 +++++++++++++++++++++++
 parm/config/gfs/config.resources          |  8 ++++
 parm/config/gfs/config.resources.GOOGLEPW | 11 +++++
 workflow/hosts.py                         |  2 +-
 workflow/hosts/googlepw.yaml              | 26 +++++++++++
 5 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100755 env/GOOGLEPW.env
 create mode 100644 parm/config/gfs/config.resources.GOOGLEPW
 create mode 100644 workflow/hosts/googlepw.yaml

diff --git a/env/GOOGLEPW.env b/env/GOOGLEPW.env
new file mode 100755
index 0000000000..f5582ccd4d
--- /dev/null
+++ b/env/GOOGLEPW.env
@@ -0,0 +1,55 @@
+#! /usr/bin/env bash
+
+if [[ $# -ne 1 ]]; then
+
+    echo "Must specify an input argument to set runtime environment variables!"
+    exit 1
+
+fi
+
+step=$1
+
+export launcher="srun -l --export=ALL"
+export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out"
+
+# Configure MPI environment
+export OMP_STACKSIZE=2048000
+export NTHSTACK=1024000000
+
+ulimit -s unlimited
+ulimit -a
+
+# Calculate common variables
+# Check first if the dependent variables are set
+if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then
+    max_threads_per_task=$((max_tasks_per_node / tasks_per_node))
+    NTHREADSmax=${threads_per_task:-${max_threads_per_task}}
+    NTHREADS1=${threads_per_task:-1}
+    [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task}
+    [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task}
+    APRUN="${launcher} -n ${ntasks}"
+else
+    echo "ERROR config.resources must be sourced before sourcing GOOGLEPW.env"
+    exit 2
+fi
+
+if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then
+
+    export launcher="srun --mpi=pmi2 -l"
+
+    (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node ))
+    (( ufs_ntasks = nnodes*tasks_per_node ))
+    # With ESMF threading, the model wants to use the full node
+    export APRUN_UFS="${launcher} -n ${ufs_ntasks}"
+    unset nnodes ufs_ntasks
+
+elif [[ "${step}" = "post" ]]; then
+
+    export NTHREADS_NP=${NTHREADS1}
+    export APRUN_NP="${APRUN}"
+
+    export NTHREADS_DWN=${threads_per_task_dwn:-1}
+    [[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task}
+    export APRUN_DWN="${launcher} -n ${ntasks_dwn}"
+
+fi
diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources
index 2b17293151..978dca6d51 100644
--- a/parm/config/gfs/config.resources
+++ b/parm/config/gfs/config.resources
@@ -120,6 +120,14 @@ case ${machine} in
     # shellcheck disable=SC2034
     mem_node_max=""
     ;;
+  "GOOGLEPW")
+    export PARTITION_BATCH="compute"
+    npe_node_max=30
+    max_tasks_per_node=30
+    # TODO Supply a max mem/node value for GOOGLE
+    # shellcheck disable=SC2034
+    mem_node_max=""
+    ;;
   "CONTAINER")
     max_tasks_per_node=1
     # TODO Supply a max mem/node value for a container
diff --git a/parm/config/gfs/config.resources.GOOGLEPW b/parm/config/gfs/config.resources.GOOGLEPW
new file mode 100644
index 0000000000..21e54013c7
--- /dev/null
+++ b/parm/config/gfs/config.resources.GOOGLEPW
@@ -0,0 +1,11 @@
+#! /usr/bin/env bash
+
+# GOOGLE-specific job resources
+
+export is_exclusive="True"
+unset memory
+
+# shellcheck disable=SC2312
+for mem_var in $(env | grep '^memory_' | cut -d= -f1); do
+  unset "${mem_var}"
+done
diff --git a/workflow/hosts.py b/workflow/hosts.py
index 6244cf564e..34ea067ade 100644
--- a/workflow/hosts.py
+++ b/workflow/hosts.py
@@ -17,7 +17,7 @@ class Host:
 
     SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', 'HERCULES',
                        'WCOSS2', 'S4', 'CONTAINER', 'GAEA',
-                       'AWSPW', 'AZUREPW']
+                       'AWSPW', 'AZUREPW', 'GOOGLEPW']
 
     def __init__(self, host=None):
 
diff --git a/workflow/hosts/googlepw.yaml b/workflow/hosts/googlepw.yaml
new file mode 100644
index 0000000000..38180dd750
--- /dev/null
+++ b/workflow/hosts/googlepw.yaml
@@ -0,0 +1,26 @@
+BASE_GIT: '' #TODO: This does not yet exist.
+DMPDIR: '' # TODO: This does not yet exist.
+PACKAGEROOT: '' #TODO: This does not yet exist.
+COMINsyn: '' #TODO: This does not yet exist.
+HOMEDIR: '/contrib/${USER}'
+STMP: '/lustre/${USER}/stmp/'
+PTMP: '/lustre/${USER}/ptmp/'
+NOSCRUB: '${HOMEDIR}'
+ACCOUNT: '${USER}'
+SCHEDULER: slurm
+QUEUE: batch
+QUEUE_SERVICE: batch
+PARTITION_BATCH: compute
+PARTITION_SERVICE: compute
+RESERVATION: ''
+CLUSTERS: ''
+CHGRP_RSTPROD: 'YES'
+CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported.
+HPSSARCH: 'NO'
+HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below.
+BASE_IC: '/bucket/global-workflow-shared-data/ICSDIR/prototype_ICs'
+LOCALARCH: 'NO'
+ATARDIR: '' # TODO: This will not yet work from GOOGLE.
+MAKE_NSSTBUFR: 'NO'
+MAKE_ACFTBUFR: 'NO'
+SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions.