Skip to content

Commit

Permalink
support ATM forecast only on Azure (NOAA-EMC#2827)
Browse files Browse the repository at this point in the history
This PR will support ATM forecast only run on Azure
  • Loading branch information
weihuang-jedi authored Aug 20, 2024
1 parent 659bcbe commit 1b18f2f
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 4 deletions.
55 changes: 55 additions & 0 deletions env/AZUREPW.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#! /usr/bin/env bash

if [[ $# -ne 1 ]]; then

echo "Must specify an input argument to set runtime environment variables!"
exit 1

fi

step=$1

export launcher="srun -l --export=ALL"
export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out"

# Configure MPI environment
export OMP_STACKSIZE=2048000
export NTHSTACK=1024000000

ulimit -s unlimited
ulimit -a

# Calculate common variables
# Check first if the dependent variables are set
if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then
max_threads_per_task=$((max_tasks_per_node / tasks_per_node))
NTHREADSmax=${threads_per_task:-${max_threads_per_task}}
NTHREADS1=${threads_per_task:-1}
[[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task}
[[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task}
APRUN="${launcher} -n ${ntasks}"
else
echo "ERROR config.resources must be sourced before sourcing AZUREPW.env"
exit 2
fi

if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

export launcher="srun --mpi=pmi2 -l"

(( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node ))
(( ufs_ntasks = nnodes*tasks_per_node ))
# With ESMF threading, the model wants to use the full node
export APRUN_UFS="${launcher} -n ${ufs_ntasks}"
unset nnodes ufs_ntasks

elif [[ "${step}" = "post" ]]; then

export NTHREADS_NP=${NTHREADS1}
export APRUN_NP="${APRUN}"

export NTHREADS_DWN=${threads_per_task_dwn:-1}
[[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task}
export APRUN_DWN="${launcher} -n ${ntasks_dwn}"

fi
6 changes: 3 additions & 3 deletions parm/config/gfs/config.base
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,9 @@ export OFFSET_START_HOUR=0
# Number of regional collectives to create soundings for
export NUM_SND_COLLECTIVES=${NUM_SND_COLLECTIVES:-9}

# The tracker, genesis, and METplus jobs are not supported on AWS yet
# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS setup, not for general.
if [[ "${machine}" == "AWSPW" ]]; then
# The tracker, genesis, and METplus jobs are not supported on CSPs yet
# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS/AZURE setup, not for general.
if [[ "${machine}" =~ "PW" ]]; then
export DO_TRACKER="NO"
export DO_GENESIS="NO"
export DO_METP="NO"
Expand Down
8 changes: 8 additions & 0 deletions parm/config/gfs/config.resources
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ case ${machine} in
# shellcheck disable=SC2034
mem_node_max=""
;;
"AZUREPW")
export PARTITION_BATCH="compute"
npe_node_max=24
max_tasks_per_node=24
# TODO Supply a max mem/node value for AZURE
# shellcheck disable=SC2034
mem_node_max=""
;;
"CONTAINER")
max_tasks_per_node=1
# TODO Supply a max mem/node value for a container
Expand Down
1 change: 1 addition & 0 deletions parm/config/gfs/config.resources.AWSPW
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# AWS-specific job resources

export is_exclusive="True"
export memory=None

# shellcheck disable=SC2312
for mem_var in $(env | grep '^memory_' | cut -d= -f1); do
Expand Down
11 changes: 11 additions & 0 deletions parm/config/gfs/config.resources.AZUREPW
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#! /usr/bin/env bash

# AZURE-specific job resources

export is_exclusive="True"
unset memory

# shellcheck disable=SC2312
for mem_var in $(env | grep '^memory_' | cut -d= -f1); do
unset "${mem_var}"
done
3 changes: 2 additions & 1 deletion workflow/hosts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class Host:
"""

SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', 'HERCULES',
'WCOSS2', 'S4', 'CONTAINER', 'AWSPW', 'GAEA']
'WCOSS2', 'S4', 'CONTAINER', 'GAEA',
'AWSPW', 'AZUREPW']

def __init__(self, host=None):

Expand Down
26 changes: 26 additions & 0 deletions workflow/hosts/azurepw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
BASE_GIT: '' #TODO: This does not yet exist.
DMPDIR: '' # TODO: This does not yet exist.
PACKAGEROOT: '' #TODO: This does not yet exist.
COMINsyn: '' #TODO: This does not yet exist.
HOMEDIR: '/contrib/${USER}'
STMP: '/lustre/${USER}/stmp/'
PTMP: '/lustre/${USER}/ptmp/'
NOSCRUB: '${HOMEDIR}'
ACCOUNT: '${USER}'
SCHEDULER: slurm
QUEUE: batch
QUEUE_SERVICE: batch
PARTITION_BATCH: compute
PARTITION_SERVICE: compute
RESERVATION: ''
CLUSTERS: ''
CHGRP_RSTPROD: 'YES'
CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported.
HPSSARCH: 'NO'
HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below.
BASE_CPLIC: '/bucket/global-workflow-shared-data/ICSDIR/prototype_ICs'
LOCALARCH: 'NO'
ATARDIR: '' # TODO: This will not yet work from AZURE.
MAKE_NSSTBUFR: 'NO'
MAKE_ACFTBUFR: 'NO'
SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions.

0 comments on commit 1b18f2f

Please sign in to comment.