Skip to content

Commit

Permalink
Split clean-up into separate job (NOAA-EMC#1906)
Browse files Browse the repository at this point in the history
Moves the clean-up that was previously done in the archive jobs into
their own separate job. The clean-up is also streamlined considerably
by using only `COM_TOP` instead of going through every template.
There is also additional streamlining/corrections in the function that
does the actual removing.

Some settings used by both jobs were elevated to `config.base`.
Others only needed for cleanup were moved to the new config for
that job.

Also corrects a small error encountered when attempting to rerun an
ensemble forecast.

Resolves NOAA-EMC#583
Resolves NOAA-EMC#1872
  • Loading branch information
WalterKolczynski-NOAA authored Oct 20, 2023
1 parent 3de102c commit 1a5d0b5
Show file tree
Hide file tree
Showing 15 changed files with 204 additions and 375 deletions.
17 changes: 17 additions & 0 deletions jobs/JGLOBAL_CLEANUP
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#! /usr/bin/env bash

source "${HOMEgfs}/ush/preamble.sh"
source "${HOMEgfs}/ush/jjob_header.sh" -e "cleanup" -c "base cleanup"

"${HOMEgfs}/scripts/exglobal_cleanup.sh"
status=$?
[[ ${status} -ne 0 ]] && exit "${status}"

##########################################
# Remove the Temporary working directory
##########################################
cd "${DATAROOT}" || (echo "${DATAROOT} does not exist. ABORT!"; exit 1)
[[ ${KEEPDATA} = "NO" ]] && rm -rf "${DATA}"

exit 0

1 change: 0 additions & 1 deletion jobs/rocoto/arch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,4 @@ export jobid="${job}.$$"
"${HOMEgfs}"/jobs/JGLOBAL_ARCHIVE
status=$?


exit "${status}"
19 changes: 19 additions & 0 deletions jobs/rocoto/cleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#! /usr/bin/env bash

source "${HOMEgfs}/ush/preamble.sh"

###############################################################
# Source FV3GFS workflow modules
. "${HOMEgfs}"/ush/load_fv3gfs_modules.sh
status=$?
[[ ${status} -ne 0 ]] && exit "${status}"

export job="cleanup"
export jobid="${job}.$$"

###############################################################
# Execute the JJOB
"${HOMEgfs}"/jobs/JGLOBAL_CLEANUP
status=$?

exit "${status}"
9 changes: 0 additions & 9 deletions parm/config/gfs/config.arch
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,4 @@ export ARCH_GAUSSIAN="YES"
export ARCH_GAUSSIAN_FHMAX=${FHMAX_GFS}
export ARCH_GAUSSIAN_FHINC=${FHOUT_GFS}

#--online archive of nemsio files for fit2obs verification
export FITSARC="YES"
export FHMAX_FITS=132
[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS}

#--starting and ending hours of previous cycles to be removed from rotating directory
export RMOLDSTD=144
export RMOLDEND=24

echo "END: config.arch"
5 changes: 4 additions & 1 deletion parm/config/gfs/config.base.emc.dyn
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ export ARCH_CYC=00 # Archive data at this cycle for warm_start capabil
export ARCH_WARMICFREQ=4 # Archive frequency in days for warm_start capability
export ARCH_FCSTICFREQ=1 # Archive frequency in days for gdas and gfs forecast-only capability

export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in arch.sh and earc.sh.
#--online archive of nemsio files for fit2obs verification
export FITSARC="YES"
export FHMAX_FITS=132
[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS}

echo "END: config.base"
25 changes: 25 additions & 0 deletions parm/config/gfs/config.cleanup
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#! /usr/bin/env bash

########## config.cleanup ##########
echo "BEGIN: config.cleanup"

# Get task specific resources
source "${EXPDIR}/config.resources" cleanup

export CLEANUP_COM="YES" # NO=retain ROTDIR. YES default in cleanup.sh

#--starting and ending hours of previous cycles to be removed from rotating directory
export RMOLDSTD=144
export RMOLDEND=24

# Specify the list of files to exclude from the first stage of cleanup
# Because arrays cannot be exported, list is a single string of comma-
# separated values. This string is split to form an array at runtime.
case ${RUN} in
gdas | gfs) exclude_string="*prepbufr*, *cnvstat*, *atmanl.nc" ;;
enkf*) exclude_string="*f006.ens*" ;;
*) exclude_string="" ;;
esac
export exclude_string

echo "END: config.cleanup"
9 changes: 8 additions & 1 deletion parm/config/gfs/config.resources
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ if [[ $# -ne 1 ]]; then
echo "atmensanlinit atmensanlrun atmensanlfinal"
echo "landanl"
echo "aeroanlinit aeroanlrun aeroanlfinal"
echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch echgres"
echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch cleanup echgres"
echo "eobs ediag eomg eupd ecen esfc efcs epos earc"
echo "init_chem mom6ic ocnpost"
echo "waveinit waveprep wavepostsbs wavepostbndpnt wavepostbndpntbll wavepostpnt"
Expand Down Expand Up @@ -773,6 +773,13 @@ elif [[ ${step} = "arch" || ${step} = "earc" || ${step} = "getic" ]]; then
eval "export memory_${step}=50GB"
fi

elif [[ ${step} == "cleanup" ]]; then
export wtime_cleanup="01:00:00"
export npe_cleanup=1
export npe_node_cleanup=1
export nth_cleanup=1
export memory_cleanup="4096M"

elif [[ ${step} = "stage_ic" ]]; then

export wtime_stage_ic="00:15:00"
Expand Down
168 changes: 0 additions & 168 deletions scripts/exgdas_enkf_earc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,172 +133,4 @@ if [ "${ENSGRP}" -eq 0 ]; then
"gsistat.${RUN}.${PDY}${cyc}.ensmean"
fi


if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then
exit 0
fi

###############################################################
# ENSGRP 0 also does clean-up
###############################################################
if [[ "${ENSGRP}" -eq 0 ]]; then
function remove_files() {
# TODO: move this to a new location
local directory=$1
shift
if [[ ! -d ${directory} ]]; then
echo "No directory ${directory} to remove files from, skiping"
return
fi
local exclude_list=""
if (($# > 0)); then
exclude_list=$*
fi
local file_list
declare -a file_list
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
readarray -t file_list < <(find -L "${directory}" -type f)
if (( ${#file_list[@]} == 0 )); then return; fi
for exclude in ${exclude_list}; do
echo "Excluding ${exclude}"
declare -a file_list_old=("${file_list[@]}")
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}")
if (( ${#file_list[@]} == 0 )); then return; fi
done

for file in "${file_list[@]}"; do
rm -f "${file}"
done
# Remove directory if empty
rmdir "${directory}" || true
}

# Start start and end dates to remove
GDATEEND=$(${NDATE} -"${RMOLDEND_ENKF:-24}" "${PDY}${cyc}")
GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}")

while [ "${GDATE}" -le "${GDATEEND}" ]; do

gPDY="${GDATE:0:8}"
gcyc="${GDATE:8:2}"

if [[ -d ${COM_TOP} ]]; then
rocotolog="${EXPDIR}/logs/${GDATE}.log"
if [[ -f "${rocotolog}" ]]; then
set +e
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success")
rc=$?
set_strict
if [ "${rc}" -eq 0 ]; then
case ${CDUMP} in
gdas) nmem="${NMEM_ENS}";;
gfs) nmem="${NMEM_ENS_GFS}";;
*)
echo "FATAL ERROR: Unknown CDUMP ${CDUMP} during cleanup"
exit 10
;;
esac

readarray memlist< <(seq --format="mem%03g" 1 "${nmem}")
memlist+=("ensstat")

for mem in "${memlist[@]}"; do
# Atmos
exclude_list="f006.ens"
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL')
for template in ${templates}; do
MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}"
remove_files "${directory}" "${exclude_list[@]}"
done

# Wave
exclude_list=""
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL')
for template in ${templates}; do
MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}"
remove_files "${directory}" "${exclude_list[@]}"
done

# Ocean
exclude_list=""
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL')
for template in ${templates}; do
MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}"
remove_files "${directory}" "${exclude_list[@]}"
done

# Ice
exclude_list=""
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL')
for template in ${templates}; do
MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}"
remove_files "${directory}" "${exclude_list[@]}"
done

# Aerosols (GOCART)
exclude_list=""
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL')
for template in ${templates}; do
MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}"
remove_files "${directory}" "${exclude_list[@]}"
done

# Mediator
exclude_list=""
# Suppress warnings about chained commands suppressing exit codes
# shellcheck disable=SC2312
templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL')
for template in ${templates}; do
MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}"
remove_files "${directory}" "${exclude_list[@]}"
done
done
fi
fi
fi

# Remove any empty directories
YMD=${gPDY} HH=${gcyc} generate_com target_dir:COM_TOP_TMPL
target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/"
if [[ -d ${target_dir} ]]; then
find "${target_dir}" -empty -type d -delete
fi

# Advance to next cycle
GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}")
done
fi

# Remove enkf*.$rPDY for the older of GDATE or RDATE
GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}")
fhmax=${FHMAX_GFS}
RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}")
if [ "${GDATE}" -lt "${RDATE}" ]; then
RDATE=${GDATE}
fi
rPDY=$(echo "${RDATE}" | cut -c1-8)
clist="enkfgdas enkfgfs"
for ctype in ${clist}; do
COMIN="${ROTDIR}/${ctype}.${rPDY}"
[[ -d ${COMIN} ]] && rm -rf "${COMIN}"
done

###############################################################


exit 0
2 changes: 2 additions & 0 deletions scripts/exgdas_enkf_fcst.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ for imem in $(seq "${ENSBEG}" "${ENSEND}"); do

skip_mem="NO"
if [[ -f ${EFCSGRP}.fail ]]; then
set +e
memstat=$(grep "MEMBER ${ENSMEM}" "${EFCSGRP}.fail" | grep -c "PASS")
set_strict
[[ ${memstat} -eq 1 ]] && skip_mem="YES"
fi

Expand Down
Loading

0 comments on commit 1a5d0b5

Please sign in to comment.