From 9ddb59868928e95125b72c5db0a7b18c69b7455f Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Wed, 4 Oct 2023 02:33:25 -0500 Subject: [PATCH 01/14] Split clean-up into separate job Moves the clean-up that was previously done in the archive jobs into their own separate job. It is mostly a copy of what existed in the archive scripts, except some adjustments to account for ensemble members in the same job as the deterministic and some others for shellcheck compliance/other standard code improvements. Some settings used by both jobs were elevated to `config.base`. Others only needed for cleanup were moved to the new config for that job. Resolves #583 --- jobs/JGLOBAL_CLEANUP | 17 ++ jobs/rocoto/arch.sh | 1 - jobs/rocoto/cleanup.sh | 19 ++ parm/config/gfs/config.arch | 9 - parm/config/gfs/config.base.emc.dyn | 5 +- parm/config/gfs/config.cleanup | 15 ++ parm/config/gfs/config.resources | 9 +- scripts/exgdas_enkf_earc.sh | 168 ----------------- scripts/exglobal_archive.sh | 189 ------------------- scripts/exglobal_cleanup.sh | 210 +++++++++++++++++++++ workflow/applications/gfs_cycled.py | 4 +- workflow/applications/gfs_forecast_only.py | 4 +- workflow/rocoto/gfs_tasks.py | 13 ++ workflow/rocoto/tasks.py | 2 +- 14 files changed, 291 insertions(+), 374 deletions(-) create mode 100755 jobs/JGLOBAL_CLEANUP create mode 100755 jobs/rocoto/cleanup.sh create mode 100644 parm/config/gfs/config.cleanup create mode 100755 scripts/exglobal_cleanup.sh diff --git a/jobs/JGLOBAL_CLEANUP b/jobs/JGLOBAL_CLEANUP new file mode 100755 index 0000000000..5946710554 --- /dev/null +++ b/jobs/JGLOBAL_CLEANUP @@ -0,0 +1,17 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" +source "${HOMEgfs}/ush/jjob_header.sh" -e "cleanup" -c "base" + +"${HOMEgfs}/scripts/exglobal_cleanup.sh" +status=$? +[[ ${status} -ne 0 ]] && exit "${status}" + +########################################## +# Remove the Temporary working directory +########################################## +cd "${DATAROOT}" || (echo "${DATAROOT} does not exist. ABORT!"; exit 1) +[[ ${KEEPDATA} = "NO" ]] && rm -rf "${DATA}" + +exit 0 + diff --git a/jobs/rocoto/arch.sh b/jobs/rocoto/arch.sh index 2f62d8b354..d949b7d76f 100755 --- a/jobs/rocoto/arch.sh +++ b/jobs/rocoto/arch.sh @@ -16,5 +16,4 @@ export jobid="${job}.$$" "${HOMEgfs}"/jobs/JGLOBAL_ARCHIVE status=$? - exit "${status}" diff --git a/jobs/rocoto/cleanup.sh b/jobs/rocoto/cleanup.sh new file mode 100755 index 0000000000..96303fde57 --- /dev/null +++ b/jobs/rocoto/cleanup.sh @@ -0,0 +1,19 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" + +############################################################### +# Source FV3GFS workflow modules +. "${HOMEgfs}"/ush/load_fv3gfs_modules.sh +status=$? +[[ ${status} -ne 0 ]] && exit "${status}" + +export job="cleanup" +export jobid="${job}.$$" + +############################################################### +# Execute the JJOB +"${HOMEgfs}"/jobs/JGLOBAL_CLEANUP +status=$? + +exit "${status}" diff --git a/parm/config/gfs/config.arch b/parm/config/gfs/config.arch index 31a3713fb1..a23bcce6ae 100644 --- a/parm/config/gfs/config.arch +++ b/parm/config/gfs/config.arch @@ -12,13 +12,4 @@ export ARCH_GAUSSIAN="YES" export ARCH_GAUSSIAN_FHMAX=${FHMAX_GFS} export ARCH_GAUSSIAN_FHINC=${FHOUT_GFS} -#--online archive of nemsio files for fit2obs verification -export FITSARC="YES" -export FHMAX_FITS=132 -[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS} - -#--starting and ending hours of previous cycles to be removed from rotating directory -export RMOLDSTD=144 -export RMOLDEND=24 - echo "END: config.arch" diff --git a/parm/config/gfs/config.base.emc.dyn b/parm/config/gfs/config.base.emc.dyn index 09d8897a31..b77787794c 100644 --- a/parm/config/gfs/config.base.emc.dyn +++ b/parm/config/gfs/config.base.emc.dyn @@ -394,6 +394,9 @@ export ARCH_CYC=00 # Archive data at this cycle for warm_start capabil export ARCH_WARMICFREQ=4 # Archive frequency in days for warm_start capability export ARCH_FCSTICFREQ=1 # Archive frequency in days for gdas and gfs forecast-only capability -export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in arch.sh and earc.sh. +#--online archive of nemsio files for fit2obs verification +export FITSARC="YES" +export FHMAX_FITS=132 +[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS} echo "END: config.base" diff --git a/parm/config/gfs/config.cleanup b/parm/config/gfs/config.cleanup new file mode 100644 index 0000000000..1ab536e978 --- /dev/null +++ b/parm/config/gfs/config.cleanup @@ -0,0 +1,15 @@ +#! /usr/bin/env bash + +########## config.cleanup ########## +echo "BEGIN: config.cleanup" + +# Get task specific resources +source "${EXPDIR}/config.resources" cleanup + +export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in cleanup.sh + +#--starting and ending hours of previous cycles to be removed from rotating directory +export RMOLDSTD=144 +export RMOLDEND=24 + +echo "END: config.cleanup" \ No newline at end of file diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index d6654b61ed..6503ae5523 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -14,7 +14,7 @@ if [[ $# -ne 1 ]]; then echo "atmensanlinit atmensanlrun atmensanlfinal" echo "landanl" echo "aeroanlinit aeroanlrun aeroanlfinal" - echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch echgres" + echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch cleanup echgres" echo "eobs ediag eomg eupd ecen esfc efcs epos earc" echo "init_chem mom6ic ocnpost" echo "waveinit waveprep wavepostsbs wavepostbndpnt wavepostbndpntbll wavepostpnt" @@ -773,6 +773,13 @@ elif [[ ${step} = "arch" || ${step} = "earc" || ${step} = "getic" ]]; then eval "export memory_${step}=50GB" fi +elif [[ ${step} == "cleanup" ]]; then + export wtime_cleanup="01:00:00" + export npe_cleanup=1 + export npe_node_cleanup=1 + export nth_cleanup=1 + export memory_cleanup="4096M" + elif [[ ${step} = "stage_ic" ]]; then export wtime_stage_ic="00:15:00" diff --git a/scripts/exgdas_enkf_earc.sh b/scripts/exgdas_enkf_earc.sh index 1bb941f888..a1bcba4d79 100755 --- a/scripts/exgdas_enkf_earc.sh +++ b/scripts/exgdas_enkf_earc.sh @@ -133,172 +133,4 @@ if [ "${ENSGRP}" -eq 0 ]; then "gsistat.${RUN}.${PDY}${cyc}.ensmean" fi - -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then - exit 0 -fi - -############################################################### -# ENSGRP 0 also does clean-up -############################################################### -if [[ "${ENSGRP}" -eq 0 ]]; then - function remove_files() { - # TODO: move this to a new location - local directory=$1 - shift - if [[ ! -d ${directory} ]]; then - echo "No directory ${directory} to remove files from, skiping" - return - fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - if (( ${#file_list[@]} == 0 )); then return; fi - done - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true - } - - # Start start and end dates to remove - GDATEEND=$(${NDATE} -"${RMOLDEND_ENKF:-24}" "${PDY}${cyc}") - GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}") - - while [ "${GDATE}" -le "${GDATEEND}" ]; do - - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - - if [[ -d ${COM_TOP} ]]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [[ -f "${rocotolog}" ]]; then - set +e - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success") - rc=$? - set_strict - if [ "${rc}" -eq 0 ]; then - case ${CDUMP} in - gdas) nmem="${NMEM_ENS}";; - gfs) nmem="${NMEM_ENS_GFS}";; - *) - echo "FATAL ERROR: Unknown CDUMP ${CDUMP} during cleanup" - exit 10 - ;; - esac - - readarray memlist< <(seq --format="mem%03g" 1 "${nmem}") - memlist+=("ensstat") - - for mem in "${memlist[@]}"; do - # Atmos - exclude_list="f006.ens" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - done - fi - fi - fi - - # Remove any empty directories - YMD=${gPDY} HH=${gcyc} generate_com target_dir:COM_TOP_TMPL - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d ${target_dir} ]]; then - find "${target_dir}" -empty -type d -delete - fi - - # Advance to next cycle - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") - done -fi - -# Remove enkf*.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}") -fhmax=${FHMAX_GFS} -RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") -if [ "${GDATE}" -lt "${RDATE}" ]; then - RDATE=${GDATE} -fi -rPDY=$(echo "${RDATE}" | cut -c1-8) -clist="enkfgdas enkfgfs" -for ctype in ${clist}; do - COMIN="${ROTDIR}/${ctype}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" -done - -############################################################### - - exit 0 diff --git a/scripts/exglobal_archive.sh b/scripts/exglobal_archive.sh index 5fea07f4ed..78a6d60b65 100755 --- a/scripts/exglobal_archive.sh +++ b/scripts/exglobal_archive.sh @@ -284,193 +284,4 @@ if [[ ${HPSSARCH} = "YES" || ${LOCALARCH} = "YES" ]]; then fi ##end of HPSS archive ############################################################### - - -############################################################### -# Clean up previous cycles; various depths -# PRIOR CYCLE: Leave the prior cycle alone -GDATE=$(${NDATE} -"${assim_freq}" "${PDY}${cyc}") - -# PREVIOUS to the PRIOR CYCLE -GDATE=$(${NDATE} -"${assim_freq}" "${GDATE}") -gPDY="${GDATE:0:8}" -gcyc="${GDATE:8:2}" - -# Remove the TMPDIR directory -# TODO Only prepbufr is currently using this directory, and all jobs should be -# cleaning up after themselves anyway -COMIN="${DATAROOT}/${GDATE}" -[[ -d ${COMIN} ]] && rm -rf "${COMIN}" - -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then - exit 0 -fi - -# Step back every assim_freq hours and remove old rotating directories -# for successful cycles (defaults from 24h to 120h). -# Retain files needed by Fit2Obs -# TODO: This whole section needs to be revamped to remove marine component -# directories and not look at the rocoto log. -GDATEEND=$(${NDATE} -"${RMOLDEND:-24}" "${PDY}${cyc}") -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -RTOFS_DATE=$(${NDATE} -48 "${PDY}${cyc}") -function remove_files() { - # TODO: move this to a new location - local directory=$1 - shift - if [[ ! -d ${directory} ]]; then - echo "No directory ${directory} to remove files from, skiping" - return - fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - # echo "Number of files to remove before exclusions: ${#file_list[@]}" - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - # echo "Number of files to remove after exclusion: ${#file_list[@]}" - if (( ${#file_list[@]} == 0 )); then return; fi - done - # echo "Number of files to remove after exclusions: ${#file_list[@]}" - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true -} - -while [ "${GDATE}" -le "${GDATEEND}" ]; do - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - COMINrtofs="${ROTDIR}/rtofs.${gPDY}" - if [ -d "${COM_TOP}" ]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [ -f "${rocotolog}" ]; then - set +e - testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success") - rc=$? - set_strict - - if [ "${rc}" -eq 0 ]; then - # Obs - exclude_list="prepbufr" - templates="COM_OBS" - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Atmos - exclude_list="cnvstat atmanl.nc" - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - if [ -d "${COMINrtofs}" ] && [ "${GDATE}" -lt "${RTOFS_DATE}" ]; then rm -rf "${COMINrtofs}" ; fi - fi - fi - fi - - # Remove mdl gfsmos directory - if [ "${RUN}" = "gfs" ]; then - COMIN="${ROTDIR}/gfsmos.${gPDY}" - if [ -d "${COMIN}" ] && [ "${GDATE}" -lt "${CDATE_MOS}" ]; then rm -rf "${COMIN}" ; fi - fi - - # Remove any empty directories - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d ${target_dir} ]]; then - find "${target_dir}" -empty -type d -delete - fi - - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") -done - -# Remove archived gaussian files used for Fit2Obs in $VFYARC that are -# $FHMAX_FITS plus a delta before $CDATE. Touch existing archived -# gaussian files to prevent the files from being removed by automatic -# scrubber present on some machines. - -if [ "${RUN}" = "gfs" ]; then - fhmax=$((FHMAX_FITS+36)) - RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") - rPDY=$(echo "${RDATE}" | cut -c1-8) - COMIN="${VFYARC}/${RUN}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - TDATE=$(${NDATE} -"${FHMAX_FITS}" "${PDY}${cyc}") - while [ "${TDATE}" -lt "${PDY}${cyc}" ]; do - tPDY=$(echo "${TDATE}" | cut -c1-8) - tcyc=$(echo "${TDATE}" | cut -c9-10) - TDIR=${VFYARC}/${RUN}.${tPDY}/${tcyc} - [[ -d ${TDIR} ]] && touch "${TDIR}"/* - TDATE=$(${NDATE} +6 "${TDATE}") - done -fi - -# Remove $RUN.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -fhmax=${FHMAX_GFS} -RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") -if [ "${GDATE}" -lt "${RDATE}" ]; then - RDATE=${GDATE} -fi -rPDY=$(echo "${RDATE}" | cut -c1-8) -COMIN="${ROTDIR}/${RUN}.${rPDY}" -[[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - -############################################################### - - exit 0 diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh new file mode 100755 index 0000000000..0990b07a36 --- /dev/null +++ b/scripts/exglobal_cleanup.sh @@ -0,0 +1,210 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" + +############################################################### +# Clean up previous cycles; various depths +# PRIOR CYCLE: Leave the prior cycle alone +# shellcheck disable=SC2153 +GDATE=$(${NDATE} -"${assim_freq}" "${PDY}${cyc}") + +# PREVIOUS to the PRIOR CYCLE +GDATE=$(${NDATE} -"${assim_freq}" "${GDATE}") +gPDY="${GDATE:0:8}" +gcyc="${GDATE:8:2}" + +# Remove the TMPDIR directory +# TODO Only prepbufr is currently using this directory, and all jobs should be +# cleaning up after themselves anyway +COMIN="${DATAROOT}/${GDATE}" +[[ -d ${COMIN} ]] && rm -rf "${COMIN}" + +if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then + exit 0 +fi + +# Step back every assim_freq hours and remove old rotating directories +# for successful cycles (defaults from 24h to 120h). +# Retain files needed by Fit2Obs +# TODO: This whole section needs to be revamped to remove marine component +# directories and not look at the rocoto log. +GDATEEND=$(${NDATE} -"${RMOLDEND:-24}" "${PDY}${cyc}") +GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") +RTOFS_DATE=$(${NDATE} -48 "${PDY}${cyc}") +function remove_files() { + local directory=$1 + shift + if [[ ! -d ${directory} ]]; then + echo "No directory ${directory} to remove files from, skiping" + return + fi + local exclude_list="" + if (($# > 0)); then + exclude_list=$* + fi + local file_list + declare -a file_list + readarray -t file_list < <(find -L "${directory}" -type f) + if (( ${#file_list[@]} == 0 )); then return; fi + # echo "Number of files to remove before exclusions: ${#file_list[@]}" + for exclude in ${exclude_list}; do + echo "Excluding ${exclude}" + declare -a file_list_old=("${file_list[@]}") + readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") + # echo "Number of files to remove after exclusion: ${#file_list[@]}" + if (( ${#file_list[@]} == 0 )); then return; fi + done + # echo "Number of files to remove after exclusions: ${#file_list[@]}" + + for file in "${file_list[@]}"; do + rm -f "${file}" + done + # Remove directory if empty + rmdir "${directory}" || true +} + +while (( GDATE <= GDATEEND )); do + gPDY="${GDATE:0:8}" + gcyc="${GDATE:8:2}" + COMINrtofs="${ROTDIR}/rtofs.${gPDY}" + YMD="${gPDY}" HH="${gcyc}" generate_com COM_TOP + if [[ -d "${COM_TOP}" ]]; then + rocotolog="${EXPDIR}/logs/${GDATE}.log" + if [[ -f "${rocotolog}" ]]; then + # shellcheck disable=SC2312 + if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then + case ${RUN} in + gdas) nmem="${NMEM_ENS}";; + gfs) nmem="${NMEM_ENS_GFS}";; + *) + echo "FATAL ERROR: Unknown RUN ${RUN} during cleanup" + exit 10 + ;; + esac + + memlist=("") # Empty MEMDIR for deterministic + + RUN="enkf${RUN}" YMD="${gPDY}" HH="${gcyc}" generate_com enkf_top:COM_TOP_TMPL + if [[ -d ${enkf_top} ]]; then + # Add ensemble directories if they exist + readarray -O"${#memlist[@]}" memlist< <(seq --format="mem%03g" 1 "${nmem}") + memlist+=("ensstat") + fi + + for MEMDIR in "${memlist[@]}"; do + + if [[ -n "${MEMDIR}" ]]; then + local_run="enkf${RUN}" + else + local_run="${RUN}" + fi + + # Obs + exclude_list="prepbufr" + templates="COM_OBS" + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + # Atmos + exclude_list="cnvstat atmanl.nc" + templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + # Wave + exclude_list="" + templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + # Ocean + exclude_list="" + templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + # Ice + exclude_list="" + templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + # Aerosols (GOCART) + exclude_list="" + templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + # Mediator + exclude_list="" + templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') + for template in ${templates}; do + RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" + remove_files "${directory}" "${exclude_list[@]}" + done + + done + + if [[ -d "${COMINrtofs}" ]] && (( GDATE < RTOFS_DATE )); then rm -rf "${COMINrtofs}" ; fi + fi + fi + fi + + # Remove mdl gfsmos directory + if [[ "${RUN}" == "gfs" ]]; then + COMIN="${ROTDIR}/gfsmos.${gPDY}" + if [[ -d "${COMIN}" ]] && (( GDATE < CDATE_MOS )); then rm -rf "${COMIN}" ; fi + fi + + # Remove any empty directories + target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" + if [[ -d "${target_dir}" ]]; then + find "${target_dir}" -empty -type d -delete + fi + + GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") +done + +# Remove archived gaussian files used for Fit2Obs in $VFYARC that are +# $FHMAX_FITS plus a delta before $CDATE. Touch existing archived +# gaussian files to prevent the files from being removed by automatic +# scrubber present on some machines. + +if [[ "${RUN}" == "gfs" ]]; then + fhmax=$((FHMAX_FITS + 36)) + RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") + rPDY="${RDATE:0:8}" + COMIN="${ROTDIR}/vrfyarch/${RUN}.${rPDY}" + [[ -d ${COMIN} ]] && rm -rf "${COMIN}" + + TDATE=$(${NDATE} -"${FHMAX_FITS}" "${PDY}${cyc}") + while (( TDATE < "${PDY}${cyc}" )); do + tPDY="${TDATE:0:8}" + tcyc="${TDATE:8:2}" + TDIR="${ROTDIR}/vrfyarch/${RUN}.${tPDY}/${tcyc}" + [[ -d ${TDIR} ]] && touch "${TDIR}"/* + TDATE=$(${NDATE} +6 "${TDATE}") + done +fi + +# Remove $RUN.$rPDY for the older of GDATE or RDATE +GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") +RDATE=$(${NDATE} -"${FHMAX_GFS}" "${PDY}${cyc}") +if (( GDATE < RDATE )); then + RDATE=${GDATE} +fi +rPDY="${RDATE:0:8}" +COMIN="${ROTDIR}/${RUN}.${rPDY}" +if [[ -d ${COMIN} ]]; then rm -rf "${COMIN}"; fi diff --git a/workflow/applications/gfs_cycled.py b/workflow/applications/gfs_cycled.py index 633e93bac0..59e760140c 100644 --- a/workflow/applications/gfs_cycled.py +++ b/workflow/applications/gfs_cycled.py @@ -47,7 +47,7 @@ def _get_app_configs(self): if self.do_ocean: configs += ['ocnpost'] - configs += ['sfcanl', 'analcalc', 'fcst', 'post', 'vrfy', 'fit2obs', 'arch'] + configs += ['sfcanl', 'analcalc', 'fcst', 'post', 'vrfy', 'fit2obs', 'arch', 'cleanup'] if self.do_hybvar: if self.do_jediatmens: @@ -106,7 +106,7 @@ def get_task_names(self): # gdas_gfs_common_tasks_after_fcst += ['ocnpost'] gdas_gfs_common_tasks_after_fcst += ['vrfy'] - gdas_gfs_common_cleanup_tasks = ['arch'] + gdas_gfs_common_cleanup_tasks = ['arch', 'cleanup'] if self.do_jediatmvar: gdas_gfs_common_tasks_before_fcst += ['prepatmiodaobs', 'atmanlinit', 'atmanlrun', 'atmanlfinal'] diff --git a/workflow/applications/gfs_forecast_only.py b/workflow/applications/gfs_forecast_only.py index e6d1ab35a2..73e17ee7aa 100644 --- a/workflow/applications/gfs_forecast_only.py +++ b/workflow/applications/gfs_forecast_only.py @@ -15,7 +15,7 @@ def _get_app_configs(self): Returns the config_files that are involved in the forecast-only app """ - configs = ['stage_ic', 'fcst', 'arch'] + configs = ['stage_ic', 'fcst', 'arch', 'cleanup'] if self.do_atm: configs += ['post', 'vrfy'] @@ -109,6 +109,6 @@ def get_task_names(self): if self.do_wafs: tasks += ['wafs', 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25', 'wafsblending', 'wafsblending0p25'] - tasks += ['arch'] # arch **must** be the last task + tasks += ['arch', 'cleanup'] # arch and cleanup **must** be the last tasks return {f"{self._base['CDUMP']}": tasks} diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index 33e2ec82f3..b660ce109d 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -944,6 +944,19 @@ def arch(self): return task + # Cleanup + def cleanup(self): + deps = [] + dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} + deps.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('cleanup') + task = create_wf_task('cleanup', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + # Start of ensemble tasks def eobs(self): deps = [] diff --git a/workflow/rocoto/tasks.py b/workflow/rocoto/tasks.py index b9716c938e..29ed57daf2 100644 --- a/workflow/rocoto/tasks.py +++ b/workflow/rocoto/tasks.py @@ -11,7 +11,7 @@ class Tasks: SERVICE_TASKS = ['arch', 'earc'] VALID_TASKS = ['aerosol_init', 'stage_ic', - 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'arch', + 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'arch', "cleanup", 'prepatmiodaobs', 'atmanlinit', 'atmanlrun', 'atmanlfinal', 'ocnanalprep', 'ocnanalbmat', 'ocnanalrun', 'ocnanalchkpt', 'ocnanalpost', 'ocnanalvrfy', 'earc', 'ecen', 'echgres', 'ediag', 'efcs', From e228a07f3e67f91665e47c9d05358491e5323dda Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Wed, 4 Oct 2023 02:46:09 -0500 Subject: [PATCH 02/14] Mute a couple shellcheck warnings in cleanup --- scripts/exglobal_cleanup.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 0990b07a36..5c46b521b8 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -44,12 +44,16 @@ function remove_files() { fi local file_list declare -a file_list + # Ignore compound command warning + # shellcheck disable=SC2312 readarray -t file_list < <(find -L "${directory}" -type f) if (( ${#file_list[@]} == 0 )); then return; fi # echo "Number of files to remove before exclusions: ${#file_list[@]}" for exclude in ${exclude_list}; do echo "Excluding ${exclude}" declare -a file_list_old=("${file_list[@]}") + # Ignore compound command warning + # shellcheck disable=SC2312 readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") # echo "Number of files to remove after exclusion: ${#file_list[@]}" if (( ${#file_list[@]} == 0 )); then return; fi From e00984b01c9ac1fc3259d6692b3fff897c53c86b Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Thu, 5 Oct 2023 11:57:26 -0500 Subject: [PATCH 03/14] Have cleanup job source cleanup config --- jobs/JGLOBAL_CLEANUP | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/JGLOBAL_CLEANUP b/jobs/JGLOBAL_CLEANUP index 5946710554..ad938ccf60 100755 --- a/jobs/JGLOBAL_CLEANUP +++ b/jobs/JGLOBAL_CLEANUP @@ -1,7 +1,7 @@ #! /usr/bin/env bash source "${HOMEgfs}/ush/preamble.sh" -source "${HOMEgfs}/ush/jjob_header.sh" -e "cleanup" -c "base" +source "${HOMEgfs}/ush/jjob_header.sh" -e "cleanup" -c "base cleanup" "${HOMEgfs}/scripts/exglobal_cleanup.sh" status=$? From 017efe9cce5abef36e42e8b29e33482d182dac91 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Thu, 5 Oct 2023 11:58:57 -0500 Subject: [PATCH 04/14] Replace NDATE with posix date command in cleanup job Refs #583 --- scripts/exglobal_cleanup.sh | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 5c46b521b8..72406257ac 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -6,10 +6,9 @@ source "${HOMEgfs}/ush/preamble.sh" # Clean up previous cycles; various depths # PRIOR CYCLE: Leave the prior cycle alone # shellcheck disable=SC2153 -GDATE=$(${NDATE} -"${assim_freq}" "${PDY}${cyc}") - +GDATE=$(date --utc -d "${PDY} ${cyc} -${assim_freq} hours") # PREVIOUS to the PRIOR CYCLE -GDATE=$(${NDATE} -"${assim_freq}" "${GDATE}") +GDATE=$(date --utc -d "${GDATE:0:8} ${GDATE:8:2} -${assim_freq} hours") gPDY="${GDATE:0:8}" gcyc="${GDATE:8:2}" @@ -28,9 +27,9 @@ fi # Retain files needed by Fit2Obs # TODO: This whole section needs to be revamped to remove marine component # directories and not look at the rocoto log. -GDATEEND=$(${NDATE} -"${RMOLDEND:-24}" "${PDY}${cyc}") -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -RTOFS_DATE=$(${NDATE} -48 "${PDY}${cyc}") +GDATEEND=$(date --utc -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) +GDATE=$(date --utc -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +RTOFS_DATE=$(date --utc -d "${PDY} ${cyc} -48 hours") function remove_files() { local directory=$1 shift @@ -178,7 +177,7 @@ while (( GDATE <= GDATEEND )); do find "${target_dir}" -empty -type d -delete fi - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") + GDATE=$(date --utc -d "${GDATE:0:8} ${GDATE:8:2} +${assim_freq} hours") done # Remove archived gaussian files used for Fit2Obs in $VFYARC that are @@ -188,24 +187,24 @@ done if [[ "${RUN}" == "gfs" ]]; then fhmax=$((FHMAX_FITS + 36)) - RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") + RDATE=$(date --utc -d "${PDY} ${cyc} -${fhmax} hours") rPDY="${RDATE:0:8}" COMIN="${ROTDIR}/vrfyarch/${RUN}.${rPDY}" [[ -d ${COMIN} ]] && rm -rf "${COMIN}" - TDATE=$(${NDATE} -"${FHMAX_FITS}" "${PDY}${cyc}") + TDATE=$(date --utc -d "${PDY} ${cyc} -${FHMAX_FITS} hours") while (( TDATE < "${PDY}${cyc}" )); do tPDY="${TDATE:0:8}" tcyc="${TDATE:8:2}" TDIR="${ROTDIR}/vrfyarch/${RUN}.${tPDY}/${tcyc}" [[ -d ${TDIR} ]] && touch "${TDIR}"/* - TDATE=$(${NDATE} +6 "${TDATE}") + TDATE=$(date --utc -d "${TDATE:0:8} ${TDATE:8:2} +6 hours") done fi # Remove $RUN.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -RDATE=$(${NDATE} -"${FHMAX_GFS}" "${PDY}${cyc}") +GDATE=$(date --utc -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +RDATE=$(date --utc -d "${PDY} ${cyc} -${FHMAX_GFS} hours") if (( GDATE < RDATE )); then RDATE=${GDATE} fi From f1e952325231aeacb547bd0e818d92bc555a8722 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Thu, 5 Oct 2023 12:07:42 -0500 Subject: [PATCH 05/14] Rename control variable for cleaning up COM Since clean-up is no longer part of the archive job, the name of the variable that controls whether COM is cleaned up is renamed. Since the clean-up job does very little if this is NO, in the future may want to just not run this job in that situation. Refs #583 --- parm/config/gfs/config.cleanup | 2 +- scripts/exglobal_cleanup.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/config/gfs/config.cleanup b/parm/config/gfs/config.cleanup index 1ab536e978..be24467062 100644 --- a/parm/config/gfs/config.cleanup +++ b/parm/config/gfs/config.cleanup @@ -6,7 +6,7 @@ echo "BEGIN: config.cleanup" # Get task specific resources source "${EXPDIR}/config.resources" cleanup -export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in cleanup.sh +export CLEANUP_COM="YES" # NO=retain ROTDIR. YES default in cleanup.sh #--starting and ending hours of previous cycles to be removed from rotating directory export RMOLDSTD=144 diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 72406257ac..7238c43d89 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -18,7 +18,7 @@ gcyc="${GDATE:8:2}" COMIN="${DATAROOT}/${GDATE}" [[ -d ${COMIN} ]] && rm -rf "${COMIN}" -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then +if [[ "${CLEANUP_COM:-YES}" == NO ]] ; then exit 0 fi From 58ccf10e3b84d15aa3c5ec453c3e3c6ea2da452b Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Fri, 6 Oct 2023 02:24:51 -0500 Subject: [PATCH 06/14] Fix syntax of date calls in cleanup --- scripts/exglobal_cleanup.sh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 7238c43d89..463f83bc43 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -6,9 +6,9 @@ source "${HOMEgfs}/ush/preamble.sh" # Clean up previous cycles; various depths # PRIOR CYCLE: Leave the prior cycle alone # shellcheck disable=SC2153 -GDATE=$(date --utc -d "${PDY} ${cyc} -${assim_freq} hours") +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${assim_freq} hours") # PREVIOUS to the PRIOR CYCLE -GDATE=$(date --utc -d "${GDATE:0:8} ${GDATE:8:2} -${assim_freq} hours") +GDATE=$(date --utc +%Y%m%d%H -d "${GDATE:0:8} ${GDATE:8:2} -${assim_freq} hours") gPDY="${GDATE:0:8}" gcyc="${GDATE:8:2}" @@ -27,9 +27,9 @@ fi # Retain files needed by Fit2Obs # TODO: This whole section needs to be revamped to remove marine component # directories and not look at the rocoto log. -GDATEEND=$(date --utc -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) -GDATE=$(date --utc -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") -RTOFS_DATE=$(date --utc -d "${PDY} ${cyc} -48 hours") +GDATEEND=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +RTOFS_DATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -48 hours") function remove_files() { local directory=$1 shift @@ -104,7 +104,7 @@ while (( GDATE <= GDATEEND )); do # Obs exclude_list="prepbufr" - templates="COM_OBS" + templates="COM_OBS_TMPL" for template in ${templates}; do RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" remove_files "${directory}" "${exclude_list[@]}" @@ -177,7 +177,7 @@ while (( GDATE <= GDATEEND )); do find "${target_dir}" -empty -type d -delete fi - GDATE=$(date --utc -d "${GDATE:0:8} ${GDATE:8:2} +${assim_freq} hours") + GDATE=$(date --utc +%Y%m%d%H -d "${GDATE:0:8} ${GDATE:8:2} +${assim_freq} hours") done # Remove archived gaussian files used for Fit2Obs in $VFYARC that are @@ -187,24 +187,24 @@ done if [[ "${RUN}" == "gfs" ]]; then fhmax=$((FHMAX_FITS + 36)) - RDATE=$(date --utc -d "${PDY} ${cyc} -${fhmax} hours") + RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${fhmax} hours") rPDY="${RDATE:0:8}" COMIN="${ROTDIR}/vrfyarch/${RUN}.${rPDY}" [[ -d ${COMIN} ]] && rm -rf "${COMIN}" - TDATE=$(date --utc -d "${PDY} ${cyc} -${FHMAX_FITS} hours") + TDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_FITS} hours") while (( TDATE < "${PDY}${cyc}" )); do tPDY="${TDATE:0:8}" tcyc="${TDATE:8:2}" TDIR="${ROTDIR}/vrfyarch/${RUN}.${tPDY}/${tcyc}" [[ -d ${TDIR} ]] && touch "${TDIR}"/* - TDATE=$(date --utc -d "${TDATE:0:8} ${TDATE:8:2} +6 hours") + TDATE=$(date --utc +%Y%m%d%H -d "${TDATE:0:8} ${TDATE:8:2} +6 hours") done fi # Remove $RUN.$rPDY for the older of GDATE or RDATE -GDATE=$(date --utc -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") -RDATE=$(date --utc -d "${PDY} ${cyc} -${FHMAX_GFS} hours") +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_GFS} hours") if (( GDATE < RDATE )); then RDATE=${GDATE} fi From e1ca1ac4d2b8a8622247d4370ca5356c584b7b8d Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Fri, 6 Oct 2023 02:25:24 -0500 Subject: [PATCH 07/14] Add ensemble archive as prerequisite for gdas cleanup --- workflow/rocoto/gfs_tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index b660ce109d..73aff9f762 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -950,6 +950,10 @@ def cleanup(self): dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} deps.append(rocoto.add_dependency(dep_dict)) + if self.cdump in ['gdas'] and self.app_config.do_hybvar: + dep_dict = {'type': 'metatask', 'name': 'enkfgdaseamn'} + deps.append(rocoto.add_dependency(dep_dict)) + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) resources = self.get_resource('cleanup') From 8df75c6a7b6de790fd3a1de8669a8f1a2356c71e Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Mon, 16 Oct 2023 00:20:35 -0500 Subject: [PATCH 08/14] Streamline cleanup script The old cleanup script was more convoluted than it needed to be. The script has been streamlined quite a bit and now works properly. Instead of going through every COM template, `COM_TOP is used along with find to get everything at once (well, a few passes of find are needed to deal with regular files, symlinks, and then directories). Refs #583 Resolves #1872 --- scripts/exglobal_cleanup.sh | 195 +++++++++--------------------------- 1 file changed, 45 insertions(+), 150 deletions(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 463f83bc43..718135568a 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -9,8 +9,6 @@ source "${HOMEgfs}/ush/preamble.sh" GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${assim_freq} hours") # PREVIOUS to the PRIOR CYCLE GDATE=$(date --utc +%Y%m%d%H -d "${GDATE:0:8} ${GDATE:8:2} -${assim_freq} hours") -gPDY="${GDATE:0:8}" -gcyc="${GDATE:8:2}" # Remove the TMPDIR directory # TODO Only prepbufr is currently using this directory, and all jobs should be @@ -25,11 +23,9 @@ fi # Step back every assim_freq hours and remove old rotating directories # for successful cycles (defaults from 24h to 120h). # Retain files needed by Fit2Obs -# TODO: This whole section needs to be revamped to remove marine component -# directories and not look at the rocoto log. -GDATEEND=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) -GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") -RTOFS_DATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -48 hours") +last_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) +first_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +last_rtofs=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -48 hours") function remove_files() { local directory=$1 shift @@ -37,147 +33,48 @@ function remove_files() { echo "No directory ${directory} to remove files from, skiping" return fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - # Ignore compound command warning - # shellcheck disable=SC2312 - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - # echo "Number of files to remove before exclusions: ${#file_list[@]}" - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - # Ignore compound command warning - # shellcheck disable=SC2312 - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - # echo "Number of files to remove after exclusion: ${#file_list[@]}" - if (( ${#file_list[@]} == 0 )); then return; fi + local exclude_string="" + for exclude in "$@"; do + exclude_string+="${exclude_string} -name ${exclude} -or" done - # echo "Number of files to remove after exclusions: ${#file_list[@]}" - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true + # Chop off any trailing or + exclude_string="${exclude_string[*]/%-or}" + # Remove all regular files that do not match + # shellcheck disable=SC2086 + find "${directory}" -type f -not \( ${exclude_string} \) -delete + # Remove all symlinks that do not match + # shellcheck disable=SC2086 + find "${directory}" -type l -not \( ${exclude_string} \) -delete + # Remove any empty directories + find "${directory}" -type d -empty -delete } -while (( GDATE <= GDATEEND )); do - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - COMINrtofs="${ROTDIR}/rtofs.${gPDY}" - YMD="${gPDY}" HH="${gcyc}" generate_com COM_TOP - if [[ -d "${COM_TOP}" ]]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [[ -f "${rocotolog}" ]]; then - # shellcheck disable=SC2312 - if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then - case ${RUN} in - gdas) nmem="${NMEM_ENS}";; - gfs) nmem="${NMEM_ENS_GFS}";; - *) - echo "FATAL ERROR: Unknown RUN ${RUN} during cleanup" - exit 10 - ;; - esac - - memlist=("") # Empty MEMDIR for deterministic - - RUN="enkf${RUN}" YMD="${gPDY}" HH="${gcyc}" generate_com enkf_top:COM_TOP_TMPL - if [[ -d ${enkf_top} ]]; then - # Add ensemble directories if they exist - readarray -O"${#memlist[@]}" memlist< <(seq --format="mem%03g" 1 "${nmem}") - memlist+=("ensstat") - fi - - for MEMDIR in "${memlist[@]}"; do - - if [[ -n "${MEMDIR}" ]]; then - local_run="enkf${RUN}" - else - local_run="${RUN}" - fi - - # Obs - exclude_list="prepbufr" - templates="COM_OBS_TMPL" - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Atmos - exclude_list="cnvstat atmanl.nc" - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - RUN="${local_run}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done +for (( current_date=first_date; current_date <= last_date; \ + current_date=$(date --utc +%Y%m%d%H -d "${current_date:0:8} ${current_date:8:2} +${assim_freq} hours") )); do + current_PDY="${current_date:0:8}" + current_cyc="${current_date:8:2}" + rtofs_dir="${ROTDIR}/rtofs.${current_PDY}" + rocotolog="${EXPDIR}/logs/${current_date}.log" + if [[ -f "${rocotolog}" ]]; then + # TODO: This needs to be revamped to not look at the rocoto log. + # shellcheck disable=SC2312 + if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then + YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP - done + if [[ -d "${COM_TOP}" ]]; then + exclude_list=("prepbufr" "cnvstat" "atmanl.nc") + remove_files "${COM_TOP}" "${exclude_list[@]:-}" - if [[ -d "${COMINrtofs}" ]] && (( GDATE < RTOFS_DATE )); then rm -rf "${COMINrtofs}" ; fi + if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi fi fi fi # Remove mdl gfsmos directory if [[ "${RUN}" == "gfs" ]]; then - COMIN="${ROTDIR}/gfsmos.${gPDY}" - if [[ -d "${COMIN}" ]] && (( GDATE < CDATE_MOS )); then rm -rf "${COMIN}" ; fi + mos_dir="${ROTDIR}/gfsmos.${current_PDY}" + if [[ -d "${mos_dir}" ]] && (( current_date < CDATE_MOS )); then rm -rf "${mos_dir}" ; fi fi - - # Remove any empty directories - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d "${target_dir}" ]]; then - find "${target_dir}" -empty -type d -delete - fi - - GDATE=$(date --utc +%Y%m%d%H -d "${GDATE:0:8} ${GDATE:8:2} +${assim_freq} hours") done # Remove archived gaussian files used for Fit2Obs in $VFYARC that are @@ -188,17 +85,16 @@ done if [[ "${RUN}" == "gfs" ]]; then fhmax=$((FHMAX_FITS + 36)) RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${fhmax} hours") - rPDY="${RDATE:0:8}" - COMIN="${ROTDIR}/vrfyarch/${RUN}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - TDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_FITS} hours") - while (( TDATE < "${PDY}${cyc}" )); do - tPDY="${TDATE:0:8}" - tcyc="${TDATE:8:2}" - TDIR="${ROTDIR}/vrfyarch/${RUN}.${tPDY}/${tcyc}" - [[ -d ${TDIR} ]] && touch "${TDIR}"/* - TDATE=$(date --utc +%Y%m%d%H -d "${TDATE:0:8} ${TDATE:8:2} +6 hours") + verify_dir="${ROTDIR}/vrfyarch/${RUN}.${RDATE:0:8}" + [[ -d ${verify_dir} ]] && rm -rf "${verify_dir}" + + touch_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_FITS} hours") + while (( touch_date < "${PDY}${cyc}" )); do + touch_PDY="${touch_date:0:8}" + touch_cyc="${touch_date:8:2}" + touch_dir="${ROTDIR}/vrfyarch/${RUN}.${touch_PDY}/${touch_cyc}" + [[ -d ${touch_dir} ]] && touch "${touch_dir}"/* + touch_date=$(date --utc +%Y%m%d%H -d "${touch_PDY} ${touch_cyc} +6 hours") done fi @@ -208,6 +104,5 @@ RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_GFS} hours") if (( GDATE < RDATE )); then RDATE=${GDATE} fi -rPDY="${RDATE:0:8}" -COMIN="${ROTDIR}/${RUN}.${rPDY}" -if [[ -d ${COMIN} ]]; then rm -rf "${COMIN}"; fi +deletion_target="${ROTDIR}/${RUN}.${RDATE:0:8}" +if [[ -d ${deletion_target} ]]; then rm -rf "${deletion_target}"; fi From b038580d170ad0d06a6470eb53075395b0d4c5cd Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Mon, 16 Oct 2023 00:24:53 -0500 Subject: [PATCH 09/14] Fix error in rerunning a failed ensemble member When trying to rerun a failed ensemble member, the script would fail due to `set -u` when grep returned non-zero for not being able to find `PASS` in the group status file. This is resolved by turning it off and then back on. --- scripts/exgdas_enkf_fcst.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/exgdas_enkf_fcst.sh b/scripts/exgdas_enkf_fcst.sh index 85344e4e35..7eb2a3a711 100755 --- a/scripts/exgdas_enkf_fcst.sh +++ b/scripts/exgdas_enkf_fcst.sh @@ -122,7 +122,9 @@ for imem in $(seq "${ENSBEG}" "${ENSEND}"); do skip_mem="NO" if [[ -f ${EFCSGRP}.fail ]]; then + set +e memstat=$(grep "MEMBER ${ENSMEM}" "${EFCSGRP}.fail" | grep -c "PASS") + set_strict [[ ${memstat} -eq 1 ]] && skip_mem="YES" fi From 4539f493845f0e0b13aa5c85cecf7de7de6dda16 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Mon, 16 Oct 2023 00:36:59 -0500 Subject: [PATCH 10/14] Add enkf back into cleanup The streamlining of the cleanup script resulted in enkf not being cleaned up. Script not runs for `$RUN` and `enkf${RUN}`. Since the existence of the directory is checked before trying to clean up, it doesn't matter if `enkf${RUN}` is not a valid `$RUN` for the experiment. Refs #583 --- scripts/exglobal_cleanup.sh | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 718135568a..0f5966c957 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -59,14 +59,19 @@ for (( current_date=first_date; current_date <= last_date; \ # TODO: This needs to be revamped to not look at the rocoto log. # shellcheck disable=SC2312 if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then - YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP + # Also cleanup the enkf version of $RUN. Since the directory's existence + # is checked first, it doesn't matter if that is not a legitimate $RUN + # for this experiment. + runs_to_clean=("${RUN}" "enkf${RUN}") + for run in "${runs_to_clean[@]}"; do + RUN=${run} YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP + if [[ -d "${COM_TOP}" ]]; then + exclude_list=("prepbufr" "cnvstat" "atmanl.nc") + remove_files "${COM_TOP}" "${exclude_list[@]:-}" - if [[ -d "${COM_TOP}" ]]; then - exclude_list=("prepbufr" "cnvstat" "atmanl.nc") - remove_files "${COM_TOP}" "${exclude_list[@]:-}" - - if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi - fi + if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi + fi + done fi fi From 275ed9b3bda9bfe5e6d2d0bf33a38f181c8e7720 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Mon, 16 Oct 2023 00:44:15 -0500 Subject: [PATCH 11/14] Add variable to control last RTOFS kept --- scripts/exglobal_cleanup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 0f5966c957..171869d406 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -25,7 +25,7 @@ fi # Retain files needed by Fit2Obs last_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) first_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") -last_rtofs=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -48 hours") +last_rtofs=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDRTOFS:-48} hours") function remove_files() { local directory=$1 shift From 986b5a9ee16b1e32220a1e8ea5cf29281f11fe90 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Mon, 16 Oct 2023 01:03:51 -0500 Subject: [PATCH 12/14] Add wildcards to cleanup excludes The `-name` option to find works differently than `grep`, so wild- cards need to be added to make sure files to exclude are matched properly. --- scripts/exglobal_cleanup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 171869d406..7cd7e9f1ac 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -66,7 +66,7 @@ for (( current_date=first_date; current_date <= last_date; \ for run in "${runs_to_clean[@]}"; do RUN=${run} YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP if [[ -d "${COM_TOP}" ]]; then - exclude_list=("prepbufr" "cnvstat" "atmanl.nc") + exclude_list=("*prepbufr*" "*cnvstat*" "*atmanl.nc") remove_files "${COM_TOP}" "${exclude_list[@]:-}" if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi From 69d24d6816ff19ec01c60600b6a9c5217f4e1318 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Mon, 16 Oct 2023 15:05:26 -0500 Subject: [PATCH 13/14] Give enkf its own cleanup job Split cleanup for enkf off into it's own rocoto job to make the script cleaner. This eliminates the need to loop over RUNs and avoids the unnecessary check of non-existent RUNs. Refs #583 --- scripts/exglobal_cleanup.sh | 19 ++++++------------- workflow/applications/gfs_cycled.py | 2 +- workflow/rocoto/gfs_tasks.py | 8 ++++---- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index 7cd7e9f1ac..ecb66c686c 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -59,19 +59,12 @@ for (( current_date=first_date; current_date <= last_date; \ # TODO: This needs to be revamped to not look at the rocoto log. # shellcheck disable=SC2312 if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then - # Also cleanup the enkf version of $RUN. Since the directory's existence - # is checked first, it doesn't matter if that is not a legitimate $RUN - # for this experiment. - runs_to_clean=("${RUN}" "enkf${RUN}") - for run in "${runs_to_clean[@]}"; do - RUN=${run} YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP - if [[ -d "${COM_TOP}" ]]; then - exclude_list=("*prepbufr*" "*cnvstat*" "*atmanl.nc") - remove_files "${COM_TOP}" "${exclude_list[@]:-}" - - if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi - fi - done + YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP + if [[ -d "${COM_TOP}" ]]; then + exclude_list=("*prepbufr*" "*cnvstat*" "*atmanl.nc") + remove_files "${COM_TOP}" "${exclude_list[@]:-}" + fi + if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi fi fi diff --git a/workflow/applications/gfs_cycled.py b/workflow/applications/gfs_cycled.py index 59e760140c..6eff929d5f 100644 --- a/workflow/applications/gfs_cycled.py +++ b/workflow/applications/gfs_cycled.py @@ -137,7 +137,7 @@ def get_task_names(self): else: hybrid_tasks += ['eobs', 'eupd', 'echgres'] hybrid_tasks += ['ediag'] if self.lobsdiag_forenkf else ['eomg'] - hybrid_after_eupd_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc'] + hybrid_after_eupd_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc', 'cleanup'] # Collect all "gdas" cycle tasks gdas_tasks = gdas_gfs_common_tasks_before_fcst.copy() diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index 73aff9f762..e41e4ebcc1 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -947,12 +947,12 @@ def arch(self): # Cleanup def cleanup(self): deps = [] - dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} - deps.append(rocoto.add_dependency(dep_dict)) - - if self.cdump in ['gdas'] and self.app_config.do_hybvar: + if 'enkf' in self.cdump: dep_dict = {'type': 'metatask', 'name': 'enkfgdaseamn'} deps.append(rocoto.add_dependency(dep_dict)) + else: + dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} + deps.append(rocoto.add_dependency(dep_dict)) dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) From 88d9b11bec8074ae881f787f0f925aba32c95567 Mon Sep 17 00:00:00 2001 From: "Walter.Kolczynski" Date: Tue, 17 Oct 2023 14:27:17 -0500 Subject: [PATCH 14/14] Use different exclude lists for cleanup Changes the exclude list into a configurable variable as a comma- separated string. This allows for the use of different strings for ensemble and deterministic. Refs #583 Refs #1872 --- parm/config/gfs/config.cleanup | 10 ++++++++++ scripts/exglobal_cleanup.sh | 12 ++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/parm/config/gfs/config.cleanup b/parm/config/gfs/config.cleanup index be24467062..1908c91bb5 100644 --- a/parm/config/gfs/config.cleanup +++ b/parm/config/gfs/config.cleanup @@ -12,4 +12,14 @@ export CLEANUP_COM="YES" # NO=retain ROTDIR. YES default in cleanup.sh export RMOLDSTD=144 export RMOLDEND=24 +# Specify the list of files to exclude from the first stage of cleanup +# Because arrays cannot be exported, list is a single string of comma- +# separated values. This string is split to form an array at runtime. +case ${RUN} in + gdas | gfs) exclude_string="*prepbufr*, *cnvstat*, *atmanl.nc" ;; + enkf*) exclude_string="*f006.ens*" ;; + *) exclude_string="" ;; +esac +export exclude_string + echo "END: config.cleanup" \ No newline at end of file diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh index ecb66c686c..5d7c0a9788 100755 --- a/scripts/exglobal_cleanup.sh +++ b/scripts/exglobal_cleanup.sh @@ -33,18 +33,18 @@ function remove_files() { echo "No directory ${directory} to remove files from, skiping" return fi - local exclude_string="" + local find_exclude_string="" for exclude in "$@"; do - exclude_string+="${exclude_string} -name ${exclude} -or" + find_exclude_string+="${find_exclude_string} -name ${exclude} -or" done # Chop off any trailing or - exclude_string="${exclude_string[*]/%-or}" + find_exclude_string="${find_exclude_string[*]/%-or}" # Remove all regular files that do not match # shellcheck disable=SC2086 - find "${directory}" -type f -not \( ${exclude_string} \) -delete + find "${directory}" -type f -not \( ${find_exclude_string} \) -delete # Remove all symlinks that do not match # shellcheck disable=SC2086 - find "${directory}" -type l -not \( ${exclude_string} \) -delete + find "${directory}" -type l -not \( ${find_exclude_string} \) -delete # Remove any empty directories find "${directory}" -type d -empty -delete } @@ -61,7 +61,7 @@ for (( current_date=first_date; current_date <= last_date; \ if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP if [[ -d "${COM_TOP}" ]]; then - exclude_list=("*prepbufr*" "*cnvstat*" "*atmanl.nc") + IFS=", " read -r -a exclude_list <<< "${exclude_string:-}" remove_files "${COM_TOP}" "${exclude_list[@]:-}" fi if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi