diff --git a/jobs/JGLOBAL_CLEANUP b/jobs/JGLOBAL_CLEANUP new file mode 100755 index 0000000000..ad938ccf60 --- /dev/null +++ b/jobs/JGLOBAL_CLEANUP @@ -0,0 +1,17 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" +source "${HOMEgfs}/ush/jjob_header.sh" -e "cleanup" -c "base cleanup" + +"${HOMEgfs}/scripts/exglobal_cleanup.sh" +status=$? +[[ ${status} -ne 0 ]] && exit "${status}" + +########################################## +# Remove the Temporary working directory +########################################## +cd "${DATAROOT}" || (echo "${DATAROOT} does not exist. ABORT!"; exit 1) +[[ ${KEEPDATA} = "NO" ]] && rm -rf "${DATA}" + +exit 0 + diff --git a/jobs/rocoto/arch.sh b/jobs/rocoto/arch.sh index 2f62d8b354..d949b7d76f 100755 --- a/jobs/rocoto/arch.sh +++ b/jobs/rocoto/arch.sh @@ -16,5 +16,4 @@ export jobid="${job}.$$" "${HOMEgfs}"/jobs/JGLOBAL_ARCHIVE status=$? - exit "${status}" diff --git a/jobs/rocoto/cleanup.sh b/jobs/rocoto/cleanup.sh new file mode 100755 index 0000000000..96303fde57 --- /dev/null +++ b/jobs/rocoto/cleanup.sh @@ -0,0 +1,19 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" + +############################################################### +# Source FV3GFS workflow modules +. "${HOMEgfs}"/ush/load_fv3gfs_modules.sh +status=$? +[[ ${status} -ne 0 ]] && exit "${status}" + +export job="cleanup" +export jobid="${job}.$$" + +############################################################### +# Execute the JJOB +"${HOMEgfs}"/jobs/JGLOBAL_CLEANUP +status=$? + +exit "${status}" diff --git a/parm/config/gfs/config.arch b/parm/config/gfs/config.arch index 31a3713fb1..a23bcce6ae 100644 --- a/parm/config/gfs/config.arch +++ b/parm/config/gfs/config.arch @@ -12,13 +12,4 @@ export ARCH_GAUSSIAN="YES" export ARCH_GAUSSIAN_FHMAX=${FHMAX_GFS} export ARCH_GAUSSIAN_FHINC=${FHOUT_GFS} -#--online archive of nemsio files for fit2obs verification -export FITSARC="YES" -export FHMAX_FITS=132 -[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS} - -#--starting and ending hours of previous cycles to be removed from rotating directory -export RMOLDSTD=144 -export RMOLDEND=24 - echo "END: config.arch" diff --git a/parm/config/gfs/config.base.emc.dyn b/parm/config/gfs/config.base.emc.dyn index 09d8897a31..b77787794c 100644 --- a/parm/config/gfs/config.base.emc.dyn +++ b/parm/config/gfs/config.base.emc.dyn @@ -394,6 +394,9 @@ export ARCH_CYC=00 # Archive data at this cycle for warm_start capabil export ARCH_WARMICFREQ=4 # Archive frequency in days for warm_start capability export ARCH_FCSTICFREQ=1 # Archive frequency in days for gdas and gfs forecast-only capability -export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in arch.sh and earc.sh. +#--online archive of nemsio files for fit2obs verification +export FITSARC="YES" +export FHMAX_FITS=132 +[[ "${FHMAX_FITS}" -gt "${FHMAX_GFS}" ]] && export FHMAX_FITS=${FHMAX_GFS} echo "END: config.base" diff --git a/parm/config/gfs/config.cleanup b/parm/config/gfs/config.cleanup new file mode 100644 index 0000000000..1908c91bb5 --- /dev/null +++ b/parm/config/gfs/config.cleanup @@ -0,0 +1,25 @@ +#! /usr/bin/env bash + +########## config.cleanup ########## +echo "BEGIN: config.cleanup" + +# Get task specific resources +source "${EXPDIR}/config.resources" cleanup + +export CLEANUP_COM="YES" # NO=retain ROTDIR. YES default in cleanup.sh + +#--starting and ending hours of previous cycles to be removed from rotating directory +export RMOLDSTD=144 +export RMOLDEND=24 + +# Specify the list of files to exclude from the first stage of cleanup +# Because arrays cannot be exported, list is a single string of comma- +# separated values. This string is split to form an array at runtime. +case ${RUN} in + gdas | gfs) exclude_string="*prepbufr*, *cnvstat*, *atmanl.nc" ;; + enkf*) exclude_string="*f006.ens*" ;; + *) exclude_string="" ;; +esac +export exclude_string + +echo "END: config.cleanup" \ No newline at end of file diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index d6654b61ed..6503ae5523 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -14,7 +14,7 @@ if [[ $# -ne 1 ]]; then echo "atmensanlinit atmensanlrun atmensanlfinal" echo "landanl" echo "aeroanlinit aeroanlrun aeroanlfinal" - echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch echgres" + echo "anal sfcanl analcalc analdiag fcst post vrfy fit2obs metp arch cleanup echgres" echo "eobs ediag eomg eupd ecen esfc efcs epos earc" echo "init_chem mom6ic ocnpost" echo "waveinit waveprep wavepostsbs wavepostbndpnt wavepostbndpntbll wavepostpnt" @@ -773,6 +773,13 @@ elif [[ ${step} = "arch" || ${step} = "earc" || ${step} = "getic" ]]; then eval "export memory_${step}=50GB" fi +elif [[ ${step} == "cleanup" ]]; then + export wtime_cleanup="01:00:00" + export npe_cleanup=1 + export npe_node_cleanup=1 + export nth_cleanup=1 + export memory_cleanup="4096M" + elif [[ ${step} = "stage_ic" ]]; then export wtime_stage_ic="00:15:00" diff --git a/scripts/exgdas_enkf_earc.sh b/scripts/exgdas_enkf_earc.sh index 1bb941f888..a1bcba4d79 100755 --- a/scripts/exgdas_enkf_earc.sh +++ b/scripts/exgdas_enkf_earc.sh @@ -133,172 +133,4 @@ if [ "${ENSGRP}" -eq 0 ]; then "gsistat.${RUN}.${PDY}${cyc}.ensmean" fi - -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then - exit 0 -fi - -############################################################### -# ENSGRP 0 also does clean-up -############################################################### -if [[ "${ENSGRP}" -eq 0 ]]; then - function remove_files() { - # TODO: move this to a new location - local directory=$1 - shift - if [[ ! -d ${directory} ]]; then - echo "No directory ${directory} to remove files from, skiping" - return - fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - if (( ${#file_list[@]} == 0 )); then return; fi - done - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true - } - - # Start start and end dates to remove - GDATEEND=$(${NDATE} -"${RMOLDEND_ENKF:-24}" "${PDY}${cyc}") - GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}") - - while [ "${GDATE}" -le "${GDATEEND}" ]; do - - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - - if [[ -d ${COM_TOP} ]]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [[ -f "${rocotolog}" ]]; then - set +e - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success") - rc=$? - set_strict - if [ "${rc}" -eq 0 ]; then - case ${CDUMP} in - gdas) nmem="${NMEM_ENS}";; - gfs) nmem="${NMEM_ENS_GFS}";; - *) - echo "FATAL ERROR: Unknown CDUMP ${CDUMP} during cleanup" - exit 10 - ;; - esac - - readarray memlist< <(seq --format="mem%03g" 1 "${nmem}") - memlist+=("ensstat") - - for mem in "${memlist[@]}"; do - # Atmos - exclude_list="f006.ens" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - # Suppress warnings about chained commands suppressing exit codes - # shellcheck disable=SC2312 - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - MEMDIR="${mem}" YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - done - fi - fi - fi - - # Remove any empty directories - YMD=${gPDY} HH=${gcyc} generate_com target_dir:COM_TOP_TMPL - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d ${target_dir} ]]; then - find "${target_dir}" -empty -type d -delete - fi - - # Advance to next cycle - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") - done -fi - -# Remove enkf*.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD_ENKF:-120}" "${PDY}${cyc}") -fhmax=${FHMAX_GFS} -RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") -if [ "${GDATE}" -lt "${RDATE}" ]; then - RDATE=${GDATE} -fi -rPDY=$(echo "${RDATE}" | cut -c1-8) -clist="enkfgdas enkfgfs" -for ctype in ${clist}; do - COMIN="${ROTDIR}/${ctype}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" -done - -############################################################### - - exit 0 diff --git a/scripts/exgdas_enkf_fcst.sh b/scripts/exgdas_enkf_fcst.sh index 85344e4e35..7eb2a3a711 100755 --- a/scripts/exgdas_enkf_fcst.sh +++ b/scripts/exgdas_enkf_fcst.sh @@ -122,7 +122,9 @@ for imem in $(seq "${ENSBEG}" "${ENSEND}"); do skip_mem="NO" if [[ -f ${EFCSGRP}.fail ]]; then + set +e memstat=$(grep "MEMBER ${ENSMEM}" "${EFCSGRP}.fail" | grep -c "PASS") + set_strict [[ ${memstat} -eq 1 ]] && skip_mem="YES" fi diff --git a/scripts/exglobal_archive.sh b/scripts/exglobal_archive.sh index 5fea07f4ed..78a6d60b65 100755 --- a/scripts/exglobal_archive.sh +++ b/scripts/exglobal_archive.sh @@ -284,193 +284,4 @@ if [[ ${HPSSARCH} = "YES" || ${LOCALARCH} = "YES" ]]; then fi ##end of HPSS archive ############################################################### - - -############################################################### -# Clean up previous cycles; various depths -# PRIOR CYCLE: Leave the prior cycle alone -GDATE=$(${NDATE} -"${assim_freq}" "${PDY}${cyc}") - -# PREVIOUS to the PRIOR CYCLE -GDATE=$(${NDATE} -"${assim_freq}" "${GDATE}") -gPDY="${GDATE:0:8}" -gcyc="${GDATE:8:2}" - -# Remove the TMPDIR directory -# TODO Only prepbufr is currently using this directory, and all jobs should be -# cleaning up after themselves anyway -COMIN="${DATAROOT}/${GDATE}" -[[ -d ${COMIN} ]] && rm -rf "${COMIN}" - -if [[ "${DELETE_COM_IN_ARCHIVE_JOB:-YES}" == NO ]] ; then - exit 0 -fi - -# Step back every assim_freq hours and remove old rotating directories -# for successful cycles (defaults from 24h to 120h). -# Retain files needed by Fit2Obs -# TODO: This whole section needs to be revamped to remove marine component -# directories and not look at the rocoto log. -GDATEEND=$(${NDATE} -"${RMOLDEND:-24}" "${PDY}${cyc}") -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -RTOFS_DATE=$(${NDATE} -48 "${PDY}${cyc}") -function remove_files() { - # TODO: move this to a new location - local directory=$1 - shift - if [[ ! -d ${directory} ]]; then - echo "No directory ${directory} to remove files from, skiping" - return - fi - local exclude_list="" - if (($# > 0)); then - exclude_list=$* - fi - local file_list - declare -a file_list - readarray -t file_list < <(find -L "${directory}" -type f) - if (( ${#file_list[@]} == 0 )); then return; fi - # echo "Number of files to remove before exclusions: ${#file_list[@]}" - for exclude in ${exclude_list}; do - echo "Excluding ${exclude}" - declare -a file_list_old=("${file_list[@]}") - readarray file_list < <(printf -- '%s\n' "${file_list_old[@]}" | grep -v "${exclude}") - # echo "Number of files to remove after exclusion: ${#file_list[@]}" - if (( ${#file_list[@]} == 0 )); then return; fi - done - # echo "Number of files to remove after exclusions: ${#file_list[@]}" - - for file in "${file_list[@]}"; do - rm -f "${file}" - done - # Remove directory if empty - rmdir "${directory}" || true -} - -while [ "${GDATE}" -le "${GDATEEND}" ]; do - gPDY="${GDATE:0:8}" - gcyc="${GDATE:8:2}" - COMINrtofs="${ROTDIR}/rtofs.${gPDY}" - if [ -d "${COM_TOP}" ]; then - rocotolog="${EXPDIR}/logs/${GDATE}.log" - if [ -f "${rocotolog}" ]; then - set +e - testend=$(tail -n 1 "${rocotolog}" | grep "This cycle is complete: Success") - rc=$? - set_strict - - if [ "${rc}" -eq 0 ]; then - # Obs - exclude_list="prepbufr" - templates="COM_OBS" - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Atmos - exclude_list="cnvstat atmanl.nc" - templates=$(compgen -A variable | grep 'COM_ATMOS_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Wave - exclude_list="" - templates=$(compgen -A variable | grep 'COM_WAVE_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ocean - exclude_list="" - templates=$(compgen -A variable | grep 'COM_OCEAN_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Ice - exclude_list="" - templates=$(compgen -A variable | grep 'COM_ICE_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Aerosols (GOCART) - exclude_list="" - templates=$(compgen -A variable | grep 'COM_CHEM_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - # Mediator - exclude_list="" - templates=$(compgen -A variable | grep 'COM_MED_.*_TMPL') - for template in ${templates}; do - YMD="${gPDY}" HH="${gcyc}" generate_com "directory:${template}" - remove_files "${directory}" "${exclude_list[@]}" - done - - if [ -d "${COMINrtofs}" ] && [ "${GDATE}" -lt "${RTOFS_DATE}" ]; then rm -rf "${COMINrtofs}" ; fi - fi - fi - fi - - # Remove mdl gfsmos directory - if [ "${RUN}" = "gfs" ]; then - COMIN="${ROTDIR}/gfsmos.${gPDY}" - if [ -d "${COMIN}" ] && [ "${GDATE}" -lt "${CDATE_MOS}" ]; then rm -rf "${COMIN}" ; fi - fi - - # Remove any empty directories - target_dir="${ROTDIR:?}/${RUN}.${gPDY}/${gcyc}/" - if [[ -d ${target_dir} ]]; then - find "${target_dir}" -empty -type d -delete - fi - - GDATE=$(${NDATE} +"${assim_freq}" "${GDATE}") -done - -# Remove archived gaussian files used for Fit2Obs in $VFYARC that are -# $FHMAX_FITS plus a delta before $CDATE. Touch existing archived -# gaussian files to prevent the files from being removed by automatic -# scrubber present on some machines. - -if [ "${RUN}" = "gfs" ]; then - fhmax=$((FHMAX_FITS+36)) - RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") - rPDY=$(echo "${RDATE}" | cut -c1-8) - COMIN="${VFYARC}/${RUN}.${rPDY}" - [[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - TDATE=$(${NDATE} -"${FHMAX_FITS}" "${PDY}${cyc}") - while [ "${TDATE}" -lt "${PDY}${cyc}" ]; do - tPDY=$(echo "${TDATE}" | cut -c1-8) - tcyc=$(echo "${TDATE}" | cut -c9-10) - TDIR=${VFYARC}/${RUN}.${tPDY}/${tcyc} - [[ -d ${TDIR} ]] && touch "${TDIR}"/* - TDATE=$(${NDATE} +6 "${TDATE}") - done -fi - -# Remove $RUN.$rPDY for the older of GDATE or RDATE -GDATE=$(${NDATE} -"${RMOLDSTD:-120}" "${PDY}${cyc}") -fhmax=${FHMAX_GFS} -RDATE=$(${NDATE} -"${fhmax}" "${PDY}${cyc}") -if [ "${GDATE}" -lt "${RDATE}" ]; then - RDATE=${GDATE} -fi -rPDY=$(echo "${RDATE}" | cut -c1-8) -COMIN="${ROTDIR}/${RUN}.${rPDY}" -[[ -d ${COMIN} ]] && rm -rf "${COMIN}" - - -############################################################### - - exit 0 diff --git a/scripts/exglobal_cleanup.sh b/scripts/exglobal_cleanup.sh new file mode 100755 index 0000000000..5d7c0a9788 --- /dev/null +++ b/scripts/exglobal_cleanup.sh @@ -0,0 +1,106 @@ +#! /usr/bin/env bash + +source "${HOMEgfs}/ush/preamble.sh" + +############################################################### +# Clean up previous cycles; various depths +# PRIOR CYCLE: Leave the prior cycle alone +# shellcheck disable=SC2153 +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${assim_freq} hours") +# PREVIOUS to the PRIOR CYCLE +GDATE=$(date --utc +%Y%m%d%H -d "${GDATE:0:8} ${GDATE:8:2} -${assim_freq} hours") + +# Remove the TMPDIR directory +# TODO Only prepbufr is currently using this directory, and all jobs should be +# cleaning up after themselves anyway +COMIN="${DATAROOT}/${GDATE}" +[[ -d ${COMIN} ]] && rm -rf "${COMIN}" + +if [[ "${CLEANUP_COM:-YES}" == NO ]] ; then + exit 0 +fi + +# Step back every assim_freq hours and remove old rotating directories +# for successful cycles (defaults from 24h to 120h). +# Retain files needed by Fit2Obs +last_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDEND:-24} hours" ) +first_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +last_rtofs=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDRTOFS:-48} hours") +function remove_files() { + local directory=$1 + shift + if [[ ! -d ${directory} ]]; then + echo "No directory ${directory} to remove files from, skiping" + return + fi + local find_exclude_string="" + for exclude in "$@"; do + find_exclude_string+="${find_exclude_string} -name ${exclude} -or" + done + # Chop off any trailing or + find_exclude_string="${find_exclude_string[*]/%-or}" + # Remove all regular files that do not match + # shellcheck disable=SC2086 + find "${directory}" -type f -not \( ${find_exclude_string} \) -delete + # Remove all symlinks that do not match + # shellcheck disable=SC2086 + find "${directory}" -type l -not \( ${find_exclude_string} \) -delete + # Remove any empty directories + find "${directory}" -type d -empty -delete +} + +for (( current_date=first_date; current_date <= last_date; \ + current_date=$(date --utc +%Y%m%d%H -d "${current_date:0:8} ${current_date:8:2} +${assim_freq} hours") )); do + current_PDY="${current_date:0:8}" + current_cyc="${current_date:8:2}" + rtofs_dir="${ROTDIR}/rtofs.${current_PDY}" + rocotolog="${EXPDIR}/logs/${current_date}.log" + if [[ -f "${rocotolog}" ]]; then + # TODO: This needs to be revamped to not look at the rocoto log. + # shellcheck disable=SC2312 + if [[ $(tail -n 1 "${rocotolog}") =~ "This cycle is complete: Success" ]]; then + YMD="${current_PDY}" HH="${current_cyc}" generate_com COM_TOP + if [[ -d "${COM_TOP}" ]]; then + IFS=", " read -r -a exclude_list <<< "${exclude_string:-}" + remove_files "${COM_TOP}" "${exclude_list[@]:-}" + fi + if [[ -d "${rtofs_dir}" ]] && (( current_date < last_rtofs )); then rm -rf "${rtofs_dir}" ; fi + fi + fi + + # Remove mdl gfsmos directory + if [[ "${RUN}" == "gfs" ]]; then + mos_dir="${ROTDIR}/gfsmos.${current_PDY}" + if [[ -d "${mos_dir}" ]] && (( current_date < CDATE_MOS )); then rm -rf "${mos_dir}" ; fi + fi +done + +# Remove archived gaussian files used for Fit2Obs in $VFYARC that are +# $FHMAX_FITS plus a delta before $CDATE. Touch existing archived +# gaussian files to prevent the files from being removed by automatic +# scrubber present on some machines. + +if [[ "${RUN}" == "gfs" ]]; then + fhmax=$((FHMAX_FITS + 36)) + RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${fhmax} hours") + verify_dir="${ROTDIR}/vrfyarch/${RUN}.${RDATE:0:8}" + [[ -d ${verify_dir} ]] && rm -rf "${verify_dir}" + + touch_date=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_FITS} hours") + while (( touch_date < "${PDY}${cyc}" )); do + touch_PDY="${touch_date:0:8}" + touch_cyc="${touch_date:8:2}" + touch_dir="${ROTDIR}/vrfyarch/${RUN}.${touch_PDY}/${touch_cyc}" + [[ -d ${touch_dir} ]] && touch "${touch_dir}"/* + touch_date=$(date --utc +%Y%m%d%H -d "${touch_PDY} ${touch_cyc} +6 hours") + done +fi + +# Remove $RUN.$rPDY for the older of GDATE or RDATE +GDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${RMOLDSTD:-120} hours") +RDATE=$(date --utc +%Y%m%d%H -d "${PDY} ${cyc} -${FHMAX_GFS} hours") +if (( GDATE < RDATE )); then + RDATE=${GDATE} +fi +deletion_target="${ROTDIR}/${RUN}.${RDATE:0:8}" +if [[ -d ${deletion_target} ]]; then rm -rf "${deletion_target}"; fi diff --git a/workflow/applications/gfs_cycled.py b/workflow/applications/gfs_cycled.py index 633e93bac0..6eff929d5f 100644 --- a/workflow/applications/gfs_cycled.py +++ b/workflow/applications/gfs_cycled.py @@ -47,7 +47,7 @@ def _get_app_configs(self): if self.do_ocean: configs += ['ocnpost'] - configs += ['sfcanl', 'analcalc', 'fcst', 'post', 'vrfy', 'fit2obs', 'arch'] + configs += ['sfcanl', 'analcalc', 'fcst', 'post', 'vrfy', 'fit2obs', 'arch', 'cleanup'] if self.do_hybvar: if self.do_jediatmens: @@ -106,7 +106,7 @@ def get_task_names(self): # gdas_gfs_common_tasks_after_fcst += ['ocnpost'] gdas_gfs_common_tasks_after_fcst += ['vrfy'] - gdas_gfs_common_cleanup_tasks = ['arch'] + gdas_gfs_common_cleanup_tasks = ['arch', 'cleanup'] if self.do_jediatmvar: gdas_gfs_common_tasks_before_fcst += ['prepatmiodaobs', 'atmanlinit', 'atmanlrun', 'atmanlfinal'] @@ -137,7 +137,7 @@ def get_task_names(self): else: hybrid_tasks += ['eobs', 'eupd', 'echgres'] hybrid_tasks += ['ediag'] if self.lobsdiag_forenkf else ['eomg'] - hybrid_after_eupd_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc'] + hybrid_after_eupd_tasks += ['ecen', 'esfc', 'efcs', 'epos', 'earc', 'cleanup'] # Collect all "gdas" cycle tasks gdas_tasks = gdas_gfs_common_tasks_before_fcst.copy() diff --git a/workflow/applications/gfs_forecast_only.py b/workflow/applications/gfs_forecast_only.py index e6d1ab35a2..73e17ee7aa 100644 --- a/workflow/applications/gfs_forecast_only.py +++ b/workflow/applications/gfs_forecast_only.py @@ -15,7 +15,7 @@ def _get_app_configs(self): Returns the config_files that are involved in the forecast-only app """ - configs = ['stage_ic', 'fcst', 'arch'] + configs = ['stage_ic', 'fcst', 'arch', 'cleanup'] if self.do_atm: configs += ['post', 'vrfy'] @@ -109,6 +109,6 @@ def get_task_names(self): if self.do_wafs: tasks += ['wafs', 'wafsgcip', 'wafsgrib2', 'wafsgrib20p25', 'wafsblending', 'wafsblending0p25'] - tasks += ['arch'] # arch **must** be the last task + tasks += ['arch', 'cleanup'] # arch and cleanup **must** be the last tasks return {f"{self._base['CDUMP']}": tasks} diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index 33e2ec82f3..e41e4ebcc1 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -944,6 +944,23 @@ def arch(self): return task + # Cleanup + def cleanup(self): + deps = [] + if 'enkf' in self.cdump: + dep_dict = {'type': 'metatask', 'name': 'enkfgdaseamn'} + deps.append(rocoto.add_dependency(dep_dict)) + else: + dep_dict = {'type': 'task', 'name': f'{self.cdump}arch'} + deps.append(rocoto.add_dependency(dep_dict)) + + dependencies = rocoto.create_dependency(dep_condition='and', dep=deps) + + resources = self.get_resource('cleanup') + task = create_wf_task('cleanup', resources, cdump=self.cdump, envar=self.envars, dependency=dependencies) + + return task + # Start of ensemble tasks def eobs(self): deps = [] diff --git a/workflow/rocoto/tasks.py b/workflow/rocoto/tasks.py index b9716c938e..29ed57daf2 100644 --- a/workflow/rocoto/tasks.py +++ b/workflow/rocoto/tasks.py @@ -11,7 +11,7 @@ class Tasks: SERVICE_TASKS = ['arch', 'earc'] VALID_TASKS = ['aerosol_init', 'stage_ic', - 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'arch', + 'prep', 'anal', 'sfcanl', 'analcalc', 'analdiag', 'arch', "cleanup", 'prepatmiodaobs', 'atmanlinit', 'atmanlrun', 'atmanlfinal', 'ocnanalprep', 'ocnanalbmat', 'ocnanalrun', 'ocnanalchkpt', 'ocnanalpost', 'ocnanalvrfy', 'earc', 'ecen', 'echgres', 'ediag', 'efcs',