From fb871aba02cb6ad865af022a671f142e116e8a65 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Wed, 8 Nov 2023 17:05:14 +0000 Subject: [PATCH 01/35] started to add python rocoto stat --- ci/scripts/utils/rocoto_statcount.py | 44 ++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 ci/scripts/utils/rocoto_statcount.py diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py new file mode 100644 index 0000000000..845f805152 --- /dev/null +++ b/ci/scripts/utils/rocoto_statcount.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +import sys +import os + +from wxflow import Executable, which, Logger +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + +logger = Logger(level=os.environ.get("LOGGING_LEVEL", "DEBUG"), colored_log=False) + + +def input_args(): + """ + Method to collect user arguments + """ + + description = """ + """ + + parser = ArgumentParser(description=description, + formatter_class=ArgumentDefaultsHelpFormatter) + + parser.add_argument('--xml',help='workflow_document', type=str) + parser.add_argument('--db',help='database_file', type=str) + + args = parser.parse_args() + + return args + +if __name__ == '__main__': + + args = input_args() + + rocotostat = which("rocotostat") + if not rocotostat: + logger.exception("rocotostat not found in PATH") + sys.exit(-1) + + xml_file_path = os.path.abspath(args.xml) + db_file_path = os.path.abspath(args.db) + + rocotostat.add_default_args(['-w',xml_file_path,'-d',db_file_path,'-s']) + rocotostat_output = rocotostat(output=str) + print(rocotostat_output) From b865bf248b3ce9df53b21639430c1df0900c2b67 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 8 Nov 2023 11:30:29 -0600 Subject: [PATCH 02/35] first steps with rocoto stat script --- ci/scripts/utils/rocoto_statcount.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) mode change 100644 => 100755 ci/scripts/utils/rocoto_statcount.py diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py old mode 100644 new mode 100755 index 845f805152..c1604c46e0 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -39,6 +39,11 @@ def input_args(): xml_file_path = os.path.abspath(args.xml) db_file_path = os.path.abspath(args.db) - rocotostat.add_default_args(['-w',xml_file_path,'-d',db_file_path,'-s']) + rocotostat.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-s']) rocotostat_output = rocotostat(output=str) - print(rocotostat_output) + rocotostat_output = rocotostat_output.splitlines()[1:] + + num_cycles = len(rocotostat_output)) + + for rocoto_lines in rocotostat_output: + print(rocoto_lines.split()[0:2]) From 1e59beff6e9c4318ecb0ccc71498f3618039e378 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Wed, 8 Nov 2023 21:01:15 +0000 Subject: [PATCH 03/35] finished working version of rocoto_statcount --- ci/scripts/utils/rocoto_statcount.py | 59 ++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index c1604c46e0..a357b36541 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -20,14 +20,15 @@ def input_args(): parser = ArgumentParser(description=description, formatter_class=ArgumentDefaultsHelpFormatter) - parser.add_argument('--xml',help='workflow_document', type=str) - parser.add_argument('--db',help='database_file', type=str) + parser.add_argument('-w', help='workflow_document', type=str) + parser.add_argument('-d', help='database_file', type=str) + parser.add_argument('--check_stalled', help='check if any jobs do not advance (stalled)', action='store_true', required=False) args = parser.parse_args() return args -if __name__ == '__main__': +def rocoto_statcount(): args = input_args() @@ -36,14 +37,56 @@ def input_args(): logger.exception("rocotostat not found in PATH") sys.exit(-1) - xml_file_path = os.path.abspath(args.xml) - db_file_path = os.path.abspath(args.db) + xml_file_path = os.path.abspath(args.w) + db_file_path = os.path.abspath(args.d) + rocotostat_all = which("rocotostat") rocotostat.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-s']) + rocotostat_all.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-a']) + rocotostat_output = rocotostat(output=str) rocotostat_output = rocotostat_output.splitlines()[1:] + rocotostat_output = [line.split()[0:2] for line in rocotostat_output] + + rocotostat_output_all = rocotostat_all(output=str) + rocotostat_output_all = rocotostat_output_all.splitlines()[1:] + rocotostat_output_all = [line.split()[0:4] for line in rocotostat_output_all] + rocotostat_output_all = [line for line in rocotostat_output_all if len(line) != 1] + + rocoto_status = { + 'Cycles' : len(rocotostat_output), + 'Done_Cycles' : sum([ sublist.count('Done') for sublist in rocotostat_output ]), + 'SUCCEEDED' : sum([ sublist.count('SUCCEEDED') for sublist in rocotostat_output_all ]), + 'FAIL' : sum([ sublist.count('FAIL') for sublist in rocotostat_output_all ]), + 'DEAD' : sum([ sublist.count('DEAD') for sublist in rocotostat_output_all ]), + 'RUNNING' : sum([ sublist.count('RUNNING') for sublist in rocotostat_output_all ]), + 'PENDING' : sum([ sublist.count('PENDING') for sublist in rocotostat_output_all ]), + 'QUEUED' : sum([ sublist.count('QUEUED') for sublist in rocotostat_output_all ]) + } + + return rocoto_status - num_cycles = len(rocotostat_output)) +if __name__ == '__main__': + + args = input_args() - for rocoto_lines in rocotostat_output: - print(rocoto_lines.split()[0:2]) + rocoto_status = rocoto_statcount() + for status in rocoto_status: + print(f'Number of {status} : {rocoto_status[status]}') + rocoto_state = 'Running' + if rocoto_status['Cycles'] == rocoto_status['Done_Cycles']: + rocoto_state = 'Done' + + if args.check_stalled: + if rocoto_state != 'Done': + rocoto_run = which("rocotorun") + rocoto_run.add_default_arg(['-w',args.w,'-d',args.d]) + rocoto_run() + rocoto_status2 = rocoto_statcount() + if rocoto_status2 == rocoto_status: + rocoto_status = 'Stalled' + print(f'Rocoto State : {rocoto_status}') + sys.exit(-1) + else: + rocoto_status = 'Done' + print(f'Rocoto State : {rocoto_state}') From dd4c735f92e51ddc25c15f48677f0b848b9f092b Mon Sep 17 00:00:00 2001 From: "Terry.McGuinness" Date: Thu, 9 Nov 2023 16:06:13 +0000 Subject: [PATCH 04/35] replaced explicit decleration of stat dict with loop in rocoto statcount --- ci/scripts/utils/rocoto_statcount.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index a357b36541..4cf6f17777 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -55,14 +55,12 @@ def rocoto_statcount(): rocoto_status = { 'Cycles' : len(rocotostat_output), - 'Done_Cycles' : sum([ sublist.count('Done') for sublist in rocotostat_output ]), - 'SUCCEEDED' : sum([ sublist.count('SUCCEEDED') for sublist in rocotostat_output_all ]), - 'FAIL' : sum([ sublist.count('FAIL') for sublist in rocotostat_output_all ]), - 'DEAD' : sum([ sublist.count('DEAD') for sublist in rocotostat_output_all ]), - 'RUNNING' : sum([ sublist.count('RUNNING') for sublist in rocotostat_output_all ]), - 'PENDING' : sum([ sublist.count('PENDING') for sublist in rocotostat_output_all ]), - 'QUEUED' : sum([ sublist.count('QUEUED') for sublist in rocotostat_output_all ]) + 'Done_Cycles' : sum([ sublist.count('Done') for sublist in rocotostat_output ]) } + + status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'PENDING', 'QUEUED'] + for case in status_cases: + rocoto_status[case] = sum([ sublist.count(case) for sublist in rocotostat_output_all ]) return rocoto_status From f31ec82ab0bc8ff4335a216ca0d5e4e0db984390 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Thu, 9 Nov 2023 16:10:43 +0000 Subject: [PATCH 05/35] fixed end state mixup of varialble names --- ci/scripts/utils/rocoto_statcount.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 4cf6f17777..2b57ade2ed 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -82,9 +82,9 @@ def rocoto_statcount(): rocoto_run() rocoto_status2 = rocoto_statcount() if rocoto_status2 == rocoto_status: - rocoto_status = 'Stalled' - print(f'Rocoto State : {rocoto_status}') + rocoto_state = 'Stalled' + print(f'Rocoto State : {rocoto_state}') sys.exit(-1) - else: - rocoto_status = 'Done' + else: + rocoto_state = 'Running' print(f'Rocoto State : {rocoto_state}') From aad09fd12e8406bd2658c04488167380a44463e0 Mon Sep 17 00:00:00 2001 From: "Terry.McGuinness" Date: Thu, 9 Nov 2023 16:26:49 +0000 Subject: [PATCH 06/35] more slight improvments on user GitHub messaging outputs --- ci/scripts/check_ci.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/scripts/check_ci.sh b/ci/scripts/check_ci.sh index a5d7c77e66..3d0be3a70a 100755 --- a/ci/scripts/check_ci.sh +++ b/ci/scripts/check_ci.sh @@ -85,8 +85,8 @@ for pr in ${pr_list}; do # shellcheck disable=SC2312 if [[ -z $(ls -A "${pr_dir}/RUNTESTS/EXPDIR") ]] ; then "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Passed" + sed -i "1 i\All CI Test Cases Passed on ${MACHINE_ID^}" "${output_ci}" sed -i "1 i\`\`\`" "${output_ci}" - sed -i "1 i\All CI Test Cases Passed:" "${output_ci}" "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}" "${ROOT_DIR}/ci/scripts/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" # Check to see if this PR that was opened by the weekly tests and if so close it if it passed on all platforms @@ -131,7 +131,7 @@ for pr in ${pr_list}; do "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed" error_logs=$("${rocotostat}" -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs "${rocotocheck}" -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true { - echo "Experiment ${pslot} Terminated: *** FAILED ***" + echo "Experiment ${pslot} Terminated: *** FAILED *** on ${MACHIND_ID^}" echo "Experiment ${pslot} Terminated with ${num_failed} tasks failed at $(date)" || true echo "Error logs:" echo "${error_logs}" @@ -152,8 +152,8 @@ for pr in ${pr_list}; do rm -f "${output_ci_single}" # echo "\`\`\`" > "${output_ci_single}" DATE=$(date) - echo "Experiment ${pslot} **SUCCESS** ${DATE}" >> "${output_ci_single}" - echo "Experiment ${pslot} **SUCCESS** at ${DATE}" >> "${output_ci}" + echo "Experiment ${pslot} **SUCCESS** ${DATE +%Y%m%d} on ${MACHINE_ID^}" >> "${output_ci_single}" + echo "Experiment ${pslot} **SUCCESS** at ${DATE +%Y%m%d} on ${MACHIND_ID^}" >> "${output_ci}" "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" fi From d45017a7eb4f968709c1aa3fc6825fa7f0a60de0 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Thu, 9 Nov 2023 18:11:14 +0000 Subject: [PATCH 07/35] added failed state updates with stalled in run ci --- ci/scripts/run_ci.sh | 18 ++++++++++++++++-- ci/scripts/utils/rocoto_statcount.py | 16 +++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index cdaafb337f..76fd5bc50e 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -13,6 +13,8 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." >/dev/null 2>&1 && pwd )" scriptname=$(basename "${BASH_SOURCE[0]}") echo "Begin ${scriptname} at $(date -u)" || true export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]' +GH=${HOME}/bin/gh +REPO_URL="https://github.com/NOAA-EMC/global-workflow.git" ######################################################################### # Set up runtime environment varibles for accounts on supproted machines @@ -81,7 +83,19 @@ for pr in ${pr_list}; do pslot=$(basename "${pslot_dir}") xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" - echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" - "${rocotorun}" -v 10 -w "${xml}" -d "${db}" + ${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py -d "{db}" -w "${xml}" --check_stalled + rc=$? + if [[ "${rc}" -ne 0 ]]; then + output_ci="${pr_dir}/output_runtime_single.log" + { + echo "${pslot} has *** STALLED **** on ${MACHINE_ID^}" + echo "A jobs in expermint ${pslot} in ${pslot_dir}" + echo "may have depenencies that are not being met" + } >> "${output_ci}" + sed -i "1 i\`\`\`" "${output_ci}" + "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}" + "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed" + "${ROOT_DIR}/ci/scripts/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}" + fi done done diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 2b57ade2ed..e5494da642 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -11,10 +11,21 @@ def input_args(): """ - Method to collect user arguments + Parse command-line arguments. + + Returns + ------- + args : Namespace + The parsed command-line arguments. """ description = """ + Using rocotostat that lists the status of all jobs this scripts + determines rocoto_state: if all cycles are done, then rocoto_state is Done. + If all cycles are not done, then rocoto_state is Running. + If the check_stalled is used then rocotorun is then issued and + rocotostat is run again and if all jobs do not advanced, then + rocoto_state is Stalled and the script exits with -1. """ parser = ArgumentParser(description=description, @@ -29,6 +40,9 @@ def input_args(): return args def rocoto_statcount(): + """ + Run rocotostat and process its output. + """ args = input_args() From b170ca78f4bb5e01bb117be0ea3a1eec98f79eca Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Thu, 9 Nov 2023 18:31:21 +0000 Subject: [PATCH 08/35] Update run_ci.sh quotes from bash linter --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 76fd5bc50e..98f57254a0 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -83,7 +83,7 @@ for pr in ${pr_list}; do pslot=$(basename "${pslot_dir}") xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" - ${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py -d "{db}" -w "${xml}" --check_stalled + "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "{db}" -w "${xml}" --check_stalled rc=$? if [[ "${rc}" -ne 0 ]]; then output_ci="${pr_dir}/output_runtime_single.log" From ea4da7d9543476351e2e36f299b29d7d93ddf66a Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Thu, 9 Nov 2023 18:34:25 +0000 Subject: [PATCH 09/35] Update run_ci.sh jobs should not be plurl --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 98f57254a0..0eca8a0895 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -89,7 +89,7 @@ for pr in ${pr_list}; do output_ci="${pr_dir}/output_runtime_single.log" { echo "${pslot} has *** STALLED **** on ${MACHINE_ID^}" - echo "A jobs in expermint ${pslot} in ${pslot_dir}" + echo "A job in expermint ${pslot} in ${pslot_dir}" echo "may have depenencies that are not being met" } >> "${output_ci}" sed -i "1 i\`\`\`" "${output_ci}" From d529dbb72f88e267e2339e56bace9c26985c83d4 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Thu, 9 Nov 2023 14:08:47 -0600 Subject: [PATCH 10/35] bug updates after testing depedancies catch --- ci/scripts/driver.sh | 5 ++--- ci/scripts/run_ci.sh | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/scripts/driver.sh b/ci/scripts/driver.sh index 7988ff17a1..0bb2bbc571 100755 --- a/ci/scripts/driver.sh +++ b/ci/scripts/driver.sh @@ -159,7 +159,7 @@ for pr in ${pr_list}; do set +e export LOGFILE_PATH="${HOMEgfs}/ci/scripts/create_experiment.log" rm -f "${LOGFILE_PATH}" - "${HOMEgfs}/workflow/create_experiment.py" --yaml "${HOMEgfs}/ci/cases/pr/${case}.yaml" 2>&1 "${LOGFILE_PATH}" + "${HOMEgfs}/workflow/create_experiment.py" --yaml "${HOMEgfs}/ci/cases/pr/${case}.yaml" > "${LOGFILE_PATH}" 2>&1 ci_status=$? set -e if [[ ${ci_status} -eq 0 ]]; then @@ -174,8 +174,7 @@ for pr in ${pr_list}; do } >> "${output_ci}" else { - echo "*** Failed *** to create experiment: ${pslot}" - echo "" + echo "*** Failed *** to create experiment: ${pslot} on ${MACHINE_ID^} for PR #${pr}" cat "${LOGFILE_PATH}" } >> "${output_ci}" "${GH}" pr edit "${pr}" --repo "${REPO_URL}" --remove-label "CI-${MACHINE_ID^}-Building" --add-label "CI-${MACHINE_ID^}-Failed" diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 76fd5bc50e..ebcb00b0f7 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -83,7 +83,8 @@ for pr in ${pr_list}; do pslot=$(basename "${pslot_dir}") xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" - ${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py -d "{db}" -w "${xml}" --check_stalled + set +e + ${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py -d "${db}" -w "${xml}" --check_stalled rc=$? if [[ "${rc}" -ne 0 ]]; then output_ci="${pr_dir}/output_runtime_single.log" From 5ae0d72b1a7d77d26841c1409ae250c8ba353cfa Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Thu, 9 Nov 2023 15:26:58 -0500 Subject: [PATCH 11/35] Update rocoto_statcount.py fixed some grammar in description --- ci/scripts/utils/rocoto_statcount.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index e5494da642..42d6f4a48c 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -20,11 +20,11 @@ def input_args(): """ description = """ - Using rocotostat that lists the status of all jobs this scripts + Using rocotostat to get the status of all jobs this scripts determines rocoto_state: if all cycles are done, then rocoto_state is Done. If all cycles are not done, then rocoto_state is Running. - If the check_stalled is used then rocotorun is then issued and - rocotostat is run again and if all jobs do not advanced, then + If the check_stalled is used then rocotorun is issued and + rocotostat is run again and checks if all jobs have not advanced, then rocoto_state is Stalled and the script exits with -1. """ From 5b98e0198f90b05f342a1d091511f2d298512746 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Thu, 9 Nov 2023 15:28:24 -0500 Subject: [PATCH 12/35] Update run_ci.sh fixed double quotes for bash lint --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 3d86d6426e..f2c39dc9e5 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -84,7 +84,7 @@ for pr in ${pr_list}; do xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" set +e - ${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py -d "${db}" -w "${xml}" --check_stalled + "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" --check_stalled rc=$? if [[ "${rc}" -ne 0 ]]; then output_ci="${pr_dir}/output_runtime_single.log" From b48f519b5ce93b820d3eef2e4c5bd21e0f843c9d Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 17:10:42 +0000 Subject: [PATCH 13/35] spelling, typos, and brain fart on {DATE +} --- ci/scripts/check_ci.sh | 6 +++--- ci/scripts/run_ci.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/scripts/check_ci.sh b/ci/scripts/check_ci.sh index 3d0be3a70a..f35779114e 100755 --- a/ci/scripts/check_ci.sh +++ b/ci/scripts/check_ci.sh @@ -131,7 +131,7 @@ for pr in ${pr_list}; do "${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed" error_logs=$("${rocotostat}" -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs "${rocotocheck}" -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true { - echo "Experiment ${pslot} Terminated: *** FAILED *** on ${MACHIND_ID^}" + echo "Experiment ${pslot} Terminated: *** FAILED *** on ${MACHINE_ID^}" echo "Experiment ${pslot} Terminated with ${num_failed} tasks failed at $(date)" || true echo "Error logs:" echo "${error_logs}" @@ -152,8 +152,8 @@ for pr in ${pr_list}; do rm -f "${output_ci_single}" # echo "\`\`\`" > "${output_ci_single}" DATE=$(date) - echo "Experiment ${pslot} **SUCCESS** ${DATE +%Y%m%d} on ${MACHINE_ID^}" >> "${output_ci_single}" - echo "Experiment ${pslot} **SUCCESS** at ${DATE +%Y%m%d} on ${MACHIND_ID^}" >> "${output_ci}" + echo "Experiment ${pslot} **SUCCESS** $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" >> "${output_ci_single}" + echo "Experiment ${pslot} **SUCCESS** at $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" >> "${output_ci}" "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" fi diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index f2c39dc9e5..cfc3fe5afe 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -90,7 +90,7 @@ for pr in ${pr_list}; do output_ci="${pr_dir}/output_runtime_single.log" { echo "${pslot} has *** STALLED **** on ${MACHINE_ID^}" - echo "A job in expermint ${pslot} in ${pslot_dir}" + echo "A job in experiment ${pslot} in ${pslot_dir}" echo "may have depenencies that are not being met" } >> "${output_ci}" sed -i "1 i\`\`\`" "${output_ci}" From 02bd37ed53bd79c36bb3738ba66f69bcdd1733fb Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 12:59:59 -0500 Subject: [PATCH 14/35] Update ci/scripts/utils/rocoto_statcount.py update to more robust logic Co-authored-by: Walter Kolczynski - NOAA --- ci/scripts/utils/rocoto_statcount.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 42d6f4a48c..cc60e00f06 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -85,20 +85,10 @@ def rocoto_statcount(): rocoto_status = rocoto_statcount() for status in rocoto_status: print(f'Number of {status} : {rocoto_status[status]}') - rocoto_state = 'Running' if rocoto_status['Cycles'] == rocoto_status['Done_Cycles']: rocoto_state = 'Done' - - if args.check_stalled: - if rocoto_state != 'Done': - rocoto_run = which("rocotorun") - rocoto_run.add_default_arg(['-w',args.w,'-d',args.d]) - rocoto_run() - rocoto_status2 = rocoto_statcount() - if rocoto_status2 == rocoto_status: - rocoto_state = 'Stalled' - print(f'Rocoto State : {rocoto_state}') - sys.exit(-1) - else: - rocoto_state = 'Running' + elif rocoto_status['RUNNING'] + rocoto_status['SUBMITTING'] + rocoto_status['QUEUED'] == 0: + rocoto_state = 'Stalled' + else: + rocoto_state = 'Running' print(f'Rocoto State : {rocoto_state}') From ba7f6d927b7709aba7388ff6a242405e7f7e9719 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 13:08:16 -0500 Subject: [PATCH 15/35] Update ci/scripts/utils/rocoto_statcount.py added correct exception when command is not found Co-authored-by: Rahul Mahajan --- ci/scripts/utils/rocoto_statcount.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index cc60e00f06..cfb2085d5c 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -50,6 +50,11 @@ def rocoto_statcount(): if not rocotostat: logger.exception("rocotostat not found in PATH") sys.exit(-1) + try: + rocotostat = which("rocotostat") + except CommandNotFoundError: + logger.exception("rocotostat not found in PATH") + raise CommandNotFoundError("rocotostat not found in PATH") xml_file_path = os.path.abspath(args.w) db_file_path = os.path.abspath(args.d) From 729002cfbec688320063275d5f3592f4dd1b6b23 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 13:09:29 -0500 Subject: [PATCH 16/35] Update ci/scripts/utils/rocoto_statcount.py added import for correct exception Co-authored-by: Rahul Mahajan --- ci/scripts/utils/rocoto_statcount.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index cfb2085d5c..86fe9b23c2 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -3,7 +3,7 @@ import sys import os -from wxflow import Executable, which, Logger +from wxflow import Executable, which, Logger, CommandNotFoundError from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter logger = Logger(level=os.environ.get("LOGGING_LEVEL", "DEBUG"), colored_log=False) From 55290005b685f773c0675c3a5e890661aa04afa9 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 18:14:53 +0000 Subject: [PATCH 17/35] updated elseif line and replaced PENDING with SUBMITTING --- ci/scripts/utils/rocoto_statcount.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 86fe9b23c2..5724197da0 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -77,7 +77,7 @@ def rocoto_statcount(): 'Done_Cycles' : sum([ sublist.count('Done') for sublist in rocotostat_output ]) } - status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'PENDING', 'QUEUED'] + status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'SUBMITTING', 'QUEUED'] for case in status_cases: rocoto_status[case] = sum([ sublist.count(case) for sublist in rocotostat_output_all ]) @@ -92,7 +92,7 @@ def rocoto_statcount(): print(f'Number of {status} : {rocoto_status[status]}') if rocoto_status['Cycles'] == rocoto_status['Done_Cycles']: rocoto_state = 'Done' - elif rocoto_status['RUNNING'] + rocoto_status['SUBMITTING'] + rocoto_status['QUEUED'] == 0: + elif any(x in rocoto_status for x in ['RUNNING', 'SUBMITTING', 'QUEUED']): rocoto_state = 'Stalled' else: rocoto_state = 'Running' From 6d8057a6622e75713d38461f3d7057bb636fddee Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 19:35:03 +0000 Subject: [PATCH 18/35] added exit code and removed redundent which --- ci/scripts/run_ci.sh | 2 ++ ci/scripts/utils/rocoto_statcount.py | 8 +++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index cfc3fe5afe..36ecf4860d 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -83,6 +83,8 @@ for pr in ${pr_list}; do pslot=$(basename "${pslot_dir}") xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" + echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" + "${rocotorun}" -v 10 -w "${xml}" -d "${db} set +e "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" --check_stalled rc=$? diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 5724197da0..117307f2b1 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -46,10 +46,6 @@ def rocoto_statcount(): args = input_args() - rocotostat = which("rocotostat") - if not rocotostat: - logger.exception("rocotostat not found in PATH") - sys.exit(-1) try: rocotostat = which("rocotostat") except CommandNotFoundError: @@ -59,7 +55,7 @@ def rocoto_statcount(): xml_file_path = os.path.abspath(args.w) db_file_path = os.path.abspath(args.d) - rocotostat_all = which("rocotostat") + rocotostat_all = rocotostat rocotostat.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-s']) rocotostat_all.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-a']) @@ -94,6 +90,8 @@ def rocoto_statcount(): rocoto_state = 'Done' elif any(x in rocoto_status for x in ['RUNNING', 'SUBMITTING', 'QUEUED']): rocoto_state = 'Stalled' + print(f'Rocoto State : {rocoto_state}') + sys.exit(-1) else: rocoto_state = 'Running' print(f'Rocoto State : {rocoto_state}') From b9a2d08442d3ba9a26bd85358e688cf2a71453a9 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 19:58:14 +0000 Subject: [PATCH 19/35] put Walters test back for no RUNNING, SUBMITTING, or QUEUED and added UNKNOWN as a final state --- ci/scripts/utils/rocoto_statcount.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 117307f2b1..d8ef51b24e 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -33,7 +33,6 @@ def input_args(): parser.add_argument('-w', help='workflow_document', type=str) parser.add_argument('-d', help='database_file', type=str) - parser.add_argument('--check_stalled', help='check if any jobs do not advance (stalled)', action='store_true', required=False) args = parser.parse_args() @@ -88,7 +87,10 @@ def rocoto_statcount(): print(f'Number of {status} : {rocoto_status[status]}') if rocoto_status['Cycles'] == rocoto_status['Done_Cycles']: rocoto_state = 'Done' - elif any(x in rocoto_status for x in ['RUNNING', 'SUBMITTING', 'QUEUED']): + elif 'UNKNOWN' in rocoto_status: + rocoto_state = 'Unknown' + print(f'Rocoto State : {rocoto_state}') + elif rocoto_status['RUNNING'] + rocoto_status['SUBMITTING'] + rocoto_status['QUEUED'] == 0: rocoto_state = 'Stalled' print(f'Rocoto State : {rocoto_state}') sys.exit(-1) From 1ca109768a74fafe5d132112f7a30e125cdab041 Mon Sep 17 00:00:00 2001 From: "Terry.McGuinness" Date: Mon, 13 Nov 2023 20:09:04 +0000 Subject: [PATCH 20/35] added back which method to get new and sperate Execute Object to have alternative argument list to rocotostat --- ci/scripts/utils/rocoto_statcount.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index d8ef51b24e..0937cde034 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -54,7 +54,7 @@ def rocoto_statcount(): xml_file_path = os.path.abspath(args.w) db_file_path = os.path.abspath(args.d) - rocotostat_all = rocotostat + rocotostat_all = which("rocotostat") rocotostat.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-s']) rocotostat_all.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-a']) From ae0ca412399e90e9a7fbd12af84215a3bf2e51c5 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 15:22:47 -0500 Subject: [PATCH 21/35] Update check_ci.sh or true for echo date commands --- ci/scripts/check_ci.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/check_ci.sh b/ci/scripts/check_ci.sh index f35779114e..a35eacd4d1 100755 --- a/ci/scripts/check_ci.sh +++ b/ci/scripts/check_ci.sh @@ -152,8 +152,8 @@ for pr in ${pr_list}; do rm -f "${output_ci_single}" # echo "\`\`\`" > "${output_ci_single}" DATE=$(date) - echo "Experiment ${pslot} **SUCCESS** $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" >> "${output_ci_single}" - echo "Experiment ${pslot} **SUCCESS** at $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" >> "${output_ci}" + echo "Experiment ${pslot} **SUCCESS** $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" || true >> "${output_ci_single}" + echo "Experiment ${pslot} **SUCCESS** at $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" || true >> "${output_ci}" "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" fi From fd8c33b49769acb01298a20020e3f5dec2e4c10e Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 15:23:44 -0500 Subject: [PATCH 22/35] Update run_ci.sh syntax fix on quote in rocotorun command line --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 36ecf4860d..f17815230d 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -84,7 +84,7 @@ for pr in ${pr_list}; do xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" - "${rocotorun}" -v 10 -w "${xml}" -d "${db} + "${rocotorun}" -v 10 -w "${xml}" -d "${db}" set +e "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" --check_stalled rc=$? From 8146364cef746d0445e7e1efee9e880cb5bbe8c3 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 15:25:36 -0500 Subject: [PATCH 23/35] Update run_ci.sh removed old switch for check stalled that has been depricated --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index f17815230d..9458bf2609 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -86,7 +86,7 @@ for pr in ${pr_list}; do echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" "${rocotorun}" -v 10 -w "${xml}" -d "${db}" set +e - "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" --check_stalled + "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" rc=$? if [[ "${rc}" -ne 0 ]]; then output_ci="${pr_dir}/output_runtime_single.log" From 688613f93960c420a4383e2b86c21b5d3a16cebb Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 20:31:10 +0000 Subject: [PATCH 24/35] removed DATE as date command --- ci/scripts/check_ci.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/scripts/check_ci.sh b/ci/scripts/check_ci.sh index a35eacd4d1..0c8251bd39 100755 --- a/ci/scripts/check_ci.sh +++ b/ci/scripts/check_ci.sh @@ -151,7 +151,6 @@ for pr in ${pr_list}; do rm -Rf "${pr_dir}/RUNTESTS/COMROT/${pslot}" rm -f "${output_ci_single}" # echo "\`\`\`" > "${output_ci_single}" - DATE=$(date) echo "Experiment ${pslot} **SUCCESS** $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" || true >> "${output_ci_single}" echo "Experiment ${pslot} **SUCCESS** at $(date +'%A %b %d, %Y') on ${MACHINE_ID^}" || true >> "${output_ci}" "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}" From e74567f41f17c2d6693cf6fb3864b7a1b3e62a94 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 20:45:03 +0000 Subject: [PATCH 25/35] white spaces from lint --- ci/scripts/utils/rocoto_statcount.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 0937cde034..685a9a73e4 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -38,6 +38,7 @@ def input_args(): return args + def rocoto_statcount(): """ Run rocotostat and process its output. @@ -55,8 +56,8 @@ def rocoto_statcount(): db_file_path = os.path.abspath(args.d) rocotostat_all = which("rocotostat") - rocotostat.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-s']) - rocotostat_all.add_default_arg(['-w',xml_file_path,'-d',db_file_path,'-a']) + rocotostat.add_default_arg(['-w', xml_file_path, '-d', db_file_path, '-s']) + rocotostat_all.add_default_arg(['-w', xml_file_path, '-d', db_file_path, '-a']) rocotostat_output = rocotostat(output=str) rocotostat_output = rocotostat_output.splitlines()[1:] @@ -71,7 +72,7 @@ def rocoto_statcount(): 'Cycles' : len(rocotostat_output), 'Done_Cycles' : sum([ sublist.count('Done') for sublist in rocotostat_output ]) } - + status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'SUBMITTING', 'QUEUED'] for case in status_cases: rocoto_status[case] = sum([ sublist.count(case) for sublist in rocotostat_output_all ]) @@ -96,4 +97,4 @@ def rocoto_statcount(): sys.exit(-1) else: rocoto_state = 'Running' - print(f'Rocoto State : {rocoto_state}') + print(f'Rocoto State : {rocoto_state}') From 1654e52464372328c8e4e1b37b3346d317efd71e Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 20:52:15 +0000 Subject: [PATCH 26/35] pynorm indent on dict --- ci/scripts/utils/rocoto_statcount.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 685a9a73e4..9876c0ae71 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -69,8 +69,8 @@ def rocoto_statcount(): rocotostat_output_all = [line for line in rocotostat_output_all if len(line) != 1] rocoto_status = { - 'Cycles' : len(rocotostat_output), - 'Done_Cycles' : sum([ sublist.count('Done') for sublist in rocotostat_output ]) + 'Cycles': len(rocotostat_output), + 'Done_Cycles': sum([ sublist.count('Done') for sublist in rocotostat_output ]) } status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'SUBMITTING', 'QUEUED'] From 919b0c2dda75a497a0642764f6b1f3852aa8281a Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 20:58:13 +0000 Subject: [PATCH 27/35] more white space related pynorm stuff --- ci/scripts/utils/rocoto_statcount.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index 9876c0ae71..dacd03893c 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -70,15 +70,16 @@ def rocoto_statcount(): rocoto_status = { 'Cycles': len(rocotostat_output), - 'Done_Cycles': sum([ sublist.count('Done') for sublist in rocotostat_output ]) + 'Done_Cycles': sum([sublist.count('Done') for sublist in rocotostat_output]) } status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'SUBMITTING', 'QUEUED'] for case in status_cases: - rocoto_status[case] = sum([ sublist.count(case) for sublist in rocotostat_output_all ]) + rocoto_status[case] = sum([sublist.count(case) for sublist in rocotostat_output_all]) return rocoto_status + if __name__ == '__main__': args = input_args() From 1bfe74a2d65234cbdf42a8cec07ab4df95a39644 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 21:00:11 +0000 Subject: [PATCH 28/35] hopefully last white space related pynorm stuff --- ci/scripts/utils/rocoto_statcount.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index dacd03893c..d7d3ceae8e 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -73,7 +73,7 @@ def rocoto_statcount(): 'Done_Cycles': sum([sublist.count('Done') for sublist in rocotostat_output]) } - status_cases = [ 'SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'SUBMITTING', 'QUEUED'] + status_cases = ['SUCCEEDED', 'FAIL', 'DEAD', 'RUNNING', 'SUBMITTING', 'QUEUED'] for case in status_cases: rocoto_status[case] = sum([sublist.count(case) for sublist in rocotostat_output_all]) From ffd92cf833f9c37d78765b0625d588484525217f Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 16:11:39 -0500 Subject: [PATCH 29/35] Update run_ci.sh trying to force refresh --- ci/scripts/run_ci.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 9458bf2609..f056ec9b2f 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -88,6 +88,7 @@ for pr in ${pr_list}; do set +e "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" rc=$? + if [[ "${rc}" -ne 0 ]]; then output_ci="${pr_dir}/output_runtime_single.log" { From 084719b285f39488b58d2a3cbbf284a0cffcb47f Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 16:17:52 -0500 Subject: [PATCH 30/35] Update run_ci.sh stabs at solving double quote requirements for shellnoms --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index f056ec9b2f..07fdcd97f9 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -86,7 +86,7 @@ for pr in ${pr_list}; do echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" "${rocotorun}" -v 10 -w "${xml}" -d "${db}" set +e - "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" + "${ROOT_DIR}"/ci/scripts/utils/rocoto_statcount.py -d "${db}" -w "${xml}" rc=$? if [[ "${rc}" -ne 0 ]]; then From 14e0df1b4e397b29005953dd59e67f83e54ed6c7 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 16:20:08 -0500 Subject: [PATCH 31/35] Update run_ci.sh double quoting flags wont stop even though the are correct --- ci/scripts/run_ci.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 07fdcd97f9..bfaddf54ee 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -83,7 +83,6 @@ for pr in ${pr_list}; do pslot=$(basename "${pslot_dir}") xml="${pslot_dir}/${pslot}.xml" db="${pslot_dir}/${pslot}.db" - echo "Running: ${rocotorun} -v 10 -w ${xml} -d ${db}" "${rocotorun}" -v 10 -w "${xml}" -d "${db}" set +e "${ROOT_DIR}"/ci/scripts/utils/rocoto_statcount.py -d "${db}" -w "${xml}" From 18557c038d2f0015843293510ef610da8ba4e79f Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Mon, 13 Nov 2023 16:24:31 -0500 Subject: [PATCH 32/35] Update run_ci.sh still wrestling with double quote complaints from shellnormns even thought they are correct in the script and trying that ever the F&*@ --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index bfaddf54ee..c1802fca03 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -89,7 +89,7 @@ for pr in ${pr_list}; do rc=$? if [[ "${rc}" -ne 0 ]]; then - output_ci="${pr_dir}/output_runtime_single.log" + output_ci="${pr_dir}"/output_runtime_single.log { echo "${pslot} has *** STALLED **** on ${MACHINE_ID^}" echo "A job in experiment ${pslot} in ${pslot_dir}" From 451b55ce4ebee106dffb2fc52e2ffff034cbdd10 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Tue, 14 Nov 2023 20:09:40 +0000 Subject: [PATCH 33/35] updated discription for rocoto_statcount --- ci/scripts/utils/rocoto_statcount.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/scripts/utils/rocoto_statcount.py b/ci/scripts/utils/rocoto_statcount.py index d7d3ceae8e..98953d559c 100755 --- a/ci/scripts/utils/rocoto_statcount.py +++ b/ci/scripts/utils/rocoto_statcount.py @@ -22,10 +22,8 @@ def input_args(): description = """ Using rocotostat to get the status of all jobs this scripts determines rocoto_state: if all cycles are done, then rocoto_state is Done. - If all cycles are not done, then rocoto_state is Running. - If the check_stalled is used then rocotorun is issued and - rocotostat is run again and checks if all jobs have not advanced, then - rocoto_state is Stalled and the script exits with -1. + Assuming rocotorun had just been run, and the rocoto_state is not Done, then + rocoto_state is Stalled if there are no jobs that are RUNNING, SUBMITTING, or QUEUED. """ parser = ArgumentParser(description=description, From 338831c039c0d977cd63ea249c494ec8d29a2f16 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Tue, 28 Nov 2023 18:06:33 -0500 Subject: [PATCH 34/35] Update ci/scripts/run_ci.sh Co-authored-by: Walter Kolczynski - NOAA --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index c1802fca03..0982b1f8dd 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -85,7 +85,7 @@ for pr in ${pr_list}; do db="${pslot_dir}/${pslot}.db" "${rocotorun}" -v 10 -w "${xml}" -d "${db}" set +e - "${ROOT_DIR}"/ci/scripts/utils/rocoto_statcount.py -d "${db}" -w "${xml}" + "${ROOT_DIR}/ci/scripts/utils/rocoto_statcount.py" -d "${db}" -w "${xml}" rc=$? if [[ "${rc}" -ne 0 ]]; then From a839b4b264332d185c0f0684648da43f87dae351 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Tue, 28 Nov 2023 18:06:45 -0500 Subject: [PATCH 35/35] Update ci/scripts/run_ci.sh Co-authored-by: Walter Kolczynski - NOAA --- ci/scripts/run_ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/run_ci.sh b/ci/scripts/run_ci.sh index 0982b1f8dd..03389f8198 100755 --- a/ci/scripts/run_ci.sh +++ b/ci/scripts/run_ci.sh @@ -89,7 +89,7 @@ for pr in ${pr_list}; do rc=$? if [[ "${rc}" -ne 0 ]]; then - output_ci="${pr_dir}"/output_runtime_single.log + output_ci="${pr_dir}/output_runtime_single.log" { echo "${pslot} has *** STALLED **** on ${MACHINE_ID^}" echo "A job in experiment ${pslot} in ${pslot_dir}"