From 499401d586a157cc2de7f6cf4650422563a6bc4e Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 12:45:10 -0400 Subject: [PATCH 01/32] add global default env variable for gh the GitHub CLI executable in account running agents for EMC global-workflows --- ci/Jenkinsfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index ae86e33c66..eaee2c7671 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -17,6 +17,10 @@ pipeline { parallelsAlwaysFailFast() } + environment { + GH = '~/bin/gh' + } + stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR // which is used to designate the Nodes in the Jenkins Controler by the agent label // Each Jenknis Node is connected to said machine via an JAVA agent via an ssh tunnel From 0ee59ea82716fb98569e17250b95971f3f72398c Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 12:46:42 -0400 Subject: [PATCH 02/32] add global default env variable for gh the GitHub CLI executable in account running agents for EMC global-workflows --- ci/Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index eaee2c7671..a23a553465 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -5,7 +5,8 @@ def caseList = '' // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] -def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' +// def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' +def repo_url = 'git@github.com:TerrenceMcGuinness-NOAA/global-workflow.git' def STATUS = 'Passed' pipeline { From 2f49a04b3c1e5a654f00df13dfa5e1769ea7e51b Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 12:59:03 -0400 Subject: [PATCH 03/32] reverted to authrative URL after testing --- ci/Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index a23a553465..eaee2c7671 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -5,8 +5,7 @@ def caseList = '' // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] -// def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' -def repo_url = 'git@github.com:TerrenceMcGuinness-NOAA/global-workflow.git' +def repo_url = 'git@github.com:NOAA-EMC/global-workflow.git' def STATUS = 'Passed' pipeline { From 291d1de1603f2f1d7df53e672f39f9caa503c191 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 14:14:56 -0400 Subject: [PATCH 04/32] added a catcherror block around running experments to capture fails on the Jenkins Controller --- ci/Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index eaee2c7671..b8499c7e9a 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -227,6 +227,7 @@ pipeline { expression { return caseList.contains(Case) } } steps { + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() @@ -275,6 +276,7 @@ pipeline { } } } + } } } } From a12d61130a4f8027b0e227aed99e92a4231bd697 Mon Sep 17 00:00:00 2001 From: TerrenceMcGuinness-NOAA Date: Wed, 7 Aug 2024 14:31:52 -0400 Subject: [PATCH 05/32] Update ci/Jenkinsfile Co-authored-by: David Huber <69919478+DavidHuber-NOAA@users.noreply.github.com> --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index b8499c7e9a..d5ba86e2dd 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { } environment { - GH = '~/bin/gh' + GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() } stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR From a1011114be0bff628f46d7b4fae0dff88279e03e Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 14:39:01 -0400 Subject: [PATCH 06/32] reverted suggestion and added indent --- ci/Jenkinsfile | 82 +++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index d5ba86e2dd..2bb47b63d3 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { } environment { - GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() + GH = '~/bin/gh' } stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR @@ -228,55 +228,55 @@ pipeline { } steps { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { - script { - HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments - def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() - def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" - sh(script: " rm -f ${error_file}") - try { - sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") - } catch (Exception error_experment) { - sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") - ws(CUSTOM_WORKSPACE) { - def error_logs = "" - def error_logs_message = "" - if (fileExists(error_file)) { - def fileContent = readFile error_file - def lines = fileContent.readLines() - for (line in lines) { - echo "archiving: ${line}" - if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { - try { - archiveArtifacts artifacts: "${line}", fingerprint: true - error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " - error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" - } catch (Exception error_arch) { - echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + script { + HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments + def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() + def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" + sh(script: " rm -f ${error_file}") + try { + sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") + } catch (Exception error_experment) { + sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") + ws(CUSTOM_WORKSPACE) { + def error_logs = "" + def error_logs_message = "" + if (fileExists(error_file)) { + def fileContent = readFile error_file + def lines = fileContent.readLines() + for (line in lines) { + echo "archiving: ${line}" + if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { + try { + archiveArtifacts artifacts: "${line}", fingerprint: true + error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " + error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" + } catch (Exception error_arch) { + echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + } } } + try { + gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") + } catch (Exception error_comment) { + echo "Failed to comment on PR: ${error_comment.getMessage()}" + } + } else { + echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } + STATUS = 'Failed' try { - gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) - sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") - } catch (Exception error_comment) { - echo "Failed to comment on PR: ${error_comment.getMessage()}" + sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) + } catch (Exception e) { + echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } - } else { - echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" - } - STATUS = 'Failed' - try { - sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) - } catch (Exception e) { - echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" + echo "Failed to run experiments ${Case} on ${Machine}" } - echo "Failed to run experiments ${Case} on ${Machine}" } } } - } } } } From e7b38e22c16eba49d1a4f2e08bf400eb5d54616f Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 15:02:10 -0400 Subject: [PATCH 07/32] added build number to final message --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 2bb47b63d3..d629cbdbba 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -297,7 +297,7 @@ pipeline { """, returnStatus: true) sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) if (fileExists("${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log")) { - sh(script: """echo "**CI ${STATUS}** ${Machine} at
Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log""", returnStatus: true) + sh(script: """echo "**CI ${STATUS}** on ${Machine} in Build# ${env.BUILD_NUMBER}
Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log""", returnStatus: true) sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body-file ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log """, returnStatus: true) } if (STATUS == 'Passed') { From ffbd2beb64789d4ecd66c36fe04067dfcbfeb37b Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 15:34:13 -0400 Subject: [PATCH 08/32] make GH a global variable and assign it once the Node conncetion is established --- ci/Jenkinsfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index d629cbdbba..557369db92 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -2,6 +2,7 @@ def Machine = 'none' def machine = 'none' def CUSTOM_WORKSPACE = 'none' def caseList = '' +def GH = 'none' // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] @@ -17,10 +18,6 @@ pipeline { parallelsAlwaysFailFast() } - environment { - GH = '~/bin/gh' - } - stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR // which is used to designate the Nodes in the Jenkins Controler by the agent label // Each Jenknis Node is connected to said machine via an JAVA agent via an ssh tunnel @@ -82,6 +79,7 @@ pipeline { echo "Getting Common Workspace for ${Machine}" ws("${custom_workspace[machine]}/${env.CHANGE_ID}") { properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) + GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim() CUSTOM_WORKSPACE = "${WORKSPACE}" sh(script: "mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS;rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS/*") sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """) From b7a1ee8c2a6a3fda3681008567dfd4d6e39fb423 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 17:14:08 -0400 Subject: [PATCH 09/32] updated buildResult on stage catError mode and forced an error to get fail reflected on controler --- ci/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 557369db92..fcc9037278 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -225,7 +225,7 @@ pipeline { expression { return caseList.contains(Case) } } steps { - catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() @@ -270,7 +270,7 @@ pipeline { } catch (Exception e) { echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } - echo "Failed to run experiments ${Case} on ${Machine}" + error("Failed to run experiments ${Case} on ${Machine}") } } } From de53854b3a3e8ae700f94cc4eeebe509369563d5 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Wed, 7 Aug 2024 18:03:55 -0400 Subject: [PATCH 10/32] added build number to all messages and trying FAIL attribute in buildResult for stage level catchError --- ci/Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index fcc9037278..7d28ce9ba1 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -118,7 +118,7 @@ pipeline { checkout scm } catch (Exception e) { if (env.CHANGE_ID) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Checkout **Failed** on ${Machine}: ${e.getMessage()}" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Checkout **Failed** on ${Machine} in Build# ${env.BUILD_NUMBER}: ${e.getMessage()}" """) } STATUS = 'Failed' error("Failed to checkout: ${e.getMessage()}") @@ -151,7 +151,7 @@ pipeline { try { sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_BUILD_${env.CHANGE_ID}") gist_url=sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_BUILD_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Build **FAILED** on **${Machine}** with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Build **FAILED** on **${Machine}** in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) } catch (Exception error_comment) { echo "Failed to comment on PR: ${error_comment.getMessage()}" } @@ -212,7 +212,7 @@ pipeline { try { error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStdout: true).trim() } catch (Exception error_create) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) error("Case ${Case} failed to create experment directory") } } @@ -225,7 +225,7 @@ pipeline { expression { return caseList.contains(Case) } } steps { - catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() @@ -255,7 +255,7 @@ pipeline { } try { gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") } catch (Exception error_comment) { echo "Failed to comment on PR: ${error_comment.getMessage()}" @@ -266,7 +266,7 @@ pipeline { STATUS = 'Failed' try { sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) } catch (Exception e) { echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } From a2daf593543c39b90e3489bea02842964c669c53 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 14:12:39 +0000 Subject: [PATCH 11/32] added failFast to case matrix --- ci/Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 7d28ce9ba1..049e6b36fb 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -224,6 +224,7 @@ pipeline { when { expression { return caseList.contains(Case) } } + failFast true steps { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { From d2ec0e067c5495b85c4573534fb6570142e9eedc Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 14:54:21 +0000 Subject: [PATCH 12/32] hard coded failFast feature into running cases matrix --- ci/Jenkinsfile | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 049e6b36fb..be8c92a568 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -17,6 +17,9 @@ pipeline { skipDefaultCheckout() parallelsAlwaysFailFast() } + environment { + FAILURE_FLAG = 'false' + } stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR // which is used to designate the Nodes in the Jenkins Controler by the agent label @@ -233,7 +236,18 @@ pipeline { def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { - sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") + // Run the run-check script in the background + def process = sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system} & echo \$!", returnStdout: true).trim() + def pid = process.tokenize().last() + // Periodically check for the failure flag + while (true) { + if (env.FAILURE_FLAG == 'true') { + sh(script: "kill ${pid}") + error("Aborting due to previous failure") + } + sleep(time: 10, unit: 'SECONDS') + } + } catch (Exception error_experment) { sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") ws(CUSTOM_WORKSPACE) { @@ -264,14 +278,15 @@ pipeline { } else { echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } - STATUS = 'Failed' try { sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) } catch (Exception e) { echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } - error("Failed to run experiments ${Case} on ${Machine}") + STATUS = 'Failed' + env.FAILURE_FLAG = 'true' + error("Failed to run experiments ${Case} on ${Machine} in Build# ${env.BUILD_NUMBER}") } } } From b1bf521f49eb373086a5c3ba425925032ba1650f Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 15:23:40 +0000 Subject: [PATCH 13/32] trying out a more complex solution using threads to check background process for run-check --- ci/Jenkinsfile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index be8c92a568..454b8cf3c5 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -236,18 +236,23 @@ pipeline { def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { - // Run the run-check script in the background def process = sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system} & echo \$!", returnStdout: true).trim() def pid = process.tokenize().last() - // Periodically check for the failure flag + def checkFailureFlag = { while (true) { if (env.FAILURE_FLAG == 'true') { sh(script: "kill ${pid}") error("Aborting due to previous failure") } - sleep(time: 10, unit: 'SECONDS') + sleep(time: 60, unit: 'SECONDS') } - + } + def checkThread = Thread.start(checkFailureFlag) + def exitCode = sh(script: "wait ${pid}", returnStatus: true) + checkThread.interrupt() + if (exitCode != 0) { + error("Experiment ${Case} failed with exit code ${exitCode}") + } } catch (Exception error_experment) { sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") ws(CUSTOM_WORKSPACE) { From 99a9a9d1d40d491558ea9bd032fa1e37299b2a58 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 16:58:11 +0000 Subject: [PATCH 14/32] removed attempt to use threads for hand coded fastFail for cases --- ci/Jenkinsfile | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 454b8cf3c5..049e6b36fb 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -17,9 +17,6 @@ pipeline { skipDefaultCheckout() parallelsAlwaysFailFast() } - environment { - FAILURE_FLAG = 'false' - } stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR // which is used to designate the Nodes in the Jenkins Controler by the agent label @@ -236,23 +233,7 @@ pipeline { def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { - def process = sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system} & echo \$!", returnStdout: true).trim() - def pid = process.tokenize().last() - def checkFailureFlag = { - while (true) { - if (env.FAILURE_FLAG == 'true') { - sh(script: "kill ${pid}") - error("Aborting due to previous failure") - } - sleep(time: 60, unit: 'SECONDS') - } - } - def checkThread = Thread.start(checkFailureFlag) - def exitCode = sh(script: "wait ${pid}", returnStatus: true) - checkThread.interrupt() - if (exitCode != 0) { - error("Experiment ${Case} failed with exit code ${exitCode}") - } + sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") } catch (Exception error_experment) { sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") ws(CUSTOM_WORKSPACE) { @@ -283,15 +264,14 @@ pipeline { } else { echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } + STATUS = 'Failed' try { sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) } catch (Exception e) { echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } - STATUS = 'Failed' - env.FAILURE_FLAG = 'true' - error("Failed to run experiments ${Case} on ${Machine} in Build# ${env.BUILD_NUMBER}") + error("Failed to run experiments ${Case} on ${Machine}") } } } From 986186f6fc824ff2ce9cb754c7219c411815365e Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:05:34 +0000 Subject: [PATCH 15/32] moved the failFast attribute at the top of the matrix block --- ci/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 049e6b36fb..a3cee478bb 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -187,6 +187,7 @@ pipeline { expression { STATUS != 'Failed' } } matrix { + failFast true agent { label NodeName[machine].toLowerCase() } axes { axis { @@ -224,9 +225,8 @@ pipeline { when { expression { return caseList.contains(Case) } } - failFast true steps { - catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() From 002f9fd2f9fd607b0fb3846875d3209ade1776f9 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:23:56 +0000 Subject: [PATCH 16/32] taking a stab at using a parrallel construct in place of matrix --- ci/Jenkinsfile | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index a3cee478bb..3d674a35dc 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -186,16 +186,10 @@ pipeline { when { expression { STATUS != 'Failed' } } - matrix { - failFast true - agent { label NodeName[machine].toLowerCase() } - axes { - axis { - name 'Case' - // TODO add dynamic list of cases from env vars (needs addtional plugins) - values 'C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA' - } - } + agent { label NodeName[machine].toLowerCase() } + def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] + def parallelStages = cases.collectEntries { caseName -> + ["${caseName}": { stages { stage('Create Experiments') { @@ -279,7 +273,9 @@ pipeline { } } } + }] } + parallel parallelStages + [failFast: true] } stage( '5. FINALIZE' ) { From 20f3528b812131219f200e54c9548c505ea693cf Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:29:18 +0000 Subject: [PATCH 17/32] parallel section needs to be in a script under steps --- ci/Jenkinsfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 3d674a35dc..23ab9adb16 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -187,6 +187,8 @@ pipeline { expression { STATUS != 'Failed' } } agent { label NodeName[machine].toLowerCase() } + steps { + script { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { @@ -277,6 +279,9 @@ pipeline { } parallel parallelStages + [failFast: true] } + } + } + stage( '5. FINALIZE' ) { agent { label NodeName[machine].toLowerCase() } From 530bd19fd76f1fa80d927c64b6060f9691bb6966 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:35:45 +0000 Subject: [PATCH 18/32] still trying to get stag steps hiarchy in parallel block --- ci/Jenkinsfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 23ab9adb16..40471c6f53 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -187,7 +187,6 @@ pipeline { expression { STATUS != 'Failed' } } agent { label NodeName[machine].toLowerCase() } - steps { script { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> @@ -279,7 +278,6 @@ pipeline { } parallel parallelStages + [failFast: true] } - } } From 246bac45f26e5a2acf3ca306f05300c3ef07063e Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:40:41 +0000 Subject: [PATCH 19/32] third attept at getting stages in the right place --- ci/Jenkinsfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 40471c6f53..1d2b101640 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -187,12 +187,12 @@ pipeline { expression { STATUS != 'Failed' } } agent { label NodeName[machine].toLowerCase() } + steps { script { + stages { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { - stages { - stage('Create Experiments') { when { expression { return caseList.contains(Case) } @@ -278,6 +278,7 @@ pipeline { } parallel parallelStages + [failFast: true] } + } } From 7659b37361aa601b82af3783f28543e73d999b1f Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:45:00 +0000 Subject: [PATCH 20/32] fourth with aliginging brackets --- ci/Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 1d2b101640..2ead4e1aad 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -273,12 +273,12 @@ pipeline { } } } - } - }] + }] } parallel parallelStages + [failFast: true] - } } + } + } } From 0bfe315f0d386989c6ce47bf62995be95fe807f6 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:50:02 +0000 Subject: [PATCH 21/32] moved script inside of stages --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 2ead4e1aad..f708dbb201 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -188,8 +188,8 @@ pipeline { } agent { label NodeName[machine].toLowerCase() } steps { - script { stages { + script { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { From b7e48be2a71661145f34b9fd28ff86447b80dec8 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 17:59:02 +0000 Subject: [PATCH 22/32] removed extra stage block --- ci/Jenkinsfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index f708dbb201..47de209472 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -188,7 +188,6 @@ pipeline { } agent { label NodeName[machine].toLowerCase() } steps { - stages { script { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> @@ -279,7 +278,6 @@ pipeline { } } } - } stage( '5. FINALIZE' ) { From f154fd6878e2eef2c0dd58025382a53f882bdf96 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 18:20:23 +0000 Subject: [PATCH 23/32] removed extra steps block too --- ci/Jenkinsfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 47de209472..6fad5c4603 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -187,7 +187,6 @@ pipeline { expression { STATUS != 'Failed' } } agent { label NodeName[machine].toLowerCase() } - steps { script { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> @@ -276,7 +275,6 @@ pipeline { } parallel parallelStages + [failFast: true] } - } } From 7e40a907e5f98e76ff0469a75d7fc22b408b608a Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 18:30:42 +0000 Subject: [PATCH 24/32] removed steps back and try to solve when again --- ci/Jenkinsfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 6fad5c4603..ec92cffe46 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -187,11 +187,12 @@ pipeline { expression { STATUS != 'Failed' } } agent { label NodeName[machine].toLowerCase() } + steps { script { def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { - stage('Create Experiments') { + stage("Create Experiments: ${caseName}") { when { expression { return caseList.contains(Case) } } @@ -275,6 +276,7 @@ pipeline { } parallel parallelStages + [failFast: true] } + } } From a7ab366a87117590540e30a7244ef39b36a24123 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 18:46:29 +0000 Subject: [PATCH 25/32] removed when and used if instead sence they can not be used inside steps --- ci/Jenkinsfile | 106 ++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index ec92cffe46..536fb8d447 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -193,22 +193,21 @@ pipeline { def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { stage("Create Experiments: ${caseName}") { - when { - expression { return caseList.contains(Case) } - } steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { - sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp") - def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp" - system = yaml_case.experiment.system - def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis - env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" - try { - error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStdout: true).trim() - } catch (Exception error_create) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) - error("Case ${Case} failed to create experment directory") + if (caseList.contains(caseName)) { + sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp") + def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp" + system = yaml_case.experiment.system + def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis + env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" + try { + error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStdout: true).trim() + } catch (Exception error_create) { + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) + error("Case ${Case} failed to create experment directory") + } } } } @@ -216,56 +215,55 @@ pipeline { } stage('Run Experiments') { - when { - expression { return caseList.contains(Case) } - } steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { - HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments - def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() - def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" - sh(script: " rm -f ${error_file}") - try { - sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") - } catch (Exception error_experment) { - sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") - ws(CUSTOM_WORKSPACE) { - def error_logs = "" - def error_logs_message = "" - if (fileExists(error_file)) { - def fileContent = readFile error_file - def lines = fileContent.readLines() - for (line in lines) { - echo "archiving: ${line}" - if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { - try { - archiveArtifacts artifacts: "${line}", fingerprint: true - error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " - error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" - } catch (Exception error_arch) { - echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + if (caseList.contains(caseName)) { + HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments + def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() + def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" + sh(script: " rm -f ${error_file}") + try { + sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") + } catch (Exception error_experment) { + sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") + ws(CUSTOM_WORKSPACE) { + def error_logs = "" + def error_logs_message = "" + if (fileExists(error_file)) { + def fileContent = readFile error_file + def lines = fileContent.readLines() + for (line in lines) { + echo "archiving: ${line}" + if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { + try { + archiveArtifacts artifacts: "${line}", fingerprint: true + error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " + error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" + } catch (Exception error_arch) { + echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + } } } + try { + gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") + } catch (Exception error_comment) { + echo "Failed to comment on PR: ${error_comment.getMessage()}" + } + } else { + echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } + STATUS = 'Failed' try { - gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) - sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") - } catch (Exception error_comment) { - echo "Failed to comment on PR: ${error_comment.getMessage()}" + sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) + } catch (Exception e) { + echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } - } else { - echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" - } - STATUS = 'Failed' - try { - sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) - } catch (Exception e) { - echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" + error("Failed to run experiments ${Case} on ${Machine}") } - error("Failed to run experiments ${Case} on ${Machine}") } } } From 5c1c666ea2bc2614d19dcbbcb6172e4c46773135 Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 18:51:28 +0000 Subject: [PATCH 26/32] no steps in stepsgit add Jenkinsfile --- ci/Jenkinsfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 536fb8d447..6110129260 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -193,7 +193,6 @@ pipeline { def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { stage("Create Experiments: ${caseName}") { - steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { if (caseList.contains(caseName)) { @@ -210,12 +209,10 @@ pipeline { } } } - } } } stage('Run Experiments') { - steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { if (caseList.contains(caseName)) { @@ -268,7 +265,6 @@ pipeline { } } } - } } }] } From cb158a8149d9de42fd0660010f94da8376947d2e Mon Sep 17 00:00:00 2001 From: tmcguinness Date: Thu, 8 Aug 2024 19:17:39 +0000 Subject: [PATCH 27/32] parallelStages is working now, rename loop contol variable for caseName --- ci/Jenkinsfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 6110129260..62b890c76f 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -196,16 +196,16 @@ pipeline { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { if (caseList.contains(caseName)) { - sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp") - def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp" + sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") + def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp" system = yaml_case.experiment.system def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" try { - error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStdout: true).trim() + error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim() } catch (Exception error_create) { sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) - error("Case ${Case} failed to create experment directory") + error("Case ${caseName} failed to create experment directory") } } } @@ -217,7 +217,7 @@ pipeline { script { if (caseList.contains(caseName)) { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments - def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim() + def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" sh(script: " rm -f ${error_file}") try { @@ -244,7 +244,7 @@ pipeline { } try { gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") } catch (Exception error_comment) { echo "Failed to comment on PR: ${error_comment.getMessage()}" @@ -255,11 +255,11 @@ pipeline { STATUS = 'Failed' try { sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) } catch (Exception e) { echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" } - error("Failed to run experiments ${Case} on ${Machine}") + error("Failed to run experiments ${caseName} on ${Machine}") } } } From 5f5df7a4519e22a03eb302d00ffa953aef31ad7a Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Thu, 8 Aug 2024 17:41:49 -0400 Subject: [PATCH 28/32] cleaned up code after getting parrallel working --- ci/Jenkinsfile | 141 +++++++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 70 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 62b890c76f..84e3725fe4 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -3,6 +3,8 @@ def machine = 'none' def CUSTOM_WORKSPACE = 'none' def caseList = '' def GH = 'none' +// TODO get this list from get_host_case_list.py +def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] @@ -99,7 +101,7 @@ pipeline { } } stages { - stage('build system') { + stage("Building ${system}") { steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { @@ -188,88 +190,87 @@ pipeline { } agent { label NodeName[machine].toLowerCase() } steps { - script { - def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] - def parallelStages = cases.collectEntries { caseName -> - ["${caseName}": { - stage("Create Experiments: ${caseName}") { - catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { - script { - if (caseList.contains(caseName)) { - sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") - def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp" - system = yaml_case.experiment.system - def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis - env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" - try { - error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim() - } catch (Exception error_create) { - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) - error("Case ${caseName} failed to create experment directory") + script { + def parallelStages = cases.collectEntries { caseName -> + ["${caseName}": { + stage("Create: ${caseName}") { + catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { + script { + if (caseList.contains(caseName)) { + sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") + def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp" + system = yaml_case.experiment.system + def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis + env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS" + try { + error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim() + } catch (Exception error_create) { + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) + error("Case ${caseName} failed to create experment directory") + } + } } - } } - } - } + } - stage('Run Experiments') { - catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { - script { - if (caseList.contains(caseName)) { - HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments - def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() - def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" - sh(script: " rm -f ${error_file}") - try { - sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") - } catch (Exception error_experment) { - sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") - ws(CUSTOM_WORKSPACE) { - def error_logs = "" - def error_logs_message = "" - if (fileExists(error_file)) { - def fileContent = readFile error_file - def lines = fileContent.readLines() - for (line in lines) { - echo "archiving: ${line}" - if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { + stage("${caseName} Running") { + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + script { + if (caseList.contains(caseName)) { + HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments + def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() + def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" + sh(script: " rm -f ${error_file}") + try { + sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}") + } catch (Exception error_experment) { + sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}") + ws(CUSTOM_WORKSPACE) { + def error_logs = "" + def error_logs_message = "" + if (fileExists(error_file)) { + def fileContent = readFile error_file + def lines = fileContent.readLines() + for (line in lines) { + echo "archiving: ${line}" + if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) { + try { + archiveArtifacts artifacts: "${line}", fingerprint: true + error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " + error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" + } catch (Exception error_arch) { + echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + } + } + } try { - archiveArtifacts artifacts: "${line}", fingerprint: true - error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} " - error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n" - } catch (Exception error_arch) { - echo "Failed to archive error log ${line}: ${error_arch.getMessage()}" + gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) + sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") + } catch (Exception error_comment) { + echo "Failed to comment on PR: ${error_comment.getMessage()}" } + } else { + echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } + STATUS = 'Failed' + try { + sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) + sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) + } catch (Exception e) { + echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" + } + error("Failed to run experiments ${caseName} on ${Machine}") } - try { - gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim() - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """) - sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}") - } catch (Exception error_comment) { - echo "Failed to comment on PR: ${error_comment.getMessage()}" - } - } else { - echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs" } - STATUS = 'Failed' - try { - sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true) - sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """) - } catch (Exception e) { - echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}" - } - error("Failed to run experiments ${caseName} on ${Machine}") } } } - } } + }] } - }] - } - parallel parallelStages + [failFast: true] - } + parallel parallelStages + [failFast: true] + } } } From b4d5ed66ba01c70bf2bac4e96e872fd9c76d4a18 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Thu, 8 Aug 2024 17:47:42 -0400 Subject: [PATCH 29/32] matrix stage labels must be literal --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 84e3725fe4..33d53fc670 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -101,7 +101,7 @@ pipeline { } } stages { - stage("Building ${system}") { + stage('Building') { steps { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { From a670daf26ab39f25bb3347e42d68d3069b8871ad Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Fri, 9 Aug 2024 12:52:54 -0400 Subject: [PATCH 30/32] added dyamic case list --- ci/Jenkinsfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 33d53fc670..6940792c08 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -1,10 +1,10 @@ def Machine = 'none' def machine = 'none' def CUSTOM_WORKSPACE = 'none' -def caseList = '' +def cases = '' def GH = 'none' // TODO get this list from get_host_case_list.py -def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] +// def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] @@ -173,7 +173,7 @@ pipeline { } } if (system == 'gfs') { - caseList = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split() + cases = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split() } } } @@ -196,7 +196,7 @@ pipeline { stage("Create: ${caseName}") { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { - if (caseList.contains(caseName)) { + //if (caseList.contains(caseName)) { sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp" system = yaml_case.experiment.system @@ -208,7 +208,7 @@ pipeline { sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) error("Case ${caseName} failed to create experment directory") } - } + //} } } } @@ -216,7 +216,7 @@ pipeline { stage("${caseName} Running") { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { - if (caseList.contains(caseName)) { + //if (caseList.contains(caseName)) { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" @@ -263,7 +263,7 @@ pipeline { error("Failed to run experiments ${caseName} on ${Machine}") } } - } + //} } } } From b2f6c60a2d138a76fc75793413197948d246017c Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Fri, 9 Aug 2024 13:01:28 -0400 Subject: [PATCH 31/32] removed hard coded list cases --- ci/Jenkinsfile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 6940792c08..8752a361ef 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -3,8 +3,6 @@ def machine = 'none' def CUSTOM_WORKSPACE = 'none' def cases = '' def GH = 'none' -// TODO get this list from get_host_case_list.py -// def cases = ['C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96_atmaerosnowDA'] // Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR. def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea'] def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI'] @@ -196,7 +194,6 @@ pipeline { stage("Create: ${caseName}") { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { - //if (caseList.contains(caseName)) { sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp" system = yaml_case.experiment.system @@ -208,7 +205,6 @@ pipeline { sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """) error("Case ${caseName} failed to create experment directory") } - //} } } } @@ -216,7 +212,6 @@ pipeline { stage("${caseName} Running") { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { - //if (caseList.contains(caseName)) { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim() def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs" @@ -263,7 +258,6 @@ pipeline { error("Failed to run experiments ${caseName} on ${Machine}") } } - //} } } } From 05a83cc8c39dc4dce64046be94f30388e8034ee7 Mon Sep 17 00:00:00 2001 From: Terry McGuinness Date: Fri, 9 Aug 2024 19:54:36 -0400 Subject: [PATCH 32/32] Reverse order in case label to match Create --- ci/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 8752a361ef..8ed4927c6b 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -191,7 +191,7 @@ pipeline { script { def parallelStages = cases.collectEntries { caseName -> ["${caseName}": { - stage("Create: ${caseName}") { + stage("Create ${caseName}") { catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { script { sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp") @@ -209,7 +209,7 @@ pipeline { } } - stage("${caseName} Running") { + stage("Running ${caseName}") { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { script { HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments