Skip to content

Commit

Permalink
Merge branch 'develop' into feature/snow-ensemble
Browse files Browse the repository at this point in the history
  • Loading branch information
CoryMartin-NOAA committed Aug 13, 2024
2 parents 9280086 + d994642 commit 5ed6dec
Showing 1 changed file with 70 additions and 81 deletions.
151 changes: 70 additions & 81 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -186,99 +186,88 @@ pipeline {
when {
expression { STATUS != 'Failed' }
}
matrix {
agent { label NodeName[machine].toLowerCase() }
axes {
axis {
name 'Case'
// TODO add dynamic list of cases from env vars (needs addtional plugins)
values 'C48C48_ufs_hybatmDA', 'C48_ATM', 'C48_S2SW', 'C48_S2SWA_gefs', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atm3DVar', 'C96C48_hybatmaerosnowDA'
}
}
stages {

stage('Create Experiments') {
when {
expression { return caseList.contains(Case) }
}
steps {
catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') {
script {
sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp")
def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp"
system = yaml_case.experiment.system
def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
try {
error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStdout: true).trim()
} catch (Exception error_create) {
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """)
error("Case ${Case} failed to create experment directory")
}
agent { label NodeName[machine].toLowerCase() }
steps {
script {
def parallelStages = cases.collectEntries { caseName ->
["${caseName}": {
stage("Create ${caseName}") {
catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') {
script {
sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp")
def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${caseName}.yaml.tmp"
system = yaml_case.experiment.system
def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
try {
error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim()
} catch (Exception error_create) {
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """)
error("Case ${caseName} failed to create experment directory")
}
}
}
}
}
}

stage('Run Experiments') {
when {
expression { return caseList.contains(Case) }
}
steps {
script {
HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim()
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: " rm -f ${error_file}")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}")
} catch (Exception error_experment) {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}")
ws(CUSTOM_WORKSPACE) {
def error_logs = ""
def error_logs_message = ""
if (fileExists(error_file)) {
def fileContent = readFile error_file
def lines = fileContent.readLines()
for (line in lines) {
echo "archiving: ${line}"
if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) {
try {
archiveArtifacts artifacts: "${line}", fingerprint: true
error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} "
error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n"
} catch (Exception error_arch) {
echo "Failed to archive error log ${line}: ${error_arch.getMessage()}"
stage("Running ${caseName}") {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script {
HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim()
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: " rm -f ${error_file}")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}")
} catch (Exception error_experment) {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}")
ws(CUSTOM_WORKSPACE) {
def error_logs = ""
def error_logs_message = ""
if (fileExists(error_file)) {
def fileContent = readFile error_file
def lines = fileContent.readLines()
for (line in lines) {
echo "archiving: ${line}"
if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) {
try {
archiveArtifacts artifacts: "${line}", fingerprint: true
error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} "
error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n"
} catch (Exception error_arch) {
echo "Failed to archive error log ${line}: ${error_arch.getMessage()}"
}
}
}
try {
gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim()
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """)
sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}")
} catch (Exception error_comment) {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
}
} else {
echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs"
}
STATUS = 'Failed'
try {
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """)
} catch (Exception e) {
echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}"
}
error("Failed to run experiments ${caseName} on ${Machine}")
}
}
}
try {
gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim()
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """)
sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}")
} catch (Exception error_comment) {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
}
} else {
echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs"
}
STATUS = 'Failed'
try {
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """)
} catch (Exception e) {
echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}"
}
echo "Failed to run experiments ${Case} on ${Machine}"
}
}
}
}
}]
}
parallel parallelStages + [failFast: true]
}
}
}


stage( '5. FINALIZE' ) {
agent { label NodeName[machine].toLowerCase() }
Expand Down

0 comments on commit 5ed6dec

Please sign in to comment.