Skip to content

Commit

Permalink
Merge branch 'develop' into feature/gwdev_issue_2451.003
Browse files Browse the repository at this point in the history
  • Loading branch information
HenryRWinterbottom committed Jun 27, 2024
2 parents e819512 + 9476c12 commit d3e4f3c
Show file tree
Hide file tree
Showing 50 changed files with 901 additions and 597 deletions.
95 changes: 58 additions & 37 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
def Machine = 'none'
def machine = 'none'
def HOME = 'none'
def CUSTOM_WORKSPACE = 'none'
def caseList = ''
// Location of the custom workspaces for each machine in the CI system. They are persitent for each iteration of the PR.
def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES']
def NodeName = [hera: 'Hera-EMC', orion: 'Orion-EMC', hercules: 'Hercules-EMC', gaea: 'Gaea']
def custom_workspace = [hera: '/scratch1/NCEPDEV/global/CI', orion: '/work2/noaa/stmp/CI/ORION', hercules: '/work2/noaa/stmp/CI/HERCULES', gaea: '/gpfs/f5/epic/proj-shared/global/CI']
def repo_url = '[email protected]:NOAA-EMC/global-workflow.git'
def STATUS = 'Passed'

Expand Down Expand Up @@ -40,9 +41,9 @@ pipeline {
echo "This is parent job so getting list of nodes matching labels:"
for (label in pullRequest.labels) {
if (label.matches("CI-(.*?)-Ready")) {
def Machine_name = label.split('-')[1].toString()
def machine_name = label.split('-')[1].toString().toLowerCase()
jenkins.model.Jenkins.get().computers.each { c ->
if (c.node.selfLabel.name == "${Machine_name}-EMC") {
if (c.node.selfLabel.name == NodeName[machine_name]) {
run_nodes.add(c.node.selfLabel.name)
}
}
Expand Down Expand Up @@ -70,25 +71,25 @@ pipeline {
}

stage('2. Get Common Workspace') {
agent { label "${machine}-emc" }
agent { label NodeName[machine].toLowerCase() }
steps {
script {
Machine = machine[0].toUpperCase() + machine.substring(1)
echo "Getting Common Workspace for ${Machine}"
ws("${custom_workspace[machine]}/${env.CHANGE_ID}") {
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hercules-EMC', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])])
HOME = "${WORKSPACE}"
sh(script: "mkdir -p ${HOME}/RUNTESTS;rm -Rf ${HOME}/RUNTESTS/*")
CUSTOM_WORKSPACE = "${WORKSPACE}"
sh(script: "mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS;rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS/*")
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """)
}
echo "Building and running on ${Machine} in directory ${HOME}"
echo "Building and running on ${Machine} in directory ${CUSTOM_WORKSPACE}"
}
}
}

stage('3. Build System') {
matrix {
agent { label "${machine}-emc" }
agent { label NodeName[machine].toLowerCase() }
//options {
// throttle(['global_matrix_build'])
//}
Expand All @@ -102,7 +103,7 @@ pipeline {
stage('build system') {
steps {
script {
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to build the system on per system basis under the common workspace HOME
def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to build the system on per system basis under the custome workspace for each buile system
sh(script: "mkdir -p ${HOMEgfs}")
ws(HOMEgfs) {
if (fileExists("${HOMEgfs}/sorc/BUILT_semaphor")) { // if the system is already built, skip the build in the case of re-runs
Expand All @@ -112,7 +113,16 @@ pipeline {
sh(script: './link_workflow.sh')
}
} else {
checkout scm
try {
echo "Checking out the code for ${system} on ${Machine} using scm in ${HOMEgfs}"
checkout scm
} catch (Exception e) {
if (env.CHANGE_ID) {
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Checkout **Failed** on ${Machine}: ${e.getMessage()}" """)
}
echo "Failed to checkout: ${e.getMessage()}"
STATUS = 'Failed'
}
def gist_url = ""
def error_logs = ""
def error_logs_message = ""
Expand Down Expand Up @@ -173,7 +183,7 @@ pipeline {
stage('4. Run Tests') {
failFast false
matrix {
agent { label "${machine}-emc" }
agent { label NodeName[machine].toLowerCase() }
axes {
axis {
name 'Case'
Expand All @@ -189,11 +199,11 @@ pipeline {
}
steps {
script {
sh(script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp")
def yaml_case = readYaml file: "${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp"
sh(script: "sed -n '/{.*}/!p' ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml > ${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp")
def yaml_case = readYaml file: "${CUSTOM_WORKSPACE}/gfs/ci/cases/pr/${Case}.yaml.tmp"
system = yaml_case.experiment.system
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to populate the XML on per system basis
env.RUNTESTS = "${HOME}/RUNTESTS"
def HOMEgfs = "${CUSTOM_WORKSPACE}/${system}" // local HOMEgfs is used to populate the XML on per system basis
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml")
}
}
Expand All @@ -206,27 +216,27 @@ pipeline {
failFast false
steps {
script {
HOMEgfs = "${HOME}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${HOME}/RUNTESTS ${Case}", returnStdout: true).trim()
def error_file = "${HOME}/RUNTESTS/${pslot}_error.logs"
HOMEgfs = "${CUSTOM_WORKSPACE}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${Case}", returnStdout: true).trim()
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: " rm -f ${error_file}")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot} ${system}")
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} ${system}")
} catch (Exception error_experment) {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}")
ws(HOME) {
ws(CUSTOM_WORKSPACE) {
def error_logs = ""
def error_logs_message = ""
if (fileExists(error_file)) {
def fileContent = readFile error_file
def lines = fileContent.readLines()
for (line in lines) {
echo "archiving: ${line}"
if (fileExists("${HOME}/${line}") && readFile("${HOME}/${line}").length() > 0) {
if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) {
try {
archiveArtifacts artifacts: "${line}", fingerprint: true
error_logs = error_logs + "${HOME}/${line} "
error_logs_message = error_logs_message + "${HOME}/${line}\n"
error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} "
error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n"
} catch (Exception error_arch) {
echo "Failed to archive error log ${line}: ${error_arch.getMessage()}"
}
Expand All @@ -240,12 +250,12 @@ pipeline {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
}
} else {
echo "No error logs found for failed cases in $HOME/RUNTESTS/${pslot}_error.logs"
echo "No error logs found for failed cases in $CUSTOM_WORKSPACE/RUNTESTS/${pslot}_error.logs"
}
STATUS = 'Failed'
try {
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${HOME}/RUNTESTS/${pslot}\\`" """)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${Case} **FAILED** on ${Machine} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}\\`" """)
} catch (Exception e) {
echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}"
}
Expand All @@ -259,19 +269,30 @@ pipeline {
}
}
stage( '5. FINALIZE' ) {
when {
expression {
STATUS == 'Passed'
}
}
agent { label "${machine}-emc" }
agent { label NodeName[machine].toLowerCase() }
steps {
script {
try {
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --remove-label "CI-${Machine}-Building" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "**CI ${STATUS}** ${Machine} at <br>Built and ran in directory \\`${HOME}\\`" """, returnStatus: true)
} catch (Exception e) {
echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}"
sh(script: """
labels=\$(gh pr view ${env.CHANGE_ID} --repo ${repo_url} --json labels --jq '.labels[].name')
for label in \$labels; do
if [[ "\$label" == *"${Machine}"* ]]; then
gh pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "\$label"
fi
done
""", returnStatus: true)
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
if (fileExists("${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log")) {
sh(script: """echo "**CI ${STATUS}** ${Machine} at <br>Built and ran in directory \\`${CUSTOM_WORKSPACE}\\`\n\\`\\`\\`\n" | cat - ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log > temp && mv temp ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log""", returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body-file ${CUSTOM_WORKSPACE}/RUNTESTS/ci-run_check.log """, returnStatus: true)
}
if (STATUS == 'Passed') {
try {
sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/*")
} catch (Exception e) {
echo "Failed to remove custom work directory ${CUSTOM_WORKSPACE} on ${Machine}: ${e.getMessage()}"
}
} else {
echo "Failed to build and run Global-workflow in ${CUSTOM_WORKSPACE} and on ${Machine}"
}
}
}
Expand Down
1 change: 0 additions & 1 deletion ci/cases/pr/C96_atmaerosnowDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ arguments:
skip_ci_on_hosts:
- orion
- hercules
- wcoss2
2 changes: 1 addition & 1 deletion ci/scripts/check_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ for pr in ${pr_list}; do
DATE=$(date +'%D %r')
echo "Experiment ${pslot} **SUCCESS** on ${MACHINE_ID^} at ${DATE}" >> "${output_ci_single}"
echo "Experiment ${pslot} *** SUCCESS *** at ${DATE}" >> "${output_ci}"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}"
# "${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}"
fi
done
done
4 changes: 1 addition & 3 deletions ci/scripts/run-check_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,7 @@ while true; do

if [[ "${ROCOTO_STATE}" == "DONE" ]]; then
{
echo "Experiment ${pslot} Completed ${CYCLES_DONE} Cycles at $(date)" || true
echo "with ${SUCCEEDED} successfully completed jobs" || true
echo "Experiment ${pslot} Completed: *SUCCESS*"
echo "Experiment ${pslot} Completed ${CYCLES_DONE} Cycles: *SUCCESS* at $(date)" || true
} | tee -a "${run_check_logfile}"
rc=0
break
Expand Down
32 changes: 5 additions & 27 deletions env/AWSPW.env
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
if [[ $# -ne 1 ]]; then

echo "Must specify an input argument to set runtime environment variables!"
echo "argument can be any one of the following:"
echo "atmanlvar atmanlfv3inc atmensanlletkf atmensanlfv3inc aeroanlrun snowanl"
echo "anal sfcanl fcst post metp"
echo "eobs eupd ecen efcs epos"
echo "postsnd awips gempak"
exit 1

fi
Expand All @@ -26,13 +21,11 @@ ulimit -a

if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

if [[ "${CDUMP}" =~ "gfs" ]]; then
nprocs="npe_${step}_gfs"
ppn="npe_node_${step}_gfs" || ppn="npe_node_${step}"
else
nprocs="npe_${step}"
ppn="npe_node_${step}"
fi
ppn="npe_node_${step}_${RUN}"
[[ -z "${!ppn+0}" ]] && ppn="npe_node_${step}"
nprocs="npe_${step}_${RUN}"
[[ -z ${!nprocs+0} ]] && nprocs="npe_${step}"

(( nnodes = (${!nprocs}+${!ppn}-1)/${!ppn} ))
(( ntasks = nnodes*${!ppn} ))
# With ESMF threading, the model wants to use the full node
Expand Down Expand Up @@ -109,21 +102,6 @@ elif [[ "${step}" = "awips" ]]; then
[[ ${NTHREADS_AWIPS} -gt ${nth_max} ]] && export NTHREADS_AWIPS=${nth_max}
export APRUN_AWIPSCFP="${launcher} -n ${npe_awips} ${mpmd_opt}"

elif [[ "${step}" = "gempak" ]]; then

export CFP_MP="YES"

if [[ ${CDUMP} == "gfs" ]]; then
npe_gempak=${npe_gempak_gfs}
npe_node_gempak=${npe_node_gempak_gfs}
fi

nth_max=$((npe_node_max / npe_node_gempak))

export NTHREADS_GEMPAK=${nth_gempak:-1}
[[ ${NTHREADS_GEMPAK} -gt ${nth_max} ]] && export NTHREADS_GEMPAK=${nth_max}
export APRUN="${launcher} -n ${npe_gempak} ${mpmd_opt}"


elif [[ "${step}" = "fit2obs" ]]; then

Expand Down
5 changes: 0 additions & 5 deletions env/CONTAINER.env
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
if [[ $# -ne 1 ]]; then

echo "Must specify an input argument to set runtime environment variables!"
echo "argument can be any one of the following:"
echo "atmanlvar atmanlfv3inc atmensanlletkf atmensanlfv3inc aeroanlrun snowanl"
echo "anal sfcanl fcst post metp"
echo "eobs eupd ecen efcs epos"
echo "postsnd awips gempak"
exit 1

fi
Expand Down
14 changes: 5 additions & 9 deletions env/GAEA.env
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
if [[ $# -ne 1 ]]; then

echo "Must specify an input argument to set runtime environment variables!"
echo "argument can be any one of the following:"
echo "fcst atmos_products"
exit 1

fi
Expand All @@ -19,13 +17,11 @@ ulimit -a

if [[ "${step}" = "fcst" ]]; then

if [[ "${CDUMP}" =~ "gfs" ]]; then
nprocs="npe_${step}_gfs"
ppn="npe_node_${step}_gfs" || ppn="npe_node_${step}"
else
nprocs="npe_${step}"
ppn="npe_node_${step}"
fi
ppn="npe_node_${step}_${RUN}"
[[ -z "${!ppn+0}" ]] && ppn="npe_node_${step}"
nprocs="npe_${step}_${RUN}"
[[ -z ${!nprocs+0} ]] && nprocs="npe_${step}"

(( nnodes = (${!nprocs}+${!ppn}-1)/${!ppn} ))
(( ntasks = nnodes*${!ppn} ))
# With ESMF threading, the model wants to use the full node
Expand Down
21 changes: 5 additions & 16 deletions env/HERA.env
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
if [[ $# -ne 1 ]]; then

echo "Must specify an input argument to set runtime environment variables!"
echo "argument can be any one of the following:"
echo "atmanlvar atmanlfv3inc atmensanlletkf atmensanlfv3inc aeroanlrun snowanl"
echo "anal sfcanl fcst post metp"
echo "eobs eupd ecen efcs epos"
echo "postsnd awips gempak"
exit 1

fi
Expand Down Expand Up @@ -219,13 +214,11 @@ elif [[ "${step}" = "eupd" ]]; then

elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

if [[ "${CDUMP}" =~ "gfs" ]]; then
nprocs="npe_${step}_gfs"
ppn="npe_node_${step}_gfs" || ppn="npe_node_${step}"
else
nprocs="npe_${step}"
ppn="npe_node_${step}"
fi
ppn="npe_node_${step}_${RUN}"
[[ -z "${!ppn+0}" ]] && ppn="npe_node_${step}"
nprocs="npe_${step}_${RUN}"
[[ -z ${!nprocs+0} ]] && nprocs="npe_${step}"

(( nnodes = (${!nprocs}+${!ppn}-1)/${!ppn} ))
(( ntasks = nnodes*${!ppn} ))
# With ESMF threading, the model wants to use the full node
Expand Down Expand Up @@ -314,10 +307,6 @@ elif [[ "${step}" = "gempak" ]]; then

export CFP_MP="YES"

if [[ ${CDUMP} == "gfs" ]]; then
npe_node_gempak=${npe_node_gempak_gfs}
fi

nth_max=$((npe_node_max / npe_node_gempak))

export NTHREADS_GEMPAK=${nth_gempak:-1}
Expand Down
Loading

0 comments on commit d3e4f3c

Please sign in to comment.