Skip to content

Commit

Permalink
[develop] Fixes for PW Jenkins Nightly Builds (ufs-community#1091)
Browse files Browse the repository at this point in the history
* Adds logic to handle GCP's default conda env, which conflicts with the SRW App's conda env. Fixes a Parallel Works naming convention bug in the script.
* It also addresses a known issue with a Ruby warning on PW instances that prevents the run_WE2E_tests.py from exiting gracefully. The solution we use in our bootstrap for /contrib doesn't seem to work for the /lustre directory, which is why the warning is hardcoded into the monitor_jobs.py script.
* The new spack-stack build on Azure is missing a gnu library, so added the path to this missing library to the proper run scripts and cleaned up the wflow noaacloud lua file.
* Removed log and error files from the qsub wrapper script so that qsub can generate these files with the job id in the files name. Also, fixed typo in the wrapper script.
  • Loading branch information
EdwardSnyder-NOAA authored and Natalie Perlin committed Aug 15, 2024
1 parent fbc762e commit 8d9a137
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 12 deletions.
6 changes: 3 additions & 3 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,19 @@ pipeline {

sh "SRW_WE2E_COMPREHENSIVE_TESTS=${run_we2e_comprehensive_tests} SRW_WE2E_SINGLE_TEST=${single_test}" + ' bash --login "${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/srw_test.sh"'

// Archive the test log files
sh "[[ -d ${SRW_WE2E_EXPERIMENT_BASE_DIR} ]] && cd ${SRW_WE2E_EXPERIMENT_BASE_DIR} && tar --create --gzip --verbose --dereference --file ${WORKSPACE}/${SRW_PLATFORM}/we2e_test_logs-${SRW_PLATFORM}-${SRW_COMPILER}.tgz */log.generate_FV3LAM_wflow */log/* ${WORKSPACE}/${SRW_PLATFORM}/tests/WE2E/WE2E_tests_*yaml WE2E_summary*txt ${WORKSPACE}/${SRW_PLATFORM}/tests/WE2E/log.* || cat /dev/null > ${WORKSPACE}/${SRW_PLATFORM}/we2e_test_logs-${SRW_PLATFORM}-${SRW_COMPILER}.tgz"
}
sh "STAGE_NAME=${env.STAGE_NAME} " + 'bash --login "${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/disk_usage.sh"'
}
}

post {
success {
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/*_test_results-*-*.txt", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/we2e_test_logs-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.tgz", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/*-skill-score.txt", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
}
always {
// Archive the test log files
sh "[[ -d ${SRW_WE2E_EXPERIMENT_BASE_DIR} ]] && cd ${SRW_WE2E_EXPERIMENT_BASE_DIR} && tar --create --gzip --verbose --dereference --file ${env.WORKSPACE}/${env.SRW_PLATFORM}/we2e_test_logs-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.tgz */log.generate_FV3LAM_wflow */log/* ${env.WORKSPACE}/${env.SRW_PLATFORM}/tests/WE2E/WE2E_tests_*yaml WE2E_summary*txt ${env.WORKSPACE}/${env.SRW_PLATFORM}/tests/WE2E/log.* || cat /dev/null > ${env.WORKSPACE}/${env.SRW_PLATFORM}/we2e_test_logs-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.tgz"
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/*_test_results-*-*.txt", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/we2e_test_logs-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.tgz", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}-*-time-srw_test.json", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}-*-disk-usage${env.STAGE_NAME}.csv", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
// Remove the data sets from the experiments directory to conserve disk space
Expand Down
2 changes: 0 additions & 2 deletions .cicd/scripts/qsub_srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,5 @@
#PBS -l select=1:ncpus=24:mpiprocs=24:ompthreads=1
#PBS -l walltime=00:30:00
#PBS -V
#PBS -o log_wrap.%j.log
#PBS -e err_wrap.%j.err

bash ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/srw_ftest.sh
5 changes: 5 additions & 0 deletions .cicd/scripts/srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ sed "s|^workflow:|workflow:\n EXEC_SUBDIR: ${workspace}/install_${SRW_COMPILER}
# Decrease forecast length since we are running all the steps
sed "s|^ FCST_LEN_HRS: 12| FCST_LEN_HRS: 6|g" -i ush/config.yaml

# Update compiler
sed "s|^ COMPILER: intel| COMPILER: ${SRW_COMPILER}|g" -i ush/config.yaml

# DATA_LOCATION differs on each platform ... find it.
export DATA_LOCATION=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${platform,,}.yaml | awk '{printf "%s", $2}')
echo "DATA_LOCATION=${DATA_LOCATION}"
Expand All @@ -85,6 +88,8 @@ source etc/lmod-setup.sh ${platform,,}
module use modulefiles
module load build_${platform,,}_${SRW_COMPILER}
module load wflow_${platform,,}
# Deactivate conflicting conda env on GCP
[[ "${SRW_PLATFORM}" =~ "gclusternoaa" ]] && conda deactivate

[[ ${FORGIVE_CONDA} == true ]] && set +e +u # Some platforms have incomplete python3 or conda support, but wouldn't necessarily block workflow tests
conda activate srw_app
Expand Down
12 changes: 7 additions & 5 deletions .cicd/scripts/srw_metric.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ cd ${workspace}

# Activate workflow environment
module load wflow_${platform,,}
# Deactivate conflicting conda env on GCP
[[ "${SRW_PLATFORM}" =~ "gclusternoaa" ]] && conda deactivate

[[ ${FORGIVE_CONDA} == true ]] && set +e +u # Some platforms have incomplete python3 or conda support, but would not necessarily block workflow tests
conda activate srw_app
Expand All @@ -98,7 +100,7 @@ if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
# Clear out data
rm -rf ${workspace}/Indy-Severe-Weather/
# Check if metprd data exists locally otherwise get it from S3
TEST_EXTRN_MDL_SOURCE_BASEDIR=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${SRW_PLATFORM}.yaml | awk '{print $NF}')
TEST_EXTRN_MDL_SOURCE_BASEDIR=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${platform}.yaml | awk '{print $NF}')
if [[ -d $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat ]] ; then
mkdir -p Indy-Severe-Weather/metprd/point_stat
cp -rp $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat Indy-Severe-Weather/metprd
Expand All @@ -108,7 +110,7 @@ if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/sample_cases/release-public-v2.1.0/Indy-Severe-Weather.tgz
tar xvfz Indy-Severe-Weather.tgz
fi
[[ -f ${platform,,}-${srw_compiler}-skill-score.txt ]] && rm ${platform,,}-${srw_compiler}-skill-score.txt
[[ -f ${SRW_PLATFORM,,}-${srw_compiler}-skill-score.txt ]] && rm ${SRW_PLATFORM,,}-${srw_compiler}-skill-score.txt
# Skill score index is computed over several terms that are defined in parm/metplus/STATAnalysisConfig_skill_score.
# It is computed by aggregating the output from earlier runs of the Point-Stat and/or Grid-Stat tools over one or more cases.
# In this example, skill score index is a weighted average of 4 skill scores of RMSE statistics for wind speed, dew point temperature,
Expand All @@ -126,15 +128,15 @@ if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
sed -i 's|--load("conda")|load("conda")|g' ${workspace}/modulefiles/tasks/${platform,,}/run_vx.local.lua
fi
# Run stat_analysis
stat_analysis -config parm/metplus/STATAnalysisConfig_skill_score -lookin ${workspace}/Indy-Severe-Weather/metprd/point_stat -v 2 -out ${platform,,}-${srw_compiler}-skill-score.txt
stat_analysis -config parm/metplus/STATAnalysisConfig_skill_score -lookin ${workspace}/Indy-Severe-Weather/metprd/point_stat -v 2 -out ${SRW_PLATFORM,,}-${srw_compiler}-skill-score.txt

# check skill-score.txt
cat ${platform,,}-${srw_compiler}-skill-score.txt
cat ${SRW_PLATFORM,,}-${srw_compiler}-skill-score.txt

# get skill-score (SS_INDEX) and check if it is significantly smaller than 1.0
# A value greater than 1.0 indicates that the forecast model outperforms the reference,
# while a value less than 1.0 indicates that the reference outperforms the forecast.
tmp_string=$( tail -2 ${platform,,}-${srw_compiler}-skill-score.txt | head -1 )
tmp_string=$( tail -2 ${SRW_PLATFORM,,}-${srw_compiler}-skill-score.txt | head -1 )
SS_INDEX=$(echo $tmp_string | awk -F " " '{print $NF}')
echo "Skill Score: ${SS_INDEX}"
if [[ ${SS_INDEX} < "0.700" ]]; then
Expand Down
2 changes: 1 addition & 1 deletion .cicd/scripts/wrapper_srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ do
# Return exit code and check for results file first
results_file="${WORKSPACE}/${SRW_PLATFORM}/functional_test_results_${SRW_PLATFORM}_${SRW_COMPILER}.txt"
if [ ! -f "$results_file" ]; then
echo "Missing results file! \nexit 1"
echo -e "Missing results file! \nexit 1"
exit 1
fi

Expand Down
2 changes: 1 addition & 1 deletion tests/WE2E/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ def compare_rocotostat(expt_dict,name):
continue
line_array = line.split()
# Skip header lines
if line_array[0] == 'CYCLE':
if line_array[0] == 'CYCLE' or line_array[0] == '/apps/rocoto/1.3.3/lib/workflowmgr/launchserver.rb:40:':
continue
# We should now just have lines describing jobs, in the form:
# line_array = ['cycle','task','jobid','status','exit status','num tries','walltime']
Expand Down

0 comments on commit 8d9a137

Please sign in to comment.