Skip to content

Commit

Permalink
GitHub Workflow Pipeline (#1740)
Browse files Browse the repository at this point in the history
* added two files for this PR (now that devlop has pslot tag updates)
Co-authored-by: TerrenceMcGuinness-NOAA <[email protected]>
Co-authored-by: Rahul Mahajan <[email protected]>
  • Loading branch information
TerrenceMcGuinness-NOAA authored Jul 14, 2023
1 parent 15ef84d commit 3e96ed5
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 0 deletions.
86 changes: 86 additions & 0 deletions .github/workflows/globalworkflow-ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: gw-ci-orion

on: [workflow_dispatch]

# TEST_DIR contains 2 directories;
# 1. HOMEgfs: clone of the global-workflow
# 2. RUNTESTS: A directory containing EXPDIR and COMROT for experiments
# e.g. $> tree ./TEST_DIR
# ./TEST_DIR
# ├── HOMEgfs
# └── RUNTESTS
# ├── COMROT
# │   └── ${pslot}
# └── EXPDIR
# └── ${pslot}
env:
TEST_DIR: ${{ github.workspace }}/${{ github.run_id }}
MACHINE_ID: orion

jobs:
checkout-build-link:
runs-on: [self-hosted, orion-ready]
timeout-minutes: 600

steps:
- name: Checkout global-workflow
uses: actions/checkout@v3
with:
path: ${{ github.run_id }}/HOMEgfs # This path needs to be relative

- name: Checkout components
run: |
cd ${{ env.TEST_DIR }}/HOMEgfs/sorc
./checkout.sh -c -g # Options e.g. -u can be added late
- name: Build components
run: |
cd ${{ env.TEST_DIR }}/HOMEgfs/sorc
./build_all.sh
- name: Link artifacts
run: |
cd ${{ env.TEST_DIR }}/HOMEgfs/sorc
./link_workflow.sh
create-experiments:
needs: checkout-build-link
runs-on: [self-hosted, orion-ready]
strategy:
matrix:
case: ["C48_S2S", "C96_atm3DVar"]

steps:
- name: Create Experiments ${{ matrix.case }}
env:
HOMEgfs_PR: ${{ env.TEST_DIR }}/HOMEgfs
RUNTESTS: ${{ env.TEST_DIR }}/RUNTESTS
pslot: ${{ matrix.case }}.${{ github.run_id }}
run: |
cd ${{ env.TEST_DIR }}/HOMEgfs
source workflow/gw_setup.sh
source ci/platforms/orion.sh
./ci/scripts/create_experiment.py --yaml ci/cases/${{ matrix.case }}.yaml --dir ${{ env.HOMEgfs_PR }}
run-experiments:
needs: create-experiments
runs-on: [self-hosted, orion-ready]
strategy:
max-parallel: 2
matrix:
case: ["C48_S2S", "C96_atm3DVar"]
steps:
- name: Run Experiment ${{ matrix.case }}
run: |
cd ${{ env.TEST_DIR }}/HOMEgfs
./ci/scripts/run-check_ci.sh ${{ env.TEST_DIR }} ${{ matrix.case }}.${{ github.run_id }}
clean-up:
needs: run-experiments
runs-on: [self-hosted, orion-ready]
steps:
- name: Clean-up
run: |
cd ${{ github.workspace }}
rm -rf ${{ github.run_id }}
107 changes: 107 additions & 0 deletions ci/scripts/run-check_ci.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/bash

set -eu

#####################################################################################
# Script description: script to check the status of an experiment as reported
# by Rocoto
#####################################################################################

TEST_DIR=${1:-${TEST_DIR:-?}} # Location of the root of the testing directory
pslot=${2:-${pslot:-?}} # Name of the experiment being tested by this script

# TEST_DIR contains 2 directories;
# 1. HOMEgfs: clone of the global-workflow
# 2. RUNTESTS: A directory containing EXPDIR and COMROT for experiments
# # e.g. $> tree ./TEST_DIR
# ./TEST_DIR
# ├── HOMEgfs
# └── RUNTESTS
# ├── COMROT
# │   └── ${pslot}
# └── EXPDIR
# └── ${pslot}
HOMEgfs="${TEST_DIR}/HOMEgfs"
RUNTESTS="${TEST_DIR}/RUNTESTS"

# Source modules and setup logging
echo "Source modules."
source "${HOMEgfs}/workflow/gw_setup.sh"

# cd into the experiment directory
echo "cd ${RUNTESTS}/EXPDIR/${pslot}"
cd "${RUNTESTS}/EXPDIR/${pslot}" || (echo "FATAL ERROR: Unable to cd into '${RUNTESTS}/EXPDIR/${pslot}', ABORT!"; exit 1)

# Name of the Rocoto XML and database files
xml="${pslot}.xml"
db="${pslot}.db"

# Ensure the XML is present for the experiment
if [[ ! -f "${xml}" ]]; then
echo "FATAL ERROR: XML file ${xml} not found in '${pslot}', experiment ${pslot} failed, ABORT!"
exit 1
fi

# Launch experiment
echo "Launch experiment with Rocoto."
rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}"
sleep 30
if [[ ! -f "${db}" ]]; then
echo "FATAL ERROR: Rocoto database file ${db} not found, experiment ${pslot} failed, ABORT!"
exit 2
fi

# Experiment launched
rc=99
while true; do

echo "Run rocotorun."
rocotorun -v "${ROCOTO_VERBOSE:-0}" -w "${xml}" -d "${db}"

# Wait before running rocotostat
sleep 30

# Get job statistics
echo "Gather Rocoto statistics"
rocotostat_output=$(rocotostat -w "${xml}" -d "${db}" -s | grep -v CYCLE) || true
num_cycles=$(echo "${rocotostat_output}" | wc -l) || true
num_done=$(echo "${rocotostat_output}" | grep -c Done) || true
num_succeeded=$(rocotostat -w "${xml}" -d "${db}" -a | grep -c SUCCEEDED) || true
num_failed=$(rocotostat -w "${xml}" -d "${db}" -a | grep -c -E 'FAIL|DEAD') || true

echo "${pslot} Total Cycles: ${num_cycles} number done: ${num_done}"

if [[ ${num_failed} -ne 0 ]]; then
{
echo "Experiment ${pslot} Terminated with ${num_failed} tasks failed at $(date)" || true
echo "Experiment ${pslot} Terminated: *FAILED*"
} >> "${RUNTESTS}/ci.log"

error_logs=$(rocotostat -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs rocotocheck -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true
{
echo "Error logs:"
echo "${error_logs}"
} >> "${RUNTESTS}/ci.log"
sed -i "s/\`\`\`//2g" "${RUNTESTS}/ci.log"
sacct --format=jobid,jobname%35,WorkDir%100,stat | grep "${pslot}" | grep "${pr}\/RUNTESTS" | awk '{print $1}' | xargs scancel || true
rc=1
break
fi

if [[ "${num_done}" -eq "${num_cycles}" ]]; then
{
echo "Experiment ${pslot} Completed at $(date)" || true
echo "with ${num_succeeded} successfully completed jobs" || true
echo "Experiment ${pslot} Completed: *SUCCESS*"
} >> "${RUNTESTS}/ci.log"
sed -i "s/\`\`\`//2g" "${RUNTESTS}/ci.log"
rc=0
break
fi

# Wait before running rocotorun again
sleep 300

done

exit "${rc}"

0 comments on commit 3e96ed5

Please sign in to comment.