From c8534c8393b4042a3c6b49f040ca23c2a1744829 Mon Sep 17 00:00:00 2001 From: DavidHuber Date: Mon, 23 Oct 2023 15:31:26 +0000 Subject: [PATCH 1/3] Multithread the checkout script #1953 --- sorc/checkout.sh | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/sorc/checkout.sh b/sorc/checkout.sh index 9c9addad1d..2578422147 100755 --- a/sorc/checkout.sh +++ b/sorc/checkout.sh @@ -34,7 +34,7 @@ function checkout() { # logdir [default: $(pwd)]: where you want logfiles written # CLEAN [default: NO]: whether to delete existing directories and create a fresh clone # - # Usage: checkout + # Usage: checkout # # Arguments # dir: Directory for the clone @@ -48,7 +48,8 @@ function checkout() { dir="$1" remote="$2" version="$3" - recursive=${4:-"YES"} + cpus="$4" + recursive=${5:-"YES"} name=$(echo "${dir}" | cut -d '.' -f 1) echo "Performing checkout of ${name}" @@ -90,7 +91,7 @@ function checkout() { fi if [[ "${recursive}" == "YES" ]]; then echo "|-- Updating submodules (if any)" - git submodule update --init --recursive >> "${logfile}" 2>&1 + git submodule update --init --recursive -j "${cpus}" >> "${logfile}" 2>&1 status=$? if ((status > 0)); then echo " WARNING: Error while updating submodules of ${name}" @@ -149,25 +150,30 @@ source "${topdir}/../workflow/gw_setup.sh" # The checkout version should always be a speciifc commit (hash or tag), not a branch errs=0 -checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" ; errs=$((errs + $?)) -checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" ; errs=$((errs + $?)) -checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" ; errs=$((errs + $?)) -checkout "ufs_model.fd" "https://github.com/ufs-community/ufs-weather-model" "${ufs_model_hash:-4d05445}" ; errs=$((errs + $?)) -checkout "verif-global.fd" "https://github.com/NOAA-EMC/EMC_verif-global.git" "c267780" ; errs=$((errs + $?)) +checkout "ufs_model.fd" "https://github.com/ufs-community/ufs-weather-model" "${ufs_model_hash:-4d05445}" "8" ; errs=$((errs + $?)) + +checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" "1" & +checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" "1" & +checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" "1" & +checkout "verif-global.fd" "https://github.com/NOAA-EMC/EMC_verif-global.git" "c267780" "1" & if [[ ${checkout_gsi} == "YES" ]]; then - checkout "gsi_enkf.fd" "https://github.com/NOAA-EMC/GSI.git" "ca19008" "NO"; errs=$((errs + $?)) + checkout "gsi_enkf.fd" "https://github.com/NOAA-EMC/GSI.git" "ca19008" "1" "NO" & fi if [[ ${checkout_gdas} == "YES" ]]; then - checkout "gdas.cd" "https://github.com/NOAA-EMC/GDASApp.git" "d347d22"; errs=$((errs + $?)) + checkout "gdas.cd" "https://github.com/NOAA-EMC/GDASApp.git" "d347d22" "1" & fi if [[ ${checkout_gsi} == "YES" || ${checkout_gdas} == "YES" ]]; then - checkout "gsi_utils.fd" "https://github.com/NOAA-EMC/GSI-Utils.git" "322cc7b"; errs=$((errs + $?)) - checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3"; errs=$((errs + $?)) + checkout "gsi_utils.fd" "https://github.com/NOAA-EMC/GSI-Utils.git" "322cc7b" "1" & + checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3" "1" & fi +for checkout_pid in $(jobs -p); do + wait "${checkout_pid}" || errs=$((errs + $?)) +done + if (( errs > 0 )); then echo "WARNING: One or more errors encountered during checkout process, please check logs before building" fi From 3a3bddddfbafbe498802759e809520bc2293a137 Mon Sep 17 00:00:00 2001 From: DavidHuber Date: Mon, 23 Oct 2023 15:45:25 +0000 Subject: [PATCH 2/3] Add comments to clarify multithreading in checkout.sh refs #1953 --- sorc/checkout.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sorc/checkout.sh b/sorc/checkout.sh index 2578422147..ba76113d53 100755 --- a/sorc/checkout.sh +++ b/sorc/checkout.sh @@ -150,8 +150,10 @@ source "${topdir}/../workflow/gw_setup.sh" # The checkout version should always be a speciifc commit (hash or tag), not a branch errs=0 +# Checkout UFS submodules in parallel checkout "ufs_model.fd" "https://github.com/ufs-community/ufs-weather-model" "${ufs_model_hash:-4d05445}" "8" ; errs=$((errs + $?)) +# Run all other checkouts simultaneously with just 1 core each to handle submodules. checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" "1" & checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" "1" & checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" "1" & @@ -170,6 +172,7 @@ if [[ ${checkout_gsi} == "YES" || ${checkout_gdas} == "YES" ]]; then checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3" "1" & fi +# Go through each PID and verify no errors were reported. for checkout_pid in $(jobs -p); do wait "${checkout_pid}" || errs=$((errs + $?)) done From ef3731ed560397bffc5b38e40b20775b4e6ecc95 Mon Sep 17 00:00:00 2001 From: David Huber <69919478+DavidHuber-NOAA@users.noreply.github.com> Date: Mon, 23 Oct 2023 13:35:56 -0400 Subject: [PATCH 3/3] Set default CPU count in checkout(). Co-authored-by: Rahul Mahajan --- sorc/checkout.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sorc/checkout.sh b/sorc/checkout.sh index ba76113d53..3554cd99da 100755 --- a/sorc/checkout.sh +++ b/sorc/checkout.sh @@ -34,7 +34,7 @@ function checkout() { # logdir [default: $(pwd)]: where you want logfiles written # CLEAN [default: NO]: whether to delete existing directories and create a fresh clone # - # Usage: checkout + # Usage: checkout # # Arguments # dir: Directory for the clone @@ -48,7 +48,7 @@ function checkout() { dir="$1" remote="$2" version="$3" - cpus="$4" + cpus="${4:-1}" # Default 1 thread recursive=${5:-"YES"} name=$(echo "${dir}" | cut -d '.' -f 1) @@ -154,22 +154,22 @@ errs=0 checkout "ufs_model.fd" "https://github.com/ufs-community/ufs-weather-model" "${ufs_model_hash:-4d05445}" "8" ; errs=$((errs + $?)) # Run all other checkouts simultaneously with just 1 core each to handle submodules. -checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" "1" & -checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" "1" & -checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" "1" & -checkout "verif-global.fd" "https://github.com/NOAA-EMC/EMC_verif-global.git" "c267780" "1" & +checkout "wxflow" "https://github.com/NOAA-EMC/wxflow" "528f5ab" & +checkout "gfs_utils.fd" "https://github.com/NOAA-EMC/gfs-utils" "a283262" & +checkout "ufs_utils.fd" "https://github.com/ufs-community/UFS_UTILS.git" "72a0471" & +checkout "verif-global.fd" "https://github.com/NOAA-EMC/EMC_verif-global.git" "c267780" & if [[ ${checkout_gsi} == "YES" ]]; then checkout "gsi_enkf.fd" "https://github.com/NOAA-EMC/GSI.git" "ca19008" "1" "NO" & fi if [[ ${checkout_gdas} == "YES" ]]; then - checkout "gdas.cd" "https://github.com/NOAA-EMC/GDASApp.git" "d347d22" "1" & + checkout "gdas.cd" "https://github.com/NOAA-EMC/GDASApp.git" "d347d22" & fi if [[ ${checkout_gsi} == "YES" || ${checkout_gdas} == "YES" ]]; then - checkout "gsi_utils.fd" "https://github.com/NOAA-EMC/GSI-Utils.git" "322cc7b" "1" & - checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3" "1" & + checkout "gsi_utils.fd" "https://github.com/NOAA-EMC/GSI-Utils.git" "322cc7b" & + checkout "gsi_monitor.fd" "https://github.com/NOAA-EMC/GSI-Monitor.git" "45783e3" & fi # Go through each PID and verify no errors were reported.