From b405b7d3d11d384ce9fe3b9cd2180f315f7b38f2 Mon Sep 17 00:00:00 2001 From: Dan Holdaway <27729500+danholdaway@users.noreply.github.com> Date: Wed, 8 May 2024 20:52:48 -0400 Subject: [PATCH] Use JCB for assembling JEDI YAML files for atmospheric GDAS (#2477) Change the JEDI YAML assembly for the atmospheric GDAS to use the JEDI Configuration Builder (JCB) tool so that YAMLs can be made more portable and invoke the observation chronicle mechanism. Resolves #2476 Co-authored-by: danholdaway Co-authored-by: Walter Kolczynski - NOAA --- .gitignore | 7 +++++ .gitmodules | 4 +++ parm/config/gfs/config.atmanl | 10 ++++--- parm/config/gfs/config.atmanlfv3inc | 2 +- parm/config/gfs/config.atmensanl | 5 ++-- sorc/gdas.cd | 2 +- sorc/jcb | 1 + sorc/link_workflow.sh | 17 +++++++---- ush/python/pygfs/task/analysis.py | 38 ++++++++++++++++++++---- ush/python/pygfs/task/atm_analysis.py | 8 +++-- ush/python/pygfs/task/atmens_analysis.py | 3 ++ 11 files changed, 76 insertions(+), 21 deletions(-) create mode 160000 sorc/jcb diff --git a/.gitignore b/.gitignore index 943ad64e1a..04193fca0a 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,8 @@ parm/gdas/io parm/gdas/ioda parm/gdas/snow parm/gdas/soca +parm/gdas/jcb-gdas +parm/gdas/jcb-algorithms parm/monitor parm/post/AEROSOL_LUTS.dat parm/post/nam_micro_lookup.dat @@ -195,3 +197,8 @@ versions/run.ver ush/python/wxflow workflow/wxflow ci/scripts/wxflow + +# jcb checkout and symlinks +ush/python/jcb +workflow/jcb +ci/scripts/jcb diff --git a/.gitmodules b/.gitmodules index 4851e232ee..ea1b5c06af 100644 --- a/.gitmodules +++ b/.gitmodules @@ -30,3 +30,7 @@ path = sorc/upp.fd url = https://github.com/NOAA-EMC/UPP.git ignore = dirty +[submodule "sorc/jcb"] + path = sorc/jcb + url = https://github.com/noaa-emc/jcb + fetchRecurseSubmodules = false diff --git a/parm/config/gfs/config.atmanl b/parm/config/gfs/config.atmanl index 5eb692b473..dd8ca80b11 100644 --- a/parm/config/gfs/config.atmanl +++ b/parm/config/gfs/config.atmanl @@ -5,18 +5,20 @@ echo "BEGIN: config.atmanl" -export OBS_LIST="${PARMgfs}/gdas/atm/obs/lists/gdas_prototype_3d.yaml.j2" -export JEDIYAML="${PARMgfs}/gdas/atm/variational/3dvar_drpcg.yaml.j2" +export JCB_BASE_YAML="${PARMgfs}/gdas/atm/jcb-base.yaml.j2" +export JCB_ALGO_YAML="${PARMgfs}/gdas/atm/jcb-prototype_3dvar.yaml.j2" + export STATICB_TYPE="gsibec" +export LOCALIZATION_TYPE="bump" export INTERP_METHOD='barycentric' if [[ ${DOHYBVAR} = "YES" ]]; then # shellcheck disable=SC2153 export CASE_ANL=${CASE_ENS} - export BERROR_YAML="${PARMgfs}/gdas/atm/berror/hybvar_${STATICB_TYPE}.yaml.j2" + export BERROR_YAML="background_error_hybrid_${STATICB_TYPE}_${LOCALIZATION_TYPE}" else export CASE_ANL=${CASE} - export BERROR_YAML="${PARMgfs}/gdas/atm/berror/staticb_${STATICB_TYPE}.yaml.j2" + export BERROR_YAML="background_error_static_${STATICB_TYPE}" fi export CRTM_FIX_YAML="${PARMgfs}/gdas/atm_crtm_coeff.yaml.j2" diff --git a/parm/config/gfs/config.atmanlfv3inc b/parm/config/gfs/config.atmanlfv3inc index 14c11d3dd3..ab7efa3a60 100644 --- a/parm/config/gfs/config.atmanlfv3inc +++ b/parm/config/gfs/config.atmanlfv3inc @@ -8,7 +8,7 @@ echo "BEGIN: config.atmanlfv3inc" # Get task specific resources . "${EXPDIR}/config.resources" atmanlfv3inc -export JEDIYAML=${PARMgfs}/gdas/atm/utils/fv3jedi_fv3inc_variational.yaml.j2 +export JCB_ALGO=fv3jedi_fv3inc_variational export JEDIEXE=${EXECgfs}/fv3jedi_fv3inc.x echo "END: config.atmanlfv3inc" diff --git a/parm/config/gfs/config.atmensanl b/parm/config/gfs/config.atmensanl index 23eab7f7b9..3484cb670d 100644 --- a/parm/config/gfs/config.atmensanl +++ b/parm/config/gfs/config.atmensanl @@ -5,8 +5,9 @@ echo "BEGIN: config.atmensanl" -export OBS_LIST="${PARMgfs}/gdas/atm/obs/lists/lgetkf_prototype.yaml.j2" -export JEDIYAML="${PARMgfs}/gdas/atm/lgetkf/lgetkf.yaml.j2" +export JCB_BASE_YAML="${PARMgfs}/gdas/atm/jcb-base.yaml.j2" +export JCB_ALGO_YAML="${PARMgfs}/gdas/atm/jcb-prototype_lgetkf.yaml.j2" + export INTERP_METHOD='barycentric' export CRTM_FIX_YAML="${PARMgfs}/gdas/atm_crtm_coeff.yaml.j2" diff --git a/sorc/gdas.cd b/sorc/gdas.cd index 70f1319139..2b2d417a96 160000 --- a/sorc/gdas.cd +++ b/sorc/gdas.cd @@ -1 +1 @@ -Subproject commit 70f13191391d0909e92da47dc7d17ddf1dc4c6c6 +Subproject commit 2b2d417a96528527d7d3e7eedaccf150dc075d92 diff --git a/sorc/jcb b/sorc/jcb new file mode 160000 index 0000000000..de75655d81 --- /dev/null +++ b/sorc/jcb @@ -0,0 +1 @@ +Subproject commit de75655d81ec2ee668d8d47bf4a43625c81dde7c diff --git a/sorc/link_workflow.sh b/sorc/link_workflow.sh index 0041ce083b..c5d7243e8f 100755 --- a/sorc/link_workflow.sh +++ b/sorc/link_workflow.sh @@ -88,11 +88,17 @@ if [[ "${LINK_NEST:-OFF}" == "ON" ]] ; then source "${HOMEgfs}/versions/fix.nest.ver" fi -# Link wxflow in ush/python, workflow and ci/scripts +# Link python pacakges in ush/python +# TODO: This will be unnecessary when these are part of the virtualenv +packages=("wxflow" "jcb") +for package in "${packages[@]}"; do + cd "${HOMEgfs}/ush/python" || exit 1 + [[ -s "${package}" ]] && rm -f "${package}" + ${LINK} "${HOMEgfs}/sorc/${package}/src/${package}" . +done + +# Link wxflow in workflow and ci/scripts # TODO: This will be unnecessary when wxflow is part of the virtualenv -cd "${HOMEgfs}/ush/python" || exit 1 -[[ -s "wxflow" ]] && rm -f wxflow -${LINK} "${HOMEgfs}/sorc/wxflow/src/wxflow" . cd "${HOMEgfs}/workflow" || exit 1 [[ -s "wxflow" ]] && rm -f wxflow ${LINK} "${HOMEgfs}/sorc/wxflow/src/wxflow" . @@ -100,7 +106,6 @@ cd "${HOMEgfs}/ci/scripts" || exit 1 [[ -s "wxflow" ]] && rm -f wxflow ${LINK} "${HOMEgfs}/sorc/wxflow/src/wxflow" . - # Link fix directories if [[ -n "${FIX_DIR}" ]]; then if [[ ! -d "${HOMEgfs}/fix" ]]; then mkdir "${HOMEgfs}/fix" || exit 1; fi @@ -228,7 +233,7 @@ fi #------------------------------ if [[ -d "${HOMEgfs}/sorc/gdas.cd" ]]; then cd "${HOMEgfs}/parm/gdas" || exit 1 - declare -a gdasapp_comps=("aero" "atm" "io" "ioda" "snow" "soca") + declare -a gdasapp_comps=("aero" "atm" "io" "ioda" "snow" "soca" "jcb-gdas" "jcb-algorithms") for comp in "${gdasapp_comps[@]}"; do [[ -d "${comp}" ]] && rm -rf "${comp}" ${LINK_OR_COPY} "${HOMEgfs}/sorc/gdas.cd/parm/${comp}" . diff --git a/ush/python/pygfs/task/analysis.py b/ush/python/pygfs/task/analysis.py index 5a516a02c8..5464c25370 100644 --- a/ush/python/pygfs/task/analysis.py +++ b/ush/python/pygfs/task/analysis.py @@ -6,8 +6,9 @@ from logging import getLogger from pprint import pformat from netCDF4 import Dataset -from typing import List, Dict, Any, Union +from typing import List, Dict, Any, Union, Optional +from jcb import render from wxflow import (parse_j2yaml, FileHandler, rm_p, logit, Task, Executable, WorkflowException, to_fv3time, to_YMD, Template, TemplateConstants) @@ -46,11 +47,14 @@ def initialize(self) -> None: self.link_jediexe() @logit(logger) - def get_jedi_config(self) -> Dict[str, Any]: + def get_jedi_config(self, algorithm: Optional[str] = None) -> Dict[str, Any]: """Compile a dictionary of JEDI configuration from JEDIYAML template file Parameters ---------- + algorithm (optional) : str + Name of the algorithm to use in the JEDI configuration. Will override the algorithm + set in the self.config.JCB_<>_YAML file Returns ---------- @@ -60,7 +64,31 @@ def get_jedi_config(self) -> Dict[str, Any]: # generate JEDI YAML file logger.info(f"Generate JEDI YAML config: {self.task_config.jedi_yaml}") - jedi_config = parse_j2yaml(self.task_config.JEDIYAML, self.task_config, searchpath=self.gdasapp_j2tmpl_dir) + + if 'JCB_BASE_YAML' in self.task_config.keys(): + # Step 1: fill templates of the jcb base YAML file + jcb_config = parse_j2yaml(self.task_config.JCB_BASE_YAML, self.task_config) + + # Step 2: (optional) fill templates of algorithm override YAML and merge + if 'JCB_ALGO_YAML' in self.task_config.keys(): + jcb_algo_config = parse_j2yaml(self.task_config.JCB_ALGO_YAML, self.task_config) + jcb_config = {**jcb_config, **jcb_algo_config} + + # If algorithm is present override the algorithm in the JEDI config + if algorithm: + jcb_config['algorithm'] = algorithm + + # Step 3: generate the JEDI Yaml using JCB driving YAML + jedi_config = render(jcb_config) + elif 'JEDIYAML' in self.task_config.keys(): + # Generate JEDI YAML file (without using JCB) + logger.info(f"Generate JEDI YAML config: {self.task_config.jedi_yaml}") + jedi_config = parse_j2yaml(self.task_config.JEDIYAML, self.task_config, + searchpath=self.gdasapp_j2tmpl_dir) + logger.debug(f"JEDI config:\n{pformat(jedi_config)}") + else: + raise KeyError(f"Task config must contain JCB_BASE_YAML or JEDIYAML") + logger.debug(f"JEDI config:\n{pformat(jedi_config)}") return jedi_config @@ -82,7 +110,7 @@ def get_obs_dict(self) -> Dict[str, Any]: a dictionary containing the list of observation files to copy for FileHandler """ - logger.info(f"Extracting a list of observation files from {self.task_config.JEDIYAML}") + logger.info(f"Extracting a list of observation files from Jedi config file") observations = find_value_in_nested_dict(self.task_config.jedi_config, 'observations') logger.debug(f"observations:\n{pformat(observations)}") @@ -116,7 +144,7 @@ def get_bias_dict(self) -> Dict[str, Any]: a dictionary containing the list of observation bias files to copy for FileHandler """ - logger.info(f"Extracting a list of bias correction files from {self.task_config.JEDIYAML}") + logger.info(f"Extracting a list of bias correction files from Jedi config file") observations = find_value_in_nested_dict(self.task_config.jedi_config, 'observations') logger.debug(f"observations:\n{pformat(observations)}") diff --git a/ush/python/pygfs/task/atm_analysis.py b/ush/python/pygfs/task/atm_analysis.py index 47d291268e..95545c57a4 100644 --- a/ush/python/pygfs/task/atm_analysis.py +++ b/ush/python/pygfs/task/atm_analysis.py @@ -49,6 +49,9 @@ def __init__(self, config): 'APREFIX': f"{self.runtime_config.CDUMP}.t{self.runtime_config.cyc:02d}z.", # TODO: CDUMP is being replaced by RUN 'GPREFIX': f"gdas.t{self.runtime_config.previous_cycle.hour:02d}z.", 'jedi_yaml': _jedi_yaml, + 'atm_obsdatain_path': f"{self.runtime_config.DATA}/obs/", + 'atm_obsdataout_path': f"{self.runtime_config.DATA}/diags/", + 'BKG_TSTEP': "PT1H" # Placeholder for 4D applications } ) @@ -137,8 +140,9 @@ def variational(self: Analysis) -> None: @logit(logger) def init_fv3_increment(self: Analysis) -> None: # Setup JEDI YAML file - self.task_config.jedi_yaml = os.path.join(self.runtime_config.DATA, os.path.basename(self.task_config.JEDIYAML)) - save_as_yaml(self.get_jedi_config(), self.task_config.jedi_yaml) + self.task_config.jedi_yaml = os.path.join(self.runtime_config.DATA, + f"{self.task_config.JCB_ALGO}.yaml") + save_as_yaml(self.get_jedi_config(self.task_config.JCB_ALGO), self.task_config.jedi_yaml) # Link JEDI executable to run directory self.task_config.jedi_exe = self.link_jediexe() diff --git a/ush/python/pygfs/task/atmens_analysis.py b/ush/python/pygfs/task/atmens_analysis.py index a1aecfe07c..5aaacc42e8 100644 --- a/ush/python/pygfs/task/atmens_analysis.py +++ b/ush/python/pygfs/task/atmens_analysis.py @@ -46,6 +46,9 @@ def __init__(self, config): 'APREFIX': f"{self.runtime_config.CDUMP}.t{self.runtime_config.cyc:02d}z.", # TODO: CDUMP is being replaced by RUN 'GPREFIX': f"gdas.t{self.runtime_config.previous_cycle.hour:02d}z.", 'jedi_yaml': _jedi_yaml, + 'atm_obsdatain_path': f"./obs/", + 'atm_obsdataout_path': f"./diags/", + 'BKG_TSTEP': "PT1H" # Placeholder for 4D applications } )