From dfbef2ee2b771819ce4bacc7f165ecf9afab118d Mon Sep 17 00:00:00 2001 From: Dougie Squire <42455466+dougiesquire@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:45:47 +1100 Subject: [PATCH] Include metadata validation and template scripts in package (#158) * move metadata scripts to console_scripts * add metadata_validate FileNotFoundError test * add info on cmdline scripts to docs * address review comments --- bin/create_metadata_yaml_template.py | 35 --------------- bin/validate_metadata_yaml.py | 34 -------------- docs/management/building.rst | 16 +++++++ docs/management/release.rst | 17 +++---- pyproject.toml | 2 + src/access_nri_intake/cli.py | 61 +++++++++++++++++++++++--- tests/data/access-esm1-5/metadata.yaml | 30 +++++++++++++ tests/data/access-om3/metadata.yaml | 5 ++- tests/test_cli.py | 57 ++++++++++++++++++++++-- 9 files changed, 168 insertions(+), 89 deletions(-) delete mode 100755 bin/create_metadata_yaml_template.py delete mode 100755 bin/validate_metadata_yaml.py diff --git a/bin/create_metadata_yaml_template.py b/bin/create_metadata_yaml_template.py deleted file mode 100755 index ec43fd2..0000000 --- a/bin/create_metadata_yaml_template.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. -# SPDX-License-Identifier: Apache-2.0 - -import argparse - -import yaml - -from access_nri_intake.catalog import EXP_JSONSCHEMA - - -def main(): - argparse.ArgumentParser( - description="Generate a template for metadata.yaml from the ACCESS-NRI schema" - ) - - template = {} - for name, descr in EXP_JSONSCHEMA["properties"].items(): - if name in EXP_JSONSCHEMA["required"]: - description = f"" - else: - description = f"<{descr['description']}>" - - if descr["type"] == "array": - description = [description] - - template[name] = description - - with open("../metadata.yaml", "w") as outfile: - yaml.dump(template, outfile, default_flow_style=False, sort_keys=False) - - -if __name__ == "__main__": - main() diff --git a/bin/validate_metadata_yaml.py b/bin/validate_metadata_yaml.py deleted file mode 100755 index 6c69fcd..0000000 --- a/bin/validate_metadata_yaml.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. -# SPDX-License-Identifier: Apache-2.0 - -import argparse -import glob - -from access_nri_intake.catalog import EXP_JSONSCHEMA -from access_nri_intake.utils import load_metadata_yaml - - -def main(): - - parser = argparse.ArgumentParser( - description="Validate the schema of a metadata.yaml file" - ) - parser.add_argument( - "file", - type=str, - help="The path to the metadata.yaml file (can include wildcards for multiple metadata.yaml)", - ) - - args = parser.parse_args() - file = args.file - - for f in glob.glob(file): - print(f"Validating {f}... ", end="") - load_metadata_yaml(f, EXP_JSONSCHEMA) - print("success") - - -if __name__ == "__main__": - main() diff --git a/docs/management/building.rst b/docs/management/building.rst index fad0128..5de9bcb 100644 --- a/docs/management/building.rst +++ b/docs/management/building.rst @@ -86,3 +86,19 @@ is called to ensure that all required metadata is available prior to building th Ideally this file will live in the base output directory of your model run so that it's easy for others to find, even if they aren't using the catalog (but it doesn't have to). + +.. note:: + + The access-nri-intake package includes some command-line utility scripts to help with creating and + validating :code:`metadata.yaml` files: + + * To create an empty :code:`metadata.yaml` template in the current directory:: + + $ metadata-template + + You'll then need to replace all the values enclosed in :code:`<>`. Fields marked as :code:`REQUIRED` are + required. All other fields are encouraged but can be deleted or commented out if they are not relevant. + + * To validate a :code:`metadata.yaml` file (i.e. to check that required fields are present with required types):: + + $ metadata-validate diff --git a/docs/management/release.rst b/docs/management/release.rst index 1505be8..7661003 100644 --- a/docs/management/release.rst +++ b/docs/management/release.rst @@ -10,15 +10,16 @@ person doing the release must ensure that the version of the new catalog matches following all steps below. Steps 1 and 2 below should be done in a PR and merged before commencing step 3. If the release does not include an update to the catalog on Gadi, skip the first two steps below: -#. [IF UPDATING THE CATALOG] Create a new version of the catalog on Gadi (this will take about 1 hour) - :: - $ export RELEASE=vX.X.X - $ cd bin - $ qsub -v version=${RELEASE} build_all.sh +#. [IF UPDATING THE CATALOG] Create a new version of the catalog on Gadi (this will take about 1 hour):: + + $ export RELEASE=vX.X.X + $ cd bin + $ qsub -v version=${RELEASE} build_all.sh + .. note:: - If the `schema `_ has changed, or you have not used the intake catalog recently, this step may fail with a *Network is unreachable* error trying to download the schema json files. To download and cache the schema, first import the :code:`access_nri_intake.source` and :code:`access_nri_intake.catalog` sub-packages from a Gadi node with network access (e.g. a login or ARE node). I.e., using the release version of :code:`access_nri_intake` - :: - $ python3 -c "from access_nri_intake import source, catalog" + If the `schema `_ has changed, or you have not used the intake catalog recently, this step may fail with a *Network is unreachable* error trying to download the schema json files. To download and cache the schema, first import the :code:`access_nri_intake.source` and :code:`access_nri_intake.catalog` sub-packages from a Gadi node with network access (e.g. a login or ARE node). I.e., using the release version of :code:`access_nri_intake`:: + + $ python3 -c "from access_nri_intake import source, catalog" This will cache a copy of the schema in your home directory. Then re-run ``$ qsub -v version=${RELEASE} build_all.sh`` diff --git a/pyproject.toml b/pyproject.toml index e5490b9..0e3be7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,8 @@ dynamic = ["version"] [project.scripts] catalog-build = "access_nri_intake.cli:build" +metadata-validate = "access_nri_intake.cli:metadata_validate" +metadata-template = "access_nri_intake.cli:metadata_template" [project.entry-points."intake.catalogs"] access_nri = "access_nri_intake.data:data" diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 2d0b398..66217d1 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -1,7 +1,7 @@ # Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 -""" Command line interface for access-nri-intake """ +""" Command line interfaces for access-nri-intake """ import argparse import logging @@ -23,9 +23,9 @@ class MetadataCheckError(Exception): pass -def _parse_inputs(config_yamls, build_path): +def _parse_build_inputs(config_yamls, build_path): """ - Parse inputs into a list of tuples of CatalogManager methods and args to + Parse build inputs into a list of tuples of CatalogManager methods and args to pass to the methods """ @@ -70,9 +70,9 @@ def _parse_inputs(config_yamls, build_path): return args -def _check_args(args_list): +def _check_build_args(args_list): """ - Run some checks on the parsed argmuents to be passed to the CatalogManager + Run some checks on the parsed build argmuents to be passed to the CatalogManager """ names = [] @@ -168,8 +168,8 @@ def build(): os.makedirs(build_path, exist_ok=True) # Parse inputs to pass to CatalogManager - parsed_sources = _parse_inputs(config_yamls, build_path) - _check_args([parsed_source[1] for parsed_source in parsed_sources]) + parsed_sources = _parse_build_inputs(config_yamls, build_path) + _check_build_args([parsed_source[1] for parsed_source in parsed_sources]) # Get the project storage flags def _get_project(path): @@ -218,3 +218,50 @@ def _get_project(path): if update: with open(os.path.join(_here, "data", "catalog.yaml"), "w") as fobj: yaml.dump(yaml_dict, fobj) + + +def metadata_validate(): + """ + Check provided metadata.yaml file(s) against the experiment schema + """ + + parser = argparse.ArgumentParser(description="Validate a metadata.yaml file") + parser.add_argument( + "file", + nargs="+", + help="The path to the metadata.yaml file. Multiple file paths can be passed.", + ) + + args = parser.parse_args() + files = args.file + + for f in files: + if os.path.isfile(f): + print(f"Validating {f}... ", end="") + load_metadata_yaml(f, EXP_JSONSCHEMA) + print("success") + else: + raise FileNotFoundError(f"No such file(s): {f}") + + +def metadata_template(): + """ + Create an empty template for a metadata.yaml file using the experiment schema + """ + + argparse.ArgumentParser(description="Generate a template for metadata.yaml") + + template = {} + for name, descr in EXP_JSONSCHEMA["properties"].items(): + if name in EXP_JSONSCHEMA["required"]: + description = f"" + else: + description = f"<{descr['description']}>" + + if descr["type"] == "array": + description = [description] + + template[name] = description + + with open("./metadata.yaml", "w") as outfile: + yaml.dump(template, outfile, default_flow_style=False, sort_keys=False) diff --git a/tests/data/access-esm1-5/metadata.yaml b/tests/data/access-esm1-5/metadata.yaml index e69de29..e79341d 100644 --- a/tests/data/access-esm1-5/metadata.yaml +++ b/tests/data/access-esm1-5/metadata.yaml @@ -0,0 +1,30 @@ +name: HI_C_05_r1 +experiment_uuid: a8588f4d-4507-4832-ba94-9425e6f5b1b3 +description: >- + Historical run using same configuration as CMIP6 ACCESS-ESM1.5 historical r1i1p1f1, + but with nitrogen and phosphorus limitations disabled within CASA-CNP +long_description: >- + Historical run using same configuration as CMIP6 ACCESS-ESM1.5 historical r1i1p1f1, + but with nitrogen and phosphorus limitations disabled within CASA-CNP. Branched from + CMIP6 ACCESS-ESM1.5 piControl r1i1p1f1 in the year 161 +model: + - ACCESS-ESM1-5 +nominal_resolution: + - atmos = N96 + - ocean = 1 degree +version: 1 +contact: Tilo Ziehn +email: tilo.ziehn@csiro.au +created: null +reference: null +license: null +url: null +parent_experiment: null +related_experiments: + - ca37f09c-cde0-4f27-9148-8ed94721035d + - 1b97f1c0-2bc4-4177-9acc-ce1f6cab5632 + - 1c6cb3ef-de8b-4a61-a1b6-e7044c07928f +notes: >- + null +keywords: + - null diff --git a/tests/data/access-om3/metadata.yaml b/tests/data/access-om3/metadata.yaml index 2d0e4ff..363e75d 100644 --- a/tests/data/access-om3/metadata.yaml +++ b/tests/data/access-om3/metadata.yaml @@ -1,7 +1,10 @@ experiment_uuid: 4cf0c4ee-09c9-4675-ae1f-ce46f0d848ed created: '2024-02-27' name: MOM6-CICE6-WW3-1deg_jra55do_ryf-4cf0c4ee -model: ACCESS-OM3 +model: + - ACCESS-OM3 +description: An early ACCESS-OM3 test run +long_description: An early ACCESS-OM3 test run url: git@github.com:COSIMA/MOM6-CICE6-WW3.git contact: dougiesquire email: dougiesquire@gmail.com diff --git a/tests/test_cli.py b/tests/test_cli.py index 95c181e..a8edbba 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,7 +10,12 @@ import intake import pytest -from access_nri_intake.cli import MetadataCheckError, _check_args, build +from access_nri_intake.cli import ( + MetadataCheckError, + _check_build_args, + build, + metadata_validate, +) def test_entrypoint(): @@ -20,6 +25,12 @@ def test_entrypoint(): exit_status = os.system("catalog-build --help") assert exit_status == 0 + exit_status = os.system("metadata-validate --help") + assert exit_status == 0 + + exit_status = os.system("metadata-template --help") + assert exit_status == 0 + @pytest.mark.parametrize( "args, raises", @@ -77,16 +88,16 @@ def test_entrypoint(): ), ], ) -def test_check_args(args, raises): +def test_check_build_args(args, raises): """ Check that non-unique names and uuids return an error """ if raises: with pytest.raises(MetadataCheckError) as excinfo: - _check_args(args) + _check_build_args(args) assert "exp0" in str(excinfo.value) else: - _check_args(args) + _check_build_args(args) @mock.patch( @@ -114,3 +125,41 @@ def test_build(mockargs): ) cat = intake.open_df_catalog(build_path) assert len(cat) == 2 + + +@mock.patch( + "argparse.ArgumentParser.parse_args", + return_value=argparse.Namespace( + file=["./tests/data/access-om2/metadata.yaml"], + ), +) +def test_metadata_validate(mockargs): + """Test metadata_validate""" + metadata_validate() + + +@mock.patch( + "argparse.ArgumentParser.parse_args", + return_value=argparse.Namespace( + file=[ + "./tests/data/access-om2/metadata.yaml", + "./tests/data/access-om3/metadata.yaml", + ], + ), +) +def test_metadata_validate_multi(mockargs): + """Test metadata_validate""" + metadata_validate() + + +@mock.patch( + "argparse.ArgumentParser.parse_args", + return_value=argparse.Namespace( + file="./does/not/exist.yaml", + ), +) +def test_metadata_validate_no_file(mockargs): + """Test metadata_validate""" + with pytest.raises(FileNotFoundError) as excinfo: + metadata_validate() + assert "No such file(s)" in str(excinfo.value)