Skip to content

Commit

Permalink
Include metadata validation and template scripts in package (#158)
Browse files Browse the repository at this point in the history
* move metadata scripts to console_scripts

* add metadata_validate FileNotFoundError test

* add info on cmdline scripts to docs

* address review comments
  • Loading branch information
dougiesquire committed Mar 15, 2024
1 parent a34274a commit dfbef2e
Show file tree
Hide file tree
Showing 9 changed files with 168 additions and 89 deletions.
35 changes: 0 additions & 35 deletions bin/create_metadata_yaml_template.py

This file was deleted.

34 changes: 0 additions & 34 deletions bin/validate_metadata_yaml.py

This file was deleted.

16 changes: 16 additions & 0 deletions docs/management/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,19 @@ is called to ensure that all required metadata is available prior to building th

Ideally this file will live in the base output directory of your model run so that it's easy for others to
find, even if they aren't using the catalog (but it doesn't have to).

.. note::

The access-nri-intake package includes some command-line utility scripts to help with creating and
validating :code:`metadata.yaml` files:

* To create an empty :code:`metadata.yaml` template in the current directory::

$ metadata-template

You'll then need to replace all the values enclosed in :code:`<>`. Fields marked as :code:`REQUIRED` are
required. All other fields are encouraged but can be deleted or commented out if they are not relevant.

* To validate a :code:`metadata.yaml` file (i.e. to check that required fields are present with required types)::

$ metadata-validate <path/to/metadata.yaml>
17 changes: 9 additions & 8 deletions docs/management/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@ person doing the release must ensure that the version of the new catalog matches
following all steps below. Steps 1 and 2 below should be done in a PR and merged before commencing step 3. If the release does
not include an update to the catalog on Gadi, skip the first two steps below:

#. [IF UPDATING THE CATALOG] Create a new version of the catalog on Gadi (this will take about 1 hour)
::
$ export RELEASE=vX.X.X
$ cd bin
$ qsub -v version=${RELEASE} build_all.sh
#. [IF UPDATING THE CATALOG] Create a new version of the catalog on Gadi (this will take about 1 hour)::

$ export RELEASE=vX.X.X
$ cd bin
$ qsub -v version=${RELEASE} build_all.sh

.. note::
If the `schema <https://github.com/ACCESS-NRI/schema>`_ has changed, or you have not used the intake catalog recently, this step may fail with a *Network is unreachable* error trying to download the schema json files. To download and cache the schema, first import the :code:`access_nri_intake.source` and :code:`access_nri_intake.catalog` sub-packages from a Gadi node with network access (e.g. a login or ARE node). I.e., using the release version of :code:`access_nri_intake`
::
$ python3 -c "from access_nri_intake import source, catalog"
If the `schema <https://github.com/ACCESS-NRI/schema>`_ has changed, or you have not used the intake catalog recently, this step may fail with a *Network is unreachable* error trying to download the schema json files. To download and cache the schema, first import the :code:`access_nri_intake.source` and :code:`access_nri_intake.catalog` sub-packages from a Gadi node with network access (e.g. a login or ARE node). I.e., using the release version of :code:`access_nri_intake`::

$ python3 -c "from access_nri_intake import source, catalog"
This will cache a copy of the schema in your home directory. Then re-run ``$ qsub -v version=${RELEASE} build_all.sh``
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ dynamic = ["version"]

[project.scripts]
catalog-build = "access_nri_intake.cli:build"
metadata-validate = "access_nri_intake.cli:metadata_validate"
metadata-template = "access_nri_intake.cli:metadata_template"

[project.entry-points."intake.catalogs"]
access_nri = "access_nri_intake.data:data"
Expand Down
61 changes: 54 additions & 7 deletions src/access_nri_intake/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

""" Command line interface for access-nri-intake """
""" Command line interfaces for access-nri-intake """

import argparse
import logging
Expand All @@ -23,9 +23,9 @@ class MetadataCheckError(Exception):
pass


def _parse_inputs(config_yamls, build_path):
def _parse_build_inputs(config_yamls, build_path):
"""
Parse inputs into a list of tuples of CatalogManager methods and args to
Parse build inputs into a list of tuples of CatalogManager methods and args to
pass to the methods
"""

Expand Down Expand Up @@ -70,9 +70,9 @@ def _parse_inputs(config_yamls, build_path):
return args


def _check_args(args_list):
def _check_build_args(args_list):
"""
Run some checks on the parsed argmuents to be passed to the CatalogManager
Run some checks on the parsed build argmuents to be passed to the CatalogManager
"""

names = []
Expand Down Expand Up @@ -168,8 +168,8 @@ def build():
os.makedirs(build_path, exist_ok=True)

# Parse inputs to pass to CatalogManager
parsed_sources = _parse_inputs(config_yamls, build_path)
_check_args([parsed_source[1] for parsed_source in parsed_sources])
parsed_sources = _parse_build_inputs(config_yamls, build_path)
_check_build_args([parsed_source[1] for parsed_source in parsed_sources])

# Get the project storage flags
def _get_project(path):
Expand Down Expand Up @@ -218,3 +218,50 @@ def _get_project(path):
if update:
with open(os.path.join(_here, "data", "catalog.yaml"), "w") as fobj:
yaml.dump(yaml_dict, fobj)


def metadata_validate():
"""
Check provided metadata.yaml file(s) against the experiment schema
"""

parser = argparse.ArgumentParser(description="Validate a metadata.yaml file")
parser.add_argument(
"file",
nargs="+",
help="The path to the metadata.yaml file. Multiple file paths can be passed.",
)

args = parser.parse_args()
files = args.file

for f in files:
if os.path.isfile(f):
print(f"Validating {f}... ", end="")
load_metadata_yaml(f, EXP_JSONSCHEMA)
print("success")
else:
raise FileNotFoundError(f"No such file(s): {f}")


def metadata_template():
"""
Create an empty template for a metadata.yaml file using the experiment schema
"""

argparse.ArgumentParser(description="Generate a template for metadata.yaml")

template = {}
for name, descr in EXP_JSONSCHEMA["properties"].items():
if name in EXP_JSONSCHEMA["required"]:
description = f"<REQUIRED {descr['description']}>"
else:
description = f"<{descr['description']}>"

if descr["type"] == "array":
description = [description]

template[name] = description

with open("./metadata.yaml", "w") as outfile:
yaml.dump(template, outfile, default_flow_style=False, sort_keys=False)
30 changes: 30 additions & 0 deletions tests/data/access-esm1-5/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: HI_C_05_r1
experiment_uuid: a8588f4d-4507-4832-ba94-9425e6f5b1b3
description: >-
Historical run using same configuration as CMIP6 ACCESS-ESM1.5 historical r1i1p1f1,
but with nitrogen and phosphorus limitations disabled within CASA-CNP
long_description: >-
Historical run using same configuration as CMIP6 ACCESS-ESM1.5 historical r1i1p1f1,
but with nitrogen and phosphorus limitations disabled within CASA-CNP. Branched from
CMIP6 ACCESS-ESM1.5 piControl r1i1p1f1 in the year 161
model:
- ACCESS-ESM1-5
nominal_resolution:
- atmos = N96
- ocean = 1 degree
version: 1
contact: Tilo Ziehn
email: [email protected]
created: null
reference: null
license: null
url: null
parent_experiment: null
related_experiments:
- ca37f09c-cde0-4f27-9148-8ed94721035d
- 1b97f1c0-2bc4-4177-9acc-ce1f6cab5632
- 1c6cb3ef-de8b-4a61-a1b6-e7044c07928f
notes: >-
null
keywords:
- null
5 changes: 4 additions & 1 deletion tests/data/access-om3/metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
experiment_uuid: 4cf0c4ee-09c9-4675-ae1f-ce46f0d848ed
created: '2024-02-27'
name: MOM6-CICE6-WW3-1deg_jra55do_ryf-4cf0c4ee
model: ACCESS-OM3
model:
- ACCESS-OM3
description: An early ACCESS-OM3 test run
long_description: An early ACCESS-OM3 test run
url: [email protected]:COSIMA/MOM6-CICE6-WW3.git
contact: dougiesquire
email: [email protected]
57 changes: 53 additions & 4 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
import intake
import pytest

from access_nri_intake.cli import MetadataCheckError, _check_args, build
from access_nri_intake.cli import (
MetadataCheckError,
_check_build_args,
build,
metadata_validate,
)


def test_entrypoint():
Expand All @@ -20,6 +25,12 @@ def test_entrypoint():
exit_status = os.system("catalog-build --help")
assert exit_status == 0

exit_status = os.system("metadata-validate --help")
assert exit_status == 0

exit_status = os.system("metadata-template --help")
assert exit_status == 0


@pytest.mark.parametrize(
"args, raises",
Expand Down Expand Up @@ -77,16 +88,16 @@ def test_entrypoint():
),
],
)
def test_check_args(args, raises):
def test_check_build_args(args, raises):
"""
Check that non-unique names and uuids return an error
"""
if raises:
with pytest.raises(MetadataCheckError) as excinfo:
_check_args(args)
_check_build_args(args)
assert "exp0" in str(excinfo.value)
else:
_check_args(args)
_check_build_args(args)


@mock.patch(
Expand Down Expand Up @@ -114,3 +125,41 @@ def test_build(mockargs):
)
cat = intake.open_df_catalog(build_path)
assert len(cat) == 2


@mock.patch(
"argparse.ArgumentParser.parse_args",
return_value=argparse.Namespace(
file=["./tests/data/access-om2/metadata.yaml"],
),
)
def test_metadata_validate(mockargs):
"""Test metadata_validate"""
metadata_validate()


@mock.patch(
"argparse.ArgumentParser.parse_args",
return_value=argparse.Namespace(
file=[
"./tests/data/access-om2/metadata.yaml",
"./tests/data/access-om3/metadata.yaml",
],
),
)
def test_metadata_validate_multi(mockargs):
"""Test metadata_validate"""
metadata_validate()


@mock.patch(
"argparse.ArgumentParser.parse_args",
return_value=argparse.Namespace(
file="./does/not/exist.yaml",
),
)
def test_metadata_validate_no_file(mockargs):
"""Test metadata_validate"""
with pytest.raises(FileNotFoundError) as excinfo:
metadata_validate()
assert "No such file(s)" in str(excinfo.value)

0 comments on commit dfbef2e

Please sign in to comment.