diff --git a/src/access_nri_intake/catalog/manager.py b/src/access_nri_intake/catalog/manager.py index 89424c0..049d2a9 100644 --- a/src/access_nri_intake/catalog/manager.py +++ b/src/access_nri_intake/catalog/manager.py @@ -43,13 +43,13 @@ def __init__(self, path): self.path = path - mode = "a" if os.path.exists(path) else "w" + self.mode = "a" if os.path.exists(path) else "w" self.dfcat = DfFileCatalog( path=self.path, yaml_column=YAML_COLUMN, name_column=NAME_COLUMN, - mode=mode, + mode=self.mode, columns_with_iterables=COLUMNS_WITH_ITERABLES, ) @@ -69,29 +69,29 @@ def build_esm( **kwargs, ): """ - Build an intake-esm catalog + Build an Intake-ESM datastore Parameters ---------- name: str - The name of the catalog + The name of the Intake-ESM datastore description: str - Description of the contents of the catalog - builder: subclass of :py:class:`access_nri_catalog.esmcat.BaseBuilder` - The builder to use to build the intake-esm catalog + Description of the contents of the Intake-ESM datastore + builder: subclass of :py:class:`access_nri_intake.source.builders.BaseBuilder` + The builder to use to build the Intake-ESM datastore path: str or list of str - Path or list of paths to crawl for assets/files to add to the catalog. - translator: :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator` - An instance of the :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator` class - for translating info in the intake-esm catalog into intake-dataframe-catalog column metadata. - Defaults to access_nri_catalog.metacat.translators.DefaultTranslator. + Path or list of paths to crawl for assets/files to add to the Intake-ESM datastore. + translator: :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator` + An instance of the :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator` class + for translating info in the Intake-ESM datastore into intake-dataframe-catalog column metadata. + Defaults to access_nri_intake.catalog.translators.DefaultTranslator. metadata: dict, optional Additional info to store in the intake cat.metadata attribute. This info will be available - to the translator and to users of the catalog + to the translator and to users of the Intake-ESM datastore directory: str - The directory to save the catalog to. If None, use the current directory + The directory to save the Intake-ESM datastore to. If None, use the current directory overwrite: bool, optional - Whether to overwrite any existing catalog(s) with the same name + Whether to overwrite any existing entries in the catalog with the same name kwargs: dict Additional kwargs to pass to the builder """ @@ -102,7 +102,7 @@ def build_esm( if os.path.isfile(json_file): if not overwrite: raise CatalogManagerError( - f"A catalog already exists for {name}. To overwrite, " + f"An Intake-ESM datastore already exists for {name}. To overwrite, " "pass `overwrite=True` to CatalogBuilder.build" ) diff --git a/tests/test_manager.py b/tests/test_manager.py new file mode 100644 index 0000000..ae44e0b --- /dev/null +++ b/tests/test_manager.py @@ -0,0 +1,130 @@ +# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from access_nri_intake.catalog.manager import CatalogManager, CatalogManagerError +from access_nri_intake.catalog.translators import ( + Cmip5Translator, + Cmip6Translator, + EraiTranslator, +) +from access_nri_intake.source.builders import ( + AccessCm2Builder, + AccessEsm15Builder, + AccessOm2Builder, +) + + +def test_CatalogManager_init(tmp_path): + """Test that CatalogManager initialising correctly""" + path = str(tmp_path / "cat.csv") + + cat = CatalogManager(path) + assert cat.mode == "w" + assert hasattr(cat, "dfcat") + + +@pytest.mark.parametrize( + "builder, basedir, kwargs", + [ + (AccessOm2Builder, "access-om2", {}), + (AccessCm2Builder, "access-cm2/by578", {"ensemble": False}), + (AccessEsm15Builder, "access-esm1-5", {"ensemble": False}), + ], +) +def test_CatalogManager_build_esm(tmp_path, test_data, builder, basedir, kwargs): + """Test building and adding an Intake-ESM datastore""" + path = str(tmp_path / "cat.csv") + cat = CatalogManager(path) + + args = dict( + name="test", + description="test", + builder=builder, + path=str(test_data / basedir), + metadata=dict( + model=[ + basedir, + ] + ), + directory=str(tmp_path), + **kwargs, + ) + cat.build_esm(**args).add() + + # Try to rebuild without setting overwrite + with pytest.raises(CatalogManagerError) as excinfo: + cat.build_esm(**args) + assert "An Intake-ESM datastore already exists" in str(excinfo.value) + + # Overwrite + cat.build_esm(**args, overwrite=True).add() + + cat = CatalogManager(path) + assert cat.mode == "a" + + +@pytest.mark.parametrize( + "translator, datastore, metadata", + [ + (Cmip5Translator, "cmip5-al33.json", {}), + (Cmip6Translator, "cmip6-oi10.json", {}), + (EraiTranslator, "erai.json", {"model": ["ERA-Interim"]}), + ], +) +def test_CatalogManager_load(tmp_path, test_data, translator, datastore, metadata): + """Test loading and adding an Intake-ESM datastore""" + path = str(tmp_path / "cat.csv") + cat = CatalogManager(path) + + args = dict( + name="test", + description="test", + path=str(test_data / f"esm_datastore/{datastore}"), + translator=translator, + metadata=metadata, + ) + cat.load(**args).add() + + cat = CatalogManager(path) + assert cat.mode == "a" + + +def test_CatalogManager_all(tmp_path, test_data): + """Test adding multiple sources""" + path = str(tmp_path / "cat.csv") + cat = CatalogManager(path) + + # Load source + load_args = dict( + name="cmip5-al33", + description="cmip5-al33", + path=str(test_data / "esm_datastore/cmip5-al33.json"), + translator=Cmip5Translator, + ) + cat.load( + **load_args, + ).add() + + # Build source + cat.build_esm( + name="access-om2", + description="access-om2", + builder=AccessOm2Builder, + path=str(test_data / "access-om2"), + metadata=dict( + model=[ + "ACCESS-OM2", + ] + ), + directory=str(tmp_path), + ).add() + + assert len(cat.dfcat) == 2 + + # Check that entry with same name overwrites correctly + cat.load( + **load_args, + ).add() + assert len(cat.dfcat) == 2