Skip to content

Commit

Permalink
add Builder tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dougiesquire committed Jun 30, 2023
1 parent 67b0d4c commit ecbb538
Show file tree
Hide file tree
Showing 65 changed files with 72 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/access_nri_intake/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
NAME_COLUMN = "name"
TRANSLATOR_GROUPBY_COLUMNS = ["model", "realm", "frequency"]

SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/4e3d10e563d7c1c9f66e9ab92a2926cdec3d6893/experiment_asset.json"
SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/d4da77a0e627775c11ba394c0a3f72a2c654971c/experiment_asset.json"
SCHEMA_HASH = "b18cf5bdd06a6f5bcdc71dfc80f7336c63eb49f6d6f75c2cd3371e59eee5488b"

EXP_JSONSCHEMA, CATALOG_JSONSCHEMA = get_jsonschema(
Expand Down
4 changes: 2 additions & 2 deletions src/access_nri_intake/source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
PATH_COLUMN = "path"
VARIABLE_COLUMN = "variable"

SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/4e3d10e563d7c1c9f66e9ab92a2926cdec3d6893/file_asset.json"
SCHEMA_HASH = "2a09030653f495939c90a22e95dd1c4587c8695f7f07e17b9129a6491469f9fc"
SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/d4da77a0e627775c11ba394c0a3f72a2c654971c/file_asset.json"
SCHEMA_HASH = "7f1f58e1ae419faf8e24f15e937ef5717fa872920a06758ee2983506fcaf70fc"

_, ESM_JSONSCHEMA = get_jsonschema(
url=SCHEMA_URL, known_hash=SCHEMA_HASH, required=CORE_COLUMNS
Expand Down
3 changes: 3 additions & 0 deletions src/access_nri_intake/source/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ def parse_access_ncfile(file):

start_date, end_date, frequency = get_timeinfo(ds)

if not variable_list:
raise EmptyFileError("This file contains no variables")

if filename_frequency:
if filename_frequency != frequency:
msg = (
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
72 changes: 62 additions & 10 deletions tests/test_builders.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,75 @@
# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

import intake
import pandas as pd
import pytest

from access_nri_intake.source import builders
from access_nri_intake.source import CORE_COLUMNS, builders


@pytest.mark.parametrize(
"basedir, builder, kwargs",
"basedirs, builder, kwargs, num_assets, num_valid_assets, num_datasets",
[
("access-om2", "AccessOm2Builder", {}),
("access-cm2", "AccessCm2Builder", {"ensemble": False}),
("access-cm2", "AccessCm2Builder", {"ensemble": True}),
("access-esm1-5", "AccessEsm15Builder", {"ensemble": False}),
("access-esm1-5", "AccessEsm15Builder", {"ensemble": True}),
(["access-om2"], "AccessOm2Builder", {}, 12, 12, 6),
(
["access-cm2/by578", "access-cm2/by578a"],
"AccessCm2Builder",
{"ensemble": True},
18,
14,
7,
),
(["access-esm1-5"], "AccessEsm15Builder", {"ensemble": False}, 11, 11, 11),
],
)
def test_builder(test_data, basedir, builder, kwargs):
def test_builder_build(
tmp_path,
test_data,
basedirs,
builder,
kwargs,
num_assets,
num_valid_assets,
num_datasets,
):
"""
Test the various steps of the build process
"""
Builder = getattr(builders, builder)
path = str(test_data / basedir)
path = [str(test_data / basedir) for basedir in basedirs]
builder = Builder(path, **kwargs)
print(builder.get_assets())

builder.get_assets()
assert isinstance(builder.assets, list)
assert len(builder.assets) == num_assets

builder.build()
assert isinstance(builder.df, pd.DataFrame)
assert len(builder.df) == num_valid_assets
assert all([col in builder.df.columns for col in CORE_COLUMNS])

builder.save(name="test", description="test datastore", directory=str(tmp_path))

cat = intake.open_esm_datastore(
str(tmp_path / "test.json"),
columns_with_iterables=builder.columns_with_iterables,
)
assert len(cat.df) == num_valid_assets
assert len(cat) == num_datasets


def test_builder_columns_with_iterables(test_data):
builder = builders.AccessOm2Builder(str(test_data / "access-om2"))
assert not builder.columns_with_iterables
builder.build()
assert sorted(list(builder.columns_with_iterables)) == sorted(
[
col
for col, val in builder.df.applymap(type)
.isin([list, tuple, set])
.any()
.items()
if val
]
)
8 changes: 4 additions & 4 deletions tests/test_source_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_parse_access_filename(filename, expected):
),
),
(
"access-cm2/mem1/history/atm/netCDF/by578a.pd201501_dai.nc",
"access-cm2/by578/history/atm/netCDF/by578a.pd201501_dai.nc",
(
"by578a.pd201501_dai.nc",
"by578a_pdXXXXXX_dai",
Expand All @@ -174,7 +174,7 @@ def test_parse_access_filename(filename, expected):
),
),
(
"access-cm2/mem1/history/ice/iceh_d.2015-01.nc",
"access-cm2/by578/history/ice/iceh_d.2015-01.nc",
(
"iceh_d.2015-01.nc",
"iceh_d_XXXX_XX",
Expand All @@ -195,7 +195,7 @@ def test_parse_access_filename(filename, expected):
),
),
(
"access-cm2/mem1/history/ocn/ocean_daily.nc-20150630",
"access-cm2/by578/history/ocn/ocean_daily.nc-20150630",
(
"ocean_daily.nc-20150630",
"ocean_daily",
Expand All @@ -210,7 +210,7 @@ def test_parse_access_filename(filename, expected):
),
),
(
"access-cm2/mem1/history/ocn/ocean_scalar.nc-20150630",
"access-cm2/by578/history/ocn/ocean_scalar.nc-20150630",
(
"ocean_scalar.nc-20150630",
"ocean_scalar",
Expand Down

0 comments on commit ecbb538

Please sign in to comment.