From a07470a7b94ceecc62b98f9f11ac30c11a5c3344 Mon Sep 17 00:00:00 2001 From: Marc White Date: Mon, 19 Aug 2024 10:19:54 +1000 Subject: [PATCH] Refactor parse_access_* functions into BaseBuilder class (#181) * Preparatory step - add kwargs to parse_access_filename * Make parse_access_* be classmethods on BaseBuilder * Update tests for latest changes; show pytest-cov output on terminal (and curse VSCode for not saving things automatically like my last IDE) * Refactor how the pattern 'helpers' are stored * Completed reformat to parse_access_* being classmethods on builders, including tests (some cleanup still required) * Remove terminal pytest-cov output * 'black' affected files * Attempting to fix 'pre-commit' failure * Fix mistake in returning patterns to kwargs * Remove commented-out patterns * Pre-commit test --- src/access_nri_intake/source/builders.py | 194 ++++++- src/access_nri_intake/source/utils.py | 149 ------ tests/test_builders.py | 627 +++++++++++++++++++++++ tests/test_source_utils.py | 556 +------------------- 4 files changed, 811 insertions(+), 715 deletions(-) diff --git a/src/access_nri_intake/source/builders.py b/src/access_nri_intake/source/builders.py index e2123f4..13aa524 100644 --- a/src/access_nri_intake/source/builders.py +++ b/src/access_nri_intake/source/builders.py @@ -6,12 +6,34 @@ import multiprocessing import re import traceback +from pathlib import Path +import xarray as xr from ecgtools.builder import INVALID_ASSET, TRACEBACK, Builder from ..utils import validate_against_schema from . import ESM_JSONSCHEMA, PATH_COLUMN, VARIABLE_COLUMN -from .utils import parse_access_ncfile +from .utils import EmptyFileError, get_timeinfo + +# Frequency translations +FREQUENCIES = { + "daily": (1, "day"), + "_dai$": (1, "day"), + "month": (1, "mon"), + "_mon$": (1, "mon"), + "yearly": (1, "yr"), + "_ann$": (1, "yr"), +} + +# ACCESS output file patterns +PATTERNS_HELPERS = { + "not_multi_digit": "(?:\\d(?!\\d)|[^\\d](?=\\d)|[^\\d](?!\\d))", + "om3_components": "(?:cice|mom6|ww3)", + "ymds": "\\d{4}[_,-]\\d{2}[_,-]\\d{2}[_,-]\\d{5}", + "ymd": "\\d{4}[_,-]\\d{2}[_,-]\\d{2}", + "ym": "\\d{4}[_,-]\\d{2}", + "y": "\\d{4}", +} class ParserError(Exception): @@ -24,6 +46,9 @@ class BaseBuilder(Builder): This builds on the ecgtools.Builder class. """ + # Base class carries an empty set + PATTERNS = [] + def __init__( self, path, @@ -183,10 +208,144 @@ def parser(file): # This method should be overwritten raise NotImplementedError + @classmethod + def parse_access_filename( + cls, filename, patterns=None, frequencies=FREQUENCIES, redaction_fill: str = "X" + ): + """ + Parse an ACCESS model filename and return a file id and any time information + + Parameters + ---------- + filename: str + The filename to parse with the extension removed + + Returns + ------- + file_id: str + The file id constructed by redacting time information and replacing non-python characters + with underscores + timestamp: str + A string of the redacted time information (e.g. "1990-01") + frequency: str + The frequency of the file if available in the filename + """ + if patterns is None: + patterns = cls.PATTERNS + + # Try to determine frequency + frequency = None + for pattern, freq in frequencies.items(): + if re.search(pattern, filename): + frequency = freq + break + + # Parse file id + file_id = filename + timestamp = None + for pattern in patterns: + match = re.match(pattern, file_id) + if match: + timestamp = match.group(1) + redaction = re.sub(r"\d", redaction_fill, timestamp) + file_id = ( + file_id[: match.start(1)] + redaction + file_id[match.end(1) :] + ) + break + + # Remove non-python characters from file ids + file_id = re.sub(r"[-.]", "_", file_id) + file_id = re.sub(r"_+", "_", file_id).strip("_") + + return file_id, timestamp, frequency + + @classmethod + def parse_access_ncfile(cls, file, time_dim="time"): + """ + Get Intake-ESM datastore entry info from an ACCESS netcdf file + + Parameters + ---------- + file: str + The path to the netcdf file + time_dim: str + The name of the time dimension + + Returns + ------- + """ + + file = Path(file) + filename = file.name + + file_id, filename_timestamp, filename_frequency = cls.parse_access_filename( + file.stem + ) + + with xr.open_dataset( + file, + chunks={}, + decode_cf=False, + decode_times=False, + decode_coords=False, + ) as ds: + variable_list = [] + variable_long_name_list = [] + variable_standard_name_list = [] + variable_cell_methods_list = [] + variable_units_list = [] + for var in ds.data_vars: + attrs = ds[var].attrs + if "long_name" in attrs: + variable_list.append(var) + variable_long_name_list.append(attrs["long_name"]) + if "standard_name" in attrs: + variable_standard_name_list.append(attrs["standard_name"]) + else: + variable_standard_name_list.append("") + if "cell_methods" in attrs: + variable_cell_methods_list.append(attrs["cell_methods"]) + else: + variable_cell_methods_list.append("") + if "units" in attrs: + variable_units_list.append(attrs["units"]) + else: + variable_units_list.append("") + + start_date, end_date, frequency = get_timeinfo( + ds, filename_frequency, time_dim + ) + + if not variable_list: + raise EmptyFileError("This file contains no variables") + + outputs = ( + filename, + file_id, + filename_timestamp, + frequency, + start_date, + end_date, + variable_list, + variable_long_name_list, + variable_standard_name_list, + variable_cell_methods_list, + variable_units_list, + ) + + return outputs + class AccessOm2Builder(BaseBuilder): """Intake-ESM datastore builder for ACCESS-OM2 COSIMA datasets""" + PATTERNS = [ + rf"^iceh.*\.({PATTERNS_HELPERS['ymd']}|{PATTERNS_HELPERS['ym']})$", # ACCESS-ESM1.5/OM2/CM2 ice + rf"^iceh.*\.(\d{{3}})-{PATTERNS_HELPERS['not_multi_digit']}.*", # ACCESS-OM2 ice + rf"^ocean.*[_,-](?:ymd|ym|y)_({PATTERNS_HELPERS['ymd']}|{PATTERNS_HELPERS['ym']}|{PATTERNS_HELPERS['y']})(?:$|[_,-]{PATTERNS_HELPERS['not_multi_digit']}.*)", # ACCESS-OM2 ocean + r"^ocean.*[^\d]_(\d{2})$", # A few wierd files in ACCESS-OM2 01deg_jra55v13_ryf9091 + ] + def __init__(self, path): """ Initialise a AccessOm2Builder @@ -218,8 +377,8 @@ def __init__(self, path): super().__init__(**kwargs) - @staticmethod - def parser(file): + @classmethod + def parser(cls, file): try: match_groups = re.match(r".*/output\d+/([^/]*)/.*\.nc", file).groups() realm = match_groups[0] @@ -239,7 +398,7 @@ def parser(file): variable_standard_name_list, variable_cell_methods_list, variable_units_list, - ) = parse_access_ncfile(file) + ) = cls.parse_access_ncfile(file) info = { "path": str(file), @@ -265,6 +424,10 @@ def parser(file): class AccessOm3Builder(BaseBuilder): """Intake-ESM datastore builder for ACCESS-OM3 COSIMA datasets""" + PATTERNS = [ + rf"[^\.]*\.{PATTERNS_HELPERS['om3_components']}\..*({PATTERNS_HELPERS['ymds']}|{PATTERNS_HELPERS['ymd']}|{PATTERNS_HELPERS['ym']})$", # ACCESS-OM3 + ] + def __init__(self, path): """ Initialise a AccessOm3Builder @@ -302,8 +465,8 @@ def __init__(self, path): super().__init__(**kwargs) - @staticmethod - def parser(file): + @classmethod + def parser(cls, file): try: ( filename, @@ -317,7 +480,7 @@ def parser(file): variable_standard_name_list, variable_cell_methods_list, variable_units_list, - ) = parse_access_ncfile(file) + ) = cls.parse_access_ncfile(file) if "mom6" in filename: realm = "ocean" @@ -352,6 +515,11 @@ def parser(file): class AccessEsm15Builder(BaseBuilder): """Intake-ESM datastore builder for ACCESS-ESM1.5 datasets""" + PATTERNS = [ + rf"^iceh.*\.({PATTERNS_HELPERS['ymd']}|{PATTERNS_HELPERS['ym']})$", # ACCESS-ESM1.5/OM2/CM2 ice + r"^.*\.p.-(\d{6})_.*", # ACCESS-ESM1.5 atmosphere + ] + def __init__(self, path, ensemble): """ Initialise a AccessEsm15Builder @@ -394,8 +562,8 @@ def __init__(self, path, ensemble): super().__init__(**kwargs) - @staticmethod - def parser(file): + @classmethod + def parser(cls, file): try: match_groups = re.match(r".*/([^/]*)/history/([^/]*)/.*\.nc", file).groups() exp_id = match_groups[0] @@ -416,7 +584,7 @@ def parser(file): variable_standard_name_list, variable_cell_methods_list, variable_units_list, - ) = parse_access_ncfile(file) + ) = cls.parse_access_ncfile(file) # Remove exp_id from file id so that members can be part of the same dataset file_id = re.sub(exp_id, "", file_id).strip("_") @@ -447,4 +615,8 @@ def parser(file): class AccessCm2Builder(AccessEsm15Builder): """Intake-ESM datastore builder for ACCESS-CM2 datasets""" - pass + PATTERNS = [ + rf"^iceh.*\.({PATTERNS_HELPERS['ymd']}|{PATTERNS_HELPERS['ym']})$", # ACCESS-ESM1.5/OM2/CM2 ice + rf"^iceh.*\.({PATTERNS_HELPERS['ym']})-{PATTERNS_HELPERS['not_multi_digit']}.*", # ACCESS-CM2 ice + r"^.*\.p.(\d{6})_.*", # ACCESS-CM2 atmosphere + ] diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index 481d57b..a3a8cfe 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -3,13 +3,10 @@ """ Shared utilities for writing Intake-ESM builders and their parsers """ -import re import warnings from datetime import timedelta -from pathlib import Path import cftime -import xarray as xr class EmptyFileError(Exception): @@ -154,149 +151,3 @@ def _todate(t): frequency = frequency[1] return start_date, end_date, frequency - - -def parse_access_filename(filename): - """ - Parse an ACCESS model filename and return a file id and any time information - - Parameters - ---------- - filename: str - The filename to parse with the extension removed - - Returns - ------- - file_id: str - The file id constructed by redacting time information and replacing non-python characters - with underscores - timestamp: str - A string of the redacted time information (e.g. "1990-01") - frequency: str - The frequency of the file if available in the filename - """ - - # ACCESS output file patterns - # TODO: these should be defined per driver to prevent new patterns from breaking old drivers - not_multi_digit = "(?:\\d(?!\\d)|[^\\d](?=\\d)|[^\\d](?!\\d))" - om3_components = "(?:cice|mom6|ww3)" - ymds = "\\d{4}[_,-]\\d{2}[_,-]\\d{2}[_,-]\\d{5}" - ymd = "\\d{4}[_,-]\\d{2}[_,-]\\d{2}" - ym = "\\d{4}[_,-]\\d{2}" - y = "\\d{4}" - patterns = [ - rf"^iceh.*\.({ymd}|{ym})$", # ACCESS-ESM1.5/OM2 ice - rf"^iceh.*\.({ym})-{not_multi_digit}.*", # ACCESS-CM2 ice - rf"^iceh.*\.(\d{{3}})-{not_multi_digit}.*", # ACCESS-OM2 ice - rf"^ocean.*[_,-](?:ymd|ym|y)_({ymd}|{ym}|{y})(?:$|[_,-]{not_multi_digit}.*)", # ACCESS-OM2 ocean - r"^ocean.*[^\d]_(\d{2})$", # A few wierd files in ACCESS-OM2 01deg_jra55v13_ryf9091 - r"^.*\.p.(\d{6})_.*", # ACCESS-CM2 atmosphere - r"^.*\.p.-(\d{6})_.*", # ACCESS-ESM1.5 atmosphere - rf"[^\.]*\.{om3_components}\..*({ymds}|{ymd}|{ym})$", # ACCESS-OM3 - ] - # Frequency translations - frequencies = { - "daily": (1, "day"), - "_dai$": (1, "day"), - "month": (1, "mon"), - "_mon$": (1, "mon"), - "yearly": (1, "yr"), - "_ann$": (1, "yr"), - } - redaction_fill = "X" - - # Try to determine frequency - frequency = None - for pattern, freq in frequencies.items(): - if re.search(pattern, filename): - frequency = freq - break - - # Parse file id - file_id = filename - timestamp = None - for pattern in patterns: - match = re.match(pattern, file_id) - if match: - timestamp = match.group(1) - redaction = re.sub(r"\d", redaction_fill, timestamp) - file_id = file_id[: match.start(1)] + redaction + file_id[match.end(1) :] - break - - # Remove non-python characters from file ids - file_id = re.sub(r"[-.]", "_", file_id) - file_id = re.sub(r"_+", "_", file_id).strip("_") - - return file_id, timestamp, frequency - - -def parse_access_ncfile(file, time_dim="time"): - """ - Get Intake-ESM datastore entry info from an ACCESS netcdf file - - Parameters - ---------- - file: str - The path to the netcdf file - time_dim: str - The name of the time dimension - - Returns - ------- - """ - - file = Path(file) - filename = file.name - - file_id, filename_timestamp, filename_frequency = parse_access_filename(file.stem) - - with xr.open_dataset( - file, - chunks={}, - decode_cf=False, - decode_times=False, - decode_coords=False, - ) as ds: - variable_list = [] - variable_long_name_list = [] - variable_standard_name_list = [] - variable_cell_methods_list = [] - variable_units_list = [] - for var in ds.data_vars: - attrs = ds[var].attrs - if "long_name" in attrs: - variable_list.append(var) - variable_long_name_list.append(attrs["long_name"]) - if "standard_name" in attrs: - variable_standard_name_list.append(attrs["standard_name"]) - else: - variable_standard_name_list.append("") - if "cell_methods" in attrs: - variable_cell_methods_list.append(attrs["cell_methods"]) - else: - variable_cell_methods_list.append("") - if "units" in attrs: - variable_units_list.append(attrs["units"]) - else: - variable_units_list.append("") - - start_date, end_date, frequency = get_timeinfo(ds, filename_frequency, time_dim) - - if not variable_list: - raise EmptyFileError("This file contains no variables") - - outputs = ( - filename, - file_id, - filename_timestamp, - frequency, - start_date, - end_date, - variable_list, - variable_long_name_list, - variable_standard_name_list, - variable_cell_methods_list, - variable_units_list, - ) - - return outputs diff --git a/tests/test_builders.py b/tests/test_builders.py index 960bdd5..7b8dc5d 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -150,3 +150,630 @@ def test_builder_columns_with_iterables(test_data): if val ] ) + + +@pytest.mark.parametrize( + "builder, filename, expected", + [ + # Example ACCESS-CM2 filenames + ( + builders.AccessCm2Builder, + "bz687a.pm107912_mon", + ("bz687a_pmXXXXXX_mon", "107912", (1, "mon")), + ), + ( + builders.AccessCm2Builder, + "bz687a.p7107912_mon", + ("bz687a_p7XXXXXX_mon", "107912", (1, "mon")), + ), + ( + builders.AccessCm2Builder, + "bz687a.p7107912_dai", + ("bz687a_p7XXXXXX_dai", "107912", (1, "day")), + ), + ( + builders.AccessCm2Builder, + "iceh_m.2014-06", + ("iceh_m_XXXX_XX", "2014-06", None), + ), + ( + builders.AccessCm2Builder, + "iceh.1917-05-daily", + ("iceh_XXXX_XX_daily", "1917-05", (1, "day")), + ), + ( + builders.AccessCm2Builder, + "iceh_03h.2016-01-3hourly", + ("iceh_03h_XXXX_XX_3hourly", "2016-01", None), + ), + ( + builders.AccessCm2Builder, + "ocean_bgc_ann", + ("ocean_bgc_ann", None, (1, "yr")), + ), + (builders.AccessCm2Builder, "ocean_daily", ("ocean_daily", None, (1, "day"))), + # Example ACCESS-ESM1.5 filenames + ( + builders.AccessEsm15Builder, + "PI-GWL-B2035.pe-109904_dai", + ("PI_GWL_B2035_pe_XXXXXX_dai", "109904", (1, "day")), + ), + ( + builders.AccessEsm15Builder, + "PI-GWL-B2035.pa-109904_mon", + ("PI_GWL_B2035_pa_XXXXXX_mon", "109904", (1, "mon")), + ), + ( + builders.AccessEsm15Builder, + "PI-1pct-02.pe-011802_dai.nc_dai", + ("PI_1pct_02_pe_XXXXXX_dai_nc_dai", "011802", (1, "day")), + ), + ( + builders.AccessEsm15Builder, + "iceh.1917-05", + ("iceh_XXXX_XX", "1917-05", None), + ), + # Example ACCESS-OM2 filenames + ( + builders.AccessOm2Builder, + "iceh.057-daily", + ("iceh_XXX_daily", "057", (1, "day")), + ), + ( + builders.AccessOm2Builder, + "iceh.1985-08-31", + ("iceh_XXXX_XX_XX", "1985-08-31", None), + ), + (builders.AccessOm2Builder, "ocean", ("ocean", None, None)), + (builders.AccessOm2Builder, "ocean_month", ("ocean_month", None, (1, "mon"))), + ( + builders.AccessOm2Builder, + "ocean-2d-area_t", + ("ocean_2d_area_t", None, None), + ), + ( + builders.AccessOm2Builder, + "ocean_daily_3d_pot_rho_1", + ("ocean_daily_3d_pot_rho_1", None, (1, "day")), + ), + ( + builders.AccessOm2Builder, + "ocean_daily_3d_vhrho_nt_07", + ("ocean_daily_3d_vhrho_nt_XX", "07", (1, "day")), + ), + ( + builders.AccessOm2Builder, + "ocean-3d-v-1-monthly-pow02-ym_1958_04", + ("ocean_3d_v_1_monthly_pow02_ym_XXXX_XX", "1958_04", (1, "mon")), + ), + ( + builders.AccessOm2Builder, + "ocean-2d-sfc_salt_flux_restore-1-monthly-mean-ym_1958_04", + ( + "ocean_2d_sfc_salt_flux_restore_1_monthly_mean_ym_XXXX_XX", + "1958_04", + (1, "mon"), + ), + ), + ( + builders.AccessOm2Builder, + "ocean-2d-sea_level-540-seconds-snap-ym_2022_04_01", + ( + "ocean_2d_sea_level_540_seconds_snap_ym_XXXX_XX_XX", + "2022_04_01", + None, + ), + ), + ( + builders.AccessOm2Builder, + "ocean-3d-salt-1-daily-mean-ym_2018_10_jmax511_sigfig4", + ( + "ocean_3d_salt_1_daily_mean_ym_XXXX_XX_jmax511_sigfig4", + "2018_10", + (1, "day"), + ), + ), + ( + builders.AccessOm2Builder, + "oceanbgc-3d-caco3-1-yearly-mean-y_2015", + ("oceanbgc_3d_caco3_1_yearly_mean_y_XXXX", "2015", (1, "yr")), + ), + ( + builders.AccessOm2Builder, + "oceanbgc-2d-wdet100-1-daily-mean-y_2015", + ("oceanbgc_2d_wdet100_1_daily_mean_y_XXXX", "2015", (1, "day")), + ), + ( + builders.AccessOm2Builder, + "oceanbgc-3d-phy-1-daily-mean-3-sigfig-5-daily-ymd_2020_12_01", + ( + "oceanbgc_3d_phy_1_daily_mean_3_sigfig_5_daily_ymd_XXXX_XX_XX", + "2020_12_01", + (1, "day"), + ), + ), + ( + builders.AccessOm2Builder, + "rregionPrydz_temp_xflux_adv", + ("rregionPrydz_temp_xflux_adv", None, None), + ), + # Example ACCESS-OM3 filenames + ( + builders.AccessOm3Builder, + "GMOM_JRA_WD.ww3.hi.1958-01-02-00000", + ( + "GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", + "1958-01-02-00000", + None, + ), + ), + ( + builders.AccessOm3Builder, + "GMOM_JRA.cice.h.1900-01-01", + ( + "GMOM_JRA_cice_h_XXXX_XX_XX", + "1900-01-01", + None, + ), + ), + ( + builders.AccessOm3Builder, + "GMOM_JRA.mom6.ocean_sfc_1900_01_01", + ( + "GMOM_JRA_mom6_ocean_sfc_XXXX_XX_XX", + "1900_01_01", + None, + ), + ), + ( + builders.AccessOm3Builder, + "GMOM_JRA.mom6.sfc_1900_01_01", + ( + "GMOM_JRA_mom6_sfc_XXXX_XX_XX", + "1900_01_01", + None, + ), + ), + ( + builders.AccessOm3Builder, + "GMOM_JRA.mom6.sfc_1900_01", + ( + "GMOM_JRA_mom6_sfc_XXXX_XX", + "1900_01", + None, + ), + ), + ( + builders.AccessOm3Builder, + "GMOM_JRA.mom6.static", + ( + "GMOM_JRA_mom6_static", + None, + None, + ), + ), + ], +) +def test_parse_access_filename(builder, filename, expected): + assert builder.parse_access_filename(filename) == expected + + +@pytest.mark.parametrize( + "builder, filename, expected", + [ + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean_grid.nc", + ( + "ocean_grid.nc", + "ocean_grid", + None, + "fx", + "none", + "none", + ["geolat_t", "geolon_t"], + ["tracer latitude", "tracer longitude"], + ["", ""], + ["time: point", "time: point"], + ["degrees_N", "degrees_E"], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean.nc", + ( + "ocean.nc", + "ocean", + None, + "1yr", + "1900-01-01, 00:00:00", + "1910-01-01, 00:00:00", + ["temp", "time_bounds"], + ["Conservative temperature", "time axis boundaries"], + ["sea_water_conservative_temperature", ""], + ["time: mean", ""], + ["K", "days"], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean_month.nc", + ( + "ocean_month.nc", + "ocean_month", + None, + "1mon", + "1900-01-01, 00:00:00", + "1910-01-01, 00:00:00", + ["mld", "time_bounds"], + [ + "mixed layer depth determined by density criteria", + "time axis boundaries", + ], + ["ocean_mixed_layer_thickness_defined_by_sigma_t", ""], + ["time: mean", ""], + ["m", "days"], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ocean/ocean_month_inst_nobounds.nc", + ( + "ocean_month_inst_nobounds.nc", + "ocean_month_inst_nobounds", + None, + "1mon", + "1900-01-01, 00:00:00", + "1900-02-01, 00:00:00", + ["mld"], + ["mixed layer depth determined by density criteria"], + ["ocean_mixed_layer_thickness_defined_by_sigma_t"], + ["time: mean"], + ["m"], + ), + ), + ( + builders.AccessOm2Builder, + "access-om2/output000/ice/OUTPUT/iceh.1900-01.nc", + ( + "iceh.1900-01.nc", + "iceh_XXXX_XX", + "1900-01", + "1mon", + "1900-01-01, 00:00:00", + "1900-02-01, 00:00:00", + ["TLAT", "TLON", "aice_m", "tarea", "time_bounds"], + [ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "boundaries for time-averaging interval", + ], + ["", "", "", "", ""], + ["", "", "time: mean", "", ""], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 1900-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/atm/netCDF/by578a.pd201501_dai.nc", + ( + "by578a.pd201501_dai.nc", + "by578a_pdXXXXXX_dai", + "201501", + "1day", + "2015-01-01, 00:00:00", + "2015-02-01, 00:00:00", + ["fld_s03i236"], + ["TEMPERATURE AT 1.5M"], + ["air_temperature"], + ["time: mean"], + ["K"], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/ice/iceh_d.2015-01.nc", + ( + "iceh_d.2015-01.nc", + "iceh_d_XXXX_XX", + "2015-01", + "1day", + "2015-01-01, 00:00:00", + "2015-02-01, 00:00:00", + ["TLAT", "TLON", "aice", "tarea", "time_bounds"], + [ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "boundaries for time-averaging interval", + ], + ["", "", "", "", ""], + ["", "", "time: mean", "", ""], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 1850-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/ocn/ocean_daily.nc-20150630", + ( + "ocean_daily.nc-20150630", + "ocean_daily", + None, + "1day", + "2015-01-01, 00:00:00", + "2015-07-01, 00:00:00", + ["sst", "time_bounds"], + ["Potential temperature", "time axis boundaries"], + ["sea_surface_temperature", ""], + ["time: mean", ""], + ["K", "days"], + ), + ), + ( + builders.AccessCm2Builder, + "access-cm2/by578/history/ocn/ocean_scalar.nc-20150630", + ( + "ocean_scalar.nc-20150630", + "ocean_scalar", + None, + "1mon", + "2015-01-01, 00:00:00", + "2015-07-01, 00:00:00", + ["temp_global_ave", "time_bounds"], + ["Global mean temp in liquid seawater", "time axis boundaries"], + ["sea_water_potential_temperature", ""], + ["time: mean", ""], + ["deg_C", "days"], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/atm/netCDF/HI-C-05-r1.pa-185001_mon.nc", + ( + "HI-C-05-r1.pa-185001_mon.nc", + "HI_C_05_r1_pa_XXXXXX_mon", + "185001", + "1mon", + "1850-01-01, 00:00:00", + "1850-02-01, 00:00:00", + ["fld_s03i236"], + ["TEMPERATURE AT 1.5M"], + ["air_temperature"], + ["time: mean"], + ["K"], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/ice/iceh.1850-01.nc", + ( + "iceh.1850-01.nc", + "iceh_XXXX_XX", + "1850-01", + "1mon", + "1850-01-01, 00:00:00", + "1850-02-01, 00:00:00", + ["TLAT", "TLON", "aice", "tarea", "time_bounds"], + [ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "boundaries for time-averaging interval", + ], + ["", "", "", "", ""], + ["", "", "time: mean", "", ""], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 0001-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/ocn/ocean_bgc_ann.nc-18501231", + ( + "ocean_bgc_ann.nc-18501231", + "ocean_bgc_ann", + None, + "1yr", + "1849-12-30, 00:00:00", + "1850-12-30, 00:00:00", + ["fgco2_raw", "time_bounds"], + ["Flux into ocean - DIC, inc. anth.", "time axis boundaries"], + ["", ""], + ["time: mean", ""], + ["mmol/m^2/s", "days"], + ), + ), + ( + builders.AccessEsm15Builder, + "access-esm1-5/history/ocn/ocean_bgc.nc-18501231", + ( + "ocean_bgc.nc-18501231", + "ocean_bgc", + None, + "1mon", + "1849-12-30, 00:00:00", + "1850-12-30, 00:00:00", + ["o2", "time_bounds"], + ["o2", "time axis boundaries"], + ["", ""], + ["time: mean", ""], + ["mmol/m^3", "days"], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.native_1900_01.nc", + ( + "GMOM_JRA_WD.mom6.h.native_1900_01.nc", + "GMOM_JRA_WD_mom6_h_native_XXXX_XX", + "1900_01", + "1mon", + "1900-01-01, 00:00:00", + "1900-02-01, 00:00:00", + ["average_DT", "average_T1", "average_T2", "thetao", "time_bnds"], + [ + "Length of average period", + "Start time for average period", + "End time for average period", + "Sea Water Potential Temperature", + "time axis boundaries", + ], + ["", "", "", "sea_water_potential_temperature", ""], + ["", "", "", "area:mean zl:mean yh:mean xh:mean time: mean", ""], + [ + "days", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degC", + "days since 0001-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", + ( + "GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", + "GMOM_JRA_WD_mom6_h_sfc_XXXX_XX_XX", + "1900_01_02", + "1day", + "1900-01-01, 00:00:00", + "1900-01-02, 00:00:00", + ["average_DT", "average_T1", "average_T2", "time_bnds", "tos"], + [ + "Length of average period", + "Start time for average period", + "End time for average period", + "time axis boundaries", + "Sea Surface Temperature", + ], + ["", "", "", "", "sea_surface_temperature"], + ["", "", "", "", "area:mean yh:mean xh:mean time: mean"], + [ + "days", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degC", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.static.nc", + ( + "GMOM_JRA_WD.mom6.h.static.nc", + "GMOM_JRA_WD_mom6_h_static", + None, + "fx", + "none", + "none", + ["geolat", "geolon"], + ["Latitude of tracer (T) points", "Longitude of tracer (T) points"], + ["", ""], + ["time: point", "time: point"], + ["degrees_north", "degrees_east"], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.mom6.h.z_1900_01.nc", + ( + "GMOM_JRA_WD.mom6.h.z_1900_01.nc", + "GMOM_JRA_WD_mom6_h_z_XXXX_XX", + "1900_01", + "1mon", + "1900-01-01, 00:00:00", + "1900-02-01, 00:00:00", + ["average_DT", "average_T1", "average_T2", "thetao", "time_bnds"], + [ + "Length of average period", + "Start time for average period", + "End time for average period", + "Sea Water Potential Temperature", + "time axis boundaries", + ], + ["", "", "", "sea_water_potential_temperature", ""], + ["", "", "", "area:mean z_l:mean yh:mean xh:mean time: mean", ""], + [ + "days", + "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degC", + "days since 0001-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.cice.h.1900-01-01.nc", + ( + "GMOM_JRA_WD.cice.h.1900-01-01.nc", + "GMOM_JRA_WD_cice_h_XXXX_XX_XX", + "1900-01-01", + "1day", + "1900-01-01, 00:00:00", + "1900-01-02, 00:00:00", + ["TLAT", "TLON", "aice", "tarea", "time_bounds"], + [ + "T grid center latitude", + "T grid center longitude", + "ice area (aggregate)", + "area of T grid cells", + "time interval endpoints", + ], + ["", "", "", "", ""], + ["", "", "time: mean", "", ""], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 0000-01-01 00:00:00", + ], + ), + ), + ( + builders.AccessOm3Builder, + "access-om3/output000/GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", + ( + "GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", + "GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", + "1900-01-02-00000", + "fx", # WW3 provides no time bounds + "1900-01-02, 00:00:00", + "1900-01-02, 00:00:00", + ["EF", "mapsta"], + ["1D spectral density", "map status"], + ["", ""], + ["", ""], + ["m2 s", "unitless"], + ), + ), + ], +) +def test_parse_access_ncfile(test_data, builder, filename, expected): + file = str(test_data / Path(filename)) + + assert builder.parse_access_ncfile(file) == expected diff --git a/tests/test_source_utils.py b/tests/test_source_utils.py index 30347ba..419a9f4 100644 --- a/tests/test_source_utils.py +++ b/tests/test_source_utils.py @@ -1,564 +1,10 @@ # Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 -from pathlib import Path - import pytest import xarray as xr -from access_nri_intake.source.utils import ( - get_timeinfo, - parse_access_filename, - parse_access_ncfile, -) - - -@pytest.mark.parametrize( - "filename, expected", - [ - # Example ACCESS-CM2 filenames - ("bz687a.pm107912_mon", ("bz687a_pmXXXXXX_mon", "107912", (1, "mon"))), - ("bz687a.p7107912_mon", ("bz687a_p7XXXXXX_mon", "107912", (1, "mon"))), - ("bz687a.p7107912_dai", ("bz687a_p7XXXXXX_dai", "107912", (1, "day"))), - ("iceh_m.2014-06", ("iceh_m_XXXX_XX", "2014-06", None)), - ("iceh.1917-05-daily", ("iceh_XXXX_XX_daily", "1917-05", (1, "day"))), - ("iceh_03h.2016-01-3hourly", ("iceh_03h_XXXX_XX_3hourly", "2016-01", None)), - ("ocean_bgc_ann", ("ocean_bgc_ann", None, (1, "yr"))), - ("ocean_daily", ("ocean_daily", None, (1, "day"))), - # Example ACCESS-ESM1.5 filenames - ( - "PI-GWL-B2035.pe-109904_dai", - ("PI_GWL_B2035_pe_XXXXXX_dai", "109904", (1, "day")), - ), - ( - "PI-GWL-B2035.pa-109904_mon", - ("PI_GWL_B2035_pa_XXXXXX_mon", "109904", (1, "mon")), - ), - ( - "PI-1pct-02.pe-011802_dai.nc_dai", - ("PI_1pct_02_pe_XXXXXX_dai_nc_dai", "011802", (1, "day")), - ), - ("iceh.1917-05", ("iceh_XXXX_XX", "1917-05", None)), - # Example ACCESS-OM2 filenames - ("iceh.057-daily", ("iceh_XXX_daily", "057", (1, "day"))), - ("iceh.1985-08-31", ("iceh_XXXX_XX_XX", "1985-08-31", None)), - ("ocean", ("ocean", None, None)), - ("ocean_month", ("ocean_month", None, (1, "mon"))), - ( - "ocean-2d-area_t", - ("ocean_2d_area_t", None, None), - ), - ( - "ocean_daily_3d_pot_rho_1", - ("ocean_daily_3d_pot_rho_1", None, (1, "day")), - ), - ( - "ocean_daily_3d_vhrho_nt_07", - ("ocean_daily_3d_vhrho_nt_XX", "07", (1, "day")), - ), - ( - "ocean-3d-v-1-monthly-pow02-ym_1958_04", - ("ocean_3d_v_1_monthly_pow02_ym_XXXX_XX", "1958_04", (1, "mon")), - ), - ( - "ocean-2d-sfc_salt_flux_restore-1-monthly-mean-ym_1958_04", - ( - "ocean_2d_sfc_salt_flux_restore_1_monthly_mean_ym_XXXX_XX", - "1958_04", - (1, "mon"), - ), - ), - ( - "ocean-2d-sea_level-540-seconds-snap-ym_2022_04_01", - ( - "ocean_2d_sea_level_540_seconds_snap_ym_XXXX_XX_XX", - "2022_04_01", - None, - ), - ), - ( - "ocean-3d-salt-1-daily-mean-ym_2018_10_jmax511_sigfig4", - ( - "ocean_3d_salt_1_daily_mean_ym_XXXX_XX_jmax511_sigfig4", - "2018_10", - (1, "day"), - ), - ), - ( - "oceanbgc-3d-caco3-1-yearly-mean-y_2015", - ("oceanbgc_3d_caco3_1_yearly_mean_y_XXXX", "2015", (1, "yr")), - ), - ( - "oceanbgc-2d-wdet100-1-daily-mean-y_2015", - ("oceanbgc_2d_wdet100_1_daily_mean_y_XXXX", "2015", (1, "day")), - ), - ( - "oceanbgc-3d-phy-1-daily-mean-3-sigfig-5-daily-ymd_2020_12_01", - ( - "oceanbgc_3d_phy_1_daily_mean_3_sigfig_5_daily_ymd_XXXX_XX_XX", - "2020_12_01", - (1, "day"), - ), - ), - ( - "rregionPrydz_temp_xflux_adv", - ("rregionPrydz_temp_xflux_adv", None, None), - ), - # Example ACCESS-OM3 filenames - ( - "GMOM_JRA_WD.ww3.hi.1958-01-02-00000", - ( - "GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", - "1958-01-02-00000", - None, - ), - ), - ( - "GMOM_JRA.cice.h.1900-01-01", - ( - "GMOM_JRA_cice_h_XXXX_XX_XX", - "1900-01-01", - None, - ), - ), - ( - "GMOM_JRA.mom6.ocean_sfc_1900_01_01", - ( - "GMOM_JRA_mom6_ocean_sfc_XXXX_XX_XX", - "1900_01_01", - None, - ), - ), - ( - "GMOM_JRA.mom6.sfc_1900_01_01", - ( - "GMOM_JRA_mom6_sfc_XXXX_XX_XX", - "1900_01_01", - None, - ), - ), - ( - "GMOM_JRA.mom6.sfc_1900_01", - ( - "GMOM_JRA_mom6_sfc_XXXX_XX", - "1900_01", - None, - ), - ), - ( - "GMOM_JRA.mom6.static", - ( - "GMOM_JRA_mom6_static", - None, - None, - ), - ), - ], -) -def test_parse_access_filename(filename, expected): - assert parse_access_filename(filename) == expected - - -@pytest.mark.parametrize( - "filename, expected", - [ - ( - "access-om2/output000/ocean/ocean_grid.nc", - ( - "ocean_grid.nc", - "ocean_grid", - None, - "fx", - "none", - "none", - ["geolat_t", "geolon_t"], - ["tracer latitude", "tracer longitude"], - ["", ""], - ["time: point", "time: point"], - ["degrees_N", "degrees_E"], - ), - ), - ( - "access-om2/output000/ocean/ocean.nc", - ( - "ocean.nc", - "ocean", - None, - "1yr", - "1900-01-01, 00:00:00", - "1910-01-01, 00:00:00", - ["temp", "time_bounds"], - ["Conservative temperature", "time axis boundaries"], - ["sea_water_conservative_temperature", ""], - ["time: mean", ""], - ["K", "days"], - ), - ), - ( - "access-om2/output000/ocean/ocean_month.nc", - ( - "ocean_month.nc", - "ocean_month", - None, - "1mon", - "1900-01-01, 00:00:00", - "1910-01-01, 00:00:00", - ["mld", "time_bounds"], - [ - "mixed layer depth determined by density criteria", - "time axis boundaries", - ], - ["ocean_mixed_layer_thickness_defined_by_sigma_t", ""], - ["time: mean", ""], - ["m", "days"], - ), - ), - ( - "access-om2/output000/ocean/ocean_month_inst_nobounds.nc", - ( - "ocean_month_inst_nobounds.nc", - "ocean_month_inst_nobounds", - None, - "1mon", - "1900-01-01, 00:00:00", - "1900-02-01, 00:00:00", - ["mld"], - ["mixed layer depth determined by density criteria"], - ["ocean_mixed_layer_thickness_defined_by_sigma_t"], - ["time: mean"], - ["m"], - ), - ), - ( - "access-om2/output000/ice/OUTPUT/iceh.1900-01.nc", - ( - "iceh.1900-01.nc", - "iceh_XXXX_XX", - "1900-01", - "1mon", - "1900-01-01, 00:00:00", - "1900-02-01, 00:00:00", - ["TLAT", "TLON", "aice_m", "tarea", "time_bounds"], - [ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "boundaries for time-averaging interval", - ], - ["", "", "", "", ""], - ["", "", "time: mean", "", ""], - [ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 1900-01-01 00:00:00", - ], - ), - ), - ( - "access-cm2/by578/history/atm/netCDF/by578a.pd201501_dai.nc", - ( - "by578a.pd201501_dai.nc", - "by578a_pdXXXXXX_dai", - "201501", - "1day", - "2015-01-01, 00:00:00", - "2015-02-01, 00:00:00", - ["fld_s03i236"], - ["TEMPERATURE AT 1.5M"], - ["air_temperature"], - ["time: mean"], - ["K"], - ), - ), - ( - "access-cm2/by578/history/ice/iceh_d.2015-01.nc", - ( - "iceh_d.2015-01.nc", - "iceh_d_XXXX_XX", - "2015-01", - "1day", - "2015-01-01, 00:00:00", - "2015-02-01, 00:00:00", - ["TLAT", "TLON", "aice", "tarea", "time_bounds"], - [ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "boundaries for time-averaging interval", - ], - ["", "", "", "", ""], - ["", "", "time: mean", "", ""], - [ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 1850-01-01 00:00:00", - ], - ), - ), - ( - "access-cm2/by578/history/ocn/ocean_daily.nc-20150630", - ( - "ocean_daily.nc-20150630", - "ocean_daily", - None, - "1day", - "2015-01-01, 00:00:00", - "2015-07-01, 00:00:00", - ["sst", "time_bounds"], - ["Potential temperature", "time axis boundaries"], - ["sea_surface_temperature", ""], - ["time: mean", ""], - ["K", "days"], - ), - ), - ( - "access-cm2/by578/history/ocn/ocean_scalar.nc-20150630", - ( - "ocean_scalar.nc-20150630", - "ocean_scalar", - None, - "1mon", - "2015-01-01, 00:00:00", - "2015-07-01, 00:00:00", - ["temp_global_ave", "time_bounds"], - ["Global mean temp in liquid seawater", "time axis boundaries"], - ["sea_water_potential_temperature", ""], - ["time: mean", ""], - ["deg_C", "days"], - ), - ), - ( - "access-esm1-5/history/atm/netCDF/HI-C-05-r1.pa-185001_mon.nc", - ( - "HI-C-05-r1.pa-185001_mon.nc", - "HI_C_05_r1_pa_XXXXXX_mon", - "185001", - "1mon", - "1850-01-01, 00:00:00", - "1850-02-01, 00:00:00", - ["fld_s03i236"], - ["TEMPERATURE AT 1.5M"], - ["air_temperature"], - ["time: mean"], - ["K"], - ), - ), - ( - "access-esm1-5/history/ice/iceh.1850-01.nc", - ( - "iceh.1850-01.nc", - "iceh_XXXX_XX", - "1850-01", - "1mon", - "1850-01-01, 00:00:00", - "1850-02-01, 00:00:00", - ["TLAT", "TLON", "aice", "tarea", "time_bounds"], - [ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "boundaries for time-averaging interval", - ], - ["", "", "", "", ""], - ["", "", "time: mean", "", ""], - [ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 0001-01-01 00:00:00", - ], - ), - ), - ( - "access-esm1-5/history/ocn/ocean_bgc_ann.nc-18501231", - ( - "ocean_bgc_ann.nc-18501231", - "ocean_bgc_ann", - None, - "1yr", - "1849-12-30, 00:00:00", - "1850-12-30, 00:00:00", - ["fgco2_raw", "time_bounds"], - ["Flux into ocean - DIC, inc. anth.", "time axis boundaries"], - ["", ""], - ["time: mean", ""], - ["mmol/m^2/s", "days"], - ), - ), - ( - "access-esm1-5/history/ocn/ocean_bgc.nc-18501231", - ( - "ocean_bgc.nc-18501231", - "ocean_bgc", - None, - "1mon", - "1849-12-30, 00:00:00", - "1850-12-30, 00:00:00", - ["o2", "time_bounds"], - ["o2", "time axis boundaries"], - ["", ""], - ["time: mean", ""], - ["mmol/m^3", "days"], - ), - ), - ( - "access-om3/output000/GMOM_JRA_WD.mom6.h.native_1900_01.nc", - ( - "GMOM_JRA_WD.mom6.h.native_1900_01.nc", - "GMOM_JRA_WD_mom6_h_native_XXXX_XX", - "1900_01", - "1mon", - "1900-01-01, 00:00:00", - "1900-02-01, 00:00:00", - ["average_DT", "average_T1", "average_T2", "thetao", "time_bnds"], - [ - "Length of average period", - "Start time for average period", - "End time for average period", - "Sea Water Potential Temperature", - "time axis boundaries", - ], - ["", "", "", "sea_water_potential_temperature", ""], - ["", "", "", "area:mean zl:mean yh:mean xh:mean time: mean", ""], - [ - "days", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "degC", - "days since 0001-01-01 00:00:00", - ], - ), - ), - ( - "access-om3/output000/GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", - ( - "GMOM_JRA_WD.mom6.h.sfc_1900_01_02.nc", - "GMOM_JRA_WD_mom6_h_sfc_XXXX_XX_XX", - "1900_01_02", - "1day", - "1900-01-01, 00:00:00", - "1900-01-02, 00:00:00", - ["average_DT", "average_T1", "average_T2", "time_bnds", "tos"], - [ - "Length of average period", - "Start time for average period", - "End time for average period", - "time axis boundaries", - "Sea Surface Temperature", - ], - ["", "", "", "", "sea_surface_temperature"], - ["", "", "", "", "area:mean yh:mean xh:mean time: mean"], - [ - "days", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "degC", - ], - ), - ), - ( - "access-om3/output000/GMOM_JRA_WD.mom6.h.static.nc", - ( - "GMOM_JRA_WD.mom6.h.static.nc", - "GMOM_JRA_WD_mom6_h_static", - None, - "fx", - "none", - "none", - ["geolat", "geolon"], - ["Latitude of tracer (T) points", "Longitude of tracer (T) points"], - ["", ""], - ["time: point", "time: point"], - ["degrees_north", "degrees_east"], - ), - ), - ( - "access-om3/output000/GMOM_JRA_WD.mom6.h.z_1900_01.nc", - ( - "GMOM_JRA_WD.mom6.h.z_1900_01.nc", - "GMOM_JRA_WD_mom6_h_z_XXXX_XX", - "1900_01", - "1mon", - "1900-01-01, 00:00:00", - "1900-02-01, 00:00:00", - ["average_DT", "average_T1", "average_T2", "thetao", "time_bnds"], - [ - "Length of average period", - "Start time for average period", - "End time for average period", - "Sea Water Potential Temperature", - "time axis boundaries", - ], - ["", "", "", "sea_water_potential_temperature", ""], - ["", "", "", "area:mean z_l:mean yh:mean xh:mean time: mean", ""], - [ - "days", - "days since 0001-01-01 00:00:00", - "days since 0001-01-01 00:00:00", - "degC", - "days since 0001-01-01 00:00:00", - ], - ), - ), - ( - "access-om3/output000/GMOM_JRA_WD.cice.h.1900-01-01.nc", - ( - "GMOM_JRA_WD.cice.h.1900-01-01.nc", - "GMOM_JRA_WD_cice_h_XXXX_XX_XX", - "1900-01-01", - "1day", - "1900-01-01, 00:00:00", - "1900-01-02, 00:00:00", - ["TLAT", "TLON", "aice", "tarea", "time_bounds"], - [ - "T grid center latitude", - "T grid center longitude", - "ice area (aggregate)", - "area of T grid cells", - "time interval endpoints", - ], - ["", "", "", "", ""], - ["", "", "time: mean", "", ""], - [ - "degrees_north", - "degrees_east", - "1", - "m^2", - "days since 0000-01-01 00:00:00", - ], - ), - ), - ( - "access-om3/output000/GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", - ( - "GMOM_JRA_WD.ww3.hi.1900-01-02-00000.nc", - "GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX", - "1900-01-02-00000", - "fx", # WW3 provides no time bounds - "1900-01-02, 00:00:00", - "1900-01-02, 00:00:00", - ["EF", "mapsta"], - ["1D spectral density", "map status"], - ["", ""], - ["", ""], - ["m2 s", "unitless"], - ), - ), - ], -) -def test_parse_access_ncfile(test_data, filename, expected): - file = str(test_data / Path(filename)) - - assert parse_access_ncfile(file) == expected +from access_nri_intake.source.utils import get_timeinfo @pytest.mark.parametrize(