Skip to content

Commit

Permalink
add ACCESS-OM3 builder and basic tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dougiesquire committed Feb 29, 2024
1 parent c6c4449 commit 1983bd2
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 0 deletions.
85 changes: 85 additions & 0 deletions src/access_nri_intake/source/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,91 @@ def parser(file):
return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()}


class AccessOm3Builder(BaseBuilder):
"""Intake-ESM datastore builder for ACCESS-OM3 COSIMA datasets"""

def __init__(self, path):
"""
Initialise a AccessOm3Builder
Parameters
----------
path : str or list of str
Path or list of paths to crawl for assets/files.
"""

kwargs = dict(
path=path,
depth=2,
exclude_patterns=[
"*restart*",
"*MOM_IC.nc",
"*ocean_geometry.nc",
"*ocean.stats.nc",
"*Vertical_coordinate.nc",
],
include_patterns=["*.nc"],
data_format="netcdf",
groupby_attrs=["file_id", "frequency"],
aggregations=[
{
"type": "join_existing",
"attribute_name": "start_date",
"options": {
"dim": "time",
"combine": "by_coords",
},
},
],
)

super().__init__(**kwargs)

@staticmethod
def parser(file):
try:
(
filename,
file_id,
_,
frequency,
start_date,
end_date,
variable_list,
variable_long_name_list,
variable_standard_name_list,
variable_cell_methods_list,
variable_units_list,
) = parse_access_ncfile(file)

if ("mom6" in filename) or ("ww3" in filename):
realm = "ocean"
elif "cice" in filename:
realm = "seaIce"
else:
raise ParserError(f"Cannot determine realm for file {file}")

info = {
"path": str(file),
"realm": realm,
"variable": variable_list,
"frequency": frequency,
"start_date": start_date,
"end_date": end_date,
"variable_long_name": variable_long_name_list,
"variable_standard_name": variable_standard_name_list,
"variable_cell_methods": variable_cell_methods_list,
"variable_units": variable_units_list,
"filename": filename,
"file_id": file_id,
}

return info

except Exception:
return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()}


class AccessEsm15Builder(BaseBuilder):
"""Intake-ESM datastore builder for ACCESS-ESM1.5 datasets"""

Expand Down
4 changes: 4 additions & 0 deletions src/access_nri_intake/source/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ def parse_access_filename(filename):
"""

# ACCESS output file patterns
# TODO: these should be defined per driver to prevent new patterns from breaking old drivers
not_multi_digit = "(?:\\d(?!\\d)|[^\\d](?=\\d)|[^\\d](?!\\d))"
om3_components = "(?:cice|mom6|ww3)"
ymds = "\\d{4}[_,-]\\d{2}[_,-]\\d{2}[_,-]\\d{5}"
ymd = "\\d{4}[_,-]\\d{2}[_,-]\\d{2}"
ym = "\\d{4}[_,-]\\d{2}"
y = "\\d{4}"
Expand All @@ -181,6 +184,7 @@ def parse_access_filename(filename):
r"^ocean.*[^\d]_(\d{2})$", # A few wierd files in ACCESS-OM2 01deg_jra55v13_ryf9091
r"^.*\.p.(\d{6})_.*", # ACCESS-CM2 atmosphere
r"^.*\.p.-(\d{6})_.*", # ACCESS-ESM1.5 atmosphere
rf"[^\.]*\.{om3_components}\..*({ymds}|{ymd})$", # ACCESS-OM3
]
# Frequency translations
frequencies = {
Expand Down
41 changes: 41 additions & 0 deletions tests/test_source_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,47 @@
"rregionPrydz_temp_xflux_adv",
("rregionPrydz_temp_xflux_adv", None, None),
),
# Example ACCESS-OM3 filenames
(
"GMOM_JRA_WD.ww3.hi.1958-01-02-00000",
(
"GMOM_JRA_WD_ww3_hi_XXXX_XX_XX_XXXXX",
"1958-01-02-00000",
None,
),
),
(
"GMOM_JRA.cice.h.1900-01-01",
(
"GMOM_JRA_cice_h_XXXX_XX_XX",
"1900-01-01",
None,
),
),
(
"GMOM_JRA.mom6.ocean_sfc_1900_01_01",
(
"GMOM_JRA_mom6_ocean_sfc_XXXX_XX_XX",
"1900_01_01",
None,
),
),
(
"GMOM_JRA.mom6.sfc_1900_01_01",
(
"GMOM_JRA_mom6_sfc_XXXX_XX_XX",
"1900_01_01",
None,
),
),
(
"GMOM_JRA.mom6.static",
(
"GMOM_JRA_mom6_static",
None,
None,
),
),
],
)
def test_parse_access_filename(filename, expected):
Expand Down

0 comments on commit 1983bd2

Please sign in to comment.