diff --git a/src/access_nri_intake/source/__init__.py b/src/access_nri_intake/source/__init__.py index 3b54329..90de769 100644 --- a/src/access_nri_intake/source/__init__.py +++ b/src/access_nri_intake/source/__init__.py @@ -17,8 +17,8 @@ PATH_COLUMN = "path" VARIABLE_COLUMN = "variable" -SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/d4da77a0e627775c11ba394c0a3f72a2c654971c/file_asset.json" -SCHEMA_HASH = "7f1f58e1ae419faf8e24f15e937ef5717fa872920a06758ee2983506fcaf70fc" +SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/25643eb437e95ee48b3fa6b620c7a0986c2c3bb0/file_asset.json" +SCHEMA_HASH = "d7b5fcab71861f6c4b319e64cfde75f36de2bdc797f13b5b4f7029b41ce51e5a" _, ESM_JSONSCHEMA = get_jsonschema( url=SCHEMA_URL, known_hash=SCHEMA_HASH, required=CORE_COLUMNS diff --git a/src/access_nri_intake/source/builders.py b/src/access_nri_intake/source/builders.py index acd1e72..0d55435 100644 --- a/src/access_nri_intake/source/builders.py +++ b/src/access_nri_intake/source/builders.py @@ -242,6 +242,7 @@ def parser(file): variable_long_name_list, variable_standard_name_list, variable_cell_methods_list, + variable_units_list, ) = parse_access_ncfile(file) info = { @@ -254,6 +255,7 @@ def parser(file): "variable_long_name": variable_long_name_list, "variable_standard_name": variable_standard_name_list, "variable_cell_methods": variable_cell_methods_list, + "variable_units": variable_units_list, "filename": filename, "file_id": file_id, } @@ -330,6 +332,7 @@ def parser(file): variable_long_name_list, variable_standard_name_list, variable_cell_methods_list, + variable_units_list, ) = parse_access_ncfile(file) # Remove exp_id from file id so that members can be part of the same dataset @@ -346,6 +349,7 @@ def parser(file): "variable_long_name": variable_long_name_list, "variable_standard_name": variable_standard_name_list, "variable_cell_methods": variable_cell_methods_list, + "variable_units": variable_units_list, "filename": filename, "file_id": file_id, } diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index 8d35ed4..c46b8af 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -248,6 +248,7 @@ def parse_access_ncfile(file, time_dim="time"): variable_long_name_list = [] variable_standard_name_list = [] variable_cell_methods_list = [] + variable_units_list = [] for var in ds.data_vars: attrs = ds[var].attrs if "long_name" in attrs: @@ -255,8 +256,16 @@ def parse_access_ncfile(file, time_dim="time"): variable_long_name_list.append(attrs["long_name"]) if "standard_name" in attrs: variable_standard_name_list.append(attrs["standard_name"]) + else: + variable_standard_name_list.append(None) if "cell_methods" in attrs: variable_cell_methods_list.append(attrs["cell_methods"]) + else: + variable_cell_methods_list.append(None) + if "units" in attrs: + variable_units_list.append(attrs["units"]) + else: + variable_units_list.append(None) start_date, end_date, frequency = get_timeinfo(ds, filename_frequency, time_dim) @@ -274,6 +283,7 @@ def parse_access_ncfile(file, time_dim="time"): variable_long_name_list, variable_standard_name_list, variable_cell_methods_list, + variable_units_list, ) return outputs diff --git a/tests/test_source_utils.py b/tests/test_source_utils.py index 65b6951..777446c 100644 --- a/tests/test_source_utils.py +++ b/tests/test_source_utils.py @@ -95,8 +95,9 @@ def test_parse_access_filename(filename, expected): "none", ["geolat_t", "geolon_t"], ["tracer latitude", "tracer longitude"], - [], + [None, None], ["time: point", "time: point"], + ["degrees_N", "degrees_E"], ), ), ( @@ -110,8 +111,9 @@ def test_parse_access_filename(filename, expected): "1910-01-01, 00:00:00", ["temp", "time_bounds"], ["Conservative temperature", "time axis boundaries"], - ["sea_water_conservative_temperature"], - ["time: mean"], + ["sea_water_conservative_temperature", None], + ["time: mean", None], + ["K", "days"], ), ), ( @@ -128,8 +130,9 @@ def test_parse_access_filename(filename, expected): "mixed layer depth determined by density criteria", "time axis boundaries", ], - ["ocean_mixed_layer_thickness_defined_by_sigma_t"], - ["time: mean"], + ["ocean_mixed_layer_thickness_defined_by_sigma_t", None], + ["time: mean", None], + ["m", "days"], ), ), ( @@ -145,6 +148,7 @@ def test_parse_access_filename(filename, expected): ["mixed layer depth determined by density criteria"], ["ocean_mixed_layer_thickness_defined_by_sigma_t"], ["time: mean"], + ["m"], ), ), ( @@ -164,8 +168,15 @@ def test_parse_access_filename(filename, expected): "area of T grid cells", "boundaries for time-averaging interval", ], - [], - ["time: mean"], + [None, None, None, None, None], + [None, None, "time: mean", None, None], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 1900-01-01 00:00:00", + ], ), ), ( @@ -181,6 +192,7 @@ def test_parse_access_filename(filename, expected): ["TEMPERATURE AT 1.5M"], ["air_temperature"], ["time: mean"], + ["K"], ), ), ( @@ -200,8 +212,15 @@ def test_parse_access_filename(filename, expected): "area of T grid cells", "boundaries for time-averaging interval", ], - [], - ["time: mean"], + [None, None, None, None, None], + [None, None, "time: mean", None, None], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 1850-01-01 00:00:00", + ], ), ), ( @@ -215,8 +234,9 @@ def test_parse_access_filename(filename, expected): "2015-07-01, 00:00:00", ["sst", "time_bounds"], ["Potential temperature", "time axis boundaries"], - ["sea_surface_temperature"], - ["time: mean"], + ["sea_surface_temperature", None], + ["time: mean", None], + ["K", "days"], ), ), ( @@ -230,8 +250,9 @@ def test_parse_access_filename(filename, expected): "2015-07-01, 00:00:00", ["temp_global_ave", "time_bounds"], ["Global mean temp in liquid seawater", "time axis boundaries"], - ["sea_water_potential_temperature"], - ["time: mean"], + ["sea_water_potential_temperature", None], + ["time: mean", None], + ["deg_C", "days"], ), ), ( @@ -247,6 +268,7 @@ def test_parse_access_filename(filename, expected): ["TEMPERATURE AT 1.5M"], ["air_temperature"], ["time: mean"], + ["K"], ), ), ( @@ -266,8 +288,15 @@ def test_parse_access_filename(filename, expected): "area of T grid cells", "boundaries for time-averaging interval", ], - [], - ["time: mean"], + [None, None, None, None, None], + [None, None, "time: mean", None, None], + [ + "degrees_north", + "degrees_east", + "1", + "m^2", + "days since 0001-01-01 00:00:00", + ], ), ), ( @@ -281,8 +310,9 @@ def test_parse_access_filename(filename, expected): "1850-12-30, 00:00:00", ["fgco2_raw", "time_bounds"], ["Flux into ocean - DIC, inc. anth.", "time axis boundaries"], - [], - ["time: mean"], + [None, None], + ["time: mean", None], + ["mmol/m^2/s", "days"], ), ), ( @@ -296,8 +326,9 @@ def test_parse_access_filename(filename, expected): "1850-12-30, 00:00:00", ["o2", "time_bounds"], ["o2", "time axis boundaries"], - [], - ["time: mean"], + [None, None], + ["time: mean", None], + ["mmol/m^3", "days"], ), ), ],