diff --git a/test/test_conversion_driver_esm1p5.py b/test/test_conversion_driver_esm1p5.py index 3f7684d..cc1af8d 100644 --- a/test/test_conversion_driver_esm1p5.py +++ b/test/test_conversion_driver_esm1p5.py @@ -3,6 +3,7 @@ import pytest from pathlib import Path import unittest.mock as mock +import umpost.um2netcdf as um2nc def test_get_esm1p5_fields_file_pattern(): @@ -17,14 +18,75 @@ def test_get_esm1p5_fields_file_pattern_wrong_id_length(run_id): esm1p5_convert.get_esm1p5_fields_file_pattern(run_id) -def test_get_nc_write_path(): - fields_file_path = Path("/test/path/fields_123.file") - nc_write_dir = Path("/test/path/NetCDF") +@pytest.mark.parametrize("ff_path,ff_date,nc_write_dir,expected", + [ + ( + Path("/test/aiihca.paa1feb"), + (101, 2, 1), + Path("/test/netCDF"), + Path("/test/netCDF/aiihca.pa-010102_mon.nc") + ), + ( + Path("aiihca.pe50dec"), + (1850, 12, 21), + Path("netCDF"), + Path("netCDF/aiihca.pe-185012_dai.nc") + ), + ( + Path("abc/aiihca.pi87jun"), + (1887, 6, 12), + Path("./netCDF"), + Path("./netCDF/aiihca.pi-188706_3hr.nc") + ), + ( + Path("abc/aiihca.pjc0jan"), + (120, 1, 7), + Path("./netCDF"), + Path("./netCDF/aiihca.pj-012001_6hr.nc") + ), + ( + Path("abc/aiihca.pjc0jan"), + None, + Path("./netCDF"), + Path("./netCDF/aiihca.pjc0jan.nc") + ), + ]) +def test_get_nc_write_path_recognized_unit(ff_path, ff_date, + nc_write_dir, expected): + """ + Check that netCDF file naming produces expected file paths for various + expected unit keys. + """ + nc_write_path = esm1p5_convert.get_nc_write_path( + ff_path, + nc_write_dir, + ff_date + ) + + assert nc_write_path == expected + + +def test_get_nc_write_path_unrecognized_unit(): + """ + Check that netCDF file naming falls back to simpler naming scheme + when unit key in fields file name not recognized. + """ + unknown_key = "w" + assert unknown_key not in esm1p5_convert.FF_UNIT_SUFFIX.keys() + + ff_name = f"aiihca.p{unknown_key}abcd" + ff_year = 50 + ff_month = 7 + nc_write_dir = Path("netCDF") + expected_nc_write_path = nc_write_dir / f"aiihca.p{unknown_key}-005007.nc" nc_write_path = esm1p5_convert.get_nc_write_path( - fields_file_path, nc_write_dir) + Path(ff_name), + nc_write_dir, + (ff_year, ff_month, 1) + ) - assert nc_write_path == Path("/test/path/NetCDF/fields_123.file.nc") + assert nc_write_path == expected_nc_write_path def test_find_matching_fields_files(): @@ -98,64 +160,55 @@ def mock_process(base_mock_process): @pytest.fixture def mock_process_with_exception(mock_process): - # Add a generic exception with chosen message to mock_process. - # Yield function so that tests of different exception messages + # Add a specified exception with chosen message to mock_process. + # Yield function so that tests of different exceptions and messages # can make use of the same fixture. - def _mock_process_with_exception(error_message): - mock_process.side_effect = Exception(error_message) + def _mock_process_with_exception(error_type, error_message): + mock_process.side_effect = error_type(error_message) yield _mock_process_with_exception @pytest.mark.parametrize( - "input_list", [[], ["fake_file"], [ - "fake_file_1", "fake_file_2", "fake_file_3"]] + "input_output_list", [[], + [("fake_file", "fake_file.nc")], + [("fake_file_1", "fake_file_1.nc"), + ("fake_file_2", "fake_file_2.nc"), + ("fake_file_3", "fake_file_3.nc")]] ) def test_convert_fields_file_list_success(mock_process, - input_list): + input_output_list): """ Test that process is called for each input. """ - input_list_paths = [Path(p) for p in input_list] + input_output_paths = [(Path(p1), Path(p2)) for p1, p2 in input_output_list] - succeeded, _ = esm1p5_convert.convert_fields_file_list( - input_list_paths, "fake_nc_write_dir") + succeeded, _ = esm1p5_convert.convert_fields_file_list(input_output_paths) - assert mock_process.call_count == len(input_list) + assert mock_process.call_count == len(input_output_list) - successful_input_paths = [successful_path_pair[0] for - successful_path_pair in succeeded] - - assert input_list_paths == successful_input_paths + assert succeeded == input_output_paths def test_convert_fields_file_list_fail_excepted(mock_process_with_exception): - # Hopefully this test will be unnecessary with um2nc standalone. - # Test that the "Variable can not be processed" error arising from time - # series inputs is excepted. - allowed_error_message = esm1p5_convert.ALLOWED_UM2NC_EXCEPTION_MESSAGES[ - "TIMESERIES_ERROR" - ] - mock_process_with_exception(allowed_error_message) - fake_file_path = Path("fake_file") + mock_process_with_exception(um2nc.UnsupportedTimeSeriesError, + "timeseries error") + fake_input_output_paths = [(Path("fake_file"), Path("fake_file.nc"))] _, failed = esm1p5_convert.convert_fields_file_list( - [fake_file_path], "fake_nc_write_dir") - - assert failed[0][0] == fake_file_path + fake_input_output_paths) - # TODO: Testing the exception part of the reported failures will be easier - # once um2nc specific exceptions are added. + assert failed[0][0] == fake_input_output_paths[0][0] def test_convert_fields_file_list_fail_critical(mock_process_with_exception): - # Test that critical exceptions which are not allowed by ALLOWED_UM2NC_EXCEPTION_MESSAGES + # Test that critical unexpected exceptions # are raised, and hence lead to the conversion crashing. generic_error_message = "Test error" - mock_process_with_exception(generic_error_message) + mock_process_with_exception(Exception, generic_error_message) with pytest.raises(Exception) as exc_info: esm1p5_convert.convert_fields_file_list( - ["fake_file"], "fake_nc_write_dir") + [("fake_file", "fake_file.nc")]) assert str(exc_info.value) == generic_error_message diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index 82860d0..fd41871 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -1,6 +1,7 @@ import unittest.mock as mock from dataclasses import dataclass from collections import namedtuple +from iris.exceptions import CoordinateNotFoundError import operator import umpost.um2netcdf as um2nc @@ -14,6 +15,9 @@ import iris.coords import iris.exceptions +D_LAT_N96 = 1.25 # Degrees between latitude points on N96 grid +D_LON_N96 = 1.875 # Degrees between longitude points on N96 grid + @pytest.fixture def z_sea_rho_data(): @@ -57,7 +61,10 @@ def z_sea_theta_data(): @pytest.fixture def mule_vars(z_sea_rho_data, z_sea_theta_data): - """Simulate mule variables from aiihca.paa1jan.subset data.""" + """ + Simulate mule variables for the New Dynamics grid from + aiihca.paa1jan.subset data. + """ d_lat = 1.25 # spacing manually copied from aiihca.paa1jan.subset file d_lon = 1.875 return um2nc.MuleVars(um2nc.GRID_NEW_DYNAMICS, d_lat, d_lon, z_sea_rho_data, z_sea_theta_data) @@ -150,7 +157,7 @@ def test_process_no_heaviside_drop_cubes(air_temp_cube, precipitation_flux_cube, # TODO: lat/long & level coord fixes require more internal data attrs # skip temporarily to manage test complexity - mock.patch("umpost.um2netcdf.fix_latlon_coord"), + mock.patch("umpost.um2netcdf.fix_latlon_coords"), mock.patch("umpost.um2netcdf.fix_level_coord"), mock.patch("umpost.um2netcdf.cubewrite"), ): @@ -209,7 +216,7 @@ def test_process_mask_with_heaviside(air_temp_cube, precipitation_flux_cube, mock.patch("iris.load") as m_iris_load, mock.patch("iris.fileformats.netcdf.Saver") as m_saver, # prevent I/O - mock.patch("umpost.um2netcdf.fix_latlon_coord"), + mock.patch("umpost.um2netcdf.fix_latlon_coords"), mock.patch("umpost.um2netcdf.fix_level_coord"), mock.patch("umpost.um2netcdf.apply_mask"), # TODO: eventually call real version mock.patch("umpost.um2netcdf.cubewrite"), @@ -250,7 +257,7 @@ def test_process_no_masking_keep_all_cubes(air_temp_cube, precipitation_flux_cub mock.patch("iris.load") as m_iris_load, mock.patch("iris.fileformats.netcdf.Saver") as m_saver, # prevent I/O - mock.patch("umpost.um2netcdf.fix_latlon_coord"), + mock.patch("umpost.um2netcdf.fix_latlon_coords"), mock.patch("umpost.um2netcdf.fix_level_coord"), mock.patch("umpost.um2netcdf.cubewrite"), ): @@ -397,8 +404,13 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None): self.units = None or units self.standard_name = None self.long_name = None - self.coord = {} self.data = None + + def coord(self, _): + raise NotImplementedError( + "coord() method not implemented on DummyCube" + ) + def name(self): # mimic iris API @@ -654,6 +666,358 @@ def test_fix_units_do_nothing_no_um_units(ua_plev_cube): assert ua_plev_cube.units == orig # nothing should happen as there's no cube.units +def to_iris_dimcoord(points_and_name_func): + + def dimcoord_maker(): + points, name = points_and_name_func() + return iris.coords.DimCoord( + points=points, + standard_name=name + ) + + return dimcoord_maker + + +@pytest.fixture +@to_iris_dimcoord +def lat_river_coord(): + # iris DimCoord imitating UM V7.3s 1x1 degree river grid. + # Must have length 180. + lat_river_points = np.arange(-90., 90, dtype="float32") + 0.5 + return lat_river_points, um2nc.LATITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lon_river_coord(): + # iris DimCoord imitating UM V7.3s 1x1 degree river grid. + # Must have length 360. + lon_river_points = np.arange(0., 360., dtype="float32") + 0.5 + return lon_river_points, um2nc.LONGITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lat_v_nd_coord(): + # iris DimCoord imitating the real + # lat_v grid from ESM1.5 (which uses the New Dynamics grid). + # This grid is offset half a grid cell compared to the standard + # New Dynamics latitude grid. + lat_v_points = np.arange(-90.+0.5*D_LAT_N96, 90, + D_LAT_N96, dtype="float32") + return lat_v_points, um2nc.LATITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lon_u_nd_coord(): + # iris DimCoord imitating the real + # lon_u grid from ESM1.5 (which uses the New Dynamics grid). + # This grid is offset half a grid cell compared to the standard + # New Dynamics longitude grid. + lon_u_points = np.arange(0.5*D_LON_N96, 360, D_LON_N96, dtype="float32") + return lon_u_points, um2nc.LONGITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lat_v_eg_coord(): + # iris DimCoord imitating the real + # lat_v grid from CM2 (which uses the End Game grid). + # This grid is offset half a grid cell compared to the standard + # End Game latitude grid. + lat_v_points = np.arange(-90., 91, D_LAT_N96, dtype="float32") + return lat_v_points, um2nc.LATITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lon_u_eg_coord(): + # iris DimCoord imitating the real + # lon_v grid from CM2 (which uses the End Game grid). + # This grid is offset half a grid cell compared to the standard + # New Dynamics longitude grid. + lon_u_points = np.arange(0, 360, D_LON_N96, dtype="float32") + return lon_u_points, um2nc.LONGITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lat_standard_nd_coord(): + # iris DimCoord imitating the standard latitude + # grid from ESM1.5 (which uses the New Dynamics grid). + lat_points = np.arange(-90, 91, D_LAT_N96, dtype="float32") + return lat_points, um2nc.LATITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lon_standard_nd_coord(): + # iris DimCoord imitating the standard longitude + # grid from ESM1.5 (which uses the New Dynamics grid). + lon_points = np.arange(0, 360, D_LON_N96, dtype="float32") + return lon_points, um2nc.LONGITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lat_standard_eg_coord(): + # iris DimCoord imitating the standard latitude + # grid from CM2 (which uses the End Game grid). + lat_points = np.arange(-90 + 0.5*D_LAT_N96, 90., + D_LAT_N96, dtype="float32") + return lat_points, um2nc.LATITUDE + + +@pytest.fixture +@to_iris_dimcoord +def lon_standard_eg_coord(): + # iris DimCoord imitating the standard longitude + # grid from CM2 (which uses the End Game grid). + lon_points = np.arange(0.5*D_LON_N96, 360, D_LON_N96, dtype="float32") + return lon_points, um2nc.LONGITUDE + + +class DummyCubeWithCoords(DummyCube): + # DummyCube with coordinates, which can be filled with + # iris.DimCoord objects for testing. + def __init__(self, dummy_cube, coords=None): + super().__init__(dummy_cube.item_code, + dummy_cube.var_name, + dummy_cube.attributes, + dummy_cube.units) + + # Set up coordinate dictionary to hold each dummy coordinate, with keys + # given by the coordinate names. This ensures that the key used to + # access a coordinate via the coord method matches the + # coordinate's name. + if coords is not None: + self.coordinate_dict = { + coord.name(): coord for coord in coords + } + else: + self.coordinate_dict = {} + + def coord(self, coordinate_name): + try: + return self.coordinate_dict[coordinate_name] + except KeyError: + msg = f"{self.__class__}: lacks coord for '{coordinate_name}'" + raise CoordinateNotFoundError(msg) + + +def assert_coordinates_are_unmodified(lat_coord, lon_coord): + """ + Helper function to check that a coordinate's attributes match + those expected for a coordinate that has not yet been modified + by fix_latlon_coords. + """ + for coord in [lat_coord, lon_coord]: + assert coord.points.dtype == np.dtype("float32") + assert not coord.has_bounds() + assert coord.var_name is None + + +def is_float64(lat_coord, lon_coord): + return (lat_coord.points.dtype == np.dtype("float64") and + lon_coord.points.dtype == np.dtype("float64")) + + +def has_bounds(lat_coord, lon_coord): + return lat_coord.has_bounds() and lon_coord.has_bounds() + + +# Tests of fix_latlon_coords. This function converts coordinate points +# to double, adds bounds, and adds var_names to the coordinates. +# The following tests check that these are done correctly. +def test_fix_latlon_coords_river(ua_plev_cube, + lat_river_coord, + lon_river_coord): + """ + Tests of the fix_lat_lon_coords function on river grid coordinates. + """ + + cube_with_river_coords = DummyCubeWithCoords( + dummy_cube=ua_plev_cube, + coords=[lat_river_coord, lon_river_coord] + ) + + cube_lat_coord = cube_with_river_coords.coord(um2nc.LATITUDE) + cube_lon_coord = cube_with_river_coords.coord(um2nc.LONGITUDE) + + # Checks prior to modifications. + assert_coordinates_are_unmodified(cube_lat_coord, cube_lon_coord) + + um2nc.fix_latlon_coords(cube_with_river_coords, um2nc.GRID_END_GAME, + D_LAT_N96, D_LON_N96) + + # Checks post modifications. + assert cube_lat_coord.var_name == um2nc.VAR_NAME_LAT_RIVER + assert cube_lon_coord.var_name == um2nc.VAR_NAME_LON_RIVER + + assert is_float64(cube_lat_coord, cube_lon_coord) + assert has_bounds(cube_lat_coord, cube_lon_coord) + + +def test_fix_latlon_coords_uv(ua_plev_cube, + lat_v_nd_coord, + lon_u_nd_coord, + lat_v_eg_coord, + lon_u_eg_coord, + ): + """ + Tests of the fix_lat_lon_coords for longitude u and latitude v + coordinates on both the New Dynamics and End Game grids. + """ + coord_sets = [ + (lat_v_nd_coord, lon_u_nd_coord, um2nc.GRID_NEW_DYNAMICS), + (lat_v_eg_coord, lon_u_eg_coord, um2nc.GRID_END_GAME) + ] + + for lat_coordinate, lon_coordinate, grid_type in coord_sets: + + cube_with_uv_coords = DummyCubeWithCoords( + dummy_cube=ua_plev_cube, + coords=[lat_coordinate, lon_coordinate] + ) + + # Checks prior to modifications. + assert_coordinates_are_unmodified(lat_coordinate, lon_coordinate) + + um2nc.fix_latlon_coords(cube_with_uv_coords, grid_type, + D_LAT_N96, D_LON_N96) + + assert lat_coordinate.var_name == um2nc.VAR_NAME_LAT_V + assert lon_coordinate.var_name == um2nc.VAR_NAME_LON_U + + assert is_float64(lat_coordinate, lon_coordinate) + assert has_bounds(lat_coordinate, lon_coordinate) + + +def test_fix_latlon_coords_standard(ua_plev_cube, + lat_standard_nd_coord, + lon_standard_nd_coord, + lat_standard_eg_coord, + lon_standard_eg_coord): + """ + Tests of the fix_lat_lon_coords for standard longitude + and latitude coordinates on both the New Dynamics and + End Game grids. + """ + coord_sets = [ + ( + lat_standard_nd_coord, + lon_standard_nd_coord, + um2nc.GRID_NEW_DYNAMICS + ), + ( + lat_standard_eg_coord, + lon_standard_eg_coord, + um2nc.GRID_END_GAME + ) + ] + + for lat_coordinate, lon_coordinate, grid_type in coord_sets: + + cube_with_uv_coords = DummyCubeWithCoords( + dummy_cube=ua_plev_cube, + coords=[lat_coordinate, lon_coordinate] + ) + + # Checks prior to modifications. + assert_coordinates_are_unmodified(lat_coordinate, lon_coordinate) + + um2nc.fix_latlon_coords(cube_with_uv_coords, grid_type, + D_LAT_N96, D_LON_N96) + + assert lat_coordinate.var_name == um2nc.VAR_NAME_LAT_STANDARD + assert lon_coordinate.var_name == um2nc.VAR_NAME_LON_STANDARD + + assert is_float64(lat_coordinate, lon_coordinate) + assert has_bounds(lat_coordinate, lon_coordinate) + + +def test_fix_latlon_coords_single_point(ua_plev_cube): + """ + Test that single point longitude and latitude coordinates + are provided with global bounds. + """ + + # Expected values after modification + expected_lat_bounds = um2nc.GLOBAL_COORD_BOUNDS[um2nc.LATITUDE] + expected_lon_bounds = um2nc.GLOBAL_COORD_BOUNDS[um2nc.LONGITUDE] + + lat_coord_single = iris.coords.DimCoord(points=np.array([0]), + standard_name=um2nc.LATITUDE) + lon_coord_single = iris.coords.DimCoord(points=np.array([0]), + standard_name=um2nc.LONGITUDE) + + cube_with_uv_coords = DummyCubeWithCoords( + dummy_cube=ua_plev_cube, + coords=[lat_coord_single, lon_coord_single] + ) + + assert not has_bounds(lat_coord_single, lon_coord_single) + + um2nc.fix_latlon_coords(cube_with_uv_coords, um2nc.GRID_NEW_DYNAMICS, + D_LAT_N96, D_LON_N96) + + assert has_bounds(lat_coord_single, lon_coord_single) + assert np.array_equal(lat_coord_single.bounds, expected_lat_bounds) + assert np.array_equal(lon_coord_single.bounds, expected_lon_bounds) + + +def test_fix_latlon_coords_has_bounds(ua_plev_cube): + """ + Test that existing coordinate bounds are not modified by + fix_latlon_coords. + """ + + # Expected values after modification + lon_bounds = np.array([[0, 1]]) + lat_bounds = np.array([[10, 25]]) + + lat_coord = iris.coords.DimCoord(points=np.array([0]), + standard_name=um2nc.LATITUDE, + bounds=lat_bounds.copy()) + lon_coord = iris.coords.DimCoord(points=np.array([0]), + standard_name=um2nc.LONGITUDE, + bounds=lon_bounds.copy()) + + cube_with_uv_coords = DummyCubeWithCoords( + dummy_cube=ua_plev_cube, + coords=[lat_coord, lon_coord] + ) + assert has_bounds(lat_coord, lon_coord) + + um2nc.fix_latlon_coords(cube_with_uv_coords, um2nc.GRID_NEW_DYNAMICS, + D_LAT_N96, D_LON_N96) + + assert np.array_equal(lat_coord.bounds, lat_bounds) + assert np.array_equal(lon_coord.bounds, lon_bounds) + + +def test_fix_latlon_coords_missing_coord_error(ua_plev_cube): + """ + Test that fix_latlon_coords raises the right type of error when a cube + is missing coordinates. + """ + fake_coord = iris.coords.DimCoord( + points=np.array([1, 2, 3], dtype="float32"), + # Iris requires name to still be valid 'standard name' + standard_name="height" + + ) + + cube_with_fake_coord = DummyCubeWithCoords(ua_plev_cube, fake_coord) + + with ( + pytest.raises(um2nc.UnsupportedTimeSeriesError) + ): + um2nc.fix_latlon_coords(cube_with_fake_coord, um2nc.GRID_NEW_DYNAMICS, + D_LAT_N96, D_LON_N96) + + def test_fix_cell_methods_drop_hours(): # ensure cell methods with "hour" in the interval name are translated to # empty intervals diff --git a/umpost/conversion_driver_esm1p5.py b/umpost/conversion_driver_esm1p5.py index 9d1199a..2a96f9c 100755 --- a/umpost/conversion_driver_esm1p5.py +++ b/umpost/conversion_driver_esm1p5.py @@ -2,7 +2,7 @@ """ ESM1.5 conversion driver -Wrapper script for automated fields file to NetCDF conversion +Wrapper script for automated fields file to netCDF conversion during ESM1.5 simulations. Runs conversion module on each atmospheric output in a specified directory. @@ -21,6 +21,7 @@ import errno from pathlib import Path from umpost import um2netcdf +import mule # TODO: um2netcdf will update the way arguments are fed to `process`. @@ -35,11 +36,16 @@ # TODO: Confirm with Martin the below arguments are appropriate defaults. ARG_VALS = ARG_NAMES(3, 4, True, False, 0.5, False, None, None, False, False) -# TODO: um2nc standalone will raise more specific exceptions. -# See https://github.com/ACCESS-NRI/um2nc-standalone/issues/18 -# Improve exception handling here once those changes have been made. -ALLOWED_UM2NC_EXCEPTION_MESSAGES = { - "TIMESERIES_ERROR": "Variable can not be processed", + +# Character in filenames specifying the unit key +FF_UNIT_INDEX = 8 +# Output file suffix for each type of unit. Assume's +# ESM1.5's unit definitions are being used. +FF_UNIT_SUFFIX = { + "a": "mon", + "e": "dai", + "i": "3hr", + "j": "6hr", } @@ -53,7 +59,7 @@ def get_esm1p5_fields_file_pattern(run_id: str): Returns ------- - fields_file_name_pattern: Regex pattern for matching fields file names + fields_file_name_pattern: Regex pattern for matching fields file names. """ # For ESM1.5 simulations, files start with run_id + 'a' (atmosphere) + @@ -70,14 +76,15 @@ def get_esm1p5_fields_file_pattern(run_id: str): return fields_file_name_pattern -def get_nc_write_path(fields_file_path, nc_write_dir): +def get_nc_write_path(fields_file_path, nc_write_dir, date=None): """ - Get filepath for writing NetCDF to based on fields file name. + Get filepath for writing netCDF to based on fields file name and date. Parameters ---------- fields_file_path : path to single UM fields file to be converted. - nc_write_dir : path to target directory for writing NetCDF files. + nc_write_dir : path to target directory for writing netCDF files. + date : tuple of form (year, month, day) associated with fields file data. Returns ------- @@ -86,11 +93,9 @@ def get_nc_write_path(fields_file_path, nc_write_dir): fields_file_path = Path(fields_file_path) nc_write_dir = Path(nc_write_dir) - fields_file_name = fields_file_path.name - nc_file_name = fields_file_name + ".nc" - nc_file_write_path = nc_write_dir / nc_file_name + nc_file_name = get_nc_filename(fields_file_path.name, date) - return nc_file_write_path + return nc_write_dir / nc_file_name def find_matching_fields_files(dir_contents, fields_file_name_pattern): @@ -117,14 +122,72 @@ def find_matching_fields_files(dir_contents, fields_file_name_pattern): return fields_file_paths -def convert_fields_file_list(fields_file_paths, nc_write_dir): +def get_nc_filename(fields_file_name, date=None): """ - Convert group of fields files to NetCDF, writing output in nc_write_dir. + Format a netCDF output filename based on the input fields file name and + its date. Assumes fields_file_name follows ESM1.5's naming convention + '{5 char run_id}.pa{unit}{date encoding}`. Parameters ---------- - fields_file_paths : list of paths to fields files for conversion. - nc_write_dir : directory to save NetCDF files into. + fields_file_name: name of fields file to be converted. + date: tuple of form (year, month, day) associated with fields file data, + or None. If None, ".nc" will be concatenated to the original fields + file name. + + Returns + ------- + name: formated netCDF filename for writing output. + """ + if date is None: + return f"{fields_file_name}.nc" + + # TODO: Use regex to extract stem and unit from filename to improve + # clarity, and for better handling of unexpected filenames. + stem = fields_file_name[0:FF_UNIT_INDEX + 1] + unit = fields_file_name[FF_UNIT_INDEX] + + try: + suffix = f"_{FF_UNIT_SUFFIX[unit]}" + except KeyError: + warnings.warn( + f"Unit code '{unit}' from filename f{fields_file_name} " + "not recognized. Frequency information will not be added " + "to the netCDF filename." + ) + suffix = "" + + year, month, _ = date + return f"{stem}-{year:04d}{month:02d}{suffix}.nc" + + +def get_ff_date(fields_file_path): + """ + Get the date from a fields file. To be used for naming output files. + + Parameters + ---------- + fields_file_path : path to single fields file. + + Returns + ------- + date_tuple : tuple of integers (yyyy,mm,dd) containing the fields + file's date. + """ + header = mule.FixedLengthHeader.from_file( + str(fields_file_path)) + + return header.t2_year, header.t2_month, header.t2_day # pylint: disable=no-member + + +def convert_fields_file_list(input_output_paths): + """ + Convert group of fields files to netCDF, writing output in nc_write_dir. + + Parameters + ---------- + input_output_paths : list of tuples of form (input_path, output_path). Fields file + at input_path will be written to netCDF at ouput_path. Returns ------- @@ -136,23 +199,15 @@ def convert_fields_file_list(fields_file_paths, nc_write_dir): succeeded = [] failed = [] - fields_file_paths = [Path(p) for p in fields_file_paths] - - for fields_file_path in fields_file_paths: - - nc_write_path = get_nc_write_path(fields_file_path, nc_write_dir) - + for ff_path, nc_path in input_output_paths: try: - um2netcdf.process(fields_file_path, nc_write_path, ARG_VALS) - succeeded.append((fields_file_path, nc_write_path)) + um2netcdf.process(ff_path, nc_path, ARG_VALS) + succeeded.append((ff_path, nc_path)) + + except um2netcdf.UnsupportedTimeSeriesError as exc: + failed.append((ff_path, exc)) - except Exception as exc: - # TODO: Refactor once um2nc has specific exceptions - if exc.args[0] in ALLOWED_UM2NC_EXCEPTION_MESSAGES.values(): - failed.append((fields_file_path, exc)) - else: - # raise any unexpected errors - raise + # Any unexpected errors will be raised return succeeded, failed @@ -221,7 +276,7 @@ def convert_esm1p5_output_dir(esm1p5_output_dir): ---------- esm1p5_output_dir: an "outputXYZ" directory produced by an ESM1.5 simulation. Fields files in the "atmosphere" subdirectory will be - converted to NetCDF. + converted to netCDF. Returns ------- @@ -240,8 +295,8 @@ def convert_esm1p5_output_dir(esm1p5_output_dir): errno.ENOENT, os.strerror(errno.ENOENT), current_atm_output_dir ) - # Create a directory for writing NetCDF files - nc_write_dir = current_atm_output_dir / "NetCDF" + # Create a directory for writing netCDF files + nc_write_dir = current_atm_output_dir / "netCDF" nc_write_dir.mkdir(exist_ok=True) # Find fields file outputs to be converted @@ -259,15 +314,15 @@ def convert_esm1p5_output_dir(esm1p5_output_dir): warnings.warn( f"No files matching pattern '{fields_file_name_pattern}' " f"found in {current_atm_output_dir.resolve()}. No files will be " - "converted to NetCDF." + "converted to netCDF." ) return [], [] # Don't try to run the conversion - succeeded, failed = convert_fields_file_list( - atm_dir_fields_files, - nc_write_dir - ) + output_paths = [get_nc_write_path(path, nc_write_dir, get_ff_date(path)) for path in atm_dir_fields_files] + input_output_pairs = zip(atm_dir_fields_files, output_paths) + + succeeded, failed = convert_fields_file_list(input_output_pairs) return succeeded, failed diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 999844a..70416cb 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -40,6 +40,25 @@ # TODO: what is this limit & does it still exist? XCONV_LONG_NAME_LIMIT = 110 +LONGITUDE = "longitude" +LATITUDE = "latitude" + +# Bounds for global single cells +GLOBAL_COORD_BOUNDS = { + LONGITUDE: np.array([[0., 360.]]), + LATITUDE: np.array([[-90., 90.]]) +} + +NUM_LAT_RIVER_GRID_POINTS = 180 +NUM_LON_RIVER_GRID_POINTS = 360 + +VAR_NAME_LAT_RIVER = "lat_river" +VAR_NAME_LON_RIVER = "lon_river" +VAR_NAME_LAT_V = "lat_v" +VAR_NAME_LON_U = "lon_u" +VAR_NAME_LAT_STANDARD = "lat" +VAR_NAME_LON_STANDARD = "lon" + NC_FORMATS = { 1: 'NETCDF3_CLASSIC', 2: 'NETCDF3_64BIT', @@ -57,6 +76,14 @@ class PostProcessingError(Exception): pass +class UnsupportedTimeSeriesError(PostProcessingError): + """ + Error to be raised when latitude and longitude coordinates + are missing. + """ + pass + + # Override the PP file calendar function to use Proleptic Gregorian rather than Gregorian. # This matters for control runs with model years < 1600. @property @@ -103,46 +130,193 @@ def convert_proleptic(time): time.units = newunits -def fix_latlon_coord(cube, grid_type, dlat, dlon): - def _add_coord_bounds(coord): - if len(coord.points) > 1: - if not coord.has_bounds(): - coord.guess_bounds() - else: - # For length 1, assume it's global. guess_bounds doesn't work in this case - if coord.name() == 'latitude': - if not coord.has_bounds(): - coord.bounds = np.array([[-90., 90.]]) - elif coord.name() == 'longitude': - if not coord.has_bounds(): - coord.bounds = np.array([[0., 360.]]) +def fix_lat_coord_name(lat_coordinate, grid_type, dlat): + """ + Add a 'var_name' attribute to a latitude coordinate object + based on the grid it lies on. - lat = cube.coord('latitude') + NB - Grid spacing dlon only refers to variables on the main + horizontal grids, and not the river grid. - # Force to double for consistency with CMOR - lat.points = lat.points.astype(np.float64) - _add_coord_bounds(lat) - lon = cube.coord('longitude') - lon.points = lon.points.astype(np.float64) - _add_coord_bounds(lon) - - lat = cube.coord('latitude') - if len(lat.points) == 180: - lat.var_name = 'lat_river' - elif (lat.points[0] == -90 and grid_type == 'EG') or \ - (np.allclose(-90.+0.5*dlat, lat.points[0]) and grid_type == 'ND'): - lat.var_name = 'lat_v' + Parameters + ---------- + lat_coordinate: coordinate object from iris cube (edits in place). + grid_type: (string) model horizontal grid type. + dlat: (float) meridional spacing between latitude grid points. + """ + + if lat_coordinate.name() != LATITUDE: + raise ValueError( + f"Wrong coordinate {lat_coordinate.name()} supplied. " + f"Expected {LATITUDE}." + ) + + if is_lat_river_grid(lat_coordinate.points): + lat_coordinate.var_name = VAR_NAME_LAT_RIVER + elif is_lat_v_grid(lat_coordinate.points, grid_type, dlat): + lat_coordinate.var_name = VAR_NAME_LAT_V else: - lat.var_name = 'lat' - - lon = cube.coord('longitude') - if len(lon.points) == 360: - lon.var_name = 'lon_river' - elif (lon.points[0] == 0 and grid_type == 'EG') or \ - (np.allclose(0.5*dlon, lon.points[0]) and grid_type == 'ND'): - lon.var_name = 'lon_u' + lat_coordinate.var_name = VAR_NAME_LAT_STANDARD + + +def fix_lon_coord_name(lon_coordinate, grid_type, dlon): + """ + Add a 'var_name' attribute to a longitude coordinate object + based on the grid it lies on. + + NB - Grid spacing dlon only refers to variables on the main + horizontal grids, and not the river grid. + + Parameters + ---------- + lon_coordinate: coordinate object from iris cube (edits in place). + grid_type: (string) model horizontal grid type. + dlon: (float) zonal spacing between longitude grid points. + """ + + if lon_coordinate.name() != LONGITUDE: + raise ValueError( + f"Wrong coordinate {lon_coordinate.name()} supplied. " + f"Expected {LATITUDE}." + ) + + if is_lon_river_grid(lon_coordinate.points): + lon_coordinate.var_name = VAR_NAME_LON_RIVER + elif is_lon_u_grid(lon_coordinate.points, grid_type, dlon): + lon_coordinate.var_name = VAR_NAME_LON_U else: - lon.var_name = 'lon' + lon_coordinate.var_name = VAR_NAME_LON_STANDARD + + +def is_lat_river_grid(latitude_points): + """ + Check whether latitude points are on the river routing grid. + + Parameters + ---------- + latitude_points: (array) 1D array of latitude grid points. + """ + return len(latitude_points) == NUM_LAT_RIVER_GRID_POINTS + + +def is_lon_river_grid(longitude_points): + """ + Check whether longitude points are on the river routing grid. + + Parameters + ---------- + longitude_points: (array) 1D array of longitude grid points. + """ + + return len(longitude_points) == NUM_LON_RIVER_GRID_POINTS + + +def is_lat_v_grid(latitude_points, grid_type, dlat): + """ + Check whether latitude points are on the lat_v grid. + + Parameters + ---------- + latitude_points: (array) 1D array of latitude grid points. + grid_type: (string) model horizontal grid type. + dlat: (float) meridional spacing between latitude grid points. + """ + min_latitude = latitude_points[0] + min_lat_v_nd_grid = -90.+0.5*dlat + min_lat_v_eg_grid = -90 + + if grid_type == GRID_END_GAME: + return min_latitude == min_lat_v_eg_grid + elif grid_type == GRID_NEW_DYNAMICS: + return np.allclose(min_lat_v_nd_grid, min_latitude) + + return False + + +def is_lon_u_grid(longitude_points, grid_type, dlon): + """ + Check whether longitude points are on the lon_u grid. + + Parameters + ---------- + longitude_points: (array) 1D array of longitude grid points. + grid_type: (string) model horizontal grid type. + dlon: (float) zonal spacing between longitude grid points. + """ + min_longitude = longitude_points[0] + min_lon_u_nd_grid = 0.5*dlon + min_lon_u_eg_grid = 0 + + if grid_type == GRID_END_GAME: + return min_longitude == min_lon_u_eg_grid + elif grid_type == GRID_NEW_DYNAMICS: + return np.allclose(min_lon_u_nd_grid, min_longitude) + + return False + + +def add_latlon_coord_bounds(cube_coordinate): + """ + Add bounds to horizontal coordinate (longitude or latitude) if + they don't already exist. Edits coordinate in place. + + Parameters + ---------- + cube_coordinate: coordinate object from iris cube. + """ + coordinate_name = cube_coordinate.name() + if coordinate_name not in [LONGITUDE, LATITUDE]: + raise ValueError( + f"Wrong coordinate {coordinate_name} supplied. " + f"Expected one of {LONGITUDE}, {LATITUDE}." + ) + + # Only add bounds if not already present. + if not cube_coordinate.has_bounds(): + if len(cube_coordinate.points) > 1: + cube_coordinate.guess_bounds() + else: + # For length 1, assume it's global. + # guess_bounds doesn't work in this case. + cube_coordinate.bounds = GLOBAL_COORD_BOUNDS[coordinate_name] + + +def fix_latlon_coords(cube, grid_type, dlat, dlon): + """ + Wrapper function to modify cube's horizontal coordinates + (latitude and longitude). Converts to float64, adds grid bounds, + and renames coordinates. Modifies cube in place. + + NB - grid spacings dlat and dlon only refer to variables on the main + horizontal grids, and not the river grid. + + Parameters + ---------- + cube: Iris cube object (modified in place). + grid_type: (string) model horizontal grid type. + dlat: (float) meridional spacing between latitude grid points. + dlon: (float) zonal spacing between longitude grid points. + """ + + try: + latitude_coordinate = cube.coord(LATITUDE) + longitude_coordinate = cube.coord(LONGITUDE) + except iris.exceptions.CoordinateNotFoundError as coord_error: + msg = ( + "Missing latitude or longitude coordinate for variable (possible timeseries?): \n" + f"{cube}\n" + ) + raise UnsupportedTimeSeriesError(msg) from coord_error + + # Force to double for consistency with CMOR + latitude_coordinate.points = latitude_coordinate.points.astype(np.float64) + longitude_coordinate.points = longitude_coordinate.points.astype(np.float64) + + add_latlon_coord_bounds(latitude_coordinate) + add_latlon_coord_bounds(longitude_coordinate) + + fix_lat_coord_name(latitude_coordinate, grid_type, dlat) + fix_lon_coord_name(longitude_coordinate, grid_type, dlon) # TODO: split cube ops into functions, this will likely increase process() workflow steps @@ -325,13 +499,7 @@ def process(infile, outfile, args): # Interval in cell methods isn't reliable so better to remove it. c.cell_methods = fix_cell_methods(c.cell_methods) - try: - fix_latlon_coord(c, mv.grid_type, mv.d_lat, mv.d_lon) - except iris.exceptions.CoordinateNotFoundError: - print('\nMissing lat/lon coordinates for variable (possible timeseries?)\n') - print(c) - raise Exception("Variable can not be processed") - + fix_latlon_coords(c, mv.grid_type, mv.d_lat, mv.d_lon) fix_level_coord(c, mv.z_rho, mv.z_theta) if do_masking: