From 64da4b3b2cb92298e7ec4d1f5b33f2213c048ebf Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 12:34:49 +1000 Subject: [PATCH 01/11] Extract 64 --> 32 bit data conversion. --- umpost/um2netcdf.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 80f55ef..bb1c70e 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -160,10 +160,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose): pass if not use64bit: - if cube.data.dtype == 'float64': - cube.data = cube.data.astype(np.float32) - elif cube.data.dtype == 'int64': - cube.data = cube.data.astype(np.int32) + convert_32_bit(cube) # Set the missing_value attribute. Use an array to force the type to match # the data type @@ -736,6 +733,13 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): c_sigma.var_name = 'sigma_theta' +def convert_32_bit(cube): + if cube.data.dtype == 'float64': + cube.data = cube.data.astype(np.float32) + elif cube.data.dtype == 'int64': + cube.data = cube.data.astype(np.int32) + + def parse_args(): parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf") parser.add_argument('-k', dest='nckind', required=False, type=int, From 9008c29e2cdc0dc80df39bd4477e45d99c80eedf Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 12:44:12 +1000 Subject: [PATCH 02/11] Add basic 64 --> 32 bit data conversion tests. --- test/test_um2netcdf.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index c1a86f0..ecb3fd4 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -397,6 +397,7 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None): self.standard_name = None self.long_name = None self.coord = {} + self.data = None def name(self): # mimic iris API @@ -740,3 +741,19 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): m_cube = mock.Mock(iris.cube.Cube) m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data) + + +# 64 to 32 bit data conversion tests + +def test_64_to_32_int(ua_plev_cube): + array = np.array([100, 10, 1, 0, -10], dtype=np.int64) + ua_plev_cube.data = array + um2nc.convert_32_bit(ua_plev_cube) + assert ua_plev_cube.data.dtype == np.int32 + + +def test_64_to_32_float(ua_plev_cube): + array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) + ua_plev_cube.data = array + um2nc.convert_32_bit(ua_plev_cube) + assert ua_plev_cube.data.dtype == np.float32 From 16cb7108d1be80041ed494026f86851f3640fc7d Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 12:54:44 +1000 Subject: [PATCH 03/11] Add warnings for integer conversion over/under flow. --- test/test_um2netcdf.py | 24 +++++++++++++++++++++++- umpost/um2netcdf.py | 14 ++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index ecb3fd4..94da4bb 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -745,13 +745,35 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): # 64 to 32 bit data conversion tests -def test_64_to_32_int(ua_plev_cube): +def test_convert_32_bit_with_int64(ua_plev_cube): array = np.array([100, 10, 1, 0, -10], dtype=np.int64) ua_plev_cube.data = array um2nc.convert_32_bit(ua_plev_cube) assert ua_plev_cube.data.dtype == np.int32 +def test_convert_32_bit_overflow_with_int64(ua_plev_cube): + array = np.array([3000000000], dtype=np.int64) + assert array[0] > np.iinfo(np.int32).max + ua_plev_cube.data = array + + with pytest.warns(): + um2nc.convert_32_bit(ua_plev_cube) + + assert ua_plev_cube.data.dtype == np.int32 + + +def test_convert_32_bit_underflow_with_int64(ua_plev_cube): + array = np.array([-3000000000], dtype=np.int64) + assert array[0] < np.iinfo(np.int32).max + ua_plev_cube.data = array + + with pytest.warns(): + um2nc.convert_32_bit(ua_plev_cube) + + assert ua_plev_cube.data.dtype == np.int32 + + def test_64_to_32_float(ua_plev_cube): array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) ua_plev_cube.data = array diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index bb1c70e..f1a4e76 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -733,10 +733,24 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): c_sigma.var_name = 'sigma_theta' +MAX_NP_INT32 = np.iinfo(np.int32).max +MIN_NP_INT32 = np.iinfo(np.int32).min + + def convert_32_bit(cube): if cube.data.dtype == 'float64': cube.data = cube.data.astype(np.float32) elif cube.data.dtype == 'int64': + _max = np.max(cube.data) + _min = np.min(cube.data) + + if _max > MAX_NP_INT32: + msg = f"Converting {cube.var_name} causes a 32 bit overflow!" + warnings.warn(msg) + elif _min < MIN_NP_INT32: + msg = f"Converting {cube.var_name} causes a 32 bit underflow!" + warnings.warn(msg) + cube.data = cube.data.astype(np.int32) From bf3ced28a8183ab6f3c2828907b4a1f0cace9c8c Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 14:20:51 +1000 Subject: [PATCH 04/11] Fix test function name & add explanation for skipping float32 overflow. --- test/test_um2netcdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index 94da4bb..386f7e7 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -744,6 +744,7 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): # 64 to 32 bit data conversion tests +# NB: skip float63 to float32 overflow as float32 min/max is -/+ 3.40e+38 def test_convert_32_bit_with_int64(ua_plev_cube): array = np.array([100, 10, 1, 0, -10], dtype=np.int64) @@ -774,7 +775,7 @@ def test_convert_32_bit_underflow_with_int64(ua_plev_cube): assert ua_plev_cube.data.dtype == np.int32 -def test_64_to_32_float(ua_plev_cube): +def test_convert_32_bit_with_float64(ua_plev_cube): array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) ua_plev_cube.data = array um2nc.convert_32_bit(ua_plev_cube) From 252a84b1d8f1100d1f7e20164af564a08fb343b4 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 14:23:20 +1000 Subject: [PATCH 05/11] Add conversion docstring. --- umpost/um2netcdf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index f1a4e76..9515fc1 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -738,6 +738,13 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): def convert_32_bit(cube): + """ + Convert 64 bit int/float data to 32 bit (in place). + + Parameters + ---------- + cube : iris.cube object to modify. + """ if cube.data.dtype == 'float64': cube.data = cube.data.astype(np.float32) elif cube.data.dtype == 'int64': From 52a95a2a29ab665fcf01ab57c9609ae6b355356e Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 11:47:46 +1000 Subject: [PATCH 06/11] Refactor 64 to 32 bit testing with parametrize. --- test/test_um2netcdf.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index 386f7e7..82860d0 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -1,6 +1,7 @@ import unittest.mock as mock from dataclasses import dataclass from collections import namedtuple +import operator import umpost.um2netcdf as um2nc @@ -743,38 +744,23 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data) -# 64 to 32 bit data conversion tests -# NB: skip float63 to float32 overflow as float32 min/max is -/+ 3.40e+38 - -def test_convert_32_bit_with_int64(ua_plev_cube): - array = np.array([100, 10, 1, 0, -10], dtype=np.int64) - ua_plev_cube.data = array +# int64 to int32 data conversion tests +# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38 +@pytest.mark.parametrize("array,_operator,bound", + [([100, 10, 1, 0, -10], None, None), + ([3000000000], operator.gt, np.iinfo(np.int32).max), + ([-3000000000], operator.lt, np.iinfo(np.int32).min)]) +def test_convert_32_bit(ua_plev_cube, array, _operator, bound): + ua_plev_cube.data = np.array(array, dtype=np.int64) um2nc.convert_32_bit(ua_plev_cube) - assert ua_plev_cube.data.dtype == np.int32 - - -def test_convert_32_bit_overflow_with_int64(ua_plev_cube): - array = np.array([3000000000], dtype=np.int64) - assert array[0] > np.iinfo(np.int32).max - ua_plev_cube.data = array - - with pytest.warns(): - um2nc.convert_32_bit(ua_plev_cube) - - assert ua_plev_cube.data.dtype == np.int32 - -def test_convert_32_bit_underflow_with_int64(ua_plev_cube): - array = np.array([-3000000000], dtype=np.int64) - assert array[0] < np.iinfo(np.int32).max - ua_plev_cube.data = array - - with pytest.warns(): - um2nc.convert_32_bit(ua_plev_cube) + if _operator: + assert _operator(array[0], bound) assert ua_plev_cube.data.dtype == np.int32 +# test float conversion separately, otherwise parametrize block is ugly def test_convert_32_bit_with_float64(ua_plev_cube): array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) ua_plev_cube.data = array From 7df505c03c742879986920f3fe5b44d94f72c6f7 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 12:00:08 +1000 Subject: [PATCH 07/11] Fix convert_32_bit() to emit RuntimeWarning. --- umpost/um2netcdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 9515fc1..b9f1e39 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -753,10 +753,10 @@ def convert_32_bit(cube): if _max > MAX_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit overflow!" - warnings.warn(msg) + warnings.warn(msg, category=RuntimeWarning) elif _min < MIN_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit underflow!" - warnings.warn(msg) + warnings.warn(msg, category=RuntimeWarning) cube.data = cube.data.astype(np.int32) From 78a2431f9a0446d93d250ed9670b6943f6769588 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 12:01:42 +1000 Subject: [PATCH 08/11] Fix convert_32_bit() logic for under & overflow checks. --- umpost/um2netcdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index b9f1e39..c680b6c 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -754,7 +754,8 @@ def convert_32_bit(cube): if _max > MAX_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit overflow!" warnings.warn(msg, category=RuntimeWarning) - elif _min < MIN_NP_INT32: + + if _min < MIN_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit underflow!" warnings.warn(msg, category=RuntimeWarning) From 91e541b94a8e864e080ba50f19fcfb45a996d8e7 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 12:17:35 +1000 Subject: [PATCH 09/11] Update docstring for warning. --- umpost/um2netcdf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index c680b6c..115d792 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -744,6 +744,10 @@ def convert_32_bit(cube): Parameters ---------- cube : iris.cube object to modify. + + Warns + ----- + RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow. """ if cube.data.dtype == 'float64': cube.data = cube.data.astype(np.float32) From be491c0b7591e747e43e7fd3aaafbbecc31c8856 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 14:51:47 +1000 Subject: [PATCH 10/11] Update warnings, recommend --64 option to prevent integer under/overflows. --- umpost/um2netcdf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 115d792..35a47f2 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -755,12 +755,13 @@ def convert_32_bit(cube): _max = np.max(cube.data) _min = np.min(cube.data) + msg = (f"32 bit under/overflow converting {cube.var_name}! Output data " + f"likely invalid. Use '--64' option to retain data integrity.") + if _max > MAX_NP_INT32: - msg = f"Converting {cube.var_name} causes a 32 bit overflow!" warnings.warn(msg, category=RuntimeWarning) if _min < MIN_NP_INT32: - msg = f"Converting {cube.var_name} causes a 32 bit underflow!" warnings.warn(msg, category=RuntimeWarning) cube.data = cube.data.astype(np.int32) From 92096ce86f3cdd305570e940397b06633ae8777b Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 14:53:13 +1000 Subject: [PATCH 11/11] Add task for future work. --- umpost/um2netcdf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 35a47f2..999844a 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -159,6 +159,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose): except iris.exceptions.CoordinateNotFoundError: pass + # TODO: flag warnings as an error for the driver script? if not use64bit: convert_32_bit(cube)