Fix conflicts & merge branch 'main' into 92/refactor-plevs-tmp.

ACCESS-NRI · Sep 17, 2024 · 1e5cc23 · 1e5cc23
2 parents d8da6b1 + 92096ce
commit 1e5cc23
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 4 deletions.
diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
@@ -1,6 +1,7 @@
 import unittest.mock as mock
 from dataclasses import dataclass
 from collections import namedtuple
+import operator
 
 import umpost.um2netcdf as um2nc
 
@@ -397,6 +398,7 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None):
         self.standard_name = None
         self.long_name = None
         self.coord = {}
+        self.data = None
 
     def name(self):
         # mimic iris API
@@ -800,3 +802,27 @@ def test_fix_pressure_levels_reverse_pressure(get_fake_cube_coords):
     c_pressure = cube.coord('pressure')
     assert c_pressure.attributes["positive"] == "down"
     assert all(c_pressure.points == [0.0, 1.0])
+
+
+# int64 to int32 data conversion tests
+# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38
+@pytest.mark.parametrize("array,_operator,bound",
+                         [([100, 10, 1, 0, -10], None, None),
+                          ([3000000000], operator.gt, np.iinfo(np.int32).max),
+                          ([-3000000000], operator.lt, np.iinfo(np.int32).min)])
+def test_convert_32_bit(ua_plev_cube, array, _operator, bound):
+    ua_plev_cube.data = np.array(array, dtype=np.int64)
+    um2nc.convert_32_bit(ua_plev_cube)
+
+    if _operator:
+        assert _operator(array[0], bound)
+
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+# test float conversion separately, otherwise parametrize block is ugly
+def test_convert_32_bit_with_float64(ua_plev_cube):
+    array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
+    ua_plev_cube.data = array
+    um2nc.convert_32_bit(ua_plev_cube)
+    assert ua_plev_cube.data.dtype == np.float32
diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
@@ -150,11 +150,9 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
     # TODO: move into process() AND if a new cube is returned, swap into filtered cube list
     cube = fix_pressure_levels(cube) or cube  # NB: use new cube if pressure points are modified
 
+    # TODO: flag warnings as an error for the driver script?
     if not use64bit:
-        if cube.data.dtype == 'float64':
-            cube.data = cube.data.astype(np.float32)
-        elif cube.data.dtype == 'int64':
-            cube.data = cube.data.astype(np.int32)
+        convert_32_bit(cube)
 
     # Set the missing_value attribute. Use an array to force the type to match
     # the data type
@@ -762,6 +760,40 @@ def fix_pressure_levels(cube, decimals=5):
         return iris.util.reverse(cube, 'pressure')
 
 
+MAX_NP_INT32 = np.iinfo(np.int32).max
+MIN_NP_INT32 = np.iinfo(np.int32).min
+
+
+def convert_32_bit(cube):
+    """
+    Convert 64 bit int/float data to 32 bit (in place).
+
+    Parameters
+    ----------
+    cube : iris.cube object to modify.
+
+    Warns
+    -----
+    RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow.
+    """
+    if cube.data.dtype == 'float64':
+        cube.data = cube.data.astype(np.float32)
+    elif cube.data.dtype == 'int64':
+        _max = np.max(cube.data)
+        _min = np.min(cube.data)
+
+        msg = (f"32 bit under/overflow converting {cube.var_name}! Output data "
+               f"likely invalid. Use '--64' option to retain data integrity.")
+
+        if _max > MAX_NP_INT32:
+            warnings.warn(msg, category=RuntimeWarning)
+
+        if _min < MIN_NP_INT32:
+            warnings.warn(msg, category=RuntimeWarning)
+
+        cube.data = cube.data.astype(np.int32)
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf")
     parser.add_argument('-k', dest='nckind', required=False, type=int,