From 64da4b3b2cb92298e7ec4d1f5b33f2213c048ebf Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Fri, 6 Sep 2024 12:34:49 +1000
Subject: [PATCH 01/11] Extract 64 --> 32 bit data conversion.

---
 umpost/um2netcdf.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index 80f55ef..bb1c70e 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -160,10 +160,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
         pass
 
     if not use64bit:
-        if cube.data.dtype == 'float64':
-            cube.data = cube.data.astype(np.float32)
-        elif cube.data.dtype == 'int64':
-            cube.data = cube.data.astype(np.int32)
+        convert_32_bit(cube)
 
     # Set the missing_value attribute. Use an array to force the type to match
     # the data type
@@ -736,6 +733,13 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
                 c_sigma.var_name = 'sigma_theta'
 
 
+def convert_32_bit(cube):
+    if cube.data.dtype == 'float64':
+        cube.data = cube.data.astype(np.float32)
+    elif cube.data.dtype == 'int64':
+        cube.data = cube.data.astype(np.int32)
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf")
     parser.add_argument('-k', dest='nckind', required=False, type=int,

From 9008c29e2cdc0dc80df39bd4477e45d99c80eedf Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Fri, 6 Sep 2024 12:44:12 +1000
Subject: [PATCH 02/11] Add basic 64 --> 32 bit data conversion tests.

---
 test/test_um2netcdf.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
index c1a86f0..ecb3fd4 100644
--- a/test/test_um2netcdf.py
+++ b/test/test_um2netcdf.py
@@ -397,6 +397,7 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None):
         self.standard_name = None
         self.long_name = None
         self.coord = {}
+        self.data = None
 
     def name(self):
         # mimic iris API
@@ -740,3 +741,19 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
     m_cube = mock.Mock(iris.cube.Cube)
     m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError
     um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data)
+
+
+# 64 to 32 bit data conversion tests
+
+def test_64_to_32_int(ua_plev_cube):
+    array = np.array([100, 10, 1, 0, -10], dtype=np.int64)
+    ua_plev_cube.data = array
+    um2nc.convert_32_bit(ua_plev_cube)
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+def test_64_to_32_float(ua_plev_cube):
+    array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
+    ua_plev_cube.data = array
+    um2nc.convert_32_bit(ua_plev_cube)
+    assert ua_plev_cube.data.dtype == np.float32

From 16cb7108d1be80041ed494026f86851f3640fc7d Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Fri, 6 Sep 2024 12:54:44 +1000
Subject: [PATCH 03/11] Add warnings for integer conversion over/under flow.

---
 test/test_um2netcdf.py | 24 +++++++++++++++++++++++-
 umpost/um2netcdf.py    | 14 ++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
index ecb3fd4..94da4bb 100644
--- a/test/test_um2netcdf.py
+++ b/test/test_um2netcdf.py
@@ -745,13 +745,35 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
 
 # 64 to 32 bit data conversion tests
 
-def test_64_to_32_int(ua_plev_cube):
+def test_convert_32_bit_with_int64(ua_plev_cube):
     array = np.array([100, 10, 1, 0, -10], dtype=np.int64)
     ua_plev_cube.data = array
     um2nc.convert_32_bit(ua_plev_cube)
     assert ua_plev_cube.data.dtype == np.int32
 
 
+def test_convert_32_bit_overflow_with_int64(ua_plev_cube):
+    array = np.array([3000000000], dtype=np.int64)
+    assert array[0] > np.iinfo(np.int32).max
+    ua_plev_cube.data = array
+
+    with pytest.warns():
+        um2nc.convert_32_bit(ua_plev_cube)
+
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+def test_convert_32_bit_underflow_with_int64(ua_plev_cube):
+    array = np.array([-3000000000], dtype=np.int64)
+    assert array[0] < np.iinfo(np.int32).max
+    ua_plev_cube.data = array
+
+    with pytest.warns():
+        um2nc.convert_32_bit(ua_plev_cube)
+
+    assert ua_plev_cube.data.dtype == np.int32
+
+
 def test_64_to_32_float(ua_plev_cube):
     array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
     ua_plev_cube.data = array
diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index bb1c70e..f1a4e76 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -733,10 +733,24 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
                 c_sigma.var_name = 'sigma_theta'
 
 
+MAX_NP_INT32 = np.iinfo(np.int32).max
+MIN_NP_INT32 = np.iinfo(np.int32).min
+
+
 def convert_32_bit(cube):
     if cube.data.dtype == 'float64':
         cube.data = cube.data.astype(np.float32)
     elif cube.data.dtype == 'int64':
+        _max = np.max(cube.data)
+        _min = np.min(cube.data)
+
+        if _max > MAX_NP_INT32:
+            msg = f"Converting {cube.var_name} causes a 32 bit overflow!"
+            warnings.warn(msg)
+        elif _min < MIN_NP_INT32:
+            msg = f"Converting {cube.var_name} causes a 32 bit underflow!"
+            warnings.warn(msg)
+
         cube.data = cube.data.astype(np.int32)
 
 

From bf3ced28a8183ab6f3c2828907b4a1f0cace9c8c Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Fri, 6 Sep 2024 14:20:51 +1000
Subject: [PATCH 04/11] Fix test function name & add explanation for skipping
 float32 overflow.

---
 test/test_um2netcdf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
index 94da4bb..386f7e7 100644
--- a/test/test_um2netcdf.py
+++ b/test/test_um2netcdf.py
@@ -744,6 +744,7 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
 
 
 # 64 to 32 bit data conversion tests
+# NB: skip float63 to float32 overflow as float32 min/max is  -/+ 3.40e+38
 
 def test_convert_32_bit_with_int64(ua_plev_cube):
     array = np.array([100, 10, 1, 0, -10], dtype=np.int64)
@@ -774,7 +775,7 @@ def test_convert_32_bit_underflow_with_int64(ua_plev_cube):
     assert ua_plev_cube.data.dtype == np.int32
 
 
-def test_64_to_32_float(ua_plev_cube):
+def test_convert_32_bit_with_float64(ua_plev_cube):
     array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
     ua_plev_cube.data = array
     um2nc.convert_32_bit(ua_plev_cube)

From 252a84b1d8f1100d1f7e20164af564a08fb343b4 Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Fri, 6 Sep 2024 14:23:20 +1000
Subject: [PATCH 05/11] Add conversion docstring.

---
 umpost/um2netcdf.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index f1a4e76..9515fc1 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -738,6 +738,13 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
 
 
 def convert_32_bit(cube):
+    """
+    Convert 64 bit int/float data to 32 bit (in place).
+
+    Parameters
+    ----------
+    cube : iris.cube object to modify.
+    """
     if cube.data.dtype == 'float64':
         cube.data = cube.data.astype(np.float32)
     elif cube.data.dtype == 'int64':

From 52a95a2a29ab665fcf01ab57c9609ae6b355356e Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Tue, 10 Sep 2024 11:47:46 +1000
Subject: [PATCH 06/11] Refactor 64 to 32 bit testing with parametrize.

---
 test/test_um2netcdf.py | 38 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
index 386f7e7..82860d0 100644
--- a/test/test_um2netcdf.py
+++ b/test/test_um2netcdf.py
@@ -1,6 +1,7 @@
 import unittest.mock as mock
 from dataclasses import dataclass
 from collections import namedtuple
+import operator
 
 import umpost.um2netcdf as um2nc
 
@@ -743,38 +744,23 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
     um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data)
 
 
-# 64 to 32 bit data conversion tests
-# NB: skip float63 to float32 overflow as float32 min/max is  -/+ 3.40e+38
-
-def test_convert_32_bit_with_int64(ua_plev_cube):
-    array = np.array([100, 10, 1, 0, -10], dtype=np.int64)
-    ua_plev_cube.data = array
+# int64 to int32 data conversion tests
+# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38
+@pytest.mark.parametrize("array,_operator,bound",
+                         [([100, 10, 1, 0, -10], None, None),
+                          ([3000000000], operator.gt, np.iinfo(np.int32).max),
+                          ([-3000000000], operator.lt, np.iinfo(np.int32).min)])
+def test_convert_32_bit(ua_plev_cube, array, _operator, bound):
+    ua_plev_cube.data = np.array(array, dtype=np.int64)
     um2nc.convert_32_bit(ua_plev_cube)
-    assert ua_plev_cube.data.dtype == np.int32
-
-
-def test_convert_32_bit_overflow_with_int64(ua_plev_cube):
-    array = np.array([3000000000], dtype=np.int64)
-    assert array[0] > np.iinfo(np.int32).max
-    ua_plev_cube.data = array
-
-    with pytest.warns():
-        um2nc.convert_32_bit(ua_plev_cube)
-
-    assert ua_plev_cube.data.dtype == np.int32
-
 
-def test_convert_32_bit_underflow_with_int64(ua_plev_cube):
-    array = np.array([-3000000000], dtype=np.int64)
-    assert array[0] < np.iinfo(np.int32).max
-    ua_plev_cube.data = array
-
-    with pytest.warns():
-        um2nc.convert_32_bit(ua_plev_cube)
+    if _operator:
+        assert _operator(array[0], bound)
 
     assert ua_plev_cube.data.dtype == np.int32
 
 
+# test float conversion separately, otherwise parametrize block is ugly
 def test_convert_32_bit_with_float64(ua_plev_cube):
     array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
     ua_plev_cube.data = array

From 7df505c03c742879986920f3fe5b44d94f72c6f7 Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Tue, 10 Sep 2024 12:00:08 +1000
Subject: [PATCH 07/11] Fix convert_32_bit() to emit RuntimeWarning.

---
 umpost/um2netcdf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index 9515fc1..b9f1e39 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -753,10 +753,10 @@ def convert_32_bit(cube):
 
         if _max > MAX_NP_INT32:
             msg = f"Converting {cube.var_name} causes a 32 bit overflow!"
-            warnings.warn(msg)
+            warnings.warn(msg, category=RuntimeWarning)
         elif _min < MIN_NP_INT32:
             msg = f"Converting {cube.var_name} causes a 32 bit underflow!"
-            warnings.warn(msg)
+            warnings.warn(msg, category=RuntimeWarning)
 
         cube.data = cube.data.astype(np.int32)
 

From 78a2431f9a0446d93d250ed9670b6943f6769588 Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Tue, 10 Sep 2024 12:01:42 +1000
Subject: [PATCH 08/11] Fix convert_32_bit() logic for under & overflow checks.

---
 umpost/um2netcdf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index b9f1e39..c680b6c 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -754,7 +754,8 @@ def convert_32_bit(cube):
         if _max > MAX_NP_INT32:
             msg = f"Converting {cube.var_name} causes a 32 bit overflow!"
             warnings.warn(msg, category=RuntimeWarning)
-        elif _min < MIN_NP_INT32:
+
+        if _min < MIN_NP_INT32:
             msg = f"Converting {cube.var_name} causes a 32 bit underflow!"
             warnings.warn(msg, category=RuntimeWarning)
 

From 91e541b94a8e864e080ba50f19fcfb45a996d8e7 Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Tue, 10 Sep 2024 12:17:35 +1000
Subject: [PATCH 09/11] Update docstring for warning.

---
 umpost/um2netcdf.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index c680b6c..115d792 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -744,6 +744,10 @@ def convert_32_bit(cube):
     Parameters
     ----------
     cube : iris.cube object to modify.
+
+    Warns
+    -----
+    RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow.
     """
     if cube.data.dtype == 'float64':
         cube.data = cube.data.astype(np.float32)

From be491c0b7591e747e43e7fd3aaafbbecc31c8856 Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Tue, 10 Sep 2024 14:51:47 +1000
Subject: [PATCH 10/11] Update warnings, recommend --64 option to prevent
 integer under/overflows.

---
 umpost/um2netcdf.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index 115d792..35a47f2 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -755,12 +755,13 @@ def convert_32_bit(cube):
         _max = np.max(cube.data)
         _min = np.min(cube.data)
 
+        msg = (f"32 bit under/overflow converting {cube.var_name}! Output data "
+               f"likely invalid. Use '--64' option to retain data integrity.")
+
         if _max > MAX_NP_INT32:
-            msg = f"Converting {cube.var_name} causes a 32 bit overflow!"
             warnings.warn(msg, category=RuntimeWarning)
 
         if _min < MIN_NP_INT32:
-            msg = f"Converting {cube.var_name} causes a 32 bit underflow!"
             warnings.warn(msg, category=RuntimeWarning)
 
         cube.data = cube.data.astype(np.int32)

From 92096ce86f3cdd305570e940397b06633ae8777b Mon Sep 17 00:00:00 2001
From: Ben Davies <ben.davies@anu.edu.au>
Date: Tue, 10 Sep 2024 14:53:13 +1000
Subject: [PATCH 11/11] Add task for future work.

---
 umpost/um2netcdf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
index 35a47f2..999844a 100644
--- a/umpost/um2netcdf.py
+++ b/umpost/um2netcdf.py
@@ -159,6 +159,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
     except iris.exceptions.CoordinateNotFoundError:
         pass
 
+    # TODO: flag warnings as an error for the driver script?
     if not use64bit:
         convert_32_bit(cube)