Merge pull request #690 from davidhassell/netcdf-unlimited-dimension

Prevent unlimited dimensions from being written to CFA-netCDF files
NCAS-CMS · Aug 31, 2023 · 1dff2f2 · 1dff2f2
2 parents 05b690f + 6e3ac97
commit 1dff2f2
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 47 deletions.
diff --git a/Changelog.rst b/Changelog.rst
@@ -1,3 +1,11 @@
+version 3.15.3
+--------------
+
+**2023-08-31**
+
+* Prevent unlimited dimensions from being written to CFA-netCDF files
+  (https://github.com/NCAS-CMS/cf-python/issues/689)
+
 version 3.15.2
 --------------
 

diff --git a/cf/read_write/netcdf/netcdfwrite.py b/cf/read_write/netcdf/netcdfwrite.py
@@ -22,6 +22,32 @@ def __new__(cls, *args, **kwargs):
         instance._NetCDFRead = NetCDFRead
         return instance
 
+    def _unlimited(self, field, axis):
+        """Whether an axis is unlimited.
+
+        If a CFA-netCDF file is being written then no axis can be
+        unlimited, i.e. `False` is always returned.
+
+        .. versionadded:: 3.15.3
+
+        :Parameters:
+
+            field: `Field` or `Domain`
+
+            axis: `str`
+                Domain axis construct identifier,
+                e.g. ``'domainaxis1'``.
+
+        :Returns:
+
+            `bool`
+
+        """
+        if self.write_vars["cfa"]:
+            return False
+
+        return super()._unlimited(field, axis)
+
     def _write_as_cfa(self, cfvar, construct_type, domain_axes):
         """Whether or not to write as a CFA variable.
 

diff --git a/cf/test/test_CFA.py b/cf/test/test_CFA.py
@@ -435,8 +435,6 @@ def test_CFA_PP(self):
 
     def test_CFA_multiple_files(self):
         """Test storing multiple CFA frgament locations."""
-        tmpfile1 = "delme1.nc"
-        tmpfile2 = "delme2.nc"
         f = cf.example_field(0)
         cf.write(f, tmpfile1)
         f = cf.read(tmpfile1)[0]
@@ -451,6 +449,22 @@ def test_CFA_multiple_files(self):
         self.assertEqual(len(g.data.get_filenames()), 2)
         self.assertEqual(len(g.get_filenames()), 3)
 
+    def test_CFA_unlimited_dimension(self):
+        """Test CFA with unlimited dimensions"""
+        # Create a CFA file from a field that has an unlimited
+        # dimension and no metadata constructs spanning that dimension
+        f = cf.example_field(0)
+        d = f.domain_axis("X")
+        d.nc_set_unlimited(True)
+        f.del_construct("X")
+        cf.write(f, tmpfile1)
+        g = cf.read(tmpfile1)
+        cf.write(g, tmpfile2, cfa=True)
+
+        # Check that the CFA file can be read
+        h = cf.read(tmpfile2)
+        self.assertEqual(len(h), 1)
+
 
 if __name__ == "__main__":
     print("Run date:", datetime.datetime.now())

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -392,6 +392,7 @@ def _get_date():
 }
 
 import warnings
+
 warnings.filterwarnings("ignore")
 
 # -- Options for LaTeX output -------------------------------------------------

diff --git a/docs/source/recipes/plot_12_recipe.py b/docs/source/recipes/plot_12_recipe.py
@@ -67,22 +67,22 @@
 high = aod.where(mask, cf.masked)
 
 # %%
-# 9. Now plot both the AOD from `high-quality` retrieval and all other retrievals 
+# 9. Now plot both the AOD from `high-quality` retrieval and all other retrievals
 # using `cfplot.con <http://ajheaps.github.io/cf-plot/con.html>`_. Here:
 #
-# - `cfplot.gopen <http://ajheaps.github.io/cf-plot/gopen.html>`_ is used to 
-#   define the parts of the plot area, specifying that the figure should have 
-#   1 row and 2 columns, which is closed by 
+# - `cfplot.gopen <http://ajheaps.github.io/cf-plot/gopen.html>`_ is used to
+#   define the parts of the plot area, specifying that the figure should have
+#   1 row and 2 columns, which is closed by
 #   `cfplot.gclose <http://ajheaps.github.io/cf-plot/gclose.html>`_;
 # - `plt.suptitle <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.suptitle.html>`_
 #   is used to add a title for the whole figure;
-# - the subplots for plotting are selected using 
-#   `cfplot.gpos <https://ajheaps.github.io/cf-plot/gpos.html>`_ after which 
-#   `cfplot.mapset <https://ajheaps.github.io/cf-plot/mapset.html>`_ is used to 
-#   set the map limits and resolution for the subplots;  
-# - and as cf-plot stores the plot in a plot object with the name 
-#   ``cfp.plotvars.plot``, country borders are added using normal 
-#   `Cartopy operations <https://scitools.org.uk/cartopy/docs/latest/reference/index.html>`_ 
+# - the subplots for plotting are selected using
+#   `cfplot.gpos <https://ajheaps.github.io/cf-plot/gpos.html>`_ after which
+#   `cfplot.mapset <https://ajheaps.github.io/cf-plot/mapset.html>`_ is used to
+#   set the map limits and resolution for the subplots;
+# - and as cf-plot stores the plot in a plot object with the name
+#   ``cfp.plotvars.plot``, country borders are added using normal
+#   `Cartopy operations <https://scitools.org.uk/cartopy/docs/latest/reference/index.html>`_
 #   on the ``cfp.plotvars.mymap`` object:
 import cartopy.feature as cfeature
 

diff --git a/docs/source/recipes/plot_13_recipe.py b/docs/source/recipes/plot_13_recipe.py
@@ -182,9 +182,9 @@
 # 8. The line for variable ``nino34_anomaly`` calculates the standardized
 # anomaly for each time step in the ``nino34_index`` data. It subtracts the
 # ``climatology`` from the ``nino34_index`` and then divides by the ``std_dev``.
-# The resulting ``nino34_anomaly`` data represents how much the SST in the Niño 
-# 3.4 region deviates from the 1961-1990 average, in units of standard 
-# deviations. This is a common way to quantify climate anomalies like El Niño 
+# The resulting ``nino34_anomaly`` data represents how much the SST in the Niño
+# 3.4 region deviates from the 1961-1990 average, in units of standard
+# deviations. This is a common way to quantify climate anomalies like El Niño
 # and La Niña events:
 nino34_anomaly = (nino34_index - climatology) / std_dev
 
@@ -228,7 +228,7 @@
 elnino = nino34_rolling >= 0.4
 lanina = nino34_rolling <= -0.4
 
-cfp.gset(xmin='1940-1-1', xmax='2022-12-31', ymin=-3, ymax=3)
+cfp.gset(xmin="1940-1-1", xmax="2022-12-31", ymin=-3, ymax=3)
 
 cfp.gopen(figsize=(10, 6))
 cfp.lineplot(

diff --git a/docs/source/recipes/plot_15_recipe.py b/docs/source/recipes/plot_15_recipe.py
@@ -6,29 +6,30 @@
 using a land use data file and visualize the data as a histogram. This will help
 to understand the proportion of different land use categories in each country.
 
-The land use data is initially available at a high spatial resolution of 
-approximately 100 m, with several flags defined with numbers representing the 
-type of land use. Regridding the data to a coarser resolution of approximately 
+The land use data is initially available at a high spatial resolution of
+approximately 100 m, with several flags defined with numbers representing the
+type of land use. Regridding the data to a coarser resolution of approximately
 25 km would incorrectly represent the flags on the new grids.
 
-To avoid this, we will resample the data to the coarser resolution by 
-aggregating the data within predefined spatial regions or bins. This approach 
-will give a dataset where each 25 km grid cell contains a histogram of land use 
-flags, as determined by the original 100 m resolution data. It retains the 
-original spatial extent of the data while reducing its spatial complexity. 
-Regridding, on the other hand, involves interpolating the data onto a new grid, 
+To avoid this, we will resample the data to the coarser resolution by
+aggregating the data within predefined spatial regions or bins. This approach
+will give a dataset where each 25 km grid cell contains a histogram of land use
+flags, as determined by the original 100 m resolution data. It retains the
+original spatial extent of the data while reducing its spatial complexity.
+Regridding, on the other hand, involves interpolating the data onto a new grid,
 which can introduce artifacts and distortions in the data.
 
 """
 
-# %%
-# 1. Import the required libraries. We will use Cartopy's ``shapereader`` to 
-# work with shapefiles that define country boundaries:
-import cf
 import cartopy.io.shapereader as shpreader
 import matplotlib.pyplot as plt
 import numpy as np
 
+# %%
+# 1. Import the required libraries. We will use Cartopy's ``shapereader`` to
+# work with shapefiles that define country boundaries:
+import cf
+
 # %%
 # 2. Read and select land use data by index and see properties of all constructs:
 f = cf.read("~/recipes/output.tif.nc")[0]
@@ -54,6 +55,7 @@
 #   coordinates with the help of the ``subspace`` function. The subset data is
 #   stored in the ``f`` variable.
 
+
 def extract_data(country_name):
     shpfilename = shpreader.natural_earth(
         resolution="10m", category="cultural", name="admin_0_countries"
@@ -71,43 +73,45 @@ def extract_data(country_name):
 
     return f
 
+
 # %%
 # 4. Define a function to plot a histogram of land use distribution for a
 # specific country:
 #
 # - The `digitize <https://ncas-cms.github.io/cf-python/method/cf.Field.digitize.html>`_
-#   function of the ``cf.Field`` object is called to convert the land use data 
-#   into indices of bins. It takes an array of bins (defined by 
+#   function of the ``cf.Field`` object is called to convert the land use data
+#   into indices of bins. It takes an array of bins (defined by
 #   the `np.linspace <https://numpy.org/doc/stable/reference/generated/numpy.linspace.html>`_ function)
-#   and the ``return_bins=True`` parameter, which returns the actual bin values 
+#   and the ``return_bins=True`` parameter, which returns the actual bin values
 #   along with the digitized data.
 # - The `np.linspace <https://numpy.org/doc/stable/reference/generated/numpy.linspace.html>`_
 #   function is used to create an array of evenly spaced bin edges from 0 to 50,
 #   with 51 total values. This creates bins of width 1.
-# - The ``digitized`` variable contains the bin indices for each data point, 
+# - The ``digitized`` variable contains the bin indices for each data point,
 #   while the bins variable contains the actual bin values.
-# - The `cf.histogram <https://ncas-cms.github.io/cf-python/function/cf.histogram.html>`_ 
-#   function is called on the digitized data to create a histogram. This 
+# - The `cf.histogram <https://ncas-cms.github.io/cf-python/function/cf.histogram.html>`_
+#   function is called on the digitized data to create a histogram. This
 #   function returns a field object with the histogram data.
 # - The `squeeze <https://ncas-cms.github.io/cf-python/method/cf.Field.squeeze.html>`_
 #   function applied to the histogram ``array`` extracts the histogram data as a NumPy
 #   array and removes any single dimensions.
-# - The ``total_valid_sub_cells`` variable calculates the total number of valid 
+# - The ``total_valid_sub_cells`` variable calculates the total number of valid
 #   subcells (non-missing data points) by summing the histogram data.
-# - The last element of the bin_counts array is removed with slicing 
+# - The last element of the bin_counts array is removed with slicing
 #   (``bin_counts[:-1]``) to match the length of the ``bin_indices`` array.
-# - The ``percentages`` variable calculates the percentage of each bin by 
+# - The ``percentages`` variable calculates the percentage of each bin by
 #   dividing the ``bin_counts`` by the ``total_valid_sub_cells`` and multiplying
 #   by 100.
-# - The ``bin_indices`` variable calculates the center of each bin by averaging 
-#   the bin edges. This is done by adding the ``bins.array[:-1, 0]`` and 
+# - The ``bin_indices`` variable calculates the center of each bin by averaging
+#   the bin edges. This is done by adding the ``bins.array[:-1, 0]`` and
 #   ``bins.array[1:, 0]`` arrays and dividing by 2.
-# - The ``ax.bar`` function is called to plot the histogram as a bar chart on 
+# - The ``ax.bar`` function is called to plot the histogram as a bar chart on
 #   the provided axis. The x-axis values are given by the ``bin_indices`` array,
 #   and the y-axis values are given by the ``percentages`` array.
-# - The title, x-axis label, y-axis label, and axis limits are set based on the 
+# - The title, x-axis label, y-axis label, and axis limits are set based on the
 #   input parameters.
 
+
 def plot_histogram(field, ax, label, ylim, xlim):
     digitized, bins = field.digitize(np.linspace(0, 50, 51), return_bins=True)
 
@@ -129,21 +133,22 @@ def plot_histogram(field, ax, label, ylim, xlim):
     ax.set_ylim(ylim)
     ax.set_xlim(xlim)
 
+
 # %%
 # 5. Define the countries of interest:
 countries = ["Ireland", "Belgium", "Switzerland"]
 
 # %%
 # 6. Set up the figure and axes for plotting the histograms:
 #
-# - The ``plt.subplots`` function is called to set up a figure with three 
+# - The ``plt.subplots`` function is called to set up a figure with three
 #   subplots, with a figure size of 8 inches by 10 inches.
-# - A loop iterates over each country in the countries list and for each 
-#   country, the ``extract_data`` function is called to extract its land use 
+# - A loop iterates over each country in the countries list and for each
+#   country, the ``extract_data`` function is called to extract its land use
 #   data.
 # - The ``plot_histogram`` function is then called to plot the histogram of land
 #   use distribution on the corresponding subplot.
-# - The ``plt.tight_layout`` function is called to ensure that the subplots are 
+# - The ``plt.tight_layout`` function is called to ensure that the subplots are
 #   properly spaced within the figure and finally, the ``plt.show`` function
 #   displays the figure with the histograms.
 fig, axs = plt.subplots(3, 1, figsize=(8, 10))