Skip to content

Commit

Permalink
Merge pull request #690 from davidhassell/netcdf-unlimited-dimension
Browse files Browse the repository at this point in the history
Prevent unlimited dimensions from being written to CFA-netCDF files
  • Loading branch information
davidhassell authored Aug 31, 2023
2 parents 05b690f + 6e3ac97 commit 1dff2f2
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 47 deletions.
8 changes: 8 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
version 3.15.3
--------------

**2023-08-31**

* Prevent unlimited dimensions from being written to CFA-netCDF files
(https://github.com/NCAS-CMS/cf-python/issues/689)

version 3.15.2
--------------

Expand Down
26 changes: 26 additions & 0 deletions cf/read_write/netcdf/netcdfwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,32 @@ def __new__(cls, *args, **kwargs):
instance._NetCDFRead = NetCDFRead
return instance

def _unlimited(self, field, axis):
"""Whether an axis is unlimited.
If a CFA-netCDF file is being written then no axis can be
unlimited, i.e. `False` is always returned.
.. versionadded:: 3.15.3
:Parameters:
field: `Field` or `Domain`
axis: `str`
Domain axis construct identifier,
e.g. ``'domainaxis1'``.
:Returns:
`bool`
"""
if self.write_vars["cfa"]:
return False

return super()._unlimited(field, axis)

def _write_as_cfa(self, cfvar, construct_type, domain_axes):
"""Whether or not to write as a CFA variable.
Expand Down
18 changes: 16 additions & 2 deletions cf/test/test_CFA.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,6 @@ def test_CFA_PP(self):

def test_CFA_multiple_files(self):
"""Test storing multiple CFA frgament locations."""
tmpfile1 = "delme1.nc"
tmpfile2 = "delme2.nc"
f = cf.example_field(0)
cf.write(f, tmpfile1)
f = cf.read(tmpfile1)[0]
Expand All @@ -451,6 +449,22 @@ def test_CFA_multiple_files(self):
self.assertEqual(len(g.data.get_filenames()), 2)
self.assertEqual(len(g.get_filenames()), 3)

def test_CFA_unlimited_dimension(self):
"""Test CFA with unlimited dimensions"""
# Create a CFA file from a field that has an unlimited
# dimension and no metadata constructs spanning that dimension
f = cf.example_field(0)
d = f.domain_axis("X")
d.nc_set_unlimited(True)
f.del_construct("X")
cf.write(f, tmpfile1)
g = cf.read(tmpfile1)
cf.write(g, tmpfile2, cfa=True)

# Check that the CFA file can be read
h = cf.read(tmpfile2)
self.assertEqual(len(h), 1)


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ def _get_date():
}

import warnings

warnings.filterwarnings("ignore")

# -- Options for LaTeX output -------------------------------------------------
Expand Down
22 changes: 11 additions & 11 deletions docs/source/recipes/plot_12_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,22 @@
high = aod.where(mask, cf.masked)

# %%
# 9. Now plot both the AOD from `high-quality` retrieval and all other retrievals
# 9. Now plot both the AOD from `high-quality` retrieval and all other retrievals
# using `cfplot.con <http://ajheaps.github.io/cf-plot/con.html>`_. Here:
#
# - `cfplot.gopen <http://ajheaps.github.io/cf-plot/gopen.html>`_ is used to
# define the parts of the plot area, specifying that the figure should have
# 1 row and 2 columns, which is closed by
# - `cfplot.gopen <http://ajheaps.github.io/cf-plot/gopen.html>`_ is used to
# define the parts of the plot area, specifying that the figure should have
# 1 row and 2 columns, which is closed by
# `cfplot.gclose <http://ajheaps.github.io/cf-plot/gclose.html>`_;
# - `plt.suptitle <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.suptitle.html>`_
# is used to add a title for the whole figure;
# - the subplots for plotting are selected using
# `cfplot.gpos <https://ajheaps.github.io/cf-plot/gpos.html>`_ after which
# `cfplot.mapset <https://ajheaps.github.io/cf-plot/mapset.html>`_ is used to
# set the map limits and resolution for the subplots;
# - and as cf-plot stores the plot in a plot object with the name
# ``cfp.plotvars.plot``, country borders are added using normal
# `Cartopy operations <https://scitools.org.uk/cartopy/docs/latest/reference/index.html>`_
# - the subplots for plotting are selected using
# `cfplot.gpos <https://ajheaps.github.io/cf-plot/gpos.html>`_ after which
# `cfplot.mapset <https://ajheaps.github.io/cf-plot/mapset.html>`_ is used to
# set the map limits and resolution for the subplots;
# - and as cf-plot stores the plot in a plot object with the name
# ``cfp.plotvars.plot``, country borders are added using normal
# `Cartopy operations <https://scitools.org.uk/cartopy/docs/latest/reference/index.html>`_
# on the ``cfp.plotvars.mymap`` object:
import cartopy.feature as cfeature

Expand Down
8 changes: 4 additions & 4 deletions docs/source/recipes/plot_13_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@
# 8. The line for variable ``nino34_anomaly`` calculates the standardized
# anomaly for each time step in the ``nino34_index`` data. It subtracts the
# ``climatology`` from the ``nino34_index`` and then divides by the ``std_dev``.
# The resulting ``nino34_anomaly`` data represents how much the SST in the Niño
# 3.4 region deviates from the 1961-1990 average, in units of standard
# deviations. This is a common way to quantify climate anomalies like El Niño
# The resulting ``nino34_anomaly`` data represents how much the SST in the Niño
# 3.4 region deviates from the 1961-1990 average, in units of standard
# deviations. This is a common way to quantify climate anomalies like El Niño
# and La Niña events:
nino34_anomaly = (nino34_index - climatology) / std_dev

Expand Down Expand Up @@ -228,7 +228,7 @@
elnino = nino34_rolling >= 0.4
lanina = nino34_rolling <= -0.4

cfp.gset(xmin='1940-1-1', xmax='2022-12-31', ymin=-3, ymax=3)
cfp.gset(xmin="1940-1-1", xmax="2022-12-31", ymin=-3, ymax=3)

cfp.gopen(figsize=(10, 6))
cfp.lineplot(
Expand Down
65 changes: 35 additions & 30 deletions docs/source/recipes/plot_15_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,30 @@
using a land use data file and visualize the data as a histogram. This will help
to understand the proportion of different land use categories in each country.
The land use data is initially available at a high spatial resolution of
approximately 100 m, with several flags defined with numbers representing the
type of land use. Regridding the data to a coarser resolution of approximately
The land use data is initially available at a high spatial resolution of
approximately 100 m, with several flags defined with numbers representing the
type of land use. Regridding the data to a coarser resolution of approximately
25 km would incorrectly represent the flags on the new grids.
To avoid this, we will resample the data to the coarser resolution by
aggregating the data within predefined spatial regions or bins. This approach
will give a dataset where each 25 km grid cell contains a histogram of land use
flags, as determined by the original 100 m resolution data. It retains the
original spatial extent of the data while reducing its spatial complexity.
Regridding, on the other hand, involves interpolating the data onto a new grid,
To avoid this, we will resample the data to the coarser resolution by
aggregating the data within predefined spatial regions or bins. This approach
will give a dataset where each 25 km grid cell contains a histogram of land use
flags, as determined by the original 100 m resolution data. It retains the
original spatial extent of the data while reducing its spatial complexity.
Regridding, on the other hand, involves interpolating the data onto a new grid,
which can introduce artifacts and distortions in the data.
"""

# %%
# 1. Import the required libraries. We will use Cartopy's ``shapereader`` to
# work with shapefiles that define country boundaries:
import cf
import cartopy.io.shapereader as shpreader
import matplotlib.pyplot as plt
import numpy as np

# %%
# 1. Import the required libraries. We will use Cartopy's ``shapereader`` to
# work with shapefiles that define country boundaries:
import cf

# %%
# 2. Read and select land use data by index and see properties of all constructs:
f = cf.read("~/recipes/output.tif.nc")[0]
Expand All @@ -54,6 +55,7 @@
# coordinates with the help of the ``subspace`` function. The subset data is
# stored in the ``f`` variable.


def extract_data(country_name):
shpfilename = shpreader.natural_earth(
resolution="10m", category="cultural", name="admin_0_countries"
Expand All @@ -71,43 +73,45 @@ def extract_data(country_name):

return f


# %%
# 4. Define a function to plot a histogram of land use distribution for a
# specific country:
#
# - The `digitize <https://ncas-cms.github.io/cf-python/method/cf.Field.digitize.html>`_
# function of the ``cf.Field`` object is called to convert the land use data
# into indices of bins. It takes an array of bins (defined by
# function of the ``cf.Field`` object is called to convert the land use data
# into indices of bins. It takes an array of bins (defined by
# the `np.linspace <https://numpy.org/doc/stable/reference/generated/numpy.linspace.html>`_ function)
# and the ``return_bins=True`` parameter, which returns the actual bin values
# and the ``return_bins=True`` parameter, which returns the actual bin values
# along with the digitized data.
# - The `np.linspace <https://numpy.org/doc/stable/reference/generated/numpy.linspace.html>`_
# function is used to create an array of evenly spaced bin edges from 0 to 50,
# with 51 total values. This creates bins of width 1.
# - The ``digitized`` variable contains the bin indices for each data point,
# - The ``digitized`` variable contains the bin indices for each data point,
# while the bins variable contains the actual bin values.
# - The `cf.histogram <https://ncas-cms.github.io/cf-python/function/cf.histogram.html>`_
# function is called on the digitized data to create a histogram. This
# - The `cf.histogram <https://ncas-cms.github.io/cf-python/function/cf.histogram.html>`_
# function is called on the digitized data to create a histogram. This
# function returns a field object with the histogram data.
# - The `squeeze <https://ncas-cms.github.io/cf-python/method/cf.Field.squeeze.html>`_
# function applied to the histogram ``array`` extracts the histogram data as a NumPy
# array and removes any single dimensions.
# - The ``total_valid_sub_cells`` variable calculates the total number of valid
# - The ``total_valid_sub_cells`` variable calculates the total number of valid
# subcells (non-missing data points) by summing the histogram data.
# - The last element of the bin_counts array is removed with slicing
# - The last element of the bin_counts array is removed with slicing
# (``bin_counts[:-1]``) to match the length of the ``bin_indices`` array.
# - The ``percentages`` variable calculates the percentage of each bin by
# - The ``percentages`` variable calculates the percentage of each bin by
# dividing the ``bin_counts`` by the ``total_valid_sub_cells`` and multiplying
# by 100.
# - The ``bin_indices`` variable calculates the center of each bin by averaging
# the bin edges. This is done by adding the ``bins.array[:-1, 0]`` and
# - The ``bin_indices`` variable calculates the center of each bin by averaging
# the bin edges. This is done by adding the ``bins.array[:-1, 0]`` and
# ``bins.array[1:, 0]`` arrays and dividing by 2.
# - The ``ax.bar`` function is called to plot the histogram as a bar chart on
# - The ``ax.bar`` function is called to plot the histogram as a bar chart on
# the provided axis. The x-axis values are given by the ``bin_indices`` array,
# and the y-axis values are given by the ``percentages`` array.
# - The title, x-axis label, y-axis label, and axis limits are set based on the
# - The title, x-axis label, y-axis label, and axis limits are set based on the
# input parameters.


def plot_histogram(field, ax, label, ylim, xlim):
digitized, bins = field.digitize(np.linspace(0, 50, 51), return_bins=True)

Expand All @@ -129,21 +133,22 @@ def plot_histogram(field, ax, label, ylim, xlim):
ax.set_ylim(ylim)
ax.set_xlim(xlim)


# %%
# 5. Define the countries of interest:
countries = ["Ireland", "Belgium", "Switzerland"]

# %%
# 6. Set up the figure and axes for plotting the histograms:
#
# - The ``plt.subplots`` function is called to set up a figure with three
# - The ``plt.subplots`` function is called to set up a figure with three
# subplots, with a figure size of 8 inches by 10 inches.
# - A loop iterates over each country in the countries list and for each
# country, the ``extract_data`` function is called to extract its land use
# - A loop iterates over each country in the countries list and for each
# country, the ``extract_data`` function is called to extract its land use
# data.
# - The ``plot_histogram`` function is then called to plot the histogram of land
# use distribution on the corresponding subplot.
# - The ``plt.tight_layout`` function is called to ensure that the subplots are
# - The ``plt.tight_layout`` function is called to ensure that the subplots are
# properly spaced within the figure and finally, the ``plt.show`` function
# displays the figure with the histograms.
fig, axs = plt.subplots(3, 1, figsize=(8, 10))
Expand Down

0 comments on commit 1dff2f2

Please sign in to comment.