Merge branch 'main' into datatree_alignment_docs

pydata · Sep 26, 2024 · 09385fd · 09385fd
2 parents 4c030d8 + fecaa85
commit 09385fd
Show file tree

Hide file tree

Showing 24 changed files with 313 additions and 92 deletions.
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -1,2 +1,2 @@
 github: numfocus
-custom: http://numfocus.org/donate-to-xarray
+custom: https://numfocus.org/donate-to-xarray
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -58,6 +58,9 @@ jobs:
             python-version: "3.10"
             os: ubuntu-latest
           # Latest python version:
+          - env: "all-but-numba"
+            python-version: "3.12"
+            os: ubuntu-latest
           - env: "all-but-dask"
             # Not 3.12 because of pint
             python-version: "3.11"

diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ Xarray is a fiscally sponsored project of
 [NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting
 the open source scientific computing community. If you like Xarray and
 want to support our mission, please consider making a
-[donation](https://numfocus.salsalabs.org/donate-to-xarray/) to support
+[donation](https://numfocus.org/donate-to-xarray) to support
 our efforts.
 
 ## History

diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml
@@ -0,0 +1,54 @@
+name: xarray-tests
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  # Pin a "very new numpy" (updated Sept 24, 2024)
+  - numpy>=2.1.1
+  - aiobotocore
+  - array-api-strict
+  - boto3
+  - bottleneck
+  - cartopy
+  - cftime
+  - dask-core
+  - dask-expr # dask raises a deprecation warning without this, breaking doctests
+  - distributed
+  - flox
+  - fsspec
+  - h5netcdf
+  - h5py
+  - hdf5
+  - hypothesis
+  - iris
+  - lxml # Optional dep of pydap
+  - matplotlib-base
+  - nc-time-axis
+  - netcdf4
+  # numba, sparse, numbagg, numexpr often conflicts with newer versions of numpy.
+  # This environment helps us test xarray with the latest versions
+  # of numpy
+  # - numba
+  # - numbagg
+  # - numexpr
+  # - sparse
+  - opt_einsum
+  - packaging
+  - pandas
+  # - pint>=0.22
+  - pip
+  - pooch
+  - pre-commit
+  - pyarrow # pandas raises a deprecation warning without this, breaking doctests
+  - pydap
+  - pytest
+  - pytest-cov
+  - pytest-env
+  - pytest-xdist
+  - pytest-timeout
+  - rasterio
+  - scipy
+  - seaborn
+  - toolz
+  - typing_extensions
+  - zarr
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -32,6 +32,9 @@ New Features
   `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
   By `Eni Awowale <https://github.com/eni-awowale>`_.
+- Added support for vectorized interpolation using additional interpolators
+  from the ``scipy.interpolate`` module (:issue:`9049`, :pull:`9526`).
+  By `Holly Mandel <https://github.com/hollymandel>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/pyproject.toml b/pyproject.toml
@@ -323,7 +323,6 @@ filterwarnings = [
   "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
   "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
   "default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols
-  "ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
 ]
 
 log_cli_level = "INFO"

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -163,7 +163,7 @@ def __complex__(self: Any) -> complex:
         return complex(self.values)
 
     def __array__(
-        self: Any, dtype: DTypeLike | None = None, copy: bool | None = None
+        self: Any, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
     ) -> np.ndarray:
         if not copy:
             if np.lib.NumpyVersion(np.__version__) >= "2.0.0":

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -2224,12 +2224,12 @@ def interp(
 
         Performs univariate or multivariate interpolation of a DataArray onto
         new coordinates using scipy's interpolation routines. If interpolating
-        along an existing dimension, :py:class:`scipy.interpolate.interp1d` is
-        called. When interpolating along multiple existing dimensions, an
+        along an existing dimension,  either :py:class:`scipy.interpolate.interp1d`
+        or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`)
+        is called. When interpolating along multiple existing dimensions, an
         attempt is made to decompose the interpolation into multiple
-        1-dimensional interpolations. If this is possible,
-        :py:class:`scipy.interpolate.interp1d` is called. Otherwise,
-        :py:func:`scipy.interpolate.interpn` is called.
+        1-dimensional interpolations. If this is possible, the 1-dimensional interpolator is called.
+        Otherwise, :py:func:`scipy.interpolate.interpn` is called.
 
         Parameters
         ----------

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -3885,12 +3885,12 @@ def interp(
 
         Performs univariate or multivariate interpolation of a Dataset onto
         new coordinates using scipy's interpolation routines. If interpolating
-        along an existing dimension, :py:class:`scipy.interpolate.interp1d` is
-        called.  When interpolating along multiple existing dimensions, an
+        along an existing dimension, either :py:class:`scipy.interpolate.interp1d`
+        or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`)
+        is called.  When interpolating along multiple existing dimensions, an
         attempt is made to decompose the interpolation into multiple
-        1-dimensional interpolations. If this is possible,
-        :py:class:`scipy.interpolate.interp1d` is called. Otherwise,
-        :py:func:`scipy.interpolate.interpn` is called.
+        1-dimensional interpolations. If this is possible, the 1-dimensional interpolator
+        is called. Otherwise, :py:func:`scipy.interpolate.interpn` is called.
 
         Parameters
         ----------
@@ -10621,7 +10621,7 @@ def rolling(
         --------
         Dataset.cumulative
         DataArray.rolling
-        core.rolling.DatasetRolling
+        DataArray.rolling_exp
         """
         from xarray.core.rolling import DatasetRolling
 
@@ -10651,9 +10651,9 @@ def cumulative(
 
         See Also
         --------
-        Dataset.rolling
         DataArray.cumulative
-        core.rolling.DatasetRolling
+        Dataset.rolling
+        Dataset.rolling_exp
         """
         from xarray.core.rolling import DatasetRolling
 

diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py
@@ -55,6 +55,7 @@
     from xarray.core.dataset import calculate_dimensions
 
 if TYPE_CHECKING:
+    import numpy as np
     import pandas as pd
 
     from xarray.core.datatree_io import T_DataTreeNetcdfEngine, T_DataTreeNetcdfTypes
@@ -156,6 +157,34 @@ def check_alignment(
             check_alignment(child_path, child_ds, base_ds, child.children)
 
 
+def _deduplicate_inherited_coordinates(child: DataTree, parent: DataTree) -> None:
+    # This method removes repeated indexes (and corresponding coordinates)
+    # that are repeated between a DataTree and its parents.
+    #
+    # TODO(shoyer): Decide how to handle repeated coordinates *without* an
+    # index. Should these be allowed, in which case we probably want to
+    # exclude them from inheritance, or should they be automatically
+    # dropped?
+    # https://github.com/pydata/xarray/issues/9475#issuecomment-2357004264
+    removed_something = False
+    for name in parent._indexes:
+        if name in child._node_indexes:
+            # Indexes on a Dataset always have a corresponding coordinate.
+            # We already verified that these coordinates match in the
+            # check_alignment() call from _pre_attach().
+            del child._node_indexes[name]
+            del child._node_coord_variables[name]
+            removed_something = True
+
+    if removed_something:
+        child._node_dims = calculate_dimensions(
+            child._data_variables | child._node_coord_variables
+        )
+
+    for grandchild in child._children.values():
+        _deduplicate_inherited_coordinates(grandchild, child)
+
+
 def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None:
     offending_variable_names = [
         name for name in variables if isinstance(name, str) and "/" in name
@@ -374,7 +403,7 @@ def map(  # type: ignore[override]
 
 
 class DataTree(
-    NamedNode,
+    NamedNode["DataTree"],
     MappedDatasetMethodsMixin,
     MappedDataWithCoords,
     DataTreeArithmeticMixin,
@@ -485,6 +514,7 @@ def _pre_attach(self: DataTree, parent: DataTree, name: str) -> None:
         node_ds = self.to_dataset(inherited=False)
         parent_ds = parent._to_dataset_view(rebuild_dims=False, inherited=True)
         check_alignment(path, node_ds, parent_ds, self.children)
+        _deduplicate_inherited_coordinates(self, parent)
 
     @property
     def _coord_variables(self) -> ChainMap[Hashable, Variable]:
@@ -737,7 +767,9 @@ def __bool__(self) -> bool:
     def __iter__(self) -> Iterator[str]:
         return itertools.chain(self._data_variables, self._children)  # type: ignore[arg-type]
 
-    def __array__(self, dtype=None, copy=None):
+    def __array__(
+        self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
+    ) -> np.ndarray:
         raise TypeError(
             "cannot directly convert a DataTree into a "
             "numpy array. Instead, create an xarray.DataArray "
@@ -1350,7 +1382,7 @@ def map_over_subtree(
         func: Callable,
         *args: Iterable[Any],
         **kwargs: Any,
-    ) -> DataTree | tuple[DataTree]:
+    ) -> DataTree | tuple[DataTree, ...]:
         """
         Apply a function to every dataset in this subtree, returning a new tree which stores the results.
 

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -193,7 +193,11 @@ def values(self) -> range:
     def data(self) -> range:
         return range(self.size)
 
-    def __array__(self) -> np.ndarray:
+    def __array__(
+        self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
+    ) -> np.ndarray:
+        if copy is False:
+            raise NotImplementedError(f"An array copy is necessary, got {copy = }.")
         return np.arange(self.size)
 
     @property

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 import pandas as pd
+from packaging.version import Version
 
 from xarray.core import duck_array_ops
 from xarray.core.nputils import NumpyVIndexAdapter
@@ -505,9 +506,14 @@ class ExplicitlyIndexed:
 
     __slots__ = ()
 
-    def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
+    def __array__(
+        self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
+    ) -> np.ndarray:
         # Leave casting to an array up to the underlying array type.
-        return np.asarray(self.get_duck_array(), dtype=dtype)
+        if Version(np.__version__) >= Version("2.0.0"):
+            return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
+        else:
+            return np.asarray(self.get_duck_array(), dtype=dtype)
 
     def get_duck_array(self):
         return self.array
@@ -520,11 +526,6 @@ def get_duck_array(self):
         key = BasicIndexer((slice(None),) * self.ndim)
         return self[key]
 
-    def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
-        # This is necessary because we apply the indexing key in self.get_duck_array()
-        # Note this is the base class for all lazy indexing classes
-        return np.asarray(self.get_duck_array(), dtype=dtype)
-
     def _oindex_get(self, indexer: OuterIndexer):
         raise NotImplementedError(
             f"{self.__class__.__name__}._oindex_get method should be overridden"
@@ -570,8 +571,13 @@ def __init__(self, array, indexer_cls: type[ExplicitIndexer] = BasicIndexer):
         self.array = as_indexable(array)
         self.indexer_cls = indexer_cls
 
-    def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
-        return np.asarray(self.get_duck_array(), dtype=dtype)
+    def __array__(
+        self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
+    ) -> np.ndarray:
+        if Version(np.__version__) >= Version("2.0.0"):
+            return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
+        else:
+            return np.asarray(self.get_duck_array(), dtype=dtype)
 
     def get_duck_array(self):
         return self.array.get_duck_array()
@@ -830,9 +836,6 @@ def __init__(self, array):
     def _ensure_cached(self):
         self.array = as_indexable(self.array.get_duck_array())
 
-    def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
-        return np.asarray(self.get_duck_array(), dtype=dtype)
-
     def get_duck_array(self):
         self._ensure_cached()
         return self.array.get_duck_array()
@@ -1674,15 +1677,21 @@ def __init__(self, array: pd.Index, dtype: DTypeLike = None):
     def dtype(self) -> np.dtype:
         return self._dtype
 
-    def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
+    def __array__(
+        self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
+    ) -> np.ndarray:
         if dtype is None:
             dtype = self.dtype
         array = self.array
         if isinstance(array, pd.PeriodIndex):
             with suppress(AttributeError):
                 # this might not be public API
                 array = array.astype("object")
-        return np.asarray(array.values, dtype=dtype)
+
+        if Version(np.__version__) >= Version("2.0.0"):
+            return np.asarray(array.values, dtype=dtype, copy=copy)
+        else:
+            return np.asarray(array.values, dtype=dtype)
 
     def get_duck_array(self) -> np.ndarray:
         return np.asarray(self)
@@ -1831,15 +1840,17 @@ def __init__(
         super().__init__(array, dtype)
         self.level = level
 
-    def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
+    def __array__(
+        self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
+    ) -> np.ndarray:
         if dtype is None:
             dtype = self.dtype
         if self.level is not None:
             return np.asarray(
                 self.array.get_level_values(self.level).values, dtype=dtype
             )
         else:
-            return super().__array__(dtype)
+            return super().__array__(dtype, copy=copy)
 
     def _convert_scalar(self, item):
         if isinstance(item, tuple) and self.level is not None: