Skip to content

Commit

Permalink
Merge branch 'main' into datatree_alignment_docs
Browse files Browse the repository at this point in the history
  • Loading branch information
TomNicholas authored Sep 26, 2024
2 parents 4c030d8 + fecaa85 commit 09385fd
Show file tree
Hide file tree
Showing 24 changed files with 313 additions and 92 deletions.
2 changes: 1 addition & 1 deletion .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
github: numfocus
custom: http://numfocus.org/donate-to-xarray
custom: https://numfocus.org/donate-to-xarray
3 changes: 3 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ jobs:
python-version: "3.10"
os: ubuntu-latest
# Latest python version:
- env: "all-but-numba"
python-version: "3.12"
os: ubuntu-latest
- env: "all-but-dask"
# Not 3.12 because of pint
python-version: "3.11"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Xarray is a fiscally sponsored project of
[NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting
the open source scientific computing community. If you like Xarray and
want to support our mission, please consider making a
[donation](https://numfocus.salsalabs.org/donate-to-xarray/) to support
[donation](https://numfocus.org/donate-to-xarray) to support
our efforts.

## History
Expand Down
54 changes: 54 additions & 0 deletions ci/requirements/all-but-numba.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: xarray-tests
channels:
- conda-forge
- nodefaults
dependencies:
# Pin a "very new numpy" (updated Sept 24, 2024)
- numpy>=2.1.1
- aiobotocore
- array-api-strict
- boto3
- bottleneck
- cartopy
- cftime
- dask-core
- dask-expr # dask raises a deprecation warning without this, breaking doctests
- distributed
- flox
- fsspec
- h5netcdf
- h5py
- hdf5
- hypothesis
- iris
- lxml # Optional dep of pydap
- matplotlib-base
- nc-time-axis
- netcdf4
# numba, sparse, numbagg, numexpr often conflicts with newer versions of numpy.
# This environment helps us test xarray with the latest versions
# of numpy
# - numba
# - numbagg
# - numexpr
# - sparse
- opt_einsum
- packaging
- pandas
# - pint>=0.22
- pip
- pooch
- pre-commit
- pyarrow # pandas raises a deprecation warning without this, breaking doctests
- pydap
- pytest
- pytest-cov
- pytest-env
- pytest-xdist
- pytest-timeout
- rasterio
- scipy
- seaborn
- toolz
- typing_extensions
- zarr
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ New Features
`Tom Nicholas <https://github.com/TomNicholas>`_.
- Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
By `Eni Awowale <https://github.com/eni-awowale>`_.
- Added support for vectorized interpolation using additional interpolators
from the ``scipy.interpolate`` module (:issue:`9049`, :pull:`9526`).
By `Holly Mandel <https://github.com/hollymandel>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ filterwarnings = [
"default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
"default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
"default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols
"ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
]

log_cli_level = "INFO"
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def __complex__(self: Any) -> complex:
return complex(self.values)

def __array__(
self: Any, dtype: DTypeLike | None = None, copy: bool | None = None
self: Any, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
if not copy:
if np.lib.NumpyVersion(np.__version__) >= "2.0.0":
Expand Down
10 changes: 5 additions & 5 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2224,12 +2224,12 @@ def interp(
Performs univariate or multivariate interpolation of a DataArray onto
new coordinates using scipy's interpolation routines. If interpolating
along an existing dimension, :py:class:`scipy.interpolate.interp1d` is
called. When interpolating along multiple existing dimensions, an
along an existing dimension, either :py:class:`scipy.interpolate.interp1d`
or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`)
is called. When interpolating along multiple existing dimensions, an
attempt is made to decompose the interpolation into multiple
1-dimensional interpolations. If this is possible,
:py:class:`scipy.interpolate.interp1d` is called. Otherwise,
:py:func:`scipy.interpolate.interpn` is called.
1-dimensional interpolations. If this is possible, the 1-dimensional interpolator is called.
Otherwise, :py:func:`scipy.interpolate.interpn` is called.
Parameters
----------
Expand Down
16 changes: 8 additions & 8 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3885,12 +3885,12 @@ def interp(
Performs univariate or multivariate interpolation of a Dataset onto
new coordinates using scipy's interpolation routines. If interpolating
along an existing dimension, :py:class:`scipy.interpolate.interp1d` is
called. When interpolating along multiple existing dimensions, an
along an existing dimension, either :py:class:`scipy.interpolate.interp1d`
or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`)
is called. When interpolating along multiple existing dimensions, an
attempt is made to decompose the interpolation into multiple
1-dimensional interpolations. If this is possible,
:py:class:`scipy.interpolate.interp1d` is called. Otherwise,
:py:func:`scipy.interpolate.interpn` is called.
1-dimensional interpolations. If this is possible, the 1-dimensional interpolator
is called. Otherwise, :py:func:`scipy.interpolate.interpn` is called.
Parameters
----------
Expand Down Expand Up @@ -10621,7 +10621,7 @@ def rolling(
--------
Dataset.cumulative
DataArray.rolling
core.rolling.DatasetRolling
DataArray.rolling_exp
"""
from xarray.core.rolling import DatasetRolling

Expand Down Expand Up @@ -10651,9 +10651,9 @@ def cumulative(
See Also
--------
Dataset.rolling
DataArray.cumulative
core.rolling.DatasetRolling
Dataset.rolling
Dataset.rolling_exp
"""
from xarray.core.rolling import DatasetRolling

Expand Down
38 changes: 35 additions & 3 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from xarray.core.dataset import calculate_dimensions

if TYPE_CHECKING:
import numpy as np
import pandas as pd

from xarray.core.datatree_io import T_DataTreeNetcdfEngine, T_DataTreeNetcdfTypes
Expand Down Expand Up @@ -156,6 +157,34 @@ def check_alignment(
check_alignment(child_path, child_ds, base_ds, child.children)


def _deduplicate_inherited_coordinates(child: DataTree, parent: DataTree) -> None:
# This method removes repeated indexes (and corresponding coordinates)
# that are repeated between a DataTree and its parents.
#
# TODO(shoyer): Decide how to handle repeated coordinates *without* an
# index. Should these be allowed, in which case we probably want to
# exclude them from inheritance, or should they be automatically
# dropped?
# https://github.com/pydata/xarray/issues/9475#issuecomment-2357004264
removed_something = False
for name in parent._indexes:
if name in child._node_indexes:
# Indexes on a Dataset always have a corresponding coordinate.
# We already verified that these coordinates match in the
# check_alignment() call from _pre_attach().
del child._node_indexes[name]
del child._node_coord_variables[name]
removed_something = True

if removed_something:
child._node_dims = calculate_dimensions(
child._data_variables | child._node_coord_variables
)

for grandchild in child._children.values():
_deduplicate_inherited_coordinates(grandchild, child)


def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None:
offending_variable_names = [
name for name in variables if isinstance(name, str) and "/" in name
Expand Down Expand Up @@ -374,7 +403,7 @@ def map( # type: ignore[override]


class DataTree(
NamedNode,
NamedNode["DataTree"],
MappedDatasetMethodsMixin,
MappedDataWithCoords,
DataTreeArithmeticMixin,
Expand Down Expand Up @@ -485,6 +514,7 @@ def _pre_attach(self: DataTree, parent: DataTree, name: str) -> None:
node_ds = self.to_dataset(inherited=False)
parent_ds = parent._to_dataset_view(rebuild_dims=False, inherited=True)
check_alignment(path, node_ds, parent_ds, self.children)
_deduplicate_inherited_coordinates(self, parent)

@property
def _coord_variables(self) -> ChainMap[Hashable, Variable]:
Expand Down Expand Up @@ -737,7 +767,9 @@ def __bool__(self) -> bool:
def __iter__(self) -> Iterator[str]:
return itertools.chain(self._data_variables, self._children) # type: ignore[arg-type]

def __array__(self, dtype=None, copy=None):
def __array__(
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
raise TypeError(
"cannot directly convert a DataTree into a "
"numpy array. Instead, create an xarray.DataArray "
Expand Down Expand Up @@ -1350,7 +1382,7 @@ def map_over_subtree(
func: Callable,
*args: Iterable[Any],
**kwargs: Any,
) -> DataTree | tuple[DataTree]:
) -> DataTree | tuple[DataTree, ...]:
"""
Apply a function to every dataset in this subtree, returning a new tree which stores the results.
Expand Down
6 changes: 5 additions & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,11 @@ def values(self) -> range:
def data(self) -> range:
return range(self.size)

def __array__(self) -> np.ndarray:
def __array__(
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
if copy is False:
raise NotImplementedError(f"An array copy is necessary, got {copy = }.")
return np.arange(self.size)

@property
Expand Down
43 changes: 27 additions & 16 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np
import pandas as pd
from packaging.version import Version

from xarray.core import duck_array_ops
from xarray.core.nputils import NumpyVIndexAdapter
Expand Down Expand Up @@ -505,9 +506,14 @@ class ExplicitlyIndexed:

__slots__ = ()

def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
def __array__(
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
# Leave casting to an array up to the underlying array type.
return np.asarray(self.get_duck_array(), dtype=dtype)
if Version(np.__version__) >= Version("2.0.0"):
return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
else:
return np.asarray(self.get_duck_array(), dtype=dtype)

def get_duck_array(self):
return self.array
Expand All @@ -520,11 +526,6 @@ def get_duck_array(self):
key = BasicIndexer((slice(None),) * self.ndim)
return self[key]

def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
# This is necessary because we apply the indexing key in self.get_duck_array()
# Note this is the base class for all lazy indexing classes
return np.asarray(self.get_duck_array(), dtype=dtype)

def _oindex_get(self, indexer: OuterIndexer):
raise NotImplementedError(
f"{self.__class__.__name__}._oindex_get method should be overridden"
Expand Down Expand Up @@ -570,8 +571,13 @@ def __init__(self, array, indexer_cls: type[ExplicitIndexer] = BasicIndexer):
self.array = as_indexable(array)
self.indexer_cls = indexer_cls

def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
return np.asarray(self.get_duck_array(), dtype=dtype)
def __array__(
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
if Version(np.__version__) >= Version("2.0.0"):
return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
else:
return np.asarray(self.get_duck_array(), dtype=dtype)

def get_duck_array(self):
return self.array.get_duck_array()
Expand Down Expand Up @@ -830,9 +836,6 @@ def __init__(self, array):
def _ensure_cached(self):
self.array = as_indexable(self.array.get_duck_array())

def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
return np.asarray(self.get_duck_array(), dtype=dtype)

def get_duck_array(self):
self._ensure_cached()
return self.array.get_duck_array()
Expand Down Expand Up @@ -1674,15 +1677,21 @@ def __init__(self, array: pd.Index, dtype: DTypeLike = None):
def dtype(self) -> np.dtype:
return self._dtype

def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
def __array__(
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
if dtype is None:
dtype = self.dtype
array = self.array
if isinstance(array, pd.PeriodIndex):
with suppress(AttributeError):
# this might not be public API
array = array.astype("object")
return np.asarray(array.values, dtype=dtype)

if Version(np.__version__) >= Version("2.0.0"):
return np.asarray(array.values, dtype=dtype, copy=copy)
else:
return np.asarray(array.values, dtype=dtype)

def get_duck_array(self) -> np.ndarray:
return np.asarray(self)
Expand Down Expand Up @@ -1831,15 +1840,17 @@ def __init__(
super().__init__(array, dtype)
self.level = level

def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
def __array__(
self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
) -> np.ndarray:
if dtype is None:
dtype = self.dtype
if self.level is not None:
return np.asarray(
self.array.get_level_values(self.level).values, dtype=dtype
)
else:
return super().__array__(dtype)
return super().__array__(dtype, copy=copy)

def _convert_scalar(self, item):
if isinstance(item, tuple) and self.level is not None:
Expand Down
Loading

0 comments on commit 09385fd

Please sign in to comment.