diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 30c1e18f33c..2398577db10 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,2 @@ github: numfocus -custom: http://numfocus.org/donate-to-xarray +custom: https://numfocus.org/donate-to-xarray diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4698e144293..1229dc97925 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -58,6 +58,9 @@ jobs: python-version: "3.10" os: ubuntu-latest # Latest python version: + - env: "all-but-numba" + python-version: "3.12" + os: ubuntu-latest - env: "all-but-dask" # Not 3.12 because of pint python-version: "3.11" diff --git a/.github/workflows/nightly-wheels.yml b/.github/workflows/nightly-wheels.yml index 9aac7ab8775..c31f2774bd8 100644 --- a/.github/workflows/nightly-wheels.yml +++ b/.github/workflows/nightly-wheels.yml @@ -38,7 +38,7 @@ jobs: fi - name: Upload wheel - uses: scientific-python/upload-nightly-action@b67d7fcc0396e1128a474d1ab2b48aa94680f9fc # 0.5.0 + uses: scientific-python/upload-nightly-action@ccf29c805b5d0c1dc31fa97fcdb962be074cade3 # 0.6.0 with: anaconda_nightly_upload_token: ${{ secrets.ANACONDA_NIGHTLY }} artifacts_path: dist diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 2eeabf469b7..5cf48dfd1fb 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -88,7 +88,7 @@ jobs: path: dist - name: Publish package to TestPyPI if: github.event_name == 'push' - uses: pypa/gh-action-pypi-publish@v1.10.1 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: repository_url: https://test.pypi.org/legacy/ verbose: true @@ -111,6 +111,6 @@ jobs: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.10.1 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: verbose: true diff --git a/README.md b/README.md index dcf71217e2c..8fc8ff335d4 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ Xarray is a fiscally sponsored project of [NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting the open source scientific computing community. If you like Xarray and want to support our mission, please consider making a -[donation](https://numfocus.salsalabs.org/donate-to-xarray/) to support +[donation](https://numfocus.org/donate-to-xarray) to support our efforts. ## History diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml new file mode 100644 index 00000000000..61f64a176af --- /dev/null +++ b/ci/requirements/all-but-numba.yml @@ -0,0 +1,54 @@ +name: xarray-tests +channels: + - conda-forge + - nodefaults +dependencies: + # Pin a "very new numpy" (updated Sept 24, 2024) + - numpy>=2.1.1 + - aiobotocore + - array-api-strict + - boto3 + - bottleneck + - cartopy + - cftime + - dask-core + - dask-expr # dask raises a deprecation warning without this, breaking doctests + - distributed + - flox + - fsspec + - h5netcdf + - h5py + - hdf5 + - hypothesis + - iris + - lxml # Optional dep of pydap + - matplotlib-base + - nc-time-axis + - netcdf4 + # numba, sparse, numbagg, numexpr often conflicts with newer versions of numpy. + # This environment helps us test xarray with the latest versions + # of numpy + # - numba + # - numbagg + # - numexpr + # - sparse + - opt_einsum + - packaging + - pandas + # - pint>=0.22 + - pip + - pooch + - pre-commit + - pyarrow # pandas raises a deprecation warning without this, breaking doctests + - pydap + - pytest + - pytest-cov + - pytest-env + - pytest-xdist + - pytest-timeout + - rasterio + - scipy + - seaborn + - toolz + - typing_extensions + - zarr diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst index 30939cbbd17..5fe5e15fa63 100644 --- a/doc/user-guide/pandas.rst +++ b/doc/user-guide/pandas.rst @@ -103,7 +103,7 @@ DataFrames: xr.DataArray.from_series(s) Both the ``from_series`` and ``from_dataframe`` methods use reindexing, so they -work even if not the hierarchical index is not a full tensor product: +work even if the hierarchical index is not a full tensor product: .. ipython:: python @@ -126,7 +126,7 @@ Particularly after a roundtrip, the following deviations are noted: To avoid these problems, the third-party `ntv-pandas `__ library offers lossless and reversible conversions between ``Dataset``/ ``DataArray`` and pandas ``DataFrame`` objects. -This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter find the multidimensional structure hidden by the tabular structure). +This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter finds the multidimensional structure hidden by the tabular structure). The `ntv-pandas examples `__ show how to improve the conversion for the previous ``Dataset`` example and for more complex examples. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 72e49a983e3..28863946bfa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,9 @@ New Features `Tom Nicholas `_. - Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`). By `Eni Awowale `_. +- Added support for vectorized interpolation using additional interpolators + from the ``scipy.interpolate`` module (:issue:`9049`, :pull:`9526`). + By `Holly Mandel `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 35522d82edf..0078a346b75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -323,7 +323,6 @@ filterwarnings = [ "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning", "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core", "default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols - "ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.", ] log_cli_level = "INFO" diff --git a/xarray/core/common.py b/xarray/core/common.py index e4c61a1bc12..9a6807faad2 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -163,7 +163,7 @@ def __complex__(self: Any) -> complex: return complex(self.values) def __array__( - self: Any, dtype: DTypeLike | None = None, copy: bool | None = None + self: Any, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: if not copy: if np.lib.NumpyVersion(np.__version__) >= "2.0.0": diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bcc57acd316..2adf862f1fd 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2224,12 +2224,12 @@ def interp( Performs univariate or multivariate interpolation of a DataArray onto new coordinates using scipy's interpolation routines. If interpolating - along an existing dimension, :py:class:`scipy.interpolate.interp1d` is - called. When interpolating along multiple existing dimensions, an + along an existing dimension, either :py:class:`scipy.interpolate.interp1d` + or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`) + is called. When interpolating along multiple existing dimensions, an attempt is made to decompose the interpolation into multiple - 1-dimensional interpolations. If this is possible, - :py:class:`scipy.interpolate.interp1d` is called. Otherwise, - :py:func:`scipy.interpolate.interpn` is called. + 1-dimensional interpolations. If this is possible, the 1-dimensional interpolator is called. + Otherwise, :py:func:`scipy.interpolate.interpn` is called. Parameters ---------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b1ce264cbc8..82b60d7abc8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3885,12 +3885,12 @@ def interp( Performs univariate or multivariate interpolation of a Dataset onto new coordinates using scipy's interpolation routines. If interpolating - along an existing dimension, :py:class:`scipy.interpolate.interp1d` is - called. When interpolating along multiple existing dimensions, an + along an existing dimension, either :py:class:`scipy.interpolate.interp1d` + or a 1-dimensional scipy interpolator (e.g. :py:class:`scipy.interpolate.KroghInterpolator`) + is called. When interpolating along multiple existing dimensions, an attempt is made to decompose the interpolation into multiple - 1-dimensional interpolations. If this is possible, - :py:class:`scipy.interpolate.interp1d` is called. Otherwise, - :py:func:`scipy.interpolate.interpn` is called. + 1-dimensional interpolations. If this is possible, the 1-dimensional interpolator + is called. Otherwise, :py:func:`scipy.interpolate.interpn` is called. Parameters ---------- @@ -10621,7 +10621,7 @@ def rolling( -------- Dataset.cumulative DataArray.rolling - core.rolling.DatasetRolling + DataArray.rolling_exp """ from xarray.core.rolling import DatasetRolling @@ -10651,9 +10651,9 @@ def cumulative( See Also -------- - Dataset.rolling DataArray.cumulative - core.rolling.DatasetRolling + Dataset.rolling + Dataset.rolling_exp """ from xarray.core.rolling import DatasetRolling diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index bd583ac86cb..52d44bec96f 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -55,6 +55,7 @@ from xarray.core.dataset import calculate_dimensions if TYPE_CHECKING: + import numpy as np import pandas as pd from xarray.core.datatree_io import T_DataTreeNetcdfEngine, T_DataTreeNetcdfTypes @@ -156,6 +157,34 @@ def check_alignment( check_alignment(child_path, child_ds, base_ds, child.children) +def _deduplicate_inherited_coordinates(child: DataTree, parent: DataTree) -> None: + # This method removes repeated indexes (and corresponding coordinates) + # that are repeated between a DataTree and its parents. + # + # TODO(shoyer): Decide how to handle repeated coordinates *without* an + # index. Should these be allowed, in which case we probably want to + # exclude them from inheritance, or should they be automatically + # dropped? + # https://github.com/pydata/xarray/issues/9475#issuecomment-2357004264 + removed_something = False + for name in parent._indexes: + if name in child._node_indexes: + # Indexes on a Dataset always have a corresponding coordinate. + # We already verified that these coordinates match in the + # check_alignment() call from _pre_attach(). + del child._node_indexes[name] + del child._node_coord_variables[name] + removed_something = True + + if removed_something: + child._node_dims = calculate_dimensions( + child._data_variables | child._node_coord_variables + ) + + for grandchild in child._children.values(): + _deduplicate_inherited_coordinates(grandchild, child) + + def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: offending_variable_names = [ name for name in variables if isinstance(name, str) and "/" in name @@ -374,7 +403,7 @@ def map( # type: ignore[override] class DataTree( - NamedNode, + NamedNode["DataTree"], MappedDatasetMethodsMixin, MappedDataWithCoords, DataTreeArithmeticMixin, @@ -485,6 +514,7 @@ def _pre_attach(self: DataTree, parent: DataTree, name: str) -> None: node_ds = self.to_dataset(inherited=False) parent_ds = parent._to_dataset_view(rebuild_dims=False, inherited=True) check_alignment(path, node_ds, parent_ds, self.children) + _deduplicate_inherited_coordinates(self, parent) @property def _coord_variables(self) -> ChainMap[Hashable, Variable]: @@ -737,7 +767,9 @@ def __bool__(self) -> bool: def __iter__(self) -> Iterator[str]: return itertools.chain(self._data_variables, self._children) # type: ignore[arg-type] - def __array__(self, dtype=None, copy=None): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: raise TypeError( "cannot directly convert a DataTree into a " "numpy array. Instead, create an xarray.DataArray " @@ -1350,7 +1382,7 @@ def map_over_subtree( func: Callable, *args: Iterable[Any], **kwargs: Any, - ) -> DataTree | tuple[DataTree]: + ) -> DataTree | tuple[DataTree, ...]: """ Apply a function to every dataset in this subtree, returning a new tree which stores the results. diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 1cea9a7a28d..38dccfa2038 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -303,7 +303,7 @@ def inline_variable_array_repr(var, max_width): """Build a one-line summary of a variable's data.""" if hasattr(var._data, "_repr_inline_"): return var._data._repr_inline_(max_width) - if var._in_memory: + if getattr(var, "_in_memory", False): return format_array_flat(var, max_width) dask_array_type = array_type("dask") if isinstance(var._data, dask_array_type): @@ -1102,7 +1102,8 @@ def _datatree_node_repr(node: DataTree, show_inherited: bool) -> str: summary.append(f"{dims_start}({dims_values})") if node._node_coord_variables: - summary.append(coords_repr(node.coords, col_width=col_width, max_rows=max_rows)) + node_coords = node.to_dataset(inherited=False).coords + summary.append(coords_repr(node_coords, col_width=col_width, max_rows=max_rows)) if show_inherited and inherited_coords: summary.append( diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 58971435018..92f0572d37a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -193,7 +193,11 @@ def values(self) -> range: def data(self) -> range: return range(self.size) - def __array__(self) -> np.ndarray: + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: + if copy is False: + raise NotImplementedError(f"An array copy is necessary, got {copy = }.") return np.arange(self.size) @property diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 67912908a2b..d8727c38c48 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd +from packaging.version import Version from xarray.core import duck_array_ops from xarray.core.nputils import NumpyVIndexAdapter @@ -505,9 +506,14 @@ class ExplicitlyIndexed: __slots__ = () - def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: # Leave casting to an array up to the underlying array type. - return np.asarray(self.get_duck_array(), dtype=dtype) + if Version(np.__version__) >= Version("2.0.0"): + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) + else: + return np.asarray(self.get_duck_array(), dtype=dtype) def get_duck_array(self): return self.array @@ -520,11 +526,6 @@ def get_duck_array(self): key = BasicIndexer((slice(None),) * self.ndim) return self[key] - def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: - # This is necessary because we apply the indexing key in self.get_duck_array() - # Note this is the base class for all lazy indexing classes - return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, indexer: OuterIndexer): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" @@ -570,8 +571,13 @@ def __init__(self, array, indexer_cls: type[ExplicitIndexer] = BasicIndexer): self.array = as_indexable(array) self.indexer_cls = indexer_cls - def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: - return np.asarray(self.get_duck_array(), dtype=dtype) + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: + if Version(np.__version__) >= Version("2.0.0"): + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) + else: + return np.asarray(self.get_duck_array(), dtype=dtype) def get_duck_array(self): return self.array.get_duck_array() @@ -830,9 +836,6 @@ def __init__(self, array): def _ensure_cached(self): self.array = as_indexable(self.array.get_duck_array()) - def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: - return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() @@ -875,10 +878,10 @@ def as_indexable(array): return PandasIndexingAdapter(array) if is_duck_dask_array(array): return DaskIndexingAdapter(array) - if hasattr(array, "__array_function__"): - return NdArrayLikeIndexingAdapter(array) if hasattr(array, "__array_namespace__"): return ArrayApiIndexingAdapter(array) + if hasattr(array, "__array_function__"): + return NdArrayLikeIndexingAdapter(array) raise TypeError(f"Invalid array type: {type(array)}") @@ -1674,7 +1677,9 @@ def __init__(self, array: pd.Index, dtype: DTypeLike = None): def dtype(self) -> np.dtype: return self._dtype - def __array__(self, dtype: DTypeLike = None) -> np.ndarray: + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: if dtype is None: dtype = self.dtype array = self.array @@ -1682,7 +1687,11 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray: with suppress(AttributeError): # this might not be public API array = array.astype("object") - return np.asarray(array.values, dtype=dtype) + + if Version(np.__version__) >= Version("2.0.0"): + return np.asarray(array.values, dtype=dtype, copy=copy) + else: + return np.asarray(array.values, dtype=dtype) def get_duck_array(self) -> np.ndarray: return np.asarray(self) @@ -1831,7 +1840,9 @@ def __init__( super().__init__(array, dtype) self.level = level - def __array__(self, dtype: DTypeLike = None) -> np.ndarray: + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: if dtype is None: dtype = self.dtype if self.level is not None: @@ -1839,7 +1850,7 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray: self.array.get_level_values(self.level).values, dtype=dtype ) else: - return super().__array__(dtype) + return super().__array__(dtype, copy=copy) def _convert_scalar(self, item): if isinstance(item, tuple) and self.level is not None: diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 2df53b172f0..6a380f53f0d 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -138,6 +138,7 @@ def __init__( copy=False, bounds_error=False, order=None, + axis=-1, **kwargs, ): from scipy.interpolate import interp1d @@ -173,6 +174,7 @@ def __init__( bounds_error=bounds_error, assume_sorted=assume_sorted, copy=copy, + axis=axis, **self.cons_kwargs, ) @@ -479,7 +481,8 @@ def _get_interpolator( interp1d_methods = get_args(Interp1dOptions) valid_methods = tuple(vv for v in get_args(InterpOptions) for vv in get_args(v)) - # prioritize scipy.interpolate + # prefer numpy.interp for 1d linear interpolation. This function cannot + # take higher dimensional data but scipy.interp1d can. if ( method == "linear" and not kwargs.get("fill_value", None) == "extrapolate" @@ -492,21 +495,33 @@ def _get_interpolator( if method in interp1d_methods: kwargs.update(method=method) interp_class = ScipyInterpolator - elif vectorizeable_only: - raise ValueError( - f"{method} is not a vectorizeable interpolator. " - f"Available methods are {interp1d_methods}" - ) elif method == "barycentric": + kwargs.update(axis=-1) interp_class = _import_interpolant("BarycentricInterpolator", method) elif method in ["krogh", "krog"]: + kwargs.update(axis=-1) interp_class = _import_interpolant("KroghInterpolator", method) elif method == "pchip": + kwargs.update(axis=-1) interp_class = _import_interpolant("PchipInterpolator", method) elif method == "spline": + utils.emit_user_level_warning( + "The 1d SplineInterpolator class is performing an incorrect calculation and " + "is being deprecated. Please use `method=polynomial` for 1D Spline Interpolation.", + PendingDeprecationWarning, + ) + if vectorizeable_only: + raise ValueError(f"{method} is not a vectorizeable interpolator. ") kwargs.update(method=method) interp_class = SplineInterpolator elif method == "akima": + kwargs.update(axis=-1) + interp_class = _import_interpolant("Akima1DInterpolator", method) + elif method == "makima": + kwargs.update(method="makima", axis=-1) + interp_class = _import_interpolant("Akima1DInterpolator", method) + elif method == "makima": + kwargs.update(method="makima", axis=-1) interp_class = _import_interpolant("Akima1DInterpolator", method) else: raise ValueError(f"{method} is not a valid scipy interpolator") @@ -525,6 +540,7 @@ def _get_interpolator_nd(method, **kwargs): if method in valid_methods: kwargs.update(method=method) + kwargs.setdefault("bounds_error", False) interp_class = _import_interpolant("interpn", method) else: raise ValueError( @@ -614,9 +630,6 @@ def interp(var, indexes_coords, method: InterpOptions, **kwargs): if not indexes_coords: return var.copy() - # default behavior - kwargs["bounds_error"] = kwargs.get("bounds_error", False) - result = var # decompose the interpolation into a succession of independent interpolation for indep_indexes_coords in decompose_interp(indexes_coords): @@ -663,8 +676,8 @@ def interp_func(var, x, new_x, method: InterpOptions, kwargs): new_x : a list of 1d array New coordinates. Should not contain NaN. method : string - {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for - 1-dimensional interpolation. + {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'pchip', 'akima', + 'makima', 'barycentric', 'krogh'} for 1-dimensional interpolation. {'linear', 'nearest'} for multidimensional interpolation **kwargs Optional keyword arguments to be passed to scipy.interpolator @@ -756,7 +769,7 @@ def interp_func(var, x, new_x, method: InterpOptions, kwargs): def _interp1d(var, x, new_x, func, kwargs): # x, new_x are tuples of size 1. x, new_x = x[0], new_x[0] - rslt = func(x, var, assume_sorted=True, **kwargs)(np.ravel(new_x)) + rslt = func(x, var, **kwargs)(np.ravel(new_x)) if new_x.ndim > 1: return reshape(rslt, (var.shape[:-1] + new_x.shape)) if new_x.ndim == 0: diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index fc7240139aa..7fbb63068c0 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -162,7 +162,7 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0): def nanprod(a, axis=None, dtype=None, out=None, min_count=None): mask = isnull(a) - result = nputils.nanprod(a, axis=axis, dtype=dtype, out=out) + result = nputils.nanprod(a, axis=axis, dtype=dtype) if min_count is not None: return _maybe_null_out(result, axis, mask, min_count) else: diff --git a/xarray/core/types.py b/xarray/core/types.py index 34b6029ee15..1d383d550ec 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -228,7 +228,9 @@ def copy( Interp1dOptions = Literal[ "linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial" ] -InterpolantOptions = Literal["barycentric", "krogh", "pchip", "spline", "akima"] +InterpolantOptions = Literal[ + "barycentric", "krogh", "pchip", "spline", "akima", "makima" +] InterpOptions = Union[Interp1dOptions, InterpolantOptions] DatetimeUnitOptions = Literal[ diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9b9239cc042..d8cf0fe7550 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -320,12 +320,14 @@ def convert_non_numpy_type(data): else: data = np.asarray(data) - if not isinstance(data, np.ndarray) and ( + # immediately return array-like types except `numpy.ndarray` subclasses and `numpy` scalars + if not isinstance(data, np.ndarray | np.generic) and ( hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") ): return cast("T_DuckArray", data) - # validate whether the data is valid data types. + # validate whether the data is valid data types. Also, explicitly cast `numpy` + # subclasses and `numpy` scalars to `numpy.ndarray` data = np.asarray(data) if data.dtype.kind in "OMm": diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index a7d7ed7994f..90c442d2e1f 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -153,15 +153,15 @@ def __getitem__( @overload def __array__( - self, dtype: None = ..., /, *, copy: None | bool = ... + self, dtype: None = ..., /, *, copy: bool | None = ... ) -> np.ndarray[Any, _DType_co]: ... @overload def __array__( - self, dtype: _DType, /, *, copy: None | bool = ... + self, dtype: _DType, /, *, copy: bool | None = ... ) -> np.ndarray[Any, _DType]: ... def __array__( - self, dtype: _DType | None = ..., /, *, copy: None | bool = ... + self, dtype: _DType | None = ..., /, *, copy: bool | None = ... ) -> np.ndarray[Any, _DType] | np.ndarray[Any, _DType_co]: ... # TODO: Should return the same subclass but with a new dtype generic. diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 71ae1a7075f..6b54994f311 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -87,6 +87,7 @@ def _importorskip( has_matplotlib, requires_matplotlib = _importorskip("matplotlib") has_scipy, requires_scipy = _importorskip("scipy") +has_scipy_ge_1_13, requires_scipy_ge_1_13 = _importorskip("scipy", "1.13") with warnings.catch_warnings(): warnings.filterwarnings( "ignore", diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index 4e1e31cfa49..7373b6c75ab 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -24,7 +24,9 @@ def __init__(self, array): def get_duck_array(self): raise UnexpectedDataAccess("Tried accessing data") - def __array__(self, dtype: np.typing.DTypeLike = None): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: raise UnexpectedDataAccess("Tried accessing data") def __getitem__(self, key): @@ -49,7 +51,9 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) - def __array__(self, dtype: np.typing.DTypeLike = None): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: raise UnexpectedDataAccess("Tried accessing data") def __array_namespace__(self): @@ -140,7 +144,9 @@ def __repr__(self: Any) -> str: def get_duck_array(self): raise UnexpectedDataAccess("Tried accessing data") - def __array__(self, dtype: np.typing.DTypeLike = None): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: raise UnexpectedDataAccess("Tried accessing data") def __getitem__(self, key) -> "ConcatenatableArray": diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py index 3e1ce0ea266..ec4b39aaab6 100644 --- a/xarray/tests/test_assertions.py +++ b/xarray/tests/test_assertions.py @@ -173,9 +173,11 @@ def dims(self): warnings.warn("warning in test", stacklevel=2) return super().dims - def __array__(self, dtype=None, copy=None): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: warnings.warn("warning in test", stacklevel=2) - return super().__array__() + return super().__array__(dtype, copy=copy) a = WarningVariable("x", [1]) b = WarningVariable("x", [2]) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 4e22ba57e2e..30934f83c63 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -1,4 +1,5 @@ import re +import sys import typing from copy import copy, deepcopy from textwrap import dedent @@ -15,6 +16,8 @@ from xarray.testing import assert_equal, assert_identical from xarray.tests import assert_array_equal, create_test_data, source_ndarray +ON_WINDOWS = sys.platform == "win32" + class TestTreeCreation: def test_empty(self): @@ -1052,7 +1055,7 @@ def test_repr_two_children(self): { "/": Dataset(coords={"x": [1.0]}), "/first_child": None, - "/second_child": Dataset({"foo": ("x", [0.0])}), + "/second_child": Dataset({"foo": ("x", [0.0])}, coords={"z": 1.0}), } ) @@ -1067,6 +1070,8 @@ def test_repr_two_children(self): ├── Group: /first_child └── Group: /second_child Dimensions: (x: 1) + Coordinates: + z float64 8B 1.0 Data variables: foo (x) float64 8B 0.0 """ @@ -1091,6 +1096,8 @@ def test_repr_two_children(self): Group: /second_child Dimensions: (x: 1) + Coordinates: + z float64 8B 1.0 Inherited coordinates: * x (x) float64 8B 1.0 Data variables: @@ -1138,6 +1145,104 @@ def test_repr_inherited_dims(self): ).strip() assert result == expected + @pytest.mark.skipif( + ON_WINDOWS, reason="windows (pre NumPy2) uses int32 instead of int64" + ) + def test_doc_example(self): + # regression test for https://github.com/pydata/xarray/issues/9499 + time = xr.DataArray(data=["2022-01", "2023-01"], dims="time") + stations = xr.DataArray(data=list("abcdef"), dims="station") + lon = [-100, -80, -60] + lat = [10, 20, 30] + # Set up fake data + wind_speed = xr.DataArray(np.ones((2, 6)) * 2, dims=("time", "station")) + pressure = xr.DataArray(np.ones((2, 6)) * 3, dims=("time", "station")) + air_temperature = xr.DataArray(np.ones((2, 6)) * 4, dims=("time", "station")) + dewpoint = xr.DataArray(np.ones((2, 6)) * 5, dims=("time", "station")) + infrared = xr.DataArray(np.ones((2, 3, 3)) * 6, dims=("time", "lon", "lat")) + true_color = xr.DataArray(np.ones((2, 3, 3)) * 7, dims=("time", "lon", "lat")) + tree = xr.DataTree.from_dict( + { + "/": xr.Dataset( + coords={"time": time}, + ), + "/weather": xr.Dataset( + coords={"station": stations}, + data_vars={ + "wind_speed": wind_speed, + "pressure": pressure, + }, + ), + "/weather/temperature": xr.Dataset( + data_vars={ + "air_temperature": air_temperature, + "dewpoint": dewpoint, + }, + ), + "/satellite": xr.Dataset( + coords={"lat": lat, "lon": lon}, + data_vars={ + "infrared": infrared, + "true_color": true_color, + }, + ), + }, + ) + + result = repr(tree) + expected = dedent( + """ + + Group: / + │ Dimensions: (time: 2) + │ Coordinates: + │ * time (time) + Group: /weather + │ Dimensions: (time: 2, station: 6) + │ Coordinates: + │ * station (station) str: return re.escape(dedent(message).strip()) @@ -1190,6 +1295,22 @@ def test_inherited_coords_override(self): xr.testing.assert_equal(dt["/b/y"], xr.DataArray(2, coords=sub_coords)) xr.testing.assert_equal(dt["/b/z"], xr.DataArray(3, coords=sub_coords)) + def test_inherited_coords_with_index_are_deduplicated(self): + dt = DataTree.from_dict( + { + "/": xr.Dataset(coords={"x": [1, 2]}), + "/b": xr.Dataset(coords={"x": [1, 2]}), + } + ) + child_dataset = dt.children["b"].to_dataset(inherited=False) + expected = xr.Dataset() + assert_identical(child_dataset, expected) + + dt["/c"] = xr.Dataset({"foo": ("x", [4, 5])}, coords={"x": [1, 2]}) + child_dataset = dt.children["c"].to_dataset(inherited=False) + expected = xr.Dataset({"foo": ("x", [4, 5])}) + assert_identical(child_dataset, expected) + def test_inconsistent_dims(self): expected_msg = _exact_match( """ @@ -1534,6 +1655,18 @@ def test_binary_op_on_datatree(self): result = dt * dt # type: ignore[operator] assert_equal(result, expected) + def test_arithmetic_inherited_coords(self): + tree = DataTree(xr.Dataset(coords={"x": [1, 2, 3]})) + tree["/foo"] = DataTree(xr.Dataset({"bar": ("x", [4, 5, 6])})) + actual: DataTree = 2 * tree # type: ignore[assignment,operator] + + actual_dataset = actual.children["foo"].to_dataset(inherited=False) + assert "x" not in actual_dataset.coords + + expected = tree.copy() + expected["/foo/bar"].data = np.array([8, 10, 12]) + assert_identical(actual, expected) + class TestUFuncs: def test_tree(self, create_test_datatree): diff --git a/xarray/tests/test_datatree_mapping.py b/xarray/tests/test_datatree_mapping.py index c7e0e93b89b..9a7d3009c3b 100644 --- a/xarray/tests/test_datatree_mapping.py +++ b/xarray/tests/test_datatree_mapping.py @@ -7,7 +7,7 @@ check_isomorphic, map_over_subtree, ) -from xarray.testing import assert_equal +from xarray.testing import assert_equal, assert_identical empty = xr.Dataset() @@ -306,6 +306,17 @@ def fail_on_specific_node(ds): ): dt.map_over_subtree(fail_on_specific_node) + def test_inherited_coordinates_with_index(self): + root = xr.Dataset(coords={"x": [1, 2]}) + child = xr.Dataset({"foo": ("x", [0, 1])}) # no coordinates + tree = xr.DataTree.from_dict({"/": root, "/child": child}) + actual = tree.map_over_subtree(lambda ds: ds) # identity + assert isinstance(actual, xr.DataTree) + assert_identical(tree, actual) + + actual_child = actual.children["child"].to_dataset(inherited=False) + assert_identical(actual_child, child) + class TestMutableOperations: def test_construct_using_type(self): diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 4123b3e8aee..688f41a7f92 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -942,7 +942,9 @@ def test_lazy_array_wont_compute() -> None: from xarray.core.indexing import LazilyIndexedArray class LazilyIndexedArrayNotComputable(LazilyIndexedArray): - def __array__(self, dtype=None, copy=None): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: raise NotImplementedError("Computing this array is not possible.") arr = LazilyIndexedArrayNotComputable(np.array([1, 2])) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 985fb02a87e..92c21cc32fb 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -894,6 +894,74 @@ def test_posify_mask_subindexer(indices, expected) -> None: np.testing.assert_array_equal(expected, actual) +class ArrayWithNamespace: + def __array_namespace__(self, version=None): + pass + + +class ArrayWithArrayFunction: + def __array_function__(self, func, types, args, kwargs): + pass + + +class ArrayWithNamespaceAndArrayFunction: + def __array_namespace__(self, version=None): + pass + + def __array_function__(self, func, types, args, kwargs): + pass + + +def as_dask_array(arr, chunks): + try: + import dask.array as da + except ImportError: + return None + + return da.from_array(arr, chunks=chunks) + + +@pytest.mark.parametrize( + ["array", "expected_type"], + ( + pytest.param( + indexing.CopyOnWriteArray(np.array([1, 2])), + indexing.CopyOnWriteArray, + id="ExplicitlyIndexed", + ), + pytest.param( + np.array([1, 2]), indexing.NumpyIndexingAdapter, id="numpy.ndarray" + ), + pytest.param( + pd.Index([1, 2]), indexing.PandasIndexingAdapter, id="pandas.Index" + ), + pytest.param( + as_dask_array(np.array([1, 2]), chunks=(1,)), + indexing.DaskIndexingAdapter, + id="dask.array", + marks=requires_dask, + ), + pytest.param( + ArrayWithNamespace(), indexing.ArrayApiIndexingAdapter, id="array_api" + ), + pytest.param( + ArrayWithArrayFunction(), + indexing.NdArrayLikeIndexingAdapter, + id="array_like", + ), + pytest.param( + ArrayWithNamespaceAndArrayFunction(), + indexing.ArrayApiIndexingAdapter, + id="array_api_with_fallback", + ), + ), +) +def test_as_indexable(array, expected_type): + actual = indexing.as_indexable(array) + + assert isinstance(actual, expected_type) + + def test_indexing_1d_object_array() -> None: items = (np.arange(3), np.arange(6)) arr = DataArray(np.array(items, dtype=object)) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 5c03881242b..d02e12dd695 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -16,6 +16,7 @@ assert_identical, has_dask, has_scipy, + has_scipy_ge_1_13, requires_cftime, requires_dask, requires_scipy, @@ -132,29 +133,66 @@ def func(obj, new_x): assert_allclose(actual, expected) -@pytest.mark.parametrize("use_dask", [False, True]) -def test_interpolate_vectorize(use_dask: bool) -> None: - if not has_scipy: - pytest.skip("scipy is not installed.") - - if not has_dask and use_dask: - pytest.skip("dask is not installed in the environment.") - +@requires_scipy +@pytest.mark.parametrize( + "use_dask, method", + ( + (False, "linear"), + (False, "akima"), + pytest.param( + False, + "makima", + marks=pytest.mark.skipif(not has_scipy_ge_1_13, reason="scipy too old"), + ), + pytest.param( + True, + "linear", + marks=pytest.mark.skipif(not has_dask, reason="dask not available"), + ), + pytest.param( + True, + "akima", + marks=pytest.mark.skipif(not has_dask, reason="dask not available"), + ), + ), +) +def test_interpolate_vectorize(use_dask: bool, method: InterpOptions) -> None: # scipy interpolation for the reference - def func(obj, dim, new_x): + def func(obj, dim, new_x, method): + scipy_kwargs = {} + interpolant_options = { + "barycentric": scipy.interpolate.BarycentricInterpolator, + "krogh": scipy.interpolate.KroghInterpolator, + "pchip": scipy.interpolate.PchipInterpolator, + "akima": scipy.interpolate.Akima1DInterpolator, + "makima": scipy.interpolate.Akima1DInterpolator, + } + shape = [s for i, s in enumerate(obj.shape) if i != obj.get_axis_num(dim)] for s in new_x.shape[::-1]: shape.insert(obj.get_axis_num(dim), s) - return scipy.interpolate.interp1d( - da[dim], - obj.data, - axis=obj.get_axis_num(dim), - bounds_error=False, - fill_value=np.nan, - )(new_x).reshape(shape) + if method in interpolant_options: + interpolant = interpolant_options[method] + if method == "makima": + scipy_kwargs["method"] = method + return interpolant( + da[dim], obj.data, axis=obj.get_axis_num(dim), **scipy_kwargs + )(new_x).reshape(shape) + else: + + return scipy.interpolate.interp1d( + da[dim], + obj.data, + axis=obj.get_axis_num(dim), + kind=method, + bounds_error=False, + fill_value=np.nan, + **scipy_kwargs, + )(new_x).reshape(shape) da = get_example_data(0) + if use_dask: da = da.chunk({"y": 5}) @@ -165,17 +203,17 @@ def func(obj, dim, new_x): coords={"z": np.random.randn(30), "z2": ("z", np.random.randn(30))}, ) - actual = da.interp(x=xdest, method="linear") + actual = da.interp(x=xdest, method=method) expected = xr.DataArray( - func(da, "x", xdest), + func(da, "x", xdest, method), dims=["z", "y"], coords={ "z": xdest["z"], "z2": xdest["z2"], "y": da["y"], "x": ("z", xdest.values), - "x2": ("z", func(da["x2"], "x", xdest)), + "x2": ("z", func(da["x2"], "x", xdest, method)), }, ) assert_allclose(actual, expected.transpose("z", "y", transpose_coords=True)) @@ -191,10 +229,10 @@ def func(obj, dim, new_x): }, ) - actual = da.interp(x=xdest, method="linear") + actual = da.interp(x=xdest, method=method) expected = xr.DataArray( - func(da, "x", xdest), + func(da, "x", xdest, method), dims=["z", "w", "y"], coords={ "z": xdest["z"], @@ -202,7 +240,7 @@ def func(obj, dim, new_x): "z2": xdest["z2"], "y": da["y"], "x": (("z", "w"), xdest.data), - "x2": (("z", "w"), func(da["x2"], "x", xdest)), + "x2": (("z", "w"), func(da["x2"], "x", xdest, method)), }, ) assert_allclose(actual, expected.transpose("z", "w", "y", transpose_coords=True)) @@ -393,19 +431,17 @@ def test_nans(use_dask: bool) -> None: assert actual.count() > 0 +@requires_scipy @pytest.mark.parametrize("use_dask", [True, False]) def test_errors(use_dask: bool) -> None: - if not has_scipy: - pytest.skip("scipy is not installed.") - - # akima and spline are unavailable + # spline is unavailable da = xr.DataArray([0, 1, np.nan, 2], dims="x", coords={"x": range(4)}) if not has_dask and use_dask: pytest.skip("dask is not installed in the environment.") da = da.chunk() - for method in ["akima", "spline"]: - with pytest.raises(ValueError): + for method in ["spline"]: + with pytest.raises(ValueError), pytest.warns(PendingDeprecationWarning): da.interp(x=[0.5, 1.5], method=method) # type: ignore[arg-type] # not sorted @@ -922,7 +958,10 @@ def test_interp1d_bounds_error() -> None: (("x", np.array([0, 0.5, 1, 2]), dict(unit="s")), False), ], ) -def test_coord_attrs(x, expect_same_attrs: bool) -> None: +def test_coord_attrs( + x, + expect_same_attrs: bool, +) -> None: base_attrs = dict(foo="bar") ds = xr.Dataset( data_vars=dict(a=2 * np.arange(5)), diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index bf90074a7cc..58d8a9dcf5d 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -137,7 +137,11 @@ def test_scipy_methods_function(method) -> None: # Note: Pandas does some wacky things with these methods and the full # integration tests won't work. da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) - actual = da.interpolate_na(method=method, dim="time") + if method == "spline": + with pytest.warns(PendingDeprecationWarning): + actual = da.interpolate_na(method=method, dim="time") + else: + actual = da.interpolate_na(method=method, dim="time") assert (da.count("time") <= actual.count("time")).all() diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 8ccf8c541b7..39ca08fa06c 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from packaging.version import Version from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray._typing import ( @@ -53,8 +54,14 @@ def shape(self) -> _Shape: class CustomArray( CustomArrayBase[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co] ): - def __array__(self) -> np.ndarray[Any, np.dtype[np.generic]]: - return np.array(self.array) + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray[Any, np.dtype[np.generic]]: + + if Version(np.__version__) >= Version("2.0.0"): + return np.asarray(self.array, dtype=dtype, copy=copy) + else: + return np.asarray(self.array, dtype=dtype) class CustomArrayIndexable( @@ -584,3 +591,15 @@ def test_broadcast_to_errors( def test_warn_on_repeated_dimension_names(self) -> None: with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) + + +def test_repr() -> None: + x: NamedArray[Any, np.dtype[np.uint64]] + x = NamedArray(("x",), np.array([0], dtype=np.uint64)) + + # Reprs should not crash: + r = x.__repr__() + x._repr_html_() + + # Basic comparison: + assert r == " Size: 8B\narray([0], dtype=uint64)" diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0a55b42f228..1d430b6b27e 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2585,7 +2585,12 @@ def test_unchanged_types(self): assert source_ndarray(x) is source_ndarray(as_compatible_data(x)) def test_converted_types(self): - for input_array in [[[0, 1, 2]], pd.DataFrame([[0, 1, 2]])]: + for input_array in [ + [[0, 1, 2]], + pd.DataFrame([[0, 1, 2]]), + np.float64(1.4), + np.str_("abc"), + ]: actual = as_compatible_data(input_array) assert_array_equal(np.asarray(input_array), actual) assert np.ndarray is type(actual)