Skip to content

Commit

Permalink
Merge branch 'main' into force-scalars-to-ndarray
Browse files Browse the repository at this point in the history
  • Loading branch information
keewis committed Sep 20, 2024
2 parents 42f7e8c + 2b800ba commit 9baaf50
Show file tree
Hide file tree
Showing 162 changed files with 3,955 additions and 4,662 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci-additional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ jobs:
#
# If dependencies emit warnings we can't do anything about, add ignores to
# `xarray/tests/__init__.py`.
# [MHS, 01/25/2024] Skip datatree_ documentation remove after #8572
python -m pytest --doctest-modules xarray --ignore xarray/tests --ignore xarray/datatree_ -Werror
python -m pytest --doctest-modules xarray --ignore xarray/tests -Werror
mypy:
name: Mypy
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
path: dist
- name: Publish package to TestPyPI
if: github.event_name == 'push'
uses: pypa/[email protected].0
uses: pypa/[email protected].1
with:
repository_url: https://test.pypi.org/legacy/
verbose: true
Expand All @@ -111,6 +111,6 @@ jobs:
name: releases
path: dist
- name: Publish package to PyPI
uses: pypa/[email protected].0
uses: pypa/[email protected].1
with:
verbose: true
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# https://pre-commit.com/
ci:
autoupdate_schedule: monthly
exclude: 'xarray/datatree_.*'
autoupdate_commit_msg: 'Update pre-commit hooks'
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
Expand Down
2 changes: 0 additions & 2 deletions MANIFEST.in

This file was deleted.

8 changes: 4 additions & 4 deletions asv_bench/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def decorator(func):
def requires_dask():
try:
import dask # noqa: F401
except ImportError:
raise NotImplementedError()
except ImportError as err:
raise NotImplementedError() from err


def requires_sparse():
try:
import sparse # noqa: F401
except ImportError:
raise NotImplementedError()
except ImportError as err:
raise NotImplementedError() from err


def randn(shape, frac_nan=None, chunks=None, seed=0):
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def setup(self, calendar):
self.da = xr.DataArray(data, dims="time", coords={"time": time})

def time_dayofyear(self, calendar):
self.da.time.dt.dayofyear
_ = self.da.time.dt.dayofyear

def time_year(self, calendar):
self.da.time.dt.year
_ = self.da.time.dt.year

def time_floor(self, calendar):
self.da.time.dt.floor("D")
_ = self.da.time.dt.floor("D")
14 changes: 6 additions & 8 deletions asv_bench/benchmarks/dataset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import pandas as pd

import xarray as xr
from xarray.backends.api import open_datatree
from xarray.core.datatree import DataTree

from . import _skip_slow, parameterized, randint, randn, requires_dask

Expand Down Expand Up @@ -556,7 +554,7 @@ def make_datatree(self, nchildren=10):
for group in range(self.nchildren)
}
dtree = root | nested_tree1 | nested_tree2 | nested_tree3
self.dtree = DataTree.from_dict(dtree)
self.dtree = xr.DataTree.from_dict(dtree)


class IOReadDataTreeNetCDF4(IONestedDataTree):
Expand All @@ -574,10 +572,10 @@ def setup(self):
dtree.to_netcdf(filepath=self.filepath)

def time_load_datatree_netcdf4(self):
open_datatree(self.filepath, engine="netcdf4").load()
xr.open_datatree(self.filepath, engine="netcdf4").load()

def time_open_datatree_netcdf4(self):
open_datatree(self.filepath, engine="netcdf4")
xr.open_datatree(self.filepath, engine="netcdf4")


class IOWriteNetCDFDask:
Expand Down Expand Up @@ -608,8 +606,8 @@ def setup(self):

try:
import distributed
except ImportError:
raise NotImplementedError()
except ImportError as err:
raise NotImplementedError() from err

self.client = distributed.Client()
self.write = create_delayed_write()
Expand Down Expand Up @@ -724,7 +722,7 @@ class PerformanceBackend(xr.backends.BackendEntrypoint):
def open_dataset(
self,
filename_or_obj: str | os.PathLike | None,
drop_variables: tuple[str] = None,
drop_variables: tuple[str, ...] = None,
*,
mask_and_scale=True,
decode_times=True,
Expand Down
15 changes: 15 additions & 0 deletions asv_bench/benchmarks/datatree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import xarray as xr
from xarray.core.datatree import DataTree


class Datatree:
def setup(self):
run1 = DataTree.from_dict({"run1": xr.Dataset({"a": 1})})
self.d_few = {"run1": run1}
self.d_many = {f"run{i}": xr.Dataset({"a": 1}) for i in range(100)}

def time_from_dict_few(self):
DataTree.from_dict(self.d_few)

def time_from_dict_many(self):
DataTree.from_dict(self.d_many)
19 changes: 17 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# import flox to avoid the cost of first import
import cftime
import flox.xarray # noqa
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -96,7 +97,7 @@ def setup(self, *args, **kwargs):

requires_dask()
super().setup(**kwargs)
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dask_dataframe()
self.ds1d_mean = self.ds1d.groupby("b").mean().compute()

def time_binary_op_2d(self):
Expand Down Expand Up @@ -169,7 +170,21 @@ class GroupByLongTime:
def setup(self, use_cftime, use_flox):
arr = np.random.randn(10, 10, 365 * 30)
time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime)
self.da = xr.DataArray(arr, dims=("y", "x", "time"), coords={"time": time})

# GH9426 - deep-copying CFTime object arrays is weirdly slow
asda = xr.DataArray(time)
labeled_time = []
for year, month in zip(asda.dt.year, asda.dt.month, strict=True):
labeled_time.append(cftime.datetime(year, month, 1))

self.da = xr.DataArray(
arr,
dims=("y", "x", "time"),
coords={"time": time, "time2": ("time", labeled_time)},
)

def time_setup(self, use_cftime, use_flox):
self.da.groupby("time.month")

def time_mean(self, use_cftime, use_flox):
with xr.set_options(use_flox=use_flox):
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def time_rolling_long(self, func, pandas, use_bottleneck):
def time_rolling_np(self, window_, min_periods, use_bottleneck):
with xr.set_options(use_bottleneck=use_bottleneck):
self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
getattr(np, "nansum")
np.nansum
).load()

@parameterized(
Expand Down
4 changes: 2 additions & 2 deletions ci/min_deps_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]:

try:
version_tup = tuple(int(x) for x in version.split("."))
except ValueError:
raise ValueError("non-numerical version: " + row)
except ValueError as err:
raise ValueError("non-numerical version: " + row) from err

if len(version_tup) == 2:
yield (pkg, *version_tup, None) # type: ignore[misc]
Expand Down
4 changes: 2 additions & 2 deletions ci/requirements/bare-minimum.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ dependencies:
- pytest-env
- pytest-xdist
- pytest-timeout
- numpy=1.23
- numpy=1.24
- packaging=23.1
- pandas=2.0
- pandas=2.1
3 changes: 2 additions & 1 deletion ci/requirements/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ dependencies:
- sphinx-copybutton
- sphinx-design
- sphinx-inline-tabs
- sphinx>=5.0
- sphinx>=5.0,<7.0 # https://github.com/executablebooks/sphinx-book-theme/issues/749
- sphinxcontrib-srclinks
- sphinx-remove-toctrees
- sphinxext-opengraph
- sphinxext-rediraffe
Expand Down
24 changes: 12 additions & 12 deletions ci/requirements/min-all-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,37 @@ dependencies:
# doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py.
- python=3.10
- array-api-strict=1.0 # dependency for testing the array api compat
- boto3=1.26
- boto3=1.28
- bottleneck=1.3
- cartopy=0.21
- cartopy=0.22
- cftime=1.6
- coveralls
- dask-core=2023.4
- distributed=2023.4
- dask-core=2023.9
- distributed=2023.9
# Flox > 0.8 has a bug with numbagg versions
# It will require numbagg > 0.6
# so we should just skip that series eventually
# or keep flox pinned for longer than necessary
- flox=0.7
- h5netcdf=1.1
- h5netcdf=1.2
# h5py and hdf5 tend to cause conflicts
# for e.g. hdf5 1.12 conflicts with h5py=3.1
# prioritize bumping other packages instead
- h5py=3.8
- hdf5=1.12
- hypothesis
- iris=3.4
- iris=3.7
- lxml=4.9 # Optional dep of pydap
- matplotlib-base=3.7
- nc-time-axis=1.4
# netcdf follows a 1.major.minor[.patch] convention
# (see https://github.com/Unidata/netcdf4-python/issues/1090)
- netcdf4=1.6.0
- numba=0.56
- numba=0.57
- numbagg=0.2.1
- numpy=1.23
- numpy=1.24
- packaging=23.1
- pandas=2.0
- pandas=2.1
- pint=0.22
- pip
- pydap=3.4
Expand All @@ -49,9 +49,9 @@ dependencies:
- pytest-xdist
- pytest-timeout
- rasterio=1.3
- scipy=1.10
- scipy=1.11
- seaborn=0.12
- sparse=0.14
- toolz=0.12
- typing_extensions=4.5
- zarr=2.14
- typing_extensions=4.7
- zarr=2.16
2 changes: 1 addition & 1 deletion design_notes/flexible_indexes_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ An `XarrayIndex` subclass must/should/may implement the following properties/met
- a `data` property to access index's data and map it to coordinate data (see [Section 4](#4-indexvariable))
- a `__getitem__()` implementation to propagate the index through DataArray/Dataset indexing operations
- `equals()`, `union()` and `intersection()` methods for data alignment (see [Section 2.6](#26-using-indexes-for-data-alignment))
- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coodinates))
- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coordinates))
- a method that may return a new index and that will be called when one of the corresponding coordinates is dropped from the Dataset/DataArray (multi-coordinate indexes)
- `encode()`/`decode()` methods that would allow storage-agnostic serialization and fast-path reconstruction of the underlying index object(s) (see [Section 2.8](#28-index-encoding))
- one or more "non-standard" methods or properties that could be leveraged in Xarray 3rd-party extensions like Dataset/DataArray accessors (see [Section 2.7](#27-using-indexes-for-other-purposes))
Expand Down
2 changes: 1 addition & 1 deletion design_notes/grouper_objects.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ where `|` represents chunk boundaries. A simple rechunking to
```
000|111122|3333
```
would make this resampling reduction an embarassingly parallel blockwise problem.
would make this resampling reduction an embarrassingly parallel blockwise problem.

Similarly consider monthly-mean climatologies for which the month numbers might be
```
Expand Down
2 changes: 1 addition & 1 deletion design_notes/named_array_design_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ Questions:
Variable.coarsen_reshape
Variable.rolling_window

Variable.set_dims # split this into broadcas_to and expand_dims
Variable.set_dims # split this into broadcast_to and expand_dims


# Reordering/Reshaping
Expand Down
Loading

0 comments on commit 9baaf50

Please sign in to comment.