diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py index 47eb44ee54f..632b38e329f 100644 --- a/asv_bench/benchmarks/__init__.py +++ b/asv_bench/benchmarks/__init__.py @@ -18,15 +18,15 @@ def decorator(func): def requires_dask(): try: import dask # noqa: F401 - except ImportError: - raise NotImplementedError() + except ImportError as err: + raise NotImplementedError() from err def requires_sparse(): try: import sparse # noqa: F401 - except ImportError: - raise NotImplementedError() + except ImportError as err: + raise NotImplementedError() from err def randn(shape, frac_nan=None, chunks=None, seed=0): diff --git a/asv_bench/benchmarks/accessors.py b/asv_bench/benchmarks/accessors.py index f9eb95851cc..259c06160ac 100644 --- a/asv_bench/benchmarks/accessors.py +++ b/asv_bench/benchmarks/accessors.py @@ -16,10 +16,10 @@ def setup(self, calendar): self.da = xr.DataArray(data, dims="time", coords={"time": time}) def time_dayofyear(self, calendar): - self.da.time.dt.dayofyear + _ = self.da.time.dt.dayofyear def time_year(self, calendar): - self.da.time.dt.year + _ = self.da.time.dt.year def time_floor(self, calendar): - self.da.time.dt.floor("D") + _ = self.da.time.dt.floor("D") diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py index 6031619e4ab..8ab367b76e0 100644 --- a/asv_bench/benchmarks/dataset_io.py +++ b/asv_bench/benchmarks/dataset_io.py @@ -606,8 +606,8 @@ def setup(self): try: import distributed - except ImportError: - raise NotImplementedError() + except ImportError as err: + raise NotImplementedError() from err self.client = distributed.Client() self.write = create_delayed_write() diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 494849729bc..7690ab41652 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -69,8 +69,8 @@ def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]: try: version_tup = tuple(int(x) for x in version.split(".")) - except ValueError: - raise ValueError("non-numerical version: " + row) + except ValueError as err: + raise ValueError("non-numerical version: " + row) from err if len(version_tup) == 2: yield (pkg, *version_tup, None) # type: ignore[misc] diff --git a/doc/conf.py b/doc/conf.py index 2ac88b22371..3bf487e4882 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -259,7 +259,7 @@ Theme by the Executable Book Project

""", twitter_url="https://twitter.com/xarray_dev", icon_links=[], # workaround for pydata/pydata-sphinx-theme#1220 - announcement="Xarray's 2024 User Survey is live now. Please take ~5 minutes to fill it out and help us improve Xarray.", + # announcement="Xarray's 2024 User Survey is live now. Please take ~5 minutes to fill it out and help us improve Xarray.", ) diff --git a/doc/user-guide/hierarchical-data.rst b/doc/user-guide/hierarchical-data.rst index 450daf3f06d..84016348676 100644 --- a/doc/user-guide/hierarchical-data.rst +++ b/doc/user-guide/hierarchical-data.rst @@ -200,7 +200,7 @@ and even the distinguishing feature of the common ancestor of any two species (t .. ipython:: python - [node.name for node in primates.ancestors] + [node.name for node in reversed(primates.parents)] primates.root.name primates.find_common_ancestor(dinosaurs).name diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 264c07f562b..e4b2a06a3e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,10 +47,17 @@ Bug fixes - Make illegal path-like variable names when constructing a DataTree from a Dataset (:issue:`9339`, :pull:`9378`) By `Etienne Schalk `_. +- Work around `upstream pandas issue + `_ to ensure that we can + decode times encoded with small integer dtype values (e.g. ``np.int32``) in + environments with NumPy 2.0 or greater without needing to fall back to cftime + (:pull:`9518`). By `Spencer Clark `_. - Fix bug when encoding times with missing values as floats in the case when the non-missing times could in theory be encoded with integers (:issue:`9488`, :pull:`9497`). By `Spencer Clark `_. +- Fix a few bugs affecting groupby reductions with `flox`. (:issue:`8090`, :issue:`9398`). + By `Deepak Cherian `_. Documentation diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 77b7fcbcd99..3722a657813 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,4 +1,5 @@ import itertools +import warnings import numpy as np import pytest @@ -184,7 +185,10 @@ def drop_dims(self, data): ) ) note(f"> drop_dims: {dims}") - self.dataset = self.dataset.drop_dims(dims) + # TODO: dropping a multi-index dimension raises a DeprecationWarning + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + self.dataset = self.dataset.drop_dims(dims) for dim in dims: if dim in self.indexed_dims: diff --git a/pyproject.toml b/pyproject.toml index 66db5e41dbc..e031625c973 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -249,13 +249,14 @@ docstring-code-format = true extend-safe-fixes = [ "TID252", # absolute imports ] -extend-ignore = [ +ignore = [ "E402", "E501", "E731", "UP007", ] extend-select = [ + "B", # flake8-bugbear "F", # Pyflakes "E", # Pycodestyle "W", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 192102c5ba3..e9e3e9beacd 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -99,11 +99,11 @@ def _get_default_engine_remote_uri() -> Literal["netcdf4", "pydap"]: import pydap # noqa: F401 engine = "pydap" - except ImportError: + except ImportError as err: raise ValueError( "netCDF4 or pydap is required for accessing " "remote datasets via OPeNDAP" - ) + ) from err return engine @@ -112,8 +112,8 @@ def _get_default_engine_gz() -> Literal["scipy"]: import scipy # noqa: F401 engine: Final = "scipy" - except ImportError: # pragma: no cover - raise ValueError("scipy is required for accessing .gz files") + except ImportError as err: # pragma: no cover + raise ValueError("scipy is required for accessing .gz files") from err return engine @@ -128,11 +128,11 @@ def _get_default_engine_netcdf() -> Literal["netcdf4", "scipy"]: import scipy.io.netcdf # noqa: F401 engine = "scipy" - except ImportError: + except ImportError as err: raise ValueError( "cannot read or write netCDF files without " "netCDF4-python or scipy installed" - ) + ) from err return engine @@ -1374,8 +1374,8 @@ def to_netcdf( try: store_open = WRITEABLE_STORES[engine] - except KeyError: - raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}") + except KeyError as err: + raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}") from err if format is not None: format = format.upper() # type: ignore[assignment] diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 363b45cf69e..482dc4a8ed5 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -111,7 +111,7 @@ def _getitem(self, key): with self.datastore.lock: original_array = self.get_array(needs_lock=False) array = getitem(original_array, key) - except IndexError: + except IndexError as err: # Catch IndexError in netCDF4 and return a more informative # error message. This is most often called when an unsorted # indexer is used before the data is loaded from disk. @@ -120,7 +120,7 @@ def _getitem(self, key): "is not valid on netCDF4.Variable object. Try loading " "your data into memory first by calling .load()." ) - raise IndexError(msg) + raise IndexError(msg) from err return array @@ -192,7 +192,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): ds = create_group(ds, key) else: # wrap error to provide slightly more helpful message - raise OSError(f"group not found: {key}", e) + raise OSError(f"group not found: {key}", e) from e return ds diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 217228f591b..dc12982d103 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -40,6 +40,7 @@ def remove_duplicates(entrypoints: EntryPoints) -> list[EntryPoint]: f"\n {all_module_names}.\n " f"The entrypoint {selected_module_name} will be used.", RuntimeWarning, + stacklevel=2, ) return unique_entrypoints @@ -72,7 +73,9 @@ def backends_dict_from_pkg( backend = entrypoint.load() backend_entrypoints[name] = backend except Exception as ex: - warnings.warn(f"Engine {name!r} loading failed:\n{ex}", RuntimeWarning) + warnings.warn( + f"Engine {name!r} loading failed:\n{ex}", RuntimeWarning, stacklevel=2 + ) return backend_entrypoints @@ -146,7 +149,9 @@ def guess_engine( except PermissionError: raise except Exception: - warnings.warn(f"{engine!r} fails while guessing", RuntimeWarning) + warnings.warn( + f"{engine!r} fails while guessing", RuntimeWarning, stacklevel=2 + ) compatible_engines = [] for engine, (_, backend_cls) in BACKEND_ENTRYPOINTS.items(): @@ -155,7 +160,9 @@ def guess_engine( if backend.guess_can_open(store_spec): compatible_engines.append(engine) except Exception: - warnings.warn(f"{engine!r} fails while guessing", RuntimeWarning) + warnings.warn( + f"{engine!r} fails while guessing", RuntimeWarning, stacklevel=2 + ) installed_engines = [k for k in engines if k != "store"] if not compatible_engines: diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 83031e1ef8b..e77443061fe 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -114,7 +114,9 @@ def _open_scipy_netcdf(filename, mode, mmap, version): # TODO: gzipped loading only works with NetCDF3 files. errmsg = e.args[0] if "is not a valid NetCDF 3 file" in errmsg: - raise ValueError("gzipped file loading only supports NetCDF 3 files.") + raise ValueError( + "gzipped file loading only supports NetCDF 3 files." + ) from e else: raise @@ -134,7 +136,7 @@ def _open_scipy_netcdf(filename, mode, mmap, version): $ pip install netcdf4 """ errmsg += msg - raise TypeError(errmsg) + raise TypeError(errmsg) from e else: raise diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 473a611891a..2bbdac8d7f1 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -575,7 +575,7 @@ def get_attrs(self): def get_dimensions(self): try_nczarr = self._mode == "r" dimensions = {} - for k, v in self.zarr_group.arrays(): + for _k, v in self.zarr_group.arrays(): dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr) for d, s in zip(dim_names, v.shape, strict=True): if d in dimensions and dimensions[d] != s: @@ -1370,8 +1370,10 @@ def _get_open_params( RuntimeWarning, stacklevel=stacklevel, ) - except zarr.errors.GroupNotFoundError: - raise FileNotFoundError(f"No such file or directory: '{store}'") + except zarr.errors.GroupNotFoundError as err: + raise FileNotFoundError( + f"No such file or directory: '{store}'" + ) from err elif consolidated: # TODO: an option to pass the metadata_key keyword zarr_group = zarr.open_consolidated(store, **open_kwargs) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index d3a0fbb3dba..e85fa2736b2 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -439,8 +439,8 @@ def _get_string_slice(self, key): parsed, resolution = _parse_iso8601_with_reso(self.date_type, key) try: loc = self._partial_date_slice(resolution, parsed) - except KeyError: - raise KeyError(key) + except KeyError as err: + raise KeyError(key) from err return loc def _get_nearest_indexer(self, target, limit, tolerance): @@ -593,21 +593,21 @@ def __sub__(self, other): if _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) - except OUT_OF_BOUNDS_TIMEDELTA_ERRORS: + except OUT_OF_BOUNDS_TIMEDELTA_ERRORS as err: raise ValueError( "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." - ) + ) from err return NotImplemented def __rsub__(self, other): try: return pd.TimedeltaIndex(other - np.array(self)) - except OUT_OF_BOUNDS_TIMEDELTA_ERRORS: + except OUT_OF_BOUNDS_TIMEDELTA_ERRORS as err: raise ValueError( "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." - ) + ) from err def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 5655bd20afc..9306bde47a3 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -169,7 +169,7 @@ def _ensure_padded_year(ref_date: str) -> str: "To remove this message, remove the ambiguity by padding your reference " "date strings with zeros." ) - warnings.warn(warning_msg, SerializationWarning) + warnings.warn(warning_msg, SerializationWarning, stacklevel=2) return ref_date_padded @@ -216,7 +216,7 @@ def _decode_cf_datetime_dtype( try: result = decode_cf_datetime(example_value, units, calendar, use_cftime) - except Exception: + except Exception as err: calendar_msg = ( "the default calendar" if calendar is None else f"calendar {calendar!r}" ) @@ -225,7 +225,7 @@ def _decode_cf_datetime_dtype( "opening your dataset with decode_times=False or installing cftime " "if it is not installed." ) - raise ValueError(msg) + raise ValueError(msg) from err else: dtype = getattr(result, "dtype", np.dtype("object")) @@ -254,16 +254,25 @@ def _decode_datetime_with_pandas( "pandas." ) + # Work around pandas.to_timedelta issue with dtypes smaller than int64 and + # NumPy 2.0 by casting all int and uint data to int64 and uint64, + # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for + # more details. + if flat_num_dates.dtype.kind == "i": + flat_num_dates = flat_num_dates.astype(np.int64) + elif flat_num_dates.dtype.kind == "u": + flat_num_dates = flat_num_dates.astype(np.uint64) + time_units, ref_date_str = _unpack_netcdf_time_units(units) time_units = _netcdf_to_numpy_timeunit(time_units) try: # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. ref_date = nanosecond_precision_timestamp(ref_date_str) - except ValueError: + except ValueError as err: # ValueError is raised by pd.Timestamp for non-ISO timestamp # strings, in which case we fall back to using cftime - raise OutOfBoundsDatetime + raise OutOfBoundsDatetime from err with warnings.catch_warnings(): warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning) @@ -488,7 +497,7 @@ def cftime_to_nptime(times, raise_on_invalid: bool = True) -> np.ndarray: raise ValueError( f"Cannot convert date {t} to a date in the " f"standard calendar. Reason: {e}." - ) + ) from e else: dt = "NaT" new[i] = np.datetime64(dt) @@ -521,7 +530,7 @@ def convert_times(times, date_type, raise_on_invalid: bool = True) -> np.ndarray raise ValueError( f"Cannot convert date {t} to a date in the " f"{date_type(2000, 1, 1).calendar} calendar. Reason: {e}." - ) + ) from e else: dt = np.nan diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index e73893d0f35..72b9710372f 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -454,6 +454,7 @@ def weekofyear(self) -> DataArray: "dt.weekofyear and dt.week have been deprecated. Please use " "dt.isocalendar().week instead.", FutureWarning, + stacklevel=2, ) weekofyear = self.isocalendar().week diff --git a/xarray/core/combine.py b/xarray/core/combine.py index c7dff9d249d..50cfd87076f 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -308,7 +308,7 @@ def _combine_1d( "xarray.combine_by_coords, or do it manually " "with xarray.concat, xarray.merge and " "xarray.align" - ) + ) from err else: raise else: diff --git a/xarray/core/common.py b/xarray/core/common.py index 6a93a517aac..fff227bf4d7 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -241,8 +241,10 @@ def _get_axis_num(self: Any, dim: Hashable) -> int: _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) - except ValueError: - raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") + except ValueError as err: + raise ValueError( + f"{dim!r} not found in array dimensions {self.dims!r}" + ) from err @property def sizes(self: Any) -> Mapping[Hashable, int]: @@ -878,7 +880,8 @@ def rolling_exp( warnings.warn( "Passing ``keep_attrs`` to ``rolling_exp`` has no effect. Pass" " ``keep_attrs`` directly to the applied function, e.g." - " ``rolling_exp(...).mean(keep_attrs=False)``." + " ``rolling_exp(...).mean(keep_attrs=False)``.", + stacklevel=2, ) window = either_dict_or_kwargs(window, window_kwargs, "rolling_exp") @@ -1511,7 +1514,7 @@ def full_like( fill_value: Any, dtype: DTypeMaybeMapping | None = None, *, - chunks: T_Chunks = {}, + chunks: T_Chunks = {}, # noqa: B006 chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, ) -> Dataset | DataArray: ... diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 1133d8cc373..e137cff257f 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -252,8 +252,8 @@ def concat( try: first_obj, objs = utils.peek_at(objs) - except StopIteration: - raise ValueError("must supply at least one object to concatenate") + except StopIteration as err: + raise ValueError("must supply at least one object to concatenate") from err if compat not in _VALID_COMPAT: raise ValueError( diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index bb18bac0a1f..a6dec863aec 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -752,7 +752,7 @@ def _update_coords( # check for inconsistent state *before* modifying anything in-place dims = calculate_dimensions(variables) new_coord_names = set(coords) - for dim, size in dims.items(): + for dim, _size in dims.items(): if dim in variables: new_coord_names.add(dim) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 086ed2d9bd3..e3eb9200aed 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -177,7 +177,7 @@ def _infer_coords_and_dims( else: for n, (dim, coord) in enumerate(zip(dims, coords, strict=True)): coord = as_variable( - coord, name=dims[n], auto_convert=False + coord, name=dim, auto_convert=False ).to_index_variable() dims[n] = coord.name dims_tuple = tuple(dims) @@ -963,7 +963,8 @@ def encoding(self, value: Mapping[Any, Any]) -> None: def reset_encoding(self) -> Self: warnings.warn( - "reset_encoding is deprecated since 2023.11, use `drop_encoding` instead" + "reset_encoding is deprecated since 2023.11, use `drop_encoding` instead", + stacklevel=2, ) return self.drop_encoding() @@ -1360,7 +1361,7 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: @_deprecate_positional_args("v2023.10.0") def chunk( self, - chunks: T_ChunksFreq = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) + chunks: T_ChunksFreq = {}, # noqa: B006 # {} even though it's technically unsafe, is being used intentionally here (#4667) *, name_prefix: str = "xarray-", token: str | None = None, @@ -1427,6 +1428,7 @@ def chunk( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", category=FutureWarning, + stacklevel=2, ) chunk_mapping = {} @@ -3858,11 +3860,11 @@ def to_pandas(self) -> Self | pd.Series | pd.DataFrame: constructors = {0: lambda x: x, 1: pd.Series, 2: pd.DataFrame} try: constructor = constructors[self.ndim] - except KeyError: + except KeyError as err: raise ValueError( f"Cannot convert arrays with {self.ndim} dimensions into " "pandas objects. Requires 2 or fewer dimensions." - ) + ) from err indexes = [self.get_index(dim) for dim in self.dims] return constructor(self.values, *indexes) # type: ignore[operator] @@ -4466,11 +4468,11 @@ def from_dict(cls, d: Mapping[str, Any]) -> Self: raise ValueError( "cannot convert dict when coords are missing the key " f"'{str(e.args[0])}'" - ) + ) from e try: data = d["data"] - except KeyError: - raise ValueError("cannot convert dict without the key 'data''") + except KeyError as err: + raise ValueError("cannot convert dict without the key 'data''") from err else: obj = cls(data, coords, d.get("dims"), d.get("name"), d.get("attrs")) @@ -6784,8 +6786,8 @@ def groupby( >>> da.groupby("letters").sum() Size: 48B - array([[ 9., 11., 13.], - [ 9., 11., 13.]]) + array([[ 9, 11, 13], + [ 9, 11, 13]]) Coordinates: * letters (letters) object 16B 'a' 'b' Dimensions without coordinates: y diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6ef6e4c6feb..59c772f098c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -282,7 +282,8 @@ def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint): warnings.warn( "The specified chunks separate the stored chunks along " f'dimension "{dim}" starting at index {min(breaks)}. This could ' - "degrade performance. Instead, consider rechunking after loading." + "degrade performance. Instead, consider rechunking after loading.", + stacklevel=2, ) return dict(zip(dims, chunk_shape, strict=True)) @@ -358,12 +359,12 @@ def _get_func_args(func, param_names): """ try: func_args = inspect.signature(func).parameters - except ValueError: + except ValueError as err: func_args = {} if not param_names: raise ValueError( "Unable to inspect `func` signature, and `param_names` was not provided." - ) + ) from err if param_names: params = param_names else: @@ -779,7 +780,8 @@ def encoding(self, value: Mapping[Any, Any]) -> None: def reset_encoding(self) -> Self: warnings.warn( - "reset_encoding is deprecated since 2023.11, use `drop_encoding` instead" + "reset_encoding is deprecated since 2023.11, use `drop_encoding` instead", + stacklevel=2, ) return self.drop_encoding() @@ -2657,7 +2659,7 @@ def chunksizes(self) -> Mapping[Hashable, tuple[int, ...]]: def chunk( self, - chunks: T_ChunksFreq = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) + chunks: T_ChunksFreq = {}, # noqa: B006 # {} even though it's technically unsafe, is being used intentionally here (#4667) name_prefix: str = "xarray-", token: str | None = None, lock: bool = False, @@ -2725,6 +2727,7 @@ def chunk( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", category=DeprecationWarning, + stacklevel=2, ) chunks = {} chunks_mapping: Mapping[Any, Any] @@ -4803,6 +4806,7 @@ def expand_dims( f"No index created for dimension {k} because variable {k} is not a coordinate. " f"To create an index for {k}, please first call `.set_coords('{k}')` on this object.", UserWarning, + stacklevel=2, ) # create 1D variable without creating a new index @@ -5541,7 +5545,7 @@ def _unstack_once( new_indexes, clean_index = index.unstack() indexes.update(new_indexes) - for name, idx in new_indexes.items(): + for _name, idx in new_indexes.items(): variables.update(idx.create_variables(index_vars)) for name, var in self.variables.items(): @@ -5582,7 +5586,7 @@ def _unstack_full_reindex( indexes.update(new_indexes) new_index_variables = {} - for name, idx in new_indexes.items(): + for _name, idx in new_indexes.items(): new_index_variables.update(idx.create_variables(index_vars)) new_dim_sizes = {k: v.size for k, v in new_index_variables.items()} @@ -6209,8 +6213,10 @@ def drop_sel( labels_for_dim = np.asarray(labels_for_dim) try: index = self.get_index(dim) - except KeyError: - raise ValueError(f"dimension {dim!r} does not have coordinate labels") + except KeyError as err: + raise ValueError( + f"dimension {dim!r} does not have coordinate labels" + ) from err new_index = index.drop(labels_for_dim, errors=errors) ds = ds.loc[{dim: new_index}] return ds @@ -7743,7 +7749,9 @@ def from_dict(cls, d: Mapping[Any, Any]) -> Self: for k, v in variables } except KeyError as e: - raise ValueError(f"cannot convert dict without the key '{str(e.args[0])}'") + raise ValueError( + f"cannot convert dict without the key '{str(e.args[0])}'" + ) from e obj = cls(variable_dict) # what if coords aren't dims? @@ -8333,6 +8341,7 @@ def quantile( warnings.warn( "The `interpolation` argument to quantile was renamed to `method`.", FutureWarning, + stacklevel=2, ) if method != "linear": @@ -10389,7 +10398,7 @@ def groupby( * letters (letters) object 16B 'a' 'b' Dimensions without coordinates: y Data variables: - foo (letters, y) float64 48B 9.0 11.0 13.0 9.0 11.0 13.0 + foo (letters, y) int64 48B 9 11 13 9 11 13 Grouping by multiple variables diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index ec4786514da..4eaf1083709 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -894,6 +894,8 @@ def __setitem__( # TODO should possibly deal with hashables in general? # path-like: a name of a node/variable, or path to a node/variable path = NodePath(key) + if isinstance(value, Dataset): + value = DataTree(dataset=value) return self._set_item(path, value, new_nodes_along_path=True) else: raise ValueError("Invalid format for key") diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py index 38a7f0dc567..7f55fc93732 100644 --- a/xarray/core/datatree_render.py +++ b/xarray/core/datatree_render.py @@ -76,7 +76,7 @@ class RenderDataTree: def __init__( self, node: DataTree, - style=ContStyle(), + style=None, childiter: type = list, maxlevel: int | None = None, ): @@ -159,6 +159,8 @@ def __init__( ├── sub0 └── sub1 """ + if style is None: + style = ContStyle() if not isinstance(style, AbstractStyle): style = style() self.node = node diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 204579757e1..95aba0441e2 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -451,10 +451,10 @@ def f(values, axis=None, skipna=None, **kwargs): try: # dask/dask#3133 dask sometimes needs dtype argument # if func does not accept dtype, then raises TypeError return func(values, axis=axis, dtype=values.dtype, **kwargs) - except (AttributeError, TypeError): + except (AttributeError, TypeError) as err: raise NotImplementedError( f"{name} is not yet implemented on dask arrays" - ) + ) from err f.__name__ = name return f @@ -592,10 +592,10 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): elif isinstance(value, str): try: a = pd.to_timedelta(value) - except ValueError: + except ValueError as err: raise ValueError( f"Could not convert {value!r} to timedelta64 using pandas.to_timedelta" - ) + ) from err return py_timedelta_to_float(a, datetime_unit) else: raise TypeError( @@ -755,7 +755,8 @@ def _push(array, n: int | None = None, axis: int = -1): if pycompat.mod_version("numbagg") < Version("0.6.2"): warnings.warn( - f"numbagg >= 0.6.2 is required for bfill & ffill; {pycompat.mod_version('numbagg')} is installed. We'll attempt with bottleneck instead." + f"numbagg >= 0.6.2 is required for bfill & ffill; {pycompat.mod_version('numbagg')} is installed. We'll attempt with bottleneck instead.", + stacklevel=2, ) else: return numbagg.ffill(array, limit=n, axis=axis) diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 4820d8e49af..ec45571d1c3 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -37,11 +37,11 @@ def __get__(self, obj, cls): try: accessor_obj = self._accessor(obj) - except AttributeError: + except AttributeError as err: # __getattr__ on data object will swallow any AttributeErrors # raised when initializing the accessor, so we need to raise as # something else (GH933): - raise RuntimeError(f"error initializing {self._name!r} accessor.") + raise RuntimeError(f"error initializing {self._name!r} accessor.") from err cache[self._name] = accessor_obj return accessor_obj diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7ed6c1d9b41..693d05958e7 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -392,8 +392,8 @@ def _resolve_group( if isinstance(group, DataArray): try: align(obj, group, join="exact", copy=False) - except ValueError: - raise ValueError(error_msg) + except ValueError as err: + raise ValueError(error_msg) from err newgroup = group.copy(deep=False) newgroup.name = group.name or "group" @@ -796,14 +796,12 @@ def _maybe_restore_empty_groups(self, combined): """Our index contained empty groups (e.g., from a resampling or binning). If we reduced on that dimension, we want to restore the full index. """ - from xarray.groupers import BinGrouper, TimeResampler - + has_missing_groups = ( + self.encoded.unique_coord.size != self.encoded.full_index.size + ) indexers = {} for grouper in self.groupers: - if ( - isinstance(grouper.grouper, BinGrouper | TimeResampler) - and grouper.name in combined.dims - ): + if has_missing_groups and grouper.name in combined._indexes: indexers[grouper.name] = grouper.full_index if indexers: combined = combined.reindex(**indexers) @@ -858,10 +856,6 @@ def _flox_reduce( else obj._coords ) - any_isbin = any( - isinstance(grouper.grouper, BinGrouper) for grouper in self.groupers - ) - if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) @@ -935,14 +929,14 @@ def _flox_reduce( ): raise ValueError(f"cannot reduce over dimensions {dim}.") - if kwargs["func"] not in ["all", "any", "count"]: - kwargs.setdefault("fill_value", np.nan) - if any_isbin and kwargs["func"] == "count": - # This is an annoying hack. Xarray returns np.nan - # when there are no observations in a bin, instead of 0. - # We can fake that here by forcing min_count=1. - # note min_count makes no sense in the xarray world - # as a kwarg for count, so this should be OK + has_missing_groups = ( + self.encoded.unique_coord.size != self.encoded.full_index.size + ) + if has_missing_groups or kwargs.get("min_count", 0) > 0: + # Xarray *always* returns np.nan when there are no observations in a group, + # We can fake that here by forcing min_count=1 when it is not set. + # This handles boolean reductions, and count + # See GH8090, GH9398 kwargs.setdefault("fill_value", np.nan) kwargs.setdefault("min_count", 1) @@ -1279,7 +1273,7 @@ def _iter_grouped_shortcut(self): metadata """ var = self._obj.variable - for idx, indices in enumerate(self.encoded.group_indices): + for _idx, indices in enumerate(self.encoded.group_indices): if indices: yield var[{self._group_dim: indices}] diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index e0b58c0bead..4a735959298 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1206,12 +1206,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: label_array = normalize_label(v, dtype=self.level_coords_dtype[k]) try: label_values[k] = as_scalar(label_array) - except ValueError: + except ValueError as err: # label should be an item not an array-like raise ValueError( "Vectorized selection is not " f"available along coordinate {k!r} (multi-index level)" - ) + ) from err has_slice = any([isinstance(v, slice) for v in label_values.values()]) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 06b4b9a475f..67912908a2b 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1527,7 +1527,7 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: raise ValueError( "Assignment destination is a view. " "Do you want to .copy() array first?" - ) + ) from exc else: raise exc diff --git a/xarray/core/merge.py b/xarray/core/merge.py index bd927a188df..43d3ac9b404 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -710,7 +710,7 @@ def merge_core( coord_names.intersection_update(variables) if explicit_coords is not None: coord_names.update(explicit_coords) - for dim, size in dims.items(): + for dim, _size in dims.items(): if dim in variables: coord_names.add(dim) ambiguous_coords = coord_names.intersection(noncoord_names) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 55e754010da..2df53b172f0 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -298,13 +298,13 @@ def get_clean_interp_index( # raise if index cannot be cast to a float (e.g. MultiIndex) try: index = index.values.astype(np.float64) - except (TypeError, ValueError): + except (TypeError, ValueError) as err: # pandas raises a TypeError # xarray/numpy raise a ValueError raise TypeError( f"Index {index.name!r} must be castable to float64 to support " f"interpolation or curve fitting, got {type(index).__name__}." - ) + ) from err return index @@ -619,12 +619,12 @@ def interp(var, indexes_coords, method: InterpOptions, **kwargs): result = var # decompose the interpolation into a succession of independent interpolation - for indexes_coords in decompose_interp(indexes_coords): + for indep_indexes_coords in decompose_interp(indexes_coords): var = result # target dimensions - dims = list(indexes_coords) - x, new_x = zip(*[indexes_coords[d] for d in dims], strict=True) + dims = list(indep_indexes_coords) + x, new_x = zip(*[indep_indexes_coords[d] for d in dims], strict=True) destination = broadcast_variables(*new_x) # transpose to make the interpolated axis to the last position @@ -641,7 +641,7 @@ def interp(var, indexes_coords, method: InterpOptions, **kwargs): out_dims: OrderedSet = OrderedSet() for d in var.dims: if d in dims: - out_dims.update(indexes_coords[d][1].dims) + out_dims.update(indep_indexes_coords[d][1].dims) else: out_dims.add(d) if len(out_dims) > 1: diff --git a/xarray/core/options.py b/xarray/core/options.py index a680a6c2157..089972d83a7 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -131,6 +131,7 @@ def _warn_on_setting_enable_cftimeindex(enable_cftimeindex): "The enable_cftimeindex option is now a no-op " "and will be removed in a future version of xarray.", FutureWarning, + stacklevel=2, ) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 17accf74383..604eb274aa9 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -314,6 +314,7 @@ def iter_lineage(self: Tree) -> tuple[Tree, ...]: "`iter_lineage` has been deprecated, and in the future will raise an error." "Please use `parents` from now on.", DeprecationWarning, + stacklevel=2, ) return tuple((self, *self.parents)) @@ -326,6 +327,7 @@ def lineage(self: Tree) -> tuple[Tree, ...]: "`lineage` has been deprecated, and in the future will raise an error." "Please use `parents` from now on.", DeprecationWarning, + stacklevel=2, ) return self.iter_lineage() @@ -344,6 +346,7 @@ def ancestors(self: Tree) -> tuple[Tree, ...]: "`ancestors` has been deprecated, and in the future will raise an error." "Please use `parents`. Example: `tuple(reversed(node.parents))`", DeprecationWarning, + stacklevel=2, ) return tuple((*reversed(self.parents), self)) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 68d17fc3614..3c1dee7a36d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -570,8 +570,8 @@ def size(self: Any) -> int: def __len__(self: Any) -> int: try: return self.shape[0] - except IndexError: - raise TypeError("len() of unsized object") + except IndexError as err: + raise TypeError("len() of unsized object") from err class NDArrayMixin(NdimSizeLenMixin): @@ -807,7 +807,8 @@ def drop_dims_from_indexers( invalid = indexers.keys() - set(dims) if invalid: warnings.warn( - f"Dimensions {invalid} do not exist. Expected one or more of {dims}" + f"Dimensions {invalid} do not exist. Expected one or more of {dims}", + stacklevel=2, ) for key in invalid: indexers.pop(key) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index eb2fe6861dd..3da8b024478 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -132,8 +132,10 @@ def as_variable( elif isinstance(obj, tuple): try: dims_, data_, *attrs = obj - except ValueError: - raise ValueError(f"Tuple {obj} is not in the form (dims, data[, attrs])") + except ValueError as err: + raise ValueError( + f"Tuple {obj} is not in the form (dims, data[, attrs])" + ) from err if isinstance(data_, DataArray): raise TypeError( @@ -146,7 +148,7 @@ def as_variable( raise error.__class__( f"Variable {name!r}: Could not convert tuple of form " f"(dims, data[, attrs, encoding]): {obj} to Variable." - ) + ) from error elif utils.is_scalar(obj): obj = Variable([], obj) elif isinstance(obj, pd.Index | IndexVariable) and obj.name is not None: @@ -768,8 +770,8 @@ def _broadcast_indexes_vectorized(self, key): try: variables = _broadcast_compat_variables(*variables) - except ValueError: - raise IndexError(f"Dimensions of indexers mismatch: {key}") + except ValueError as err: + raise IndexError(f"Dimensions of indexers mismatch: {key}") from err out_key = [variable.data for variable in variables] out_dims = tuple(out_dims_set) @@ -895,12 +897,13 @@ def encoding(self) -> dict[Any, Any]: def encoding(self, value): try: self._encoding = dict(value) - except ValueError: - raise ValueError("encoding must be castable to a dictionary") + except ValueError as err: + raise ValueError("encoding must be castable to a dictionary") from err def reset_encoding(self) -> Self: warnings.warn( - "reset_encoding is deprecated since 2023.11, use `drop_encoding` instead" + "reset_encoding is deprecated since 2023.11, use `drop_encoding` instead", + stacklevel=2, ) return self.drop_encoding() @@ -1894,6 +1897,7 @@ def quantile( warnings.warn( "The `interpolation` argument to quantile was renamed to `method`.", FutureWarning, + stacklevel=2, ) if method != "linear": @@ -2527,7 +2531,7 @@ def _to_dense(self) -> Variable: def chunk( # type: ignore[override] self, - chunks: T_Chunks = {}, + chunks: T_Chunks = {}, # noqa: B006 # even though it's technically unsafe, it is being used intentionally here (#4667) name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, @@ -2662,7 +2666,7 @@ def values(self, values): def chunk( self, - chunks={}, + chunks={}, # noqa: B006 # even though it's unsafe, it is being used intentionally here (#4667) name=None, lock=False, inline_array=False, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 0d1a50a8d3c..6f5ed671de8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -517,6 +517,7 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. " "To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.", UserWarning, + stacklevel=2, ) return dims @@ -696,8 +697,10 @@ def _get_axis_num(self: Any, dim: Hashable) -> int: _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) # type: ignore[no-any-return] - except ValueError: - raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") + except ValueError as err: + raise ValueError( + f"{dim!r} not found in array dimensions {self.dims!r}" + ) from err @property def chunks(self) -> _Chunks | None: @@ -748,7 +751,7 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, - chunks: T_Chunks = {}, + chunks: T_Chunks = {}, # noqa: B006 # even though it's unsafe, it is being used intentionally here (#4667) chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, @@ -801,6 +804,7 @@ def chunk( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", category=FutureWarning, + stacklevel=2, ) chunks = {} diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 606e72acd0e..96060730345 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -130,7 +130,8 @@ def drop_missing_dims( elif missing_dims == "warn": if invalid := set(supplied_dims) - set(dims): warnings.warn( - f"Dimensions {invalid} do not exist. Expected one or more of {dims}" + f"Dimensions {invalid} do not exist. Expected one or more of {dims}", + stacklevel=2, ) return [val for val in supplied_dims if val in dims or val is ...] diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 4e43ad2826c..1b391b6fff4 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -184,7 +184,9 @@ def __init__( ncol = len(data[col]) nfacet = nrow * ncol if col_wrap is not None: - warnings.warn("Ignoring col_wrap since both col and row were passed") + warnings.warn( + "Ignoring col_wrap since both col and row were passed", stacklevel=2 + ) elif row and not col: single_group = row elif not row and col: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 744fb6ef26d..1d61c5afc74 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -372,7 +372,8 @@ def _infer_xy_labels_3d( "Several dimensions of this array could be colors. Xarray " f"will use the last possible dimension ({rgb!r}) to match " "matplotlib.pyplot.imshow. You can pass names of x, y, " - "and/or rgb dimensions to override this guess." + "and/or rgb dimensions to override this guess.", + stacklevel=2, ) assert rgb is not None @@ -453,8 +454,8 @@ def get_axis( try: import matplotlib as mpl import matplotlib.pyplot as plt - except ImportError: - raise ImportError("matplotlib is required for plot.utils.get_axis") + except ImportError as err: + raise ImportError("matplotlib is required for plot.utils.get_axis") from err if figsize is not None: if ax is not None: @@ -1053,7 +1054,8 @@ def legend_elements( warnings.warn( "Collection without array used. Make sure to " "specify the values to be colormapped via the " - "`c` argument." + "`c` argument.", + stacklevel=2, ) return handles, labels _size = kwargs.pop("size", mpl.rcParams["lines.markersize"]) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index b76733d113f..eeba8540133 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -108,7 +108,7 @@ def names() -> st.SearchStrategy[str]: def dimension_names( *, - name_strategy=names(), + name_strategy=None, min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[list[Hashable]]: @@ -126,6 +126,8 @@ def dimension_names( max_dims Maximum number of dimensions in generated list. """ + if name_strategy is None: + name_strategy = names() return st.lists( elements=name_strategy, @@ -137,7 +139,7 @@ def dimension_names( def dimension_sizes( *, - dim_names: st.SearchStrategy[Hashable] = names(), + dim_names: st.SearchStrategy[Hashable] = names(), # noqa: B008 min_dims: int = 0, max_dims: int = 3, min_side: int = 1, @@ -220,14 +222,17 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: ) +ATTRS = attrs() + + @st.composite def variables( draw: st.DrawFn, *, array_strategy_fn: ArrayStrategyFn | None = None, dims: st.SearchStrategy[Sequence[Hashable] | Mapping[Hashable, int]] | None = None, - dtype: st.SearchStrategy[np.dtype] = supported_dtypes(), - attrs: st.SearchStrategy[Mapping] = attrs(), + dtype: st.SearchStrategy[np.dtype] | None = None, + attrs: st.SearchStrategy[Mapping] = ATTRS, ) -> xr.Variable: """ Generates arbitrary xarray.Variable objects. @@ -310,6 +315,8 @@ def variables( -------- :ref:`testing.hypothesis`_ """ + if dtype is None: + dtype = supported_dtypes() if not isinstance(dims, st.SearchStrategy) and dims is not None: raise InvalidArgument( diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 0e43738ed99..71ae1a7075f 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -148,6 +148,7 @@ def _importorskip( not has_numbagg_or_bottleneck, reason="requires numbagg or bottleneck" ) has_numpy_2, requires_numpy_2 = _importorskip("numpy", "2.0.0") +_, requires_flox_0_9_12 = _importorskip("flox", "0.9.12") has_array_api_strict, requires_array_api_strict = _importorskip("array_api_strict") diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 587f43a5d7f..64309966103 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -158,7 +158,7 @@ def test_not_datetime_type(self) -> None: int_data = np.arange(len(self.data.time)).astype("int8") nontime_data = nontime_data.assign_coords(time=int_data) with pytest.raises(AttributeError, match=r"dt"): - nontime_data.time.dt + _ = nontime_data.time.dt @pytest.mark.filterwarnings("ignore:dt.weekofyear and dt.week have been deprecated") @requires_dask @@ -326,7 +326,7 @@ def test_not_datetime_type(self) -> None: int_data = np.arange(len(self.data.time)).astype("int8") nontime_data = nontime_data.assign_coords(time=int_data) with pytest.raises(AttributeError, match=r"dt"): - nontime_data.time.dt + _ = nontime_data.time.dt @pytest.mark.parametrize( "field", ["days", "seconds", "microseconds", "nanoseconds"] diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py index 2f5a8739b28..3e1ce0ea266 100644 --- a/xarray/tests/test_assertions.py +++ b/xarray/tests/test_assertions.py @@ -170,11 +170,11 @@ def test_ensure_warnings_not_elevated(func) -> None: class WarningVariable(xr.Variable): @property # type: ignore[misc] def dims(self): - warnings.warn("warning in test") + warnings.warn("warning in test", stacklevel=2) return super().dims def __array__(self, dtype=None, copy=None): - warnings.warn("warning in test") + warnings.warn("warning in test", stacklevel=2) return super().__array__() a = WarningVariable("x", [1]) @@ -190,7 +190,7 @@ def __array__(self, dtype=None, copy=None): # ensure warnings still raise outside of assert_* with pytest.raises(UserWarning): - warnings.warn("test") + warnings.warn("test", stacklevel=2) # ensure warnings stay ignored in assert_* with warnings.catch_warnings(record=True) as w: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 13258fcf6ea..cbc0b9e019d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -488,13 +488,13 @@ def test_dataset_caching(self) -> None: with self.roundtrip(expected) as actual: assert isinstance(actual.foo.variable._data, indexing.MemoryCachedArray) assert not actual.foo.variable._in_memory - actual.foo.values # cache + _ = actual.foo.values # cache assert actual.foo.variable._in_memory with self.roundtrip(expected, open_kwargs={"cache": False}) as actual: assert isinstance(actual.foo.variable._data, indexing.CopyOnWriteArray) assert not actual.foo.variable._in_memory - actual.foo.values # no caching + _ = actual.foo.values # no caching assert not actual.foo.variable._in_memory @pytest.mark.filterwarnings("ignore:deallocating CachingFileManager") @@ -818,7 +818,7 @@ def find_and_validate_array(obj): else: raise TypeError(f"{type(obj.array)} is wrapped by {type(obj)}") - for k, v in ds.variables.items(): + for _k, v in ds.variables.items(): find_and_validate_array(v._data) def test_array_type_after_indexing(self) -> None: @@ -2000,7 +2000,7 @@ def test_unsorted_index_raises(self) -> None: # Older versions of NetCDF4 raise an exception here, and if so we # want to ensure we improve (that is, replace) the error message try: - ds2.randovar.values + _ = ds2.randovar.values except IndexError as err: assert "first by calling .load" in str(err) @@ -3160,7 +3160,7 @@ def summarize(self, patches): for call in patch_.mock_calls: if "zarr.json" not in call.args: count += 1 - summary[name.strip("__")] = count + summary[name.strip("_")] = count return summary def check_requests(self, expected, patches): @@ -4450,7 +4450,7 @@ def test_dataset_caching(self) -> None: expected = Dataset({"foo": ("x", [5, 6, 7])}) with self.roundtrip(expected) as actual: assert not actual.foo.variable._in_memory - actual.foo.values # no caching + _ = actual.foo.values # no caching assert not actual.foo.variable._in_memory def test_open_mfdataset(self) -> None: @@ -4576,7 +4576,7 @@ def test_attrs_mfdataset(self) -> None: assert actual.test1 == ds1.test1 # attributes from ds2 are not retained, e.g., with pytest.raises(AttributeError, match=r"no attribute"): - actual.test2 + _ = actual.test2 def test_open_mfdataset_attrs_file(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index 01d5393e289..ab04a7b3cde 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -203,14 +203,15 @@ def test_coarsen_da_keep_attrs(funcname, argument) -> None: func = getattr(da.coarsen(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da - da.coord.attrs == attrs_coords + assert da.coord.attrs == attrs_coords assert result.name == "name" # discard attrs func = getattr(da.coarsen(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} - da.coord.attrs == {} + # XXX: no assert? + _ = da.coord.attrs == {} assert result.name == "name" # test discard attrs using global option @@ -218,7 +219,8 @@ def test_coarsen_da_keep_attrs(funcname, argument) -> None: with set_options(keep_attrs=False): result = func(*argument) assert result.attrs == {} - da.coord.attrs == {} + # XXX: no assert? + _ = da.coord.attrs == {} assert result.name == "name" # keyword takes precedence over global option @@ -226,14 +228,16 @@ def test_coarsen_da_keep_attrs(funcname, argument) -> None: with set_options(keep_attrs=False): result = func(*argument, keep_attrs=True) assert result.attrs == attrs_da - da.coord.attrs == {} + # XXX: no assert? + _ = da.coord.attrs == {} assert result.name == "name" func = getattr(da.coarsen(dim={"coord": 5}), funcname) with set_options(keep_attrs=True): result = func(*argument, keep_attrs=False) assert result.attrs == {} - da.coord.attrs == {} + # XXX: no assert? + _ = da.coord.attrs == {} assert result.name == "name" diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 66caf25cc73..bb0dd1dd25c 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd import pytest -from pandas.errors import OutOfBoundsDatetime +from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta from xarray import ( DataArray, @@ -1136,11 +1136,16 @@ def test_should_cftime_be_used_target_not_npable(): _should_cftime_be_used(src, "noleap", False) -@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64]) -def test_decode_cf_datetime_uint(dtype): +@pytest.mark.parametrize( + "dtype", + [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64], +) +def test_decode_cf_datetime_varied_integer_dtypes(dtype): units = "seconds since 2018-08-22T03:23:03Z" num_dates = dtype(50) - result = decode_cf_datetime(num_dates, units) + # Set use_cftime=False to ensure we cannot mask a failure by falling back + # to cftime. + result = decode_cf_datetime(num_dates, units, use_cftime=False) expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns")) np.testing.assert_equal(result, expected) @@ -1154,6 +1159,14 @@ def test_decode_cf_datetime_uint64_with_cftime(): np.testing.assert_equal(result, expected) +def test_decode_cf_datetime_uint64_with_pandas_overflow_error(): + units = "nanoseconds since 1970-01-01" + calendar = "standard" + num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000) + with pytest.raises(OutOfBoundsTimedelta): + decode_cf_datetime(num_dates, units, calendar, use_cftime=False) + + @requires_cftime def test_decode_cf_datetime_uint64_with_cftime_overflow_error(): units = "microseconds since 1700-01-01" @@ -1438,10 +1451,8 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None "days since 1700-01-01", np.dtype("int32"), ), - "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": ( - "250YS", - "days since 1700-01-01", - np.dtype("int32"), + "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param( + "250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime ), "pandas-encoding-with-default-units-and-dtype": ("250YS", None, None), } diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 41ad75b0fea..b7170a06128 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -29,7 +29,7 @@ def assert_combined_tile_ids_equal(dict1, dict2): assert len(dict1) == len(dict2) - for k, v in dict1.items(): + for k, _v in dict1.items(): assert k in dict2.keys() assert_equal(dict1[k], dict2[k]) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 7f7f14c8f16..f0dcfd462e7 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -396,7 +396,7 @@ def concat_var_names() -> Callable: def get_varnames(var_cnt: int = 10, list_cnt: int = 10) -> list[list[str]]: orig = [f"d{i:02d}" for i in range(var_cnt)] var_names = [] - for i in range(0, list_cnt): + for _i in range(0, list_cnt): l1 = orig.copy() var_names.append(l1) return var_names diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 062f0525593..a46a9d43c4c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -63,7 +63,7 @@ def assertLazyAnd(self, expected, actual, test): elif isinstance(actual, Variable): assert isinstance(actual.data, da.Array) else: - assert False + raise AssertionError() class TestVariable(DaskTestCase): @@ -740,7 +740,7 @@ def test_dataarray_getattr(self): nonindex_coord = build_dask_array("coord") a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) with suppress(AttributeError): - a.NOTEXIST + _ = a.NOTEXIST assert kernel_call_count == 0 def test_dataset_getattr(self): @@ -750,7 +750,7 @@ def test_dataset_getattr(self): nonindex_coord = build_dask_array("coord") ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) with suppress(AttributeError): - ds.NOTEXIST + _ = ds.NOTEXIST assert kernel_call_count == 0 def test_values(self): @@ -1104,7 +1104,7 @@ def test_unify_chunks(map_ds): ds_copy["cxy"] = ds_copy.cxy.chunk({"y": 10}) with pytest.raises(ValueError, match=r"inconsistent chunks"): - ds_copy.chunks + _ = ds_copy.chunks expected_chunks = {"x": (4, 4, 2), "y": (5, 5, 5, 5)} with raise_if_dask_computes(): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 49df5dcde2d..78db39c194e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -155,7 +155,7 @@ def test_properties(self) -> None: for k, v in self.dv.coords.items(): assert_array_equal(v, self.ds.coords[k]) with pytest.raises(AttributeError): - self.dv.dataset + _ = self.dv.dataset assert isinstance(self.ds["x"].to_index(), pd.Index) with pytest.raises(ValueError, match=r"must be 1-dimensional"): self.ds["foo"].to_index() @@ -6590,7 +6590,7 @@ def test_isin(da) -> None: def test_raise_no_warning_for_nan_in_binary_ops() -> None: with assert_no_warnings(): - xr.DataArray([1, 2, np.nan]) > 0 + _ = xr.DataArray([1, 2, np.nan]) > 0 @pytest.mark.filterwarnings("error") @@ -6868,7 +6868,7 @@ def test_fallback_to_iris_AuxCoord(self, coord_values) -> None: def test_no_dict() -> None: d = DataArray() with pytest.raises(AttributeError): - d.__dict__ + _ = d.__dict__ def test_subclass_slots() -> None: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f8a9a94defc..b058a8fbd44 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -761,7 +761,7 @@ def test_warn_ds_dims_deprecation(self) -> None: with assert_no_warnings(): len(ds.dims) ds.dims.__iter__() - "dim1" in ds.dims + _ = "dim1" in ds.dims def test_asarray(self) -> None: ds = Dataset({"x": 0}) @@ -1262,10 +1262,10 @@ def test_dask_is_lazy(self) -> None: with pytest.raises(UnexpectedDataAccess): ds.load() with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + _ = ds["var1"].values # these should not raise UnexpectedDataAccess: - ds.var1.data + _ = ds.var1.data ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) ds.transpose() @@ -3036,12 +3036,12 @@ def test_drop_encoding(self) -> None: vencoding = {"scale_factor": 10} orig.encoding = {"foo": "bar"} - for k, v in orig.variables.items(): + for k, _v in orig.variables.items(): orig[k].encoding = vencoding actual = orig.drop_encoding() assert actual.encoding == {} - for k, v in actual.variables.items(): + for _k, v in actual.variables.items(): assert v.encoding == {} assert_equal(actual, orig) @@ -3085,7 +3085,7 @@ def test_rename(self) -> None: data["var1"] = (var1.dims, InaccessibleArray(var1.values)) renamed = data.rename(newnames) with pytest.raises(UnexpectedDataAccess): - renamed["renamed_var1"].values + _ = renamed["renamed_var1"].values # https://github.com/python/mypy/issues/10008 renamed_kwargs = data.rename(**newnames) # type: ignore[arg-type] @@ -4746,11 +4746,11 @@ def test_squeeze(self) -> None: test_args: list[list] = [[], [["x"]], [["x", "z"]]] for args in test_args: - def get_args(v): + def get_args(args, v): return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset( - {k: v.squeeze(*get_args(v)) for k, v in data.variables.items()} + {k: v.squeeze(*get_args(args, v)) for k, v in data.variables.items()} ) expected = expected.set_coords(data.coords) assert_identical(expected, data.squeeze(*args)) @@ -5208,7 +5208,7 @@ def test_lazy_load(self) -> None: with pytest.raises(UnexpectedDataAccess): ds.load() with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + _ = ds["var1"].values # these should not raise UnexpectedDataAccess: ds.isel(time=10) @@ -5221,10 +5221,10 @@ def test_lazy_load_duck_array(self) -> None: for decode_cf in [True, False]: ds = open_dataset(store, decode_cf=decode_cf) with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + _ = ds["var1"].values # these should not raise UnexpectedDataAccess: - ds.var1.data + _ = ds.var1.data ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) repr(ds) @@ -5987,9 +5987,9 @@ def test_unary_ops(self) -> None: # don't actually patch these methods in with pytest.raises(AttributeError): - ds.item + _ = ds.item with pytest.raises(AttributeError): - ds.searchsorted + _ = ds.searchsorted def test_dataset_array_math(self) -> None: ds = self.make_example_math_dataset() @@ -7104,7 +7104,7 @@ def test_dir_unicode(ds) -> None: def test_raise_no_warning_for_nan_in_binary_ops() -> None: with assert_no_warnings(): - Dataset(data_vars={"x": ("y", [1, 2, np.nan])}) > 0 + _ = Dataset(data_vars={"x": ("y", [1, 2, np.nan])}) > 0 @pytest.mark.filterwarnings("error") @@ -7414,7 +7414,7 @@ def test_trapezoid_datetime(dask, which_datetime) -> None: def test_no_dict() -> None: d = Dataset() with pytest.raises(AttributeError): - d.__dict__ + _ = d.__dict__ def test_subclass_slots() -> None: diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index abcd508ad26..c9dcca32aeb 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -513,14 +513,12 @@ def test_setitem_dataset_on_this_node(self): results["."] = data assert_identical(results.to_dataset(), data) - @pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes") def test_setitem_dataset_as_new_node(self): data = xr.Dataset({"temp": [0, 50]}) folder1 = DataTree(name="folder1") folder1["results"] = data assert_identical(folder1["results"].to_dataset(), data) - @pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes") def test_setitem_dataset_as_new_node_requiring_intermediate_nodes(self): data = xr.Dataset({"temp": [0, 50]}) folder1 = DataTree(name="folder1") diff --git a/xarray/tests/test_extensions.py b/xarray/tests/test_extensions.py index 030749ce146..8a52f79198d 100644 --- a/xarray/tests/test_extensions.py +++ b/xarray/tests/test_extensions.py @@ -92,4 +92,4 @@ def __init__(self, xarray_obj): raise AttributeError("broken") with pytest.raises(RuntimeError, match=r"error initializing"): - xr.Dataset().stupid_accessor + _ = xr.Dataset().stupid_accessor diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index fa6172c5d66..9d9c22cfa96 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -34,6 +34,7 @@ requires_cftime, requires_dask, requires_flox, + requires_flox_0_9_12, requires_scipy, ) @@ -608,6 +609,7 @@ def test_groupby_repr_datetime(obj) -> None: @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") @pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning") +@pytest.mark.filterwarnings("ignore:No index created for dimension id:UserWarning") def test_groupby_drops_nans() -> None: # GH2383 # nan in 2D data variable (requires stacking) @@ -2859,6 +2861,60 @@ def test_multiple_groupers_mixed(use_flox) -> None: # ------ +@requires_flox_0_9_12 +@pytest.mark.parametrize( + "reduction", ["max", "min", "nanmax", "nanmin", "sum", "nansum", "prod", "nanprod"] +) +def test_groupby_preserve_dtype(reduction): + # all groups are present, we should follow numpy exactly + ds = xr.Dataset( + { + "test": ( + ["x", "y"], + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int16"), + ) + }, + coords={"idx": ("x", [1, 2, 1])}, + ) + + kwargs = {} + if "nan" in reduction: + kwargs["skipna"] = True + # TODO: fix dtype with numbagg/bottleneck and use_flox=False + with xr.set_options(use_numbagg=False, use_bottleneck=False): + actual = getattr(ds.groupby("idx"), reduction.removeprefix("nan"))( + **kwargs + ).test.dtype + expected = getattr(np, reduction)(ds.test.data, axis=0).dtype + + assert actual == expected + + +@requires_dask +@requires_flox_0_9_12 +@pytest.mark.parametrize("reduction", ["any", "all", "count"]) +def test_gappy_resample_reductions(reduction): + # GH8090 + dates = (("1988-12-01", "1990-11-30"), ("2000-12-01", "2001-11-30")) + times = [xr.date_range(*d, freq="D") for d in dates] + + da = xr.concat( + [ + xr.DataArray(np.random.rand(len(t)), coords={"time": t}, dims="time") + for t in times + ], + dim="time", + ).chunk(time=100) + + rs = (da > 0.5).resample(time="YS-DEC") + method = getattr(rs, reduction) + with xr.set_options(use_flox=True): + actual = method(dim="time") + with xr.set_options(use_flox=False): + expected = method(dim="time") + assert_identical(expected, actual) + + # Possible property tests # 1. lambda x: x # 2. grouped-reduce on unique coords is identical to array diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 3d47f3e1803..2605e387360 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1334,7 +1334,7 @@ def test_nonnumeric_index(self) -> None: a = DataArray(easy_array((3, 2)), coords=[["a", "b", "c"], ["d", "e"]]) if self.plotfunc.__name__ == "surface": # ax.plot_surface errors with nonnumerics: - with pytest.raises(Exception): + with pytest.raises(TypeError, match="not supported for the input types"): self.plotfunc(a) else: self.plotfunc(a) @@ -2161,7 +2161,7 @@ def test_convenient_facetgrid(self) -> None: g = self.plotfunc(d, x="x", y="y", col="z", col_wrap=2) # type: ignore[arg-type] # https://github.com/python/mypy/issues/15015 assert_array_equal(g.axs.shape, [2, 2]) - for (y, x), ax in np.ndenumerate(g.axs): + for (_y, _x), ax in np.ndenumerate(g.axs): assert ax.has_data() assert "y" == ax.get_ylabel() assert "x" == ax.get_xlabel() @@ -2169,7 +2169,7 @@ def test_convenient_facetgrid(self) -> None: # Inferring labels g = self.plotfunc(d, col="z", col_wrap=2) # type: ignore[arg-type] # https://github.com/python/mypy/issues/15015 assert_array_equal(g.axs.shape, [2, 2]) - for (y, x), ax in np.ndenumerate(g.axs): + for (_y, _x), ax in np.ndenumerate(g.axs): assert ax.has_data() assert "y" == ax.get_ylabel() assert "x" == ax.get_xlabel() @@ -3381,7 +3381,7 @@ def test_facetgrid_axes_raises_deprecation_warning() -> None: with figure_context(): ds = xr.tutorial.scatter_example_dataset() g = ds.plot.scatter(x="A", y="B", col="x") - g.axes + _ = g.axes @requires_matplotlib diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 9fdf46b0d85..397e07a4bea 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -73,7 +73,7 @@ def test_restrict_names(self, data): def check_dict_values(dictionary: dict, allowed_attrs_values_types) -> bool: """Helper function to assert that all values in recursive dict match one of a set of types.""" - for key, value in dictionary.items(): + for _key, value in dictionary.items(): if isinstance(value, allowed_attrs_values_types) or value is None: continue elif isinstance(value, dict): diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index 6b40db87c45..97a67380f3f 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -328,11 +328,13 @@ def test_parents(self): def test_lineage(self): _, leaf_f = create_test_tree() expected = ["f", "e", "b", "a"] - assert [node.name for node in leaf_f.lineage] == expected + with pytest.warns(DeprecationWarning): + assert [node.name for node in leaf_f.lineage] == expected def test_ancestors(self): _, leaf_f = create_test_tree() - ancestors = leaf_f.ancestors + with pytest.warns(DeprecationWarning): + ancestors = leaf_f.ancestors expected = ["a", "b", "e", "f"] for node, expected_name in zip(ancestors, expected, strict=True): assert node.name == expected_name diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 9e2e12fc045..f8a8878b8ee 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2761,7 +2761,7 @@ def __init__(self, array): def test_raise_no_warning_for_nan_in_binary_ops(): with assert_no_warnings(): - Variable("x", [1, 2, np.nan]) > 0 + _ = Variable("x", [1, 2, np.nan]) > 0 class TestBackendIndexing: diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 82bb3940b98..0472584028a 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -60,22 +60,22 @@ def _check_netcdf_engine_installed(name): except ImportError: try: import netCDF4 # noqa - except ImportError: + except ImportError as err: raise ImportError( f"opening tutorial dataset {name} requires either scipy or " "netCDF4 to be installed." - ) + ) from err if version == 4: try: import h5netcdf # noqa except ImportError: try: import netCDF4 # noqa - except ImportError: + except ImportError as err: raise ImportError( f"opening tutorial dataset {name} requires either h5netcdf " "or netCDF4 to be installed." - ) + ) from err # idea borrowed from Seaborn