Skip to content

Commit

Permalink
Merge branch 'main' into dedup
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer committed Sep 17, 2024
2 parents 2ba1da6 + 1c6300c commit e28f4e2
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 26 deletions.
10 changes: 7 additions & 3 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ New Features
`Matt Savoie <https://github.com/flamingbear>`_,
`Stephan Hoyer <https://github.com/shoyer>`_ and
`Tom Nicholas <https://github.com/TomNicholas>`_.

- Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
By `Eni Awowale <https://github.com/eni-awowale>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand All @@ -46,7 +47,10 @@ Bug fixes
- Make illegal path-like variable names when constructing a DataTree from a Dataset
(:issue:`9339`, :pull:`9378`)
By `Etienne Schalk <https://github.com/etienneschalk>`_.

- Fix bug when encoding times with missing values as floats in the case when
the non-missing times could in theory be encoded with integers
(:issue:`9488`, :pull:`9497`). By `Spencer Clark
<https://github.com/spencerkclark>`_.


Documentation
Expand Down Expand Up @@ -182,8 +186,8 @@ New Features
to return an object without ``attrs``. A ``deep`` parameter controls whether
variables' ``attrs`` are also dropped.
By `Maximilian Roos <https://github.com/max-sixty>`_. (:pull:`8288`)
- Added :py:func:`open_groups` for h5netcdf and netCDF4 backends (:issue:`9137`, :pull:`9243`).
By `Eni Awowale <https://github.com/eni-awowale>`_.
- Add `open_groups` method for unaligned datasets (:issue:`9137`, :pull:`9243`)

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
6 changes: 5 additions & 1 deletion properties/test_index_manipulation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import itertools
import warnings

import numpy as np
import pytest
Expand Down Expand Up @@ -184,7 +185,10 @@ def drop_dims(self, data):
)
)
note(f"> drop_dims: {dims}")
self.dataset = self.dataset.drop_dims(dims)
# TODO: dropping a multi-index dimension raises a DeprecationWarning
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=DeprecationWarning)
self.dataset = self.dataset.drop_dims(dims)

for dim in dims:
if dim in self.indexed_dims:
Expand Down
4 changes: 2 additions & 2 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ def _eagerly_encode_cf_datetime(
# needed time delta to encode faithfully to int64
needed_time_delta = _time_units_to_timedelta64(needed_units)

floor_division = True
floor_division = np.issubdtype(dtype, np.integer) or dtype is None
if time_delta > needed_time_delta:
floor_division = False
if dtype is None:
Expand Down Expand Up @@ -892,7 +892,7 @@ def _eagerly_encode_cf_timedelta(
# needed time delta to encode faithfully to int64
needed_time_delta = _time_units_to_timedelta64(needed_units)

floor_division = True
floor_division = np.issubdtype(dtype, np.integer) or dtype is None
if time_delta > needed_time_delta:
floor_division = False
if dtype is None:
Expand Down
30 changes: 21 additions & 9 deletions xarray/core/treenode.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,14 @@ def same_tree(self, other: Tree) -> bool:
AnyNamedNode = TypeVar("AnyNamedNode", bound="NamedNode")


def _validate_name(name: str | None) -> None:
if name is not None:
if not isinstance(name, str):
raise TypeError("node name must be a string or None")
if "/" in name:
raise ValueError("node names cannot contain forward slashes")


class NamedNode(TreeNode, Generic[Tree]):
"""
A TreeNode which knows its own name.
Expand All @@ -653,8 +661,8 @@ class NamedNode(TreeNode, Generic[Tree]):

def __init__(self, name=None, children=None):
super().__init__(children=children)
self._name = None
self.name = name
_validate_name(name)
self._name = name

@property
def name(self) -> str | None:
Expand All @@ -663,11 +671,13 @@ def name(self) -> str | None:

@name.setter
def name(self, name: str | None) -> None:
if name is not None:
if not isinstance(name, str):
raise TypeError("node name must be a string or None")
if "/" in name:
raise ValueError("node names cannot contain forward slashes")
if self.parent is not None:
raise ValueError(
"cannot set the name of a node which already has a parent. "
"Consider creating a detached copy of this node via .copy() "
"on the parent node."
)
_validate_name(name)
self._name = name

def __repr__(self, level=0):
Expand All @@ -677,11 +687,13 @@ def __repr__(self, level=0):
return repr_value

def __str__(self) -> str:
return f"NamedNode('{self.name}')" if self.name else "NamedNode()"
name_repr = repr(self.name) if self.name is not None else ""
return f"NamedNode({name_repr})"

def _post_attach(self: AnyNamedNode, parent: AnyNamedNode, name: str) -> None:
"""Ensures child has name attribute corresponding to key under which it has been stored."""
self.name = name
_validate_name(name) # is this check redundant?
self._name = name

def _copy_node(
self: AnyNamedNode,
Expand Down
42 changes: 32 additions & 10 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,24 +1383,46 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
assert decoded_var.encoding["dtype"] == np.int64


def test_roundtrip_float_times() -> None:
# Regression test for GitHub issue #8271
fill_value = 20.0
times = [
np.datetime64("1970-01-01 00:00:00", "ns"),
np.datetime64("1970-01-01 06:00:00", "ns"),
np.datetime64("NaT", "ns"),
]
_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS = {
"GH-8271": (
20.0,
np.array(
["1970-01-01 00:00:00", "1970-01-01 06:00:00", "NaT"],
dtype="datetime64[ns]",
),
"days since 1960-01-01",
np.array([3653, 3653.25, 20.0]),
),
"GH-9488-datetime64[ns]": (
1.0e20,
np.array(["2010-01-01 12:00:00", "NaT"], dtype="datetime64[ns]"),
"seconds since 2010-01-01",
np.array([43200, 1.0e20]),
),
"GH-9488-timedelta64[ns]": (
1.0e20,
np.array([1_000_000_000, "NaT"], dtype="timedelta64[ns]"),
"seconds",
np.array([1.0, 1.0e20]),
),
}


units = "days since 1960-01-01"
@pytest.mark.parametrize(
("fill_value", "times", "units", "encoded_values"),
_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.values(),
ids=_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.keys(),
)
def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None:
# Regression test for GitHub issues #8271 and #9488
var = Variable(
["time"],
times,
encoding=dict(dtype=np.float64, _FillValue=fill_value, units=units),
)

encoded_var = conventions.encode_cf_variable(var)
np.testing.assert_array_equal(encoded_var, np.array([3653, 3653.25, 20.0]))
np.testing.assert_array_equal(encoded_var, encoded_values)
assert encoded_var.attrs["units"] == units
assert encoded_var.attrs["_FillValue"] == fill_value

Expand Down
20 changes: 19 additions & 1 deletion xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,28 @@ def test_empty(self):
assert dt.children == {}
assert_identical(dt.to_dataset(), xr.Dataset())

def test_unnamed(self):
def test_name(self):
dt = DataTree()
assert dt.name is None

dt = DataTree(name="foo")
assert dt.name == "foo"

dt.name = "bar"
assert dt.name == "bar"

dt = DataTree(children={"foo": DataTree()})
assert dt["/foo"].name == "foo"
with pytest.raises(
ValueError, match="cannot set the name of a node which already has a parent"
):
dt["/foo"].name = "bar"

detached = dt["/foo"].copy()
assert detached.name == "foo"
detached.name = "bar"
assert detached.name == "bar"

def test_bad_names(self):
with pytest.raises(TypeError):
DataTree(name=5) # type: ignore[arg-type]
Expand Down

0 comments on commit e28f4e2

Please sign in to comment.