Merge branch 'main' into dedup

pydata · Sep 17, 2024 · e28f4e2 · e28f4e2
2 parents 2ba1da6 + 1c6300c
commit e28f4e2
Show file tree

Hide file tree

Showing 6 changed files with 86 additions and 26 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -30,7 +30,8 @@ New Features
   `Matt Savoie <https://github.com/flamingbear>`_,
   `Stephan Hoyer <https://github.com/shoyer>`_ and
   `Tom Nicholas <https://github.com/TomNicholas>`_.
-
+- Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
+  By `Eni Awowale <https://github.com/eni-awowale>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -46,7 +47,10 @@ Bug fixes
 - Make illegal path-like variable names when constructing a DataTree from a Dataset
   (:issue:`9339`, :pull:`9378`)
   By `Etienne Schalk <https://github.com/etienneschalk>`_.
-
+- Fix bug when encoding times with missing values as floats in the case when
+  the non-missing times could in theory be encoded with integers
+  (:issue:`9488`, :pull:`9497`). By `Spencer Clark
+  <https://github.com/spencerkclark>`_.
 
 
 Documentation
@@ -182,8 +186,8 @@ New Features
   to return an object without ``attrs``. A ``deep`` parameter controls whether
   variables' ``attrs`` are also dropped.
   By `Maximilian Roos <https://github.com/max-sixty>`_. (:pull:`8288`)
+- Added :py:func:`open_groups` for h5netcdf and netCDF4 backends (:issue:`9137`, :pull:`9243`).
   By `Eni Awowale <https://github.com/eni-awowale>`_.
-- Add `open_groups` method for unaligned datasets (:issue:`9137`, :pull:`9243`)
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py
@@ -1,4 +1,5 @@
 import itertools
+import warnings
 
 import numpy as np
 import pytest
@@ -184,7 +185,10 @@ def drop_dims(self, data):
             )
         )
         note(f"> drop_dims: {dims}")
-        self.dataset = self.dataset.drop_dims(dims)
+        # TODO: dropping a multi-index dimension raises a DeprecationWarning
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=DeprecationWarning)
+            self.dataset = self.dataset.drop_dims(dims)
 
         for dim in dims:
             if dim in self.indexed_dims:

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -771,7 +771,7 @@ def _eagerly_encode_cf_datetime(
         # needed time delta to encode faithfully to int64
         needed_time_delta = _time_units_to_timedelta64(needed_units)
 
-        floor_division = True
+        floor_division = np.issubdtype(dtype, np.integer) or dtype is None
         if time_delta > needed_time_delta:
             floor_division = False
             if dtype is None:
@@ -892,7 +892,7 @@ def _eagerly_encode_cf_timedelta(
     # needed time delta to encode faithfully to int64
     needed_time_delta = _time_units_to_timedelta64(needed_units)
 
-    floor_division = True
+    floor_division = np.issubdtype(dtype, np.integer) or dtype is None
     if time_delta > needed_time_delta:
         floor_division = False
         if dtype is None:

diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py
@@ -640,6 +640,14 @@ def same_tree(self, other: Tree) -> bool:
 AnyNamedNode = TypeVar("AnyNamedNode", bound="NamedNode")
 
 
+def _validate_name(name: str | None) -> None:
+    if name is not None:
+        if not isinstance(name, str):
+            raise TypeError("node name must be a string or None")
+        if "/" in name:
+            raise ValueError("node names cannot contain forward slashes")
+
+
 class NamedNode(TreeNode, Generic[Tree]):
     """
     A TreeNode which knows its own name.
@@ -653,8 +661,8 @@ class NamedNode(TreeNode, Generic[Tree]):
 
     def __init__(self, name=None, children=None):
         super().__init__(children=children)
-        self._name = None
-        self.name = name
+        _validate_name(name)
+        self._name = name
 
     @property
     def name(self) -> str | None:
@@ -663,11 +671,13 @@ def name(self) -> str | None:
 
     @name.setter
     def name(self, name: str | None) -> None:
-        if name is not None:
-            if not isinstance(name, str):
-                raise TypeError("node name must be a string or None")
-            if "/" in name:
-                raise ValueError("node names cannot contain forward slashes")
+        if self.parent is not None:
+            raise ValueError(
+                "cannot set the name of a node which already has a parent. "
+                "Consider creating a detached copy of this node via .copy() "
+                "on the parent node."
+            )
+        _validate_name(name)
         self._name = name
 
     def __repr__(self, level=0):
@@ -677,11 +687,13 @@ def __repr__(self, level=0):
         return repr_value
 
     def __str__(self) -> str:
-        return f"NamedNode('{self.name}')" if self.name else "NamedNode()"
+        name_repr = repr(self.name) if self.name is not None else ""
+        return f"NamedNode({name_repr})"
 
     def _post_attach(self: AnyNamedNode, parent: AnyNamedNode, name: str) -> None:
         """Ensures child has name attribute corresponding to key under which it has been stored."""
-        self.name = name
+        _validate_name(name)  # is this check redundant?
+        self._name = name
 
     def _copy_node(
         self: AnyNamedNode,

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -1383,24 +1383,46 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None:
     assert decoded_var.encoding["dtype"] == np.int64
 
 
-def test_roundtrip_float_times() -> None:
-    # Regression test for GitHub issue #8271
-    fill_value = 20.0
-    times = [
-        np.datetime64("1970-01-01 00:00:00", "ns"),
-        np.datetime64("1970-01-01 06:00:00", "ns"),
-        np.datetime64("NaT", "ns"),
-    ]
+_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS = {
+    "GH-8271": (
+        20.0,
+        np.array(
+            ["1970-01-01 00:00:00", "1970-01-01 06:00:00", "NaT"],
+            dtype="datetime64[ns]",
+        ),
+        "days since 1960-01-01",
+        np.array([3653, 3653.25, 20.0]),
+    ),
+    "GH-9488-datetime64[ns]": (
+        1.0e20,
+        np.array(["2010-01-01 12:00:00", "NaT"], dtype="datetime64[ns]"),
+        "seconds since 2010-01-01",
+        np.array([43200, 1.0e20]),
+    ),
+    "GH-9488-timedelta64[ns]": (
+        1.0e20,
+        np.array([1_000_000_000, "NaT"], dtype="timedelta64[ns]"),
+        "seconds",
+        np.array([1.0, 1.0e20]),
+    ),
+}
+
 
-    units = "days since 1960-01-01"
+@pytest.mark.parametrize(
+    ("fill_value", "times", "units", "encoded_values"),
+    _TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.values(),
+    ids=_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.keys(),
+)
+def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None:
+    # Regression test for GitHub issues #8271 and #9488
     var = Variable(
         ["time"],
         times,
         encoding=dict(dtype=np.float64, _FillValue=fill_value, units=units),
     )
 
     encoded_var = conventions.encode_cf_variable(var)
-    np.testing.assert_array_equal(encoded_var, np.array([3653, 3653.25, 20.0]))
+    np.testing.assert_array_equal(encoded_var, encoded_values)
     assert encoded_var.attrs["units"] == units
     assert encoded_var.attrs["_FillValue"] == fill_value
 

diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py
@@ -24,10 +24,28 @@ def test_empty(self):
         assert dt.children == {}
         assert_identical(dt.to_dataset(), xr.Dataset())
 
-    def test_unnamed(self):
+    def test_name(self):
         dt = DataTree()
         assert dt.name is None
 
+        dt = DataTree(name="foo")
+        assert dt.name == "foo"
+
+        dt.name = "bar"
+        assert dt.name == "bar"
+
+        dt = DataTree(children={"foo": DataTree()})
+        assert dt["/foo"].name == "foo"
+        with pytest.raises(
+            ValueError, match="cannot set the name of a node which already has a parent"
+        ):
+            dt["/foo"].name = "bar"
+
+        detached = dt["/foo"].copy()
+        assert detached.name == "foo"
+        detached.name = "bar"
+        assert detached.name == "bar"
+
     def test_bad_names(self):
         with pytest.raises(TypeError):
             DataTree(name=5)  # type: ignore[arg-type]