pydata · spencerkclark · Sep 19, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 19, 2024
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -47,6 +47,11 @@ Bug fixes
 - Make illegal path-like variable names when constructing a DataTree from a Dataset
   (:issue:`9339`, :pull:`9378`)
   By `Etienne Schalk <https://github.com/etienneschalk>`_.
+- Work around `upstream pandas issue
+  <https://github.com/pandas-dev/pandas/issues/56996>`_ to ensure that we can
+  decode times encoded with small integer dtype values (e.g. ``np.int32``) in
+  environments with NumPy 2.0 or greater without needing to fall back to cftime
+  (:pull:`9518`). By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Fix bug when encoding times with missing values as floats in the case when
   the non-missing times could in theory be encoded with integers
   (:issue:`9488`, :pull:`9497`). By `Spencer Clark

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -254,6 +254,15 @@ def _decode_datetime_with_pandas(
             "pandas."
         )
 
+    # Work around pandas.to_timedelta issue with dtypes smaller than int64 and
+    # NumPy 2.0 by casting all int and uint data to int64 and uint64,
+    # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for
+    # more details.
+    if flat_num_dates.dtype.kind == "i":
+        flat_num_dates = flat_num_dates.astype(np.int64)
+    elif flat_num_dates.dtype.kind == "u":
+        flat_num_dates = flat_num_dates.astype(np.uint64)
+
     time_units, ref_date_str = _unpack_netcdf_time_units(units)
     time_units = _netcdf_to_numpy_timeunit(time_units)
     try:

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -8,7 +8,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pandas.errors import OutOfBoundsDatetime
+from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta
 
 from xarray import (
     DataArray,
@@ -1136,11 +1136,16 @@ def test_should_cftime_be_used_target_not_npable():
         _should_cftime_be_used(src, "noleap", False)
 
 
-@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64])
-def test_decode_cf_datetime_uint(dtype):
+@pytest.mark.parametrize(
+    "dtype",
+    [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64],
+)
+def test_decode_cf_datetime_varied_integer_dtypes(dtype):
     units = "seconds since 2018-08-22T03:23:03Z"
     num_dates = dtype(50)
-    result = decode_cf_datetime(num_dates, units)
+    # Set use_cftime=False to ensure we cannot mask a failure by falling back
+    # to cftime.
+    result = decode_cf_datetime(num_dates, units, use_cftime=False)
     expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns"))
     np.testing.assert_equal(result, expected)
 
@@ -1154,6 +1159,14 @@ def test_decode_cf_datetime_uint64_with_cftime():
     np.testing.assert_equal(result, expected)
 
 
+def test_decode_cf_datetime_uint64_with_pandas_overflow_error():
+    units = "nanoseconds since 1970-01-01"
+    calendar = "standard"
+    num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000)
+    with pytest.raises(OutOfBoundsTimedelta):
+        decode_cf_datetime(num_dates, units, calendar, use_cftime=False)
+
+
 @requires_cftime
 def test_decode_cf_datetime_uint64_with_cftime_overflow_error():
     units = "microseconds since 1700-01-01"
@@ -1438,10 +1451,8 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None
         "days since 1700-01-01",
         np.dtype("int32"),
     ),
-    "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": (
-        "250YS",
-        "days since 1700-01-01",
-        np.dtype("int32"),
+    "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param(
+        "250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime
     ),
     "pandas-encoding-with-default-units-and-dtype": ("250YS", None, None),
 }