From 68f763e7533e29aff51d9e8596ba178621a8adf6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 14 Feb 2022 00:22:20 -0800 Subject: [PATCH] DEPR: datetimelike.astype(int_other_than_i8) return requested dtype (#45574) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/arrays/datetimelike.py | 30 +++++++++++++++++++ pandas/tests/arrays/period/test_astype.py | 19 +++++++++--- pandas/tests/arrays/test_datetimes.py | 8 ++++- pandas/tests/arrays/test_timedeltas.py | 8 ++++- .../indexes/datetimes/methods/test_astype.py | 6 +++- pandas/tests/indexes/interval/test_astype.py | 16 +++++++--- .../indexes/period/methods/test_astype.py | 6 +++- .../indexes/timedeltas/methods/test_astype.py | 6 +++- 9 files changed, 87 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a35ca589065d8..ea5258cf1537d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -238,6 +238,7 @@ Other Deprecations - Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed time zones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`) - Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) - Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) +- Deprecated behavior of :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` when converting to an integer dtype other than ``int64``. In a future version, these will convert to exactly the specified dtype (instead of always ``int64``) and will raise if the conversion overflows (:issue:`45034`) - Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) - Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`) - Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3259ee7d28bbe..483878706db75 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -446,6 +446,36 @@ def astype(self, dtype, copy: bool = True): if is_unsigned_integer_dtype(dtype): # Again, we ignore int32 vs. int64 values = values.view("uint64") + if dtype != np.uint64: + # GH#45034 + warnings.warn( + f"The behavior of .astype from {self.dtype} to {dtype} is " + "deprecated. In a future version, this astype will return " + "exactly the specified dtype instead of uint64, and will " + "raise if that conversion overflows.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif (self.asi8 < 0).any(): + # GH#45034 + warnings.warn( + f"The behavior of .astype from {self.dtype} to {dtype} is " + "deprecated. In a future version, this astype will " + "raise if the conversion overflows, as it did in this " + "case with negative int64 values.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif dtype != np.int64: + # GH#45034 + warnings.warn( + f"The behavior of .astype from {self.dtype} to {dtype} is " + "deprecated. In a future version, this astype will return " + "exactly the specified dtype instead of int64, and will " + "raise if that conversion overflows.", + FutureWarning, + stacklevel=find_stack_level(), + ) if copy: values = values.copy() diff --git a/pandas/tests/arrays/period/test_astype.py b/pandas/tests/arrays/period/test_astype.py index f05265c910e1c..e9245c9ca786b 100644 --- a/pandas/tests/arrays/period/test_astype.py +++ b/pandas/tests/arrays/period/test_astype.py @@ -9,18 +9,29 @@ @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) -def test_astype(dtype): +def test_astype_int(dtype): # We choose to ignore the sign and size of integers for # Period/Datetime/Timedelta astype arr = period_array(["2000", "2001", None], freq="D") - result = arr.astype(dtype) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") + warn1 = FutureWarning else: expected_dtype = np.dtype("int64") - - expected = arr.astype(expected_dtype) + warn1 = None + + msg_overflow = "will raise if the conversion overflows" + with tm.assert_produces_warning(warn1, match=msg_overflow): + expected = arr.astype(expected_dtype) + + warn = None if dtype == expected_dtype else FutureWarning + msg = " will return exactly the specified dtype" + if warn is None and warn1 is not None: + warn = warn1 + msg = msg_overflow + with tm.assert_produces_warning(warn, match=msg): + result = arr.astype(dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index ad75c137ec703..9ea87be2a5468 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -77,7 +77,6 @@ def test_astype_copies(self, dtype, other): @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) - result = arr.astype(dtype) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") @@ -85,6 +84,13 @@ def test_astype_int(self, dtype): expected_dtype = np.dtype("int64") expected = arr.astype(expected_dtype) + warn = None + if dtype != expected_dtype: + warn = FutureWarning + msg = " will return exactly the specified dtype" + with tm.assert_produces_warning(warn, match=msg): + result = arr.astype(dtype) + assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 675c6ac8c9233..bf3491496ab3a 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -11,7 +11,6 @@ class TestTimedeltaArray: @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")]) - result = arr.astype(dtype) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") @@ -19,6 +18,13 @@ def test_astype_int(self, dtype): expected_dtype = np.dtype("int64") expected = arr.astype(expected_dtype) + warn = None + if dtype != expected_dtype: + warn = FutureWarning + msg = " will return exactly the specified dtype" + with tm.assert_produces_warning(warn, match=msg): + result = arr.astype(dtype) + assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index 9002371f25012..e7823f0c90b1a 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -52,7 +52,11 @@ def test_astype_uint(self): name="idx", ) tm.assert_index_equal(arr.astype("uint64"), expected) - tm.assert_index_equal(arr.astype("uint32"), expected) + + msg = "will return exactly the specified dtype instead of uint64" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = arr.astype("uint32") + tm.assert_index_equal(res, expected) def test_astype_with_tz(self): diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index 94b7ccd618cba..c253a745ef5a2 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -205,10 +205,18 @@ def index(self, request): @pytest.mark.parametrize("subtype", ["int64", "uint64"]) def test_subtype_integer(self, index, subtype): dtype = IntervalDtype(subtype, "right") - result = index.astype(dtype) - expected = IntervalIndex.from_arrays( - index.left.astype(subtype), index.right.astype(subtype), closed=index.closed - ) + + warn = None + if index.isna().any() and subtype == "uint64": + warn = FutureWarning + msg = "In a future version, this astype will raise if the conversion overflows" + + with tm.assert_produces_warning(warn, match=msg): + result = index.astype(dtype) + new_left = index.left.astype(subtype) + new_right = index.right.astype(subtype) + + expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed) tm.assert_index_equal(result, expected) def test_subtype_float(self, index): diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py index c3ea72c908459..fbc1d3702115e 100644 --- a/pandas/tests/indexes/period/methods/test_astype.py +++ b/pandas/tests/indexes/period/methods/test_astype.py @@ -58,7 +58,11 @@ def test_astype_uint(self): arr = period_range("2000", periods=2, name="idx") expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx") tm.assert_index_equal(arr.astype("uint64"), expected) - tm.assert_index_equal(arr.astype("uint32"), expected) + + msg = "will return exactly the specified dtype instead of uint64" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = arr.astype("uint32") + tm.assert_index_equal(res, expected) def test_astype_object(self): idx = PeriodIndex([], freq="M") diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py index 276b0dbf246cc..aa2f7b7af8d98 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_astype.py +++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py @@ -79,7 +79,11 @@ def test_astype_uint(self): np.array([3600000000000, 90000000000000], dtype="uint64") ) tm.assert_index_equal(arr.astype("uint64"), expected) - tm.assert_index_equal(arr.astype("uint32"), expected) + + msg = "will return exactly the specified dtype instead of uint64" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = arr.astype("uint32") + tm.assert_index_equal(res, expected) def test_astype_timedelta64(self): # GH 13149, GH 13209