From fb9f205a43b683051ba61849e5a4cf6d99679171 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Dec 2021 07:20:29 -0800 Subject: [PATCH] BUG: Series.__setitem__ failing to cast numeric values (#45121) --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/dtypes/cast.py | 6 ++++++ pandas/core/frame.py | 6 ++++-- pandas/core/indexes/base.py | 4 ++-- pandas/core/series.py | 8 ++++---- .../dtypes/cast/test_can_hold_element.py | 15 ++++++++++++++ pandas/tests/frame/indexing/test_set_value.py | 7 +++---- pandas/tests/indexing/test_coercion.py | 20 ++++--------------- 8 files changed, 39 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 231e76cc153d7..0f05cdb14e09e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -837,6 +837,7 @@ Indexing - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) - Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`) - Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`) +- Bug in :meth:`Series.__setitem__` when setting floats or integers into integer-dtype series failing to upcast when necessary to retain precision (:issue:`45121`) - Missing diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4f4eac828fd60..f18f1c760ca28 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2209,6 +2209,12 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: # Anything other than integer we cannot hold return False elif dtype.itemsize < tipo.itemsize: + if is_integer(element): + # e.g. test_setitem_series_int8 if we have a python int 1 + # tipo may be np.int32, despite the fact that it will fit + # in smaller int dtypes. + info = np.iinfo(dtype) + return info.min <= element <= info.max return False elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype; we can put this into an ndarray diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 252534a0cb790..9b35d9ce39ec6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -92,6 +92,7 @@ ) from pandas.core.dtypes.cast import ( + can_hold_element, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, find_common_type, @@ -99,7 +100,6 @@ invalidate_string_dtypes, maybe_box_native, maybe_downcast_to_dtype, - validate_numeric_casting, ) from pandas.core.dtypes.common import ( ensure_platform_int, @@ -3865,7 +3865,9 @@ def _set_value( series = self._get_item_cache(col) loc = self.index.get_loc(index) - validate_numeric_casting(series.dtype, value) + if not can_hold_element(series._values, value): + # We'll go through loc and end up casting. + raise TypeError series._mgr.setitem_inplace(loc, value) # Note: trying to use series._set_value breaks tests in diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 537dd0f3f8dc8..1d773e2635185 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -69,7 +69,6 @@ find_common_type, infer_dtype_from, maybe_cast_pointwise_result, - validate_numeric_casting, ) from pandas.core.dtypes.common import ( ensure_int64, @@ -5643,7 +5642,8 @@ def set_value(self, arr, key, value): stacklevel=find_stack_level(), ) loc = self._engine.get_loc(key) - validate_numeric_casting(arr.dtype, value) + if not can_hold_element(arr, value): + raise ValueError arr[loc] = value _index_shared_docs[ diff --git a/pandas/core/series.py b/pandas/core/series.py index fa1ac0fcfb82d..81b901b13a42b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -62,10 +62,10 @@ ) from pandas.core.dtypes.cast import ( + can_hold_element, convert_dtypes, maybe_box_native, maybe_cast_pointwise_result, - validate_numeric_casting, ) from pandas.core.dtypes.common import ( ensure_platform_int, @@ -1143,9 +1143,9 @@ def __setitem__(self, key, value) -> None: def _set_with_engine(self, key, value) -> None: loc = self.index.get_loc(key) - # error: Argument 1 to "validate_numeric_casting" has incompatible type - # "Union[dtype, ExtensionDtype]"; expected "dtype" - validate_numeric_casting(self.dtype, value) # type: ignore[arg-type] + if not can_hold_element(self._values, value): + raise ValueError + # this is equivalent to self._values[key] = value self._mgr.setitem_inplace(loc, value) diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py index 3a486f795f23e..906123b1aee74 100644 --- a/pandas/tests/dtypes/cast/test_can_hold_element.py +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -53,3 +53,18 @@ def test_can_hold_element_int_values_float_ndarray(): # integer but not losslessly castable to int64 element = np.array([3, 2 ** 65], dtype=np.float64) assert not can_hold_element(arr, element) + + +def test_can_hold_element_int8_int(): + arr = np.array([], dtype=np.int8) + + element = 2 + assert can_hold_element(arr, element) + assert can_hold_element(arr, np.int8(element)) + assert can_hold_element(arr, np.uint8(element)) + assert can_hold_element(arr, np.int16(element)) + assert can_hold_element(arr, np.uint16(element)) + assert can_hold_element(arr, np.int32(element)) + assert can_hold_element(arr, np.uint32(element)) + assert can_hold_element(arr, np.int64(element)) + assert can_hold_element(arr, np.uint64(element)) diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index b8150c26aa6bb..7b68566bab225 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from pandas.core.dtypes.common import is_float_dtype @@ -38,9 +37,9 @@ def test_set_value_resize(self, float_frame): res._set_value("foobar", "baz", 5) assert is_float_dtype(res["baz"]) assert isna(res["baz"].drop(["foobar"])).all() - msg = "could not convert string to float: 'sam'" - with pytest.raises(ValueError, match=msg): - res._set_value("foobar", "baz", "sam") + + res._set_value("foobar", "baz", "sam") + assert res.loc["foobar", "baz"] == "sam" def test_set_value_with_index_dtype_change(self): df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 014f0f5933387..1ace46b0ca5c9 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -110,38 +110,26 @@ def test_setitem_series_object(self, val, exp_dtype): "val,exp_dtype", [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], ) - def test_setitem_series_int64(self, val, exp_dtype, request): + def test_setitem_series_int64(self, val, exp_dtype): obj = pd.Series([1, 2, 3, 4]) assert obj.dtype == np.int64 - if exp_dtype is np.float64: - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) - mark = pytest.mark.xfail(reason="GH12747 The result must be float") - request.node.add_marker(mark) - exp = pd.Series([1, val, 3, 4]) self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) @pytest.mark.parametrize( "val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)] ) - def test_setitem_series_int8(self, val, exp_dtype, request): + def test_setitem_series_int8(self, val, exp_dtype): obj = pd.Series([1, 2, 3, 4], dtype=np.int8) assert obj.dtype == np.int8 - if exp_dtype is np.int16: - exp = pd.Series([1, 0, 3, 4], dtype=np.int8) - self._assert_setitem_series_conversion(obj, val, exp, np.int8) - mark = pytest.mark.xfail( - reason="BUG: it must be pd.Series([1, 1, 3, 4], dtype=np.int16" - ) - request.node.add_marker(mark) - warn = None if exp_dtype is np.int8 else FutureWarning msg = "Values are too large to be losslessly cast to int8" with tm.assert_produces_warning(warn, match=msg): exp = pd.Series([1, val, 3, 4], dtype=np.int8) + + exp = pd.Series([1, val, 3, 4], dtype=exp_dtype) self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) @pytest.mark.parametrize(