BUG: Series.__setitem__ failing to cast numeric values (pandas-dev#45121

)
me-kbs · Dec 31, 2021 · fb9f205 · fb9f205
1 parent fa3d5f1
commit fb9f205
Show file tree

Hide file tree

Showing 8 changed files with 39 additions and 28 deletions.
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -837,6 +837,7 @@ Indexing
 - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
 - Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
 - Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`)
+- Bug in :meth:`Series.__setitem__` when setting floats or integers into integer-dtype series failing to upcast when necessary to retain precision (:issue:`45121`)
 -
 
 Missing

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -2209,6 +2209,12 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
                 # Anything other than integer we cannot hold
                 return False
             elif dtype.itemsize < tipo.itemsize:
+                if is_integer(element):
+                    # e.g. test_setitem_series_int8 if we have a python int 1
+                    #  tipo may be np.int32, despite the fact that it will fit
+                    #  in smaller int dtypes.
+                    info = np.iinfo(dtype)
+                    return info.min <= element <= info.max
                 return False
             elif not isinstance(tipo, np.dtype):
                 # i.e. nullable IntegerDtype; we can put this into an ndarray

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -92,14 +92,14 @@
 )
 
 from pandas.core.dtypes.cast import (
+    can_hold_element,
     construct_1d_arraylike_from_scalar,
     construct_2d_arraylike_from_scalar,
     find_common_type,
     infer_dtype_from_scalar,
     invalidate_string_dtypes,
     maybe_box_native,
     maybe_downcast_to_dtype,
-    validate_numeric_casting,
 )
 from pandas.core.dtypes.common import (
     ensure_platform_int,
@@ -3865,7 +3865,9 @@ def _set_value(
 
             series = self._get_item_cache(col)
             loc = self.index.get_loc(index)
-            validate_numeric_casting(series.dtype, value)
+            if not can_hold_element(series._values, value):
+                # We'll go through loc and end up casting.
+                raise TypeError
 
             series._mgr.setitem_inplace(loc, value)
             # Note: trying to use series._set_value breaks tests in

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -69,7 +69,6 @@
     find_common_type,
     infer_dtype_from,
     maybe_cast_pointwise_result,
-    validate_numeric_casting,
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -5643,7 +5642,8 @@ def set_value(self, arr, key, value):
             stacklevel=find_stack_level(),
         )
         loc = self._engine.get_loc(key)
-        validate_numeric_casting(arr.dtype, value)
+        if not can_hold_element(arr, value):
+            raise ValueError
         arr[loc] = value
 
     _index_shared_docs[

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -62,10 +62,10 @@
 )
 
 from pandas.core.dtypes.cast import (
+    can_hold_element,
     convert_dtypes,
     maybe_box_native,
     maybe_cast_pointwise_result,
-    validate_numeric_casting,
 )
 from pandas.core.dtypes.common import (
     ensure_platform_int,
@@ -1143,9 +1143,9 @@ def __setitem__(self, key, value) -> None:
 
     def _set_with_engine(self, key, value) -> None:
         loc = self.index.get_loc(key)
-        # error: Argument 1 to "validate_numeric_casting" has incompatible type
-        # "Union[dtype, ExtensionDtype]"; expected "dtype"
-        validate_numeric_casting(self.dtype, value)  # type: ignore[arg-type]
+        if not can_hold_element(self._values, value):
+            raise ValueError
+
         # this is equivalent to self._values[key] = value
         self._mgr.setitem_inplace(loc, value)
 

diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py
@@ -53,3 +53,18 @@ def test_can_hold_element_int_values_float_ndarray():
     # integer but not losslessly castable to int64
     element = np.array([3, 2 ** 65], dtype=np.float64)
     assert not can_hold_element(arr, element)
+
+
+def test_can_hold_element_int8_int():
+    arr = np.array([], dtype=np.int8)
+
+    element = 2
+    assert can_hold_element(arr, element)
+    assert can_hold_element(arr, np.int8(element))
+    assert can_hold_element(arr, np.uint8(element))
+    assert can_hold_element(arr, np.int16(element))
+    assert can_hold_element(arr, np.uint16(element))
+    assert can_hold_element(arr, np.int32(element))
+    assert can_hold_element(arr, np.uint32(element))
+    assert can_hold_element(arr, np.int64(element))
+    assert can_hold_element(arr, np.uint64(element))
diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 
 from pandas.core.dtypes.common import is_float_dtype
 
@@ -38,9 +37,9 @@ def test_set_value_resize(self, float_frame):
         res._set_value("foobar", "baz", 5)
         assert is_float_dtype(res["baz"])
         assert isna(res["baz"].drop(["foobar"])).all()
-        msg = "could not convert string to float: 'sam'"
-        with pytest.raises(ValueError, match=msg):
-            res._set_value("foobar", "baz", "sam")
+
+        res._set_value("foobar", "baz", "sam")
+        assert res.loc["foobar", "baz"] == "sam"
 
     def test_set_value_with_index_dtype_change(self):
         df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC"))

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -110,38 +110,26 @@ def test_setitem_series_object(self, val, exp_dtype):
         "val,exp_dtype",
         [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
     )
-    def test_setitem_series_int64(self, val, exp_dtype, request):
+    def test_setitem_series_int64(self, val, exp_dtype):
         obj = pd.Series([1, 2, 3, 4])
         assert obj.dtype == np.int64
 
-        if exp_dtype is np.float64:
-            exp = pd.Series([1, 1, 3, 4])
-            self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
-            mark = pytest.mark.xfail(reason="GH12747 The result must be float")
-            request.node.add_marker(mark)
-
         exp = pd.Series([1, val, 3, 4])
         self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
 
     @pytest.mark.parametrize(
         "val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)]
     )
-    def test_setitem_series_int8(self, val, exp_dtype, request):
+    def test_setitem_series_int8(self, val, exp_dtype):
         obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
         assert obj.dtype == np.int8
 
-        if exp_dtype is np.int16:
-            exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
-            self._assert_setitem_series_conversion(obj, val, exp, np.int8)
-            mark = pytest.mark.xfail(
-                reason="BUG: it must be pd.Series([1, 1, 3, 4], dtype=np.int16"
-            )
-            request.node.add_marker(mark)
-
         warn = None if exp_dtype is np.int8 else FutureWarning
         msg = "Values are too large to be losslessly cast to int8"
         with tm.assert_produces_warning(warn, match=msg):
             exp = pd.Series([1, val, 3, 4], dtype=np.int8)
+
+        exp = pd.Series([1, val, 3, 4], dtype=exp_dtype)
         self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
 
     @pytest.mark.parametrize(