Skip to content

Commit

Permalink
BUG: Series.__setitem__ failing to cast numeric values (pandas-dev#45121
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jbrockmendel authored Dec 31, 2021
1 parent fa3d5f1 commit fb9f205
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 28 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,7 @@ Indexing
- Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
- Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`)
- Bug in :meth:`Series.__setitem__` when setting floats or integers into integer-dtype series failing to upcast when necessary to retain precision (:issue:`45121`)
-

Missing
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2209,6 +2209,12 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
# Anything other than integer we cannot hold
return False
elif dtype.itemsize < tipo.itemsize:
if is_integer(element):
# e.g. test_setitem_series_int8 if we have a python int 1
# tipo may be np.int32, despite the fact that it will fit
# in smaller int dtypes.
info = np.iinfo(dtype)
return info.min <= element <= info.max
return False
elif not isinstance(tipo, np.dtype):
# i.e. nullable IntegerDtype; we can put this into an ndarray
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@
)

from pandas.core.dtypes.cast import (
can_hold_element,
construct_1d_arraylike_from_scalar,
construct_2d_arraylike_from_scalar,
find_common_type,
infer_dtype_from_scalar,
invalidate_string_dtypes,
maybe_box_native,
maybe_downcast_to_dtype,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
ensure_platform_int,
Expand Down Expand Up @@ -3865,7 +3865,9 @@ def _set_value(

series = self._get_item_cache(col)
loc = self.index.get_loc(index)
validate_numeric_casting(series.dtype, value)
if not can_hold_element(series._values, value):
# We'll go through loc and end up casting.
raise TypeError

series._mgr.setitem_inplace(loc, value)
# Note: trying to use series._set_value breaks tests in
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
find_common_type,
infer_dtype_from,
maybe_cast_pointwise_result,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
ensure_int64,
Expand Down Expand Up @@ -5643,7 +5642,8 @@ def set_value(self, arr, key, value):
stacklevel=find_stack_level(),
)
loc = self._engine.get_loc(key)
validate_numeric_casting(arr.dtype, value)
if not can_hold_element(arr, value):
raise ValueError
arr[loc] = value

_index_shared_docs[
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@
)

from pandas.core.dtypes.cast import (
can_hold_element,
convert_dtypes,
maybe_box_native,
maybe_cast_pointwise_result,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
ensure_platform_int,
Expand Down Expand Up @@ -1143,9 +1143,9 @@ def __setitem__(self, key, value) -> None:

def _set_with_engine(self, key, value) -> None:
loc = self.index.get_loc(key)
# error: Argument 1 to "validate_numeric_casting" has incompatible type
# "Union[dtype, ExtensionDtype]"; expected "dtype"
validate_numeric_casting(self.dtype, value) # type: ignore[arg-type]
if not can_hold_element(self._values, value):
raise ValueError

# this is equivalent to self._values[key] = value
self._mgr.setitem_inplace(loc, value)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/dtypes/cast/test_can_hold_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,18 @@ def test_can_hold_element_int_values_float_ndarray():
# integer but not losslessly castable to int64
element = np.array([3, 2 ** 65], dtype=np.float64)
assert not can_hold_element(arr, element)


def test_can_hold_element_int8_int():
arr = np.array([], dtype=np.int8)

element = 2
assert can_hold_element(arr, element)
assert can_hold_element(arr, np.int8(element))
assert can_hold_element(arr, np.uint8(element))
assert can_hold_element(arr, np.int16(element))
assert can_hold_element(arr, np.uint16(element))
assert can_hold_element(arr, np.int32(element))
assert can_hold_element(arr, np.uint32(element))
assert can_hold_element(arr, np.int64(element))
assert can_hold_element(arr, np.uint64(element))
7 changes: 3 additions & 4 deletions pandas/tests/frame/indexing/test_set_value.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_float_dtype

Expand Down Expand Up @@ -38,9 +37,9 @@ def test_set_value_resize(self, float_frame):
res._set_value("foobar", "baz", 5)
assert is_float_dtype(res["baz"])
assert isna(res["baz"].drop(["foobar"])).all()
msg = "could not convert string to float: 'sam'"
with pytest.raises(ValueError, match=msg):
res._set_value("foobar", "baz", "sam")

res._set_value("foobar", "baz", "sam")
assert res.loc["foobar", "baz"] == "sam"

def test_set_value_with_index_dtype_change(self):
df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC"))
Expand Down
20 changes: 4 additions & 16 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,38 +110,26 @@ def test_setitem_series_object(self, val, exp_dtype):
"val,exp_dtype",
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_setitem_series_int64(self, val, exp_dtype, request):
def test_setitem_series_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.dtype == np.int64

if exp_dtype is np.float64:
exp = pd.Series([1, 1, 3, 4])
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
mark = pytest.mark.xfail(reason="GH12747 The result must be float")
request.node.add_marker(mark)

exp = pd.Series([1, val, 3, 4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)

@pytest.mark.parametrize(
"val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)]
)
def test_setitem_series_int8(self, val, exp_dtype, request):
def test_setitem_series_int8(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
assert obj.dtype == np.int8

if exp_dtype is np.int16:
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
mark = pytest.mark.xfail(
reason="BUG: it must be pd.Series([1, 1, 3, 4], dtype=np.int16"
)
request.node.add_marker(mark)

warn = None if exp_dtype is np.int8 else FutureWarning
msg = "Values are too large to be losslessly cast to int8"
with tm.assert_produces_warning(warn, match=msg):
exp = pd.Series([1, val, 3, 4], dtype=np.int8)

exp = pd.Series([1, val, 3, 4], dtype=exp_dtype)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)

@pytest.mark.parametrize(
Expand Down

0 comments on commit fb9f205

Please sign in to comment.