Skip to content

Commit

Permalink
BUG: Series[dt64].__setitem__ with all-false mask incorrectly upcasti…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Feb 17, 2022
1 parent 9d6d587 commit 2541e00
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ Indexing
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`)
- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`)
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ def putmask_without_repeat(
# TODO: this prob needs some better checking for 2D cases
nlocs = mask.sum()
if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1:
if nlocs == len(new):
shape = np.shape(new)
# np.shape compat for if setitem_datetimelike_compat
# changed arraylike to list e.g. test_where_dt64_2d

if nlocs == shape[-1]:
# GH#30567
# If length of ``new`` is less than the length of ``values``,
# `np.putmask` would first repeat the ``new`` array and then
Expand All @@ -90,7 +94,7 @@ def putmask_without_repeat(
# to place in the masked locations of ``values``
np.place(values, mask, new)
# i.e. values[mask] = new
elif mask.shape[-1] == len(new) or len(new) == 1:
elif mask.shape[-1] == shape[-1] or shape[-1] == 1:
np.putmask(values, mask, new)
else:
raise ValueError("cannot assign mismatch length to masked array")
Expand Down
44 changes: 33 additions & 11 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1478,26 +1478,48 @@ def putmask(self, mask, new) -> list[Block]:
new = self._maybe_squeeze_arg(new)
mask = self._maybe_squeeze_arg(mask)

if not mask.any():
return [self]

try:
# Caller is responsible for ensuring matching lengths
values._putmask(mask, new)
except (TypeError, ValueError) as err:
_catch_deprecated_value_error(err)

if is_interval_dtype(self.dtype):
# Discussion about what we want to support in the general
# case GH#39584
blk = self.coerce_to_target_dtype(orig_new)
return blk.putmask(orig_mask, orig_new)
if self.ndim == 1 or self.shape[0] == 1:

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_new)
return blk.putmask(orig_mask, orig_new)
if is_interval_dtype(self.dtype):
# Discussion about what we want to support in the general
# case GH#39584
blk = self.coerce_to_target_dtype(orig_new)
return blk.putmask(orig_mask, orig_new)

elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_new)
return blk.putmask(orig_mask, orig_new)

else:
raise

else:
raise
# Same pattern we use in Block.putmask
is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))

res_blocks = []
nbs = self._split()
for i, nb in enumerate(nbs):
n = orig_new
if is_array:
# we have a different value per-column
n = orig_new[:, i : i + 1]

submask = orig_mask[:, i : i + 1]
rbs = nb.putmask(submask, n)
res_blocks.extend(rbs)
return res_blocks

return [self]

Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,8 +988,16 @@ def _check_where_equivalences(df, mask, other, expected):
res = df.mask(~mask, other)
tm.assert_frame_equal(res, expected)

# Note: we cannot do the same with frame.mask(~mask, other, inplace=True)
# bc that goes through Block.putmask which does *not* downcast.
# Note: frame.mask(~mask, other, inplace=True) takes some more work bc
# Block.putmask does *not* downcast. The change to 'expected' here
# is specific to the cases in test_where_dt64_2d.
df = df.copy()
df.mask(~mask, other, inplace=True)
if not mask.all():
# with mask.all(), Block.putmask is a no-op, so does not downcast
expected = expected.copy()
expected["A"] = expected["A"].astype(object)
tm.assert_frame_equal(df, expected)


def test_where_dt64_2d():
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1625,3 +1625,18 @@ def test_setitem_bool_indexer_dont_broadcast_length1_values(size, mask, item, bo
expected = Series(np.arange(size, dtype=float))
expected[selection] = item
tm.assert_series_equal(ser, expected)


def test_setitem_empty_mask_dont_upcast_dt64():
dti = date_range("2016-01-01", periods=3)
ser = Series(dti)
orig = ser.copy()
mask = np.zeros(3, dtype=bool)

ser[mask] = "foo"
assert ser.dtype == dti.dtype # no-op -> dont upcast
tm.assert_series_equal(ser, orig)

ser.mask(mask, "foo", inplace=True)
assert ser.dtype == dti.dtype # no-op -> dont upcast
tm.assert_series_equal(ser, orig)

0 comments on commit 2541e00

Please sign in to comment.