From 396015706d6664af082ac8734bd7f4659d5b2684 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 28 Feb 2022 19:36:28 -0800 Subject: [PATCH] BUG: df.iloc[:, 0] = df.iloc[::-1, 0] not setting inplace for EAs (#45352) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/internals/blocks.py | 14 +-------- pandas/tests/extension/base/setitem.py | 12 ++++++++ pandas/tests/frame/test_constructors.py | 39 ++++++++++++++----------- pandas/tests/indexing/test_iloc.py | 9 ++++-- 5 files changed, 42 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 59d4ef1d9b39d..e47751245cb65 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -360,6 +360,7 @@ Indexing - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) - Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`) - Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`) +- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`) - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`) - Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`) - Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 42c6f03a3af93..3693edbae7d95 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1636,21 +1636,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]): return self.values def set_inplace(self, locs, values: ArrayLike) -> None: - # NB: This is a misnomer, is supposed to be inplace but is not, - # see GH#33457 # When an ndarray, we should have locs.tolist() == [0] # When a BlockPlacement we should have list(locs) == [0] - - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray[Any, Any]]", variable has type - # "ExtensionArray") - self.values = values # type: ignore[assignment] - try: - # TODO(GH33457) this can be removed - self._cache.clear() - except AttributeError: - # _cache not yet initialized - pass + self.values[:] = values def _maybe_squeeze_arg(self, arg): """ diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 221710fbffca1..0c7fc61e2a7d9 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -360,6 +360,14 @@ def test_setitem_series(self, data, full_indexer): def test_setitem_frame_2d_values(self, data): # GH#44514 df = pd.DataFrame({"A": data}) + + # Avoiding using_array_manager fixture + # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 + using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) + + df = pd.DataFrame({"A": data}) + blk_data = df._mgr.arrays[0] + orig = df.copy() df.iloc[:] = df @@ -370,6 +378,10 @@ def test_setitem_frame_2d_values(self, data): df.iloc[:] = df.values self.assert_frame_equal(df, orig) + if not using_array_manager: + # GH#33457 Check that this setting occurred in-place + # FIXME(ArrayManager): this should work there too + assert df._mgr.arrays[0] is blk_data df.iloc[:-1] = df.values[:-1] self.assert_frame_equal(df, orig) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 475497cf8b20b..e5b1673da1e27 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2519,6 +2519,7 @@ def test_dict_nocopy( return c = pd.array([1, 2], dtype=any_numeric_ea_dtype) + c_orig = c.copy() df = DataFrame({"a": a, "b": b, "c": c}, copy=copy) def get_base(obj): @@ -2530,9 +2531,19 @@ def get_base(obj): else: raise TypeError - def check_views(): + def check_views(c_only: bool = False): # written to work for either BlockManager or ArrayManager + + # Check that the underlying data behind df["c"] is still `c` + # after setting with iloc. Since we don't know which entry in + # df._mgr.arrays corresponds to df["c"], we just check that exactly + # one of these arrays is `c`. GH#38939 assert sum(x is c for x in df._mgr.arrays) == 1 + if c_only: + # If we ever stop consolidating in setitem_with_indexer, + # this will become unnecessary. + return + assert ( sum( get_base(x) is a @@ -2554,23 +2565,18 @@ def check_views(): # constructor preserves views check_views() + # TODO: most of the rest of this test belongs in indexing tests df.iloc[0, 0] = 0 df.iloc[0, 1] = 0 if not copy: - # Check that the underlying data behind df["c"] is still `c` - # after setting with iloc. Since we don't know which entry in - # df._mgr.arrays corresponds to df["c"], we just check that exactly - # one of these arrays is `c`. GH#38939 - assert sum(x is c for x in df._mgr.arrays) == 1 - # TODO: we can call check_views if we stop consolidating - # in setitem_with_indexer + check_views(True) # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve # view, so we have to check in the other direction - # df.iloc[0, 2] = 0 - # if not copy: - # check_views() - c[0] = 0 + df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) + assert df.dtypes.iloc[2] == c.dtype + if not copy: + check_views(True) if copy: if a.dtype.kind == "M": @@ -2580,14 +2586,13 @@ def check_views(): assert a[0] == a.dtype.type(1) assert b[0] == b.dtype.type(3) # FIXME(GH#35417): enable after GH#35417 - # assert c[0] == 1 - assert df.iloc[0, 2] == 1 + assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c else: # TODO: we can call check_views if we stop consolidating # in setitem_with_indexer - # FIXME(GH#35417): enable after GH#35417 - # assert b[0] == 0 - assert df.iloc[0, 2] == 0 + assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c + # TODO: we can check b[0] == 0 if we stop consolidating in + # setitem_with_indexer (except for datetimelike?) def test_from_series_with_name_with_columns(self): # GH 7893 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index a7b70a48adf88..a965d32c82c61 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -875,16 +875,19 @@ def test_series_indexing_zerodim_np_array(self): result = s.iloc[np.array(0)] assert result == 1 - @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") + @td.skip_array_manager_not_yet_implemented def test_iloc_setitem_categorical_updates_inplace(self): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer cat = Categorical(["A", "B", "C"]) - df = DataFrame({1: cat, 2: [1, 2, 3]}) + df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False) + + assert tm.shares_memory(df[1], cat) # This should modify our original values in-place df.iloc[:, 0] = cat[::-1] + assert tm.shares_memory(df[1], cat) - expected = Categorical(["C", "B", "A"]) + expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"]) tm.assert_categorical_equal(cat, expected) def test_iloc_with_boolean_operation(self):