From e932ec966793074605dc9134a06737a895e6e022 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Sat, 26 Feb 2022 16:58:57 -0500 Subject: [PATCH] BUG: .transform(...) with "first" and "last" fail when axis=1 (#46074) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/groupby/generic.py | 9 ----- pandas/core/groupby/groupby.py | 6 +-- .../tests/groupby/transform/test_transform.py | 40 ++++++------------- 4 files changed, 15 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 855c4dfe257dd..5d656cde08610 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -382,6 +382,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`) - Bug in :meth:`.DataFrameGroupBy.transform` fails when ``func="size"`` and the input DataFrame has multiple columns (:issue:`27469`) - Bug in :meth:`.DataFrameGroupBy.size` and :meth:`.DataFrameGroupBy.transform` with ``func="size"`` produced incorrect results when ``axis=1`` (:issue:`45715`) +- Bug in :meth:`.DataFrameGroupby.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 71cef46950e12..aef82392590d3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -472,9 +472,6 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: result.name = self.obj.name return result - def _can_use_transform_fast(self, func: str, result) -> bool: - return True - def filter(self, func, dropna: bool = True, *args, **kwargs): """ Return a copy of a Series excluding elements from groups that @@ -1184,12 +1181,6 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) - def _can_use_transform_fast(self, func: str, result) -> bool: - return func == "size" or ( - isinstance(result, DataFrame) - and result.columns.equals(self._obj_with_exclusions.columns) - ) - def _define_paths(self, func, *args, **kwargs): if isinstance(func, str): fast_path = lambda group: getattr(group, func)(*args, **kwargs) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5ad1df66992e9..f8106edeb5d62 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1650,11 +1650,7 @@ def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): with com.temp_setattr(self, "observed", True): result = getattr(self, func)(*args, **kwargs) - if self._can_use_transform_fast(func, result): - return self._wrap_transform_fast_result(result) - - # only reached for DataFrameGroupBy - return self._transform_general(func, *args, **kwargs) + return self._wrap_transform_fast_result(result) @final def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 4b707d255b18f..e0bcefba9cf3c 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -21,10 +21,7 @@ ) import pandas._testing as tm from pandas.core.groupby.base import maybe_normalize_deprecated_kernels -from pandas.core.groupby.generic import ( - DataFrameGroupBy, - SeriesGroupBy, -) +from pandas.core.groupby.generic import DataFrameGroupBy def assert_fp_equal(a, b): @@ -195,10 +192,8 @@ def test_transform_axis_1_reducer(request, reduction_func): # GH#45715 if reduction_func in ( "corrwith", - "first", "idxmax", "idxmin", - "last", "ngroup", "nth", ): @@ -418,45 +413,36 @@ def test_transform_select_columns(df): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("duplicates", [True, False]) -def test_transform_exclude_nuisance(df, duplicates): +def test_transform_exclude_nuisance(df): # case that goes through _transform_item_by_item - if duplicates: - # make sure we work with duplicate columns GH#41427 - df.columns = ["A", "C", "C", "D"] + df.columns = ["A", "B", "B", "D"] # this also tests orderings in transform between # series/frame to make sure it's consistent expected = {} grouped = df.groupby("A") - gbc = grouped["C"] - warn = FutureWarning if duplicates else None - with tm.assert_produces_warning(warn, match="Dropping invalid columns"): - expected["C"] = gbc.transform(np.mean) - if duplicates: - # squeeze 1-column DataFrame down to Series - expected["C"] = expected["C"]["C"] + gbc = grouped["B"] + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + expected["B"] = gbc.transform(lambda x: np.mean(x)) + # squeeze 1-column DataFrame down to Series + expected["B"] = expected["B"]["B"] - assert isinstance(gbc.obj, DataFrame) - assert isinstance(gbc, DataFrameGroupBy) - else: - assert isinstance(gbc, SeriesGroupBy) - assert isinstance(gbc.obj, Series) + assert isinstance(gbc.obj, DataFrame) + assert isinstance(gbc, DataFrameGroupBy) expected["D"] = grouped["D"].transform(np.mean) expected = DataFrame(expected) with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): - result = df.groupby("A").transform(np.mean) + result = df.groupby("A").transform(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) def test_transform_function_aliases(df): - with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): - result = df.groupby("A").transform("mean") - expected = df.groupby("A").transform(np.mean) + result = df.groupby("A").transform("mean") + expected = df.groupby("A").transform(np.mean) tm.assert_frame_equal(result, expected) result = df.groupby("A")["C"].transform("mean")