diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 13487c16193e8..ffd32e263aa50 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -809,6 +809,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) +- Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 1e6515084d3b7..3cf56af18c076 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -887,12 +887,6 @@ def is_in_obj(gpr) -> bool: else: in_axis = False - if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: - raise ValueError( - f"Length of grouper ({len(gpr)}) and axis ({obj.shape[axis]}) " - "must be same length" - ) - # create the Grouping # allow us to passing the actual Grouping as the gpr ping = ( @@ -938,7 +932,7 @@ def _convert_grouper(axis: Index, grouper): return grouper.reindex(axis)._values elif isinstance(grouper, MultiIndex): return grouper._values - elif isinstance(grouper, (list, tuple, Series, Index, np.ndarray)): + elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)): if len(grouper) != len(axis): raise ValueError("Grouper and axis must be same length") diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 28128dee9da0f..585491f8664b3 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -664,11 +664,32 @@ def test_bins_unequal_len(): bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here - msg = r"Length of grouper \(8\) and axis \(10\) must be same length" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match="Grouper and axis must be same length"): series.groupby(bins).mean() +@pytest.mark.parametrize( + ["series", "data"], + [ + # Group a series with length and index equal to those of the grouper. + (Series(range(4)), {"A": [0, 3], "B": [1, 2]}), + # Group a series with length equal to that of the grouper and index unequal to + # that of the grouper. + (Series(range(4)).rename(lambda idx: idx + 1), {"A": [2], "B": [0, 1]}), + # GH44179: Group a series with length unequal to that of the grouper. + (Series(range(7)), {"A": [0, 3], "B": [1, 2]}), + ], +) +def test_categorical_series(series, data): + # Group the given series by a series with categorical data type such that group A + # takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in + # the given data. + groupby = series.groupby(Series(list("ABBA"), dtype="category")) + result = groupby.aggregate(list) + expected = Series(data, index=CategoricalIndex(data.keys())) + tm.assert_series_equal(result, expected) + + def test_as_index(): # GH13204 df = DataFrame(