Skip to content

Commit

Permalink
Group by a categorical Series of unequal length (pandas-dev#44180)
Browse files Browse the repository at this point in the history
  • Loading branch information
stanwest authored Dec 22, 2021
1 parent 50b4df3 commit 3a4821e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,7 @@ Groupby/resample/rolling
- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)
- Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`)

Reshaping
^^^^^^^^^
Expand Down
8 changes: 1 addition & 7 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,12 +887,6 @@ def is_in_obj(gpr) -> bool:
else:
in_axis = False

if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
raise ValueError(
f"Length of grouper ({len(gpr)}) and axis ({obj.shape[axis]}) "
"must be same length"
)

# create the Grouping
# allow us to passing the actual Grouping as the gpr
ping = (
Expand Down Expand Up @@ -938,7 +932,7 @@ def _convert_grouper(axis: Index, grouper):
return grouper.reindex(axis)._values
elif isinstance(grouper, MultiIndex):
return grouper._values
elif isinstance(grouper, (list, tuple, Series, Index, np.ndarray)):
elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)):
if len(grouper) != len(axis):
raise ValueError("Grouper and axis must be same length")

Expand Down
25 changes: 23 additions & 2 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,11 +664,32 @@ def test_bins_unequal_len():
bins = pd.cut(series.dropna().values, 4)

# len(bins) != len(series) here
msg = r"Length of grouper \(8\) and axis \(10\) must be same length"
with pytest.raises(ValueError, match=msg):
with pytest.raises(ValueError, match="Grouper and axis must be same length"):
series.groupby(bins).mean()


@pytest.mark.parametrize(
["series", "data"],
[
# Group a series with length and index equal to those of the grouper.
(Series(range(4)), {"A": [0, 3], "B": [1, 2]}),
# Group a series with length equal to that of the grouper and index unequal to
# that of the grouper.
(Series(range(4)).rename(lambda idx: idx + 1), {"A": [2], "B": [0, 1]}),
# GH44179: Group a series with length unequal to that of the grouper.
(Series(range(7)), {"A": [0, 3], "B": [1, 2]}),
],
)
def test_categorical_series(series, data):
# Group the given series by a series with categorical data type such that group A
# takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in
# the given data.
groupby = series.groupby(Series(list("ABBA"), dtype="category"))
result = groupby.aggregate(list)
expected = Series(data, index=CategoricalIndex(data.keys()))
tm.assert_series_equal(result, expected)


def test_as_index():
# GH13204
df = DataFrame(
Expand Down

0 comments on commit 3a4821e

Please sign in to comment.