Skip to content

Commit

Permalink
DEPR: treating object-dtype blocks as bool_only (pandas-dev#46188)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 1, 2022
1 parent 4acc937 commit 9eb316b
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ of columns could result in a larger Series result. See (:issue:`37799`).
*New behavior*:

.. ipython:: python
:okwarning:
In [5]: df.all(bool_only=True)
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ Other Deprecations
- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`)
- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
- Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`)
- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`)
-

.. ---------------------------------------------------------------------------
Expand Down
15 changes: 14 additions & 1 deletion pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
from numbers import Number
import re
from typing import Pattern
import warnings

import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike
from pandas.util._exceptions import find_stack_level

is_bool = lib.is_bool

Expand Down Expand Up @@ -447,5 +449,16 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
if dtype == np.dtype(bool):
return True
elif dtype == np.dtype("object"):
return lib.is_bool_array(arr)
result = lib.is_bool_array(arr)
if result:
# GH#46188
warnings.warn(
"In a future version, object-dtype columns with all-bool values "
"will not be included in reductions with bool_only=True. "
"Explicitly cast to bool dtype instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return result

return False
23 changes: 16 additions & 7 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,8 @@ def test_any_all_object(self):
assert result is False

def test_any_all_object_bool_only(self):
msg = "object-dtype columns with all-bool values"

df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
df._consolidate_inplace()
df["C"] = Series([True, True])
Expand All @@ -1228,29 +1230,36 @@ def test_any_all_object_bool_only(self):

# The underlying bug is in DataFrame._get_bool_data, so we check
# that while we're here
res = df._get_bool_data()
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df._get_bool_data()
expected = df[["B", "C"]]
tm.assert_frame_equal(res, expected)

res = df.all(bool_only=True, axis=0)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.all(bool_only=True, axis=0)
expected = Series([False, True], index=["B", "C"])
tm.assert_series_equal(res, expected)

# operating on a subset of columns should not produce a _larger_ Series
res = df[["B", "C"]].all(bool_only=True, axis=0)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df[["B", "C"]].all(bool_only=True, axis=0)
tm.assert_series_equal(res, expected)

assert not df.all(bool_only=True, axis=None)
with tm.assert_produces_warning(FutureWarning, match=msg):
assert not df.all(bool_only=True, axis=None)

res = df.any(bool_only=True, axis=0)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.any(bool_only=True, axis=0)
expected = Series([True, True], index=["B", "C"])
tm.assert_series_equal(res, expected)

# operating on a subset of columns should not produce a _larger_ Series
res = df[["B", "C"]].any(bool_only=True, axis=0)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df[["B", "C"]].any(bool_only=True, axis=0)
tm.assert_series_equal(res, expected)

assert df.any(bool_only=True, axis=None)
with tm.assert_produces_warning(FutureWarning, match=msg):
assert df.any(bool_only=True, axis=None)

@pytest.mark.parametrize("method", ["any", "all"])
def test_any_all_level_axis_none_raises(self, method):
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,14 +784,16 @@ def test_get_numeric_data(self):
)

def test_get_bool_data(self):
msg = "object-dtype columns with all-bool values"
mgr = create_mgr(
"int: int; float: float; complex: complex;"
"str: object; bool: bool; obj: object; dt: datetime",
item_shape=(3,),
)
mgr.iset(6, np.array([True, False, True], dtype=np.object_))

bools = mgr.get_bool_data()
with tm.assert_produces_warning(FutureWarning, match=msg):
bools = mgr.get_bool_data()
tm.assert_index_equal(bools.items, Index(["bool", "dt"]))
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
Expand All @@ -805,7 +807,8 @@ def test_get_bool_data(self):
)

# Check sharing
bools2 = mgr.get_bool_data(copy=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
bools2 = mgr.get_bool_data(copy=True)
bools2.iset(0, np.array([False, True, False]))
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
Expand Down

0 comments on commit 9eb316b

Please sign in to comment.