Skip to content

Commit

Permalink
REF: share BooleanArray.astype+NumericArray.astype (pandas-dev#45420)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Jan 23, 2022
1 parent acd7218 commit 6cf8880
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 154 deletions.
79 changes: 2 additions & 77 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from __future__ import annotations

import numbers
from typing import (
TYPE_CHECKING,
overload,
)
from typing import TYPE_CHECKING

import numpy as np

Expand All @@ -13,30 +10,19 @@
missing as libmissing,
)
from pandas._typing import (
ArrayLike,
AstypeArg,
Dtype,
DtypeObj,
npt,
type_t,
)

from pandas.core.dtypes.common import (
is_bool_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_numeric_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
ExtensionDtype,
register_extension_dtype,
)
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.missing import isna

from pandas.core import ops
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.masked import (
BaseMaskedArray,
BaseMaskedDtype,
Expand Down Expand Up @@ -360,67 +346,6 @@ def _coerce_to_array(
assert dtype == "boolean"
return coerce_to_array(value, copy=copy)

@overload
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
...

@overload
def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
...

@overload
def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
...

def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

"""
Cast to a NumPy array or ExtensionArray with 'dtype'.
Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
copy : bool, default True
Whether to copy the data, even if not necessary. If False,
a copy is made only if the old dtype does not match the
new dtype.
Returns
-------
ndarray or ExtensionArray
NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
Raises
------
TypeError
if incompatible type with an BooleanDtype, equivalent of same_kind
casting
"""
dtype = pandas_dtype(dtype)

if isinstance(dtype, ExtensionDtype):
return super().astype(dtype, copy)

if is_bool_dtype(dtype):
# astype_nansafe converts np.nan to True
if self._hasna:
raise ValueError("cannot convert float NaN to bool")
else:
return self._data.astype(dtype, copy=copy)

# for integer, error if there are missing values
if is_integer_dtype(dtype) and self._hasna:
raise ValueError("cannot convert NA to integer")

# for float dtype, ensure we use np.nan before casting (numpy cannot
# deal with pd.NA)
na_value = self._na_value
if is_float_dtype(dtype):
na_value = np.nan
# coerce
return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)

def _logical_method(self, other, op):

assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
Expand Down
27 changes: 26 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@
)
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.astype import astype_nansafe
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.common import (
is_bool,
is_bool_dtype,
is_datetime64_dtype,
is_dtype_equal,
is_float,
is_float_dtype,
Expand Down Expand Up @@ -450,7 +452,30 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
eacls = dtype.construct_array_type()
return eacls._from_sequence(self, dtype=dtype, copy=copy)

raise NotImplementedError("subclass must implement astype to np.dtype")
na_value: float | np.datetime64 | lib.NoDefault

# coerce
if is_float_dtype(dtype):
# In astype, we consider dtype=float to also mean na_value=np.nan
na_value = np.nan
elif is_datetime64_dtype(dtype):
na_value = np.datetime64("NaT")
else:
na_value = lib.no_default

# to_numpy will also raise, but we get somewhat nicer exception messages here
if is_integer_dtype(dtype) and self._hasna:
raise ValueError("cannot convert NA to integer")
if is_bool_dtype(dtype) and self._hasna:
# careful: astype_nansafe converts np.nan to True
raise ValueError("cannot convert float NaN to bool")

data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
if self.dtype.kind == "f":
# TODO: make this consistent between IntegerArray/FloatingArray,
# see test_astype_str
return astype_nansafe(data, dtype, copy=False)
return data

__array_priority__ = 1000 # higher than ndarray so ops dispatch to us

Expand Down
73 changes: 1 addition & 72 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,25 @@
from typing import (
TYPE_CHECKING,
TypeVar,
overload,
)

import numpy as np

from pandas._libs import (
Timedelta,
lib,
missing as libmissing,
)
from pandas._typing import (
ArrayLike,
AstypeArg,
Dtype,
npt,
)
from pandas._typing import Dtype
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.astype import astype_nansafe
from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_float,
is_float_dtype,
is_integer,
is_integer_dtype,
is_list_like,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype

from pandas.core.arrays.masked import (
BaseMaskedArray,
Expand All @@ -43,7 +33,6 @@
if TYPE_CHECKING:
import pyarrow

from pandas.core.arrays import ExtensionArray

T = TypeVar("T", bound="NumericArray")

Expand Down Expand Up @@ -112,66 +101,6 @@ def _from_sequence_of_strings(
scalars = to_numeric(strings, errors="raise")
return cls._from_sequence(scalars, dtype=dtype, copy=copy)

@overload
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
...

@overload
def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
...

@overload
def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
...

def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
"""
Cast to a NumPy array or ExtensionArray with 'dtype'.
Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
copy : bool, default True
Whether to copy the data, even if not necessary. If False,
a copy is made only if the old dtype does not match the
new dtype.
Returns
-------
ndarray or ExtensionArray
NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with
'dtype' for its dtype.
Raises
------
TypeError
if incompatible type with our dtype, equivalent of same_kind
casting
"""
dtype = pandas_dtype(dtype)

if isinstance(dtype, ExtensionDtype):
return super().astype(dtype, copy=copy)

na_value: float | np.datetime64 | lib.NoDefault

# coerce
if is_float_dtype(dtype):
# In astype, we consider dtype=float to also mean na_value=np.nan
na_value = np.nan
elif is_datetime64_dtype(dtype):
na_value = np.datetime64("NaT")
else:
na_value = lib.no_default

data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
if self.dtype.kind == "f":
# TODO: make this consistent between IntegerArray/FloatingArray,
# see test_astype_str
return astype_nansafe(data, dtype, copy=False)
return data

def _arith_method(self, other, op):
op_name = op.__name__
omask = None
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/floating/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ def test_astype():
# with missing values
arr = pd.array([0.1, 0.2, None], dtype="Float64")

with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype NumPy"):
with pytest.raises(ValueError, match="cannot convert NA to integer"):
arr.astype("int64")

with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype NumPy"):
with pytest.raises(ValueError, match="cannot convert float NaN to bool"):
arr.astype("bool")

result = arr.astype("float64")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_preserve_dtypes(op):
def test_astype_nansafe():
# see gh-22343
arr = pd.array([np.nan, 1, 2], dtype="Int8")
msg = "cannot convert to 'uint32'-dtype NumPy array with missing values."
msg = "cannot convert NA to integer"

with pytest.raises(ValueError, match=msg):
arr.astype("uint32")
Expand Down Expand Up @@ -136,7 +136,7 @@ def test_astype(all_data):

# coerce to same numpy_dtype - mixed
s = pd.Series(mixed)
msg = r"cannot convert to .*-dtype NumPy array with missing values.*"
msg = "cannot convert NA to integer"
with pytest.raises(ValueError, match=msg):
s.astype(all_data.dtype.numpy_dtype)

Expand Down

0 comments on commit 6cf8880

Please sign in to comment.