diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index b5c32f7dbdda1..f05f99b5764d5 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,10 +1,7 @@ from __future__ import annotations import numbers -from typing import ( - TYPE_CHECKING, - overload, -) +from typing import TYPE_CHECKING import numpy as np @@ -13,30 +10,19 @@ missing as libmissing, ) from pandas._typing import ( - ArrayLike, - AstypeArg, Dtype, DtypeObj, - npt, type_t, ) from pandas.core.dtypes.common import ( - is_bool_dtype, - is_float_dtype, - is_integer_dtype, is_list_like, is_numeric_dtype, - pandas_dtype, -) -from pandas.core.dtypes.dtypes import ( - ExtensionDtype, - register_extension_dtype, ) +from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.arrays import ExtensionArray from pandas.core.arrays.masked import ( BaseMaskedArray, BaseMaskedDtype, @@ -360,67 +346,6 @@ def _coerce_to_array( assert dtype == "boolean" return coerce_to_array(value, copy=copy) - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: - - """ - Cast to a NumPy array or ExtensionArray with 'dtype'. - - Parameters - ---------- - dtype : str or dtype - Typecode or data-type to which the array is cast. - copy : bool, default True - Whether to copy the data, even if not necessary. If False, - a copy is made only if the old dtype does not match the - new dtype. - - Returns - ------- - ndarray or ExtensionArray - NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. - - Raises - ------ - TypeError - if incompatible type with an BooleanDtype, equivalent of same_kind - casting - """ - dtype = pandas_dtype(dtype) - - if isinstance(dtype, ExtensionDtype): - return super().astype(dtype, copy) - - if is_bool_dtype(dtype): - # astype_nansafe converts np.nan to True - if self._hasna: - raise ValueError("cannot convert float NaN to bool") - else: - return self._data.astype(dtype, copy=copy) - - # for integer, error if there are missing values - if is_integer_dtype(dtype) and self._hasna: - raise ValueError("cannot convert NA to integer") - - # for float dtype, ensure we use np.nan before casting (numpy cannot - # deal with pd.NA) - na_value = self._na_value - if is_float_dtype(dtype): - na_value = np.nan - # coerce - return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) - def _logical_method(self, other, op): assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b9f7b31e19c91..2ddbd54e23368 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -37,10 +37,12 @@ ) from pandas.util._validators import validate_fillna_kwargs +from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( is_bool, is_bool_dtype, + is_datetime64_dtype, is_dtype_equal, is_float, is_float_dtype, @@ -450,7 +452,30 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: eacls = dtype.construct_array_type() return eacls._from_sequence(self, dtype=dtype, copy=copy) - raise NotImplementedError("subclass must implement astype to np.dtype") + na_value: float | np.datetime64 | lib.NoDefault + + # coerce + if is_float_dtype(dtype): + # In astype, we consider dtype=float to also mean na_value=np.nan + na_value = np.nan + elif is_datetime64_dtype(dtype): + na_value = np.datetime64("NaT") + else: + na_value = lib.no_default + + # to_numpy will also raise, but we get somewhat nicer exception messages here + if is_integer_dtype(dtype) and self._hasna: + raise ValueError("cannot convert NA to integer") + if is_bool_dtype(dtype) and self._hasna: + # careful: astype_nansafe converts np.nan to True + raise ValueError("cannot convert float NaN to bool") + + data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy) + if self.dtype.kind == "f": + # TODO: make this consistent between IntegerArray/FloatingArray, + # see test_astype_str + return astype_nansafe(data, dtype, copy=False) + return data __array_priority__ = 1000 # higher than ndarray so ops dispatch to us diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 1956551a6961d..88bb4484909c6 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -5,27 +5,18 @@ from typing import ( TYPE_CHECKING, TypeVar, - overload, ) import numpy as np from pandas._libs import ( Timedelta, - lib, missing as libmissing, ) -from pandas._typing import ( - ArrayLike, - AstypeArg, - Dtype, - npt, -) +from pandas._typing import Dtype from pandas.compat.numpy import function as nv -from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.common import ( - is_datetime64_dtype, is_float, is_float_dtype, is_integer, @@ -33,7 +24,6 @@ is_list_like, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.arrays.masked import ( BaseMaskedArray, @@ -43,7 +33,6 @@ if TYPE_CHECKING: import pyarrow - from pandas.core.arrays import ExtensionArray T = TypeVar("T", bound="NumericArray") @@ -112,66 +101,6 @@ def _from_sequence_of_strings( scalars = to_numeric(strings, errors="raise") return cls._from_sequence(scalars, dtype=dtype, copy=copy) - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: - """ - Cast to a NumPy array or ExtensionArray with 'dtype'. - - Parameters - ---------- - dtype : str or dtype - Typecode or data-type to which the array is cast. - copy : bool, default True - Whether to copy the data, even if not necessary. If False, - a copy is made only if the old dtype does not match the - new dtype. - - Returns - ------- - ndarray or ExtensionArray - NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with - 'dtype' for its dtype. - - Raises - ------ - TypeError - if incompatible type with our dtype, equivalent of same_kind - casting - """ - dtype = pandas_dtype(dtype) - - if isinstance(dtype, ExtensionDtype): - return super().astype(dtype, copy=copy) - - na_value: float | np.datetime64 | lib.NoDefault - - # coerce - if is_float_dtype(dtype): - # In astype, we consider dtype=float to also mean na_value=np.nan - na_value = np.nan - elif is_datetime64_dtype(dtype): - na_value = np.datetime64("NaT") - else: - na_value = lib.no_default - - data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy) - if self.dtype.kind == "f": - # TODO: make this consistent between IntegerArray/FloatingArray, - # see test_astype_str - return astype_nansafe(data, dtype, copy=False) - return data - def _arith_method(self, other, op): op_name = op.__name__ omask = None diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py index f0e91cd540f38..8f61f70fe0735 100644 --- a/pandas/tests/arrays/floating/test_astype.py +++ b/pandas/tests/arrays/floating/test_astype.py @@ -9,10 +9,10 @@ def test_astype(): # with missing values arr = pd.array([0.1, 0.2, None], dtype="Float64") - with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype NumPy"): + with pytest.raises(ValueError, match="cannot convert NA to integer"): arr.astype("int64") - with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype NumPy"): + with pytest.raises(ValueError, match="cannot convert float NaN to bool"): arr.astype("bool") result = arr.astype("float64") diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 3911b7f9bad34..cbbb94de51bbf 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -54,7 +54,7 @@ def test_preserve_dtypes(op): def test_astype_nansafe(): # see gh-22343 arr = pd.array([np.nan, 1, 2], dtype="Int8") - msg = "cannot convert to 'uint32'-dtype NumPy array with missing values." + msg = "cannot convert NA to integer" with pytest.raises(ValueError, match=msg): arr.astype("uint32") @@ -136,7 +136,7 @@ def test_astype(all_data): # coerce to same numpy_dtype - mixed s = pd.Series(mixed) - msg = r"cannot convert to .*-dtype NumPy array with missing values.*" + msg = "cannot convert NA to integer" with pytest.raises(ValueError, match=msg): s.astype(all_data.dtype.numpy_dtype)