Skip to content

Commit

Permalink
REF: Share NumericArray/NumericDtype methods (pandas-dev#45997)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Feb 16, 2022
1 parent bf97db3 commit cf2dfa7
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 111 deletions.
40 changes: 4 additions & 36 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np

from pandas._typing import DtypeObj
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import is_float_dtype
from pandas.core.dtypes.dtypes import register_extension_dtype

from pandas.core.arrays.numeric import (
Expand All @@ -24,13 +24,7 @@ class FloatingDtype(NumericDtype):
"""

_default_np_dtype = np.dtype(np.float64)

def __repr__(self) -> str:
return f"{self.name}Dtype()"

@property
def _is_numeric(self) -> bool:
return True
_checker = is_float_dtype

@classmethod
def construct_array_type(cls) -> type[FloatingArray]:
Expand Down Expand Up @@ -58,18 +52,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
return None

@classmethod
def _standardize_dtype(cls, dtype) -> FloatingDtype:
if isinstance(dtype, str) and dtype.startswith("Float"):
# Avoid DeprecationWarning from NumPy about np.dtype("Float64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), FloatingDtype):
try:
dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err
return dtype
def _str_to_dtype_mapping(cls):
return FLOAT_STR_TO_DTYPE

@classmethod
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
Expand Down Expand Up @@ -151,22 +135,6 @@ class FloatingArray(NumericArray):
_truthy_value = 1.0
_falsey_value = 0.0

@cache_readonly
def dtype(self) -> FloatingDtype:
return FLOAT_STR_TO_DTYPE[str(self._data.dtype)]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"):
raise TypeError(
"values should be floating numpy array. Use "
"the 'pd.array' function instead"
)
if values.dtype == np.float16:
# If we don't raise here, then accessing self.dtype would raise
raise TypeError("FloatingArray does not support np.float16 dtype.")

super().__init__(values, mask, copy=copy)


_dtype_docstring = """
An ExtensionDtype for {dtype} data.
Expand Down
71 changes: 16 additions & 55 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np

from pandas._typing import DtypeObj
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.base import register_extension_dtype
from pandas.core.dtypes.common import is_integer_dtype

from pandas.core.arrays.masked import BaseMaskedDtype
from pandas.core.arrays.numeric import (
Expand All @@ -14,33 +14,18 @@
)


class _IntegerDtype(NumericDtype):
class IntegerDtype(NumericDtype):
"""
An ExtensionDtype to hold a single size & kind of integer dtype.
These specific implementations are subclasses of the non-public
_IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
The attributes name & type are set when these subclasses are created.
"""

_default_np_dtype = np.dtype(np.int64)

def __repr__(self) -> str:
sign = "U" if self.is_unsigned_integer else ""
return f"{sign}Int{8 * self.itemsize}Dtype()"

@cache_readonly
def is_signed_integer(self) -> bool:
return self.kind == "i"

@cache_readonly
def is_unsigned_integer(self) -> bool:
return self.kind == "u"

@property
def _is_numeric(self) -> bool:
return True
_checker = is_integer_dtype

@classmethod
def construct_array_type(cls) -> type[IntegerArray]:
Expand Down Expand Up @@ -86,20 +71,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
return None

@classmethod
def _standardize_dtype(cls, dtype) -> _IntegerDtype:
if isinstance(dtype, str) and (
dtype.startswith("Int") or dtype.startswith("UInt")
):
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), _IntegerDtype):
try:
dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err
return dtype
def _str_to_dtype_mapping(cls):
return INT_STR_TO_DTYPE

@classmethod
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
Expand Down Expand Up @@ -189,26 +162,14 @@ class IntegerArray(NumericArray):
Length: 3, dtype: UInt16
"""

_dtype_cls = _IntegerDtype
_dtype_cls = IntegerDtype

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = 1
# Fill values used for any/all
_truthy_value = 1
_falsey_value = 0

@cache_readonly
def dtype(self) -> _IntegerDtype:
return INT_STR_TO_DTYPE[str(self._data.dtype)]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
raise TypeError(
"values should be integer numpy array. Use "
"the 'pd.array' function instead"
)
super().__init__(values, mask, copy=copy)


_dtype_docstring = """
An ExtensionDtype for {dtype} integer data.
Expand All @@ -231,62 +192,62 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):


@register_extension_dtype
class Int8Dtype(_IntegerDtype):
class Int8Dtype(IntegerDtype):
type = np.int8
name = "Int8"
__doc__ = _dtype_docstring.format(dtype="int8")


@register_extension_dtype
class Int16Dtype(_IntegerDtype):
class Int16Dtype(IntegerDtype):
type = np.int16
name = "Int16"
__doc__ = _dtype_docstring.format(dtype="int16")


@register_extension_dtype
class Int32Dtype(_IntegerDtype):
class Int32Dtype(IntegerDtype):
type = np.int32
name = "Int32"
__doc__ = _dtype_docstring.format(dtype="int32")


@register_extension_dtype
class Int64Dtype(_IntegerDtype):
class Int64Dtype(IntegerDtype):
type = np.int64
name = "Int64"
__doc__ = _dtype_docstring.format(dtype="int64")


@register_extension_dtype
class UInt8Dtype(_IntegerDtype):
class UInt8Dtype(IntegerDtype):
type = np.uint8
name = "UInt8"
__doc__ = _dtype_docstring.format(dtype="uint8")


@register_extension_dtype
class UInt16Dtype(_IntegerDtype):
class UInt16Dtype(IntegerDtype):
type = np.uint16
name = "UInt16"
__doc__ = _dtype_docstring.format(dtype="uint16")


@register_extension_dtype
class UInt32Dtype(_IntegerDtype):
class UInt32Dtype(IntegerDtype):
type = np.uint32
name = "UInt32"
__doc__ = _dtype_docstring.format(dtype="uint32")


@register_extension_dtype
class UInt64Dtype(_IntegerDtype):
class UInt64Dtype(IntegerDtype):
type = np.uint64
name = "UInt64"
__doc__ = _dtype_docstring.format(dtype="uint64")


INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = {
INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
"int8": Int8Dtype(),
"int16": Int16Dtype(),
"int32": Int32Dtype(),
Expand Down
64 changes: 59 additions & 5 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numbers
from typing import (
TYPE_CHECKING,
Any,
Callable,
TypeVar,
)

Expand All @@ -17,6 +19,7 @@
DtypeObj,
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_bool_dtype,
Expand All @@ -41,6 +44,22 @@

class NumericDtype(BaseMaskedDtype):
_default_np_dtype: np.dtype
_checker: Callable[[Any], bool] # is_foo_dtype

def __repr__(self) -> str:
return f"{self.name}Dtype()"

@cache_readonly
def is_signed_integer(self) -> bool:
return self.kind == "i"

@cache_readonly
def is_unsigned_integer(self) -> bool:
return self.kind == "u"

@property
def _is_numeric(self) -> bool:
return True

def __from_arrow__(
self, array: pyarrow.Array | pyarrow.ChunkedArray
Expand Down Expand Up @@ -90,12 +109,27 @@ def __from_arrow__(
else:
return array_class._concat_same_type(results)

@classmethod
def _str_to_dtype_mapping(cls):
raise AbstractMethodError(cls)

@classmethod
def _standardize_dtype(cls, dtype) -> NumericDtype:
"""
Convert a string representation or a numpy dtype to NumericDtype.
"""
raise AbstractMethodError(cls)
if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), cls):
mapping = cls._str_to_dtype_mapping()
try:
dtype = mapping[str(np.dtype(dtype))]
except KeyError as err:
raise ValueError(f"invalid dtype specified {dtype}") from err
return dtype

@classmethod
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
Expand All @@ -108,10 +142,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr


def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):
if default_dtype.kind == "f":
checker = is_float_dtype
else:
checker = is_integer_dtype
checker = dtype_cls._checker

inferred_type = None

Expand Down Expand Up @@ -188,6 +219,29 @@ class NumericArray(BaseMaskedArray):

_dtype_cls: type[NumericDtype]

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
checker = self._dtype_cls._checker
if not (isinstance(values, np.ndarray) and checker(values.dtype)):
descr = (
"floating"
if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap]
else "integer"
)
raise TypeError(
f"values should be {descr} numpy array. Use "
"the 'pd.array' function instead"
)
if values.dtype == np.float16:
# If we don't raise here, then accessing self.dtype would raise
raise TypeError("FloatingArray does not support np.float16 dtype.")

super().__init__(values, mask, copy=copy)

@cache_readonly
def dtype(self) -> NumericDtype:
mapping = self._dtype_cls._str_to_dtype_mapping()
return mapping[str(self._data.dtype)]

@classmethod
def _coerce_to_array(
cls, value, *, dtype: DtypeObj, copy: bool = False
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)
from pandas.core.arrays.base import ExtensionArray
from pandas.core.arrays.floating import FloatingDtype
from pandas.core.arrays.integer import _IntegerDtype
from pandas.core.arrays.integer import IntegerDtype
from pandas.core.construction import extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.missing import isna
Expand Down Expand Up @@ -432,7 +432,7 @@ def astype(self, dtype, copy: bool = True):
return self.copy()
return self

elif isinstance(dtype, _IntegerDtype):
elif isinstance(dtype, IntegerDtype):
arr = self._ndarray.copy()
mask = self.isna()
arr[mask] = 0
Expand Down
Loading

0 comments on commit cf2dfa7

Please sign in to comment.