REF: Share NumericArray/NumericDtype methods (pandas-dev#45997)

me-kbs · Feb 16, 2022 · cf2dfa7 · cf2dfa7
1 parent bf97db3
commit cf2dfa7
Show file tree

Hide file tree

Showing 7 changed files with 89 additions and 111 deletions.
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -3,8 +3,8 @@
 import numpy as np
 
 from pandas._typing import DtypeObj
-from pandas.util._decorators import cache_readonly
 
+from pandas.core.dtypes.common import is_float_dtype
 from pandas.core.dtypes.dtypes import register_extension_dtype
 
 from pandas.core.arrays.numeric import (
@@ -24,13 +24,7 @@ class FloatingDtype(NumericDtype):
     """
 
     _default_np_dtype = np.dtype(np.float64)
-
-    def __repr__(self) -> str:
-        return f"{self.name}Dtype()"
-
-    @property
-    def _is_numeric(self) -> bool:
-        return True
+    _checker = is_float_dtype
 
     @classmethod
     def construct_array_type(cls) -> type[FloatingArray]:
@@ -58,18 +52,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
         return None
 
     @classmethod
-    def _standardize_dtype(cls, dtype) -> FloatingDtype:
-        if isinstance(dtype, str) and dtype.startswith("Float"):
-            # Avoid DeprecationWarning from NumPy about np.dtype("Float64")
-            # https://github.com/numpy/numpy/pull/7476
-            dtype = dtype.lower()
-
-        if not issubclass(type(dtype), FloatingDtype):
-            try:
-                dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
-            except KeyError as err:
-                raise ValueError(f"invalid dtype specified {dtype}") from err
-        return dtype
+    def _str_to_dtype_mapping(cls):
+        return FLOAT_STR_TO_DTYPE
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -151,22 +135,6 @@ class FloatingArray(NumericArray):
     _truthy_value = 1.0
     _falsey_value = 0.0
 
-    @cache_readonly
-    def dtype(self) -> FloatingDtype:
-        return FLOAT_STR_TO_DTYPE[str(self._data.dtype)]
-
-    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
-        if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"):
-            raise TypeError(
-                "values should be floating numpy array. Use "
-                "the 'pd.array' function instead"
-            )
-        if values.dtype == np.float16:
-            # If we don't raise here, then accessing self.dtype would raise
-            raise TypeError("FloatingArray does not support np.float16 dtype.")
-
-        super().__init__(values, mask, copy=copy)
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} data.

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -3,9 +3,9 @@
 import numpy as np
 
 from pandas._typing import DtypeObj
-from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.base import register_extension_dtype
+from pandas.core.dtypes.common import is_integer_dtype
 
 from pandas.core.arrays.masked import BaseMaskedDtype
 from pandas.core.arrays.numeric import (
@@ -14,33 +14,18 @@
 )
 
 
-class _IntegerDtype(NumericDtype):
+class IntegerDtype(NumericDtype):
     """
     An ExtensionDtype to hold a single size & kind of integer dtype.
 
     These specific implementations are subclasses of the non-public
-    _IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
+    IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
 
     The attributes name & type are set when these subclasses are created.
     """
 
     _default_np_dtype = np.dtype(np.int64)
-
-    def __repr__(self) -> str:
-        sign = "U" if self.is_unsigned_integer else ""
-        return f"{sign}Int{8 * self.itemsize}Dtype()"
-
-    @cache_readonly
-    def is_signed_integer(self) -> bool:
-        return self.kind == "i"
-
-    @cache_readonly
-    def is_unsigned_integer(self) -> bool:
-        return self.kind == "u"
-
-    @property
-    def _is_numeric(self) -> bool:
-        return True
+    _checker = is_integer_dtype
 
     @classmethod
     def construct_array_type(cls) -> type[IntegerArray]:
@@ -86,20 +71,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
         return None
 
     @classmethod
-    def _standardize_dtype(cls, dtype) -> _IntegerDtype:
-        if isinstance(dtype, str) and (
-            dtype.startswith("Int") or dtype.startswith("UInt")
-        ):
-            # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
-            # https://github.com/numpy/numpy/pull/7476
-            dtype = dtype.lower()
-
-        if not issubclass(type(dtype), _IntegerDtype):
-            try:
-                dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
-            except KeyError as err:
-                raise ValueError(f"invalid dtype specified {dtype}") from err
-        return dtype
+    def _str_to_dtype_mapping(cls):
+        return INT_STR_TO_DTYPE
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -189,26 +162,14 @@ class IntegerArray(NumericArray):
     Length: 3, dtype: UInt16
     """
 
-    _dtype_cls = _IntegerDtype
+    _dtype_cls = IntegerDtype
 
     # The value used to fill '_data' to avoid upcasting
     _internal_fill_value = 1
     # Fill values used for any/all
     _truthy_value = 1
     _falsey_value = 0
 
-    @cache_readonly
-    def dtype(self) -> _IntegerDtype:
-        return INT_STR_TO_DTYPE[str(self._data.dtype)]
-
-    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
-        if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
-            raise TypeError(
-                "values should be integer numpy array. Use "
-                "the 'pd.array' function instead"
-            )
-        super().__init__(values, mask, copy=copy)
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.
@@ -231,62 +192,62 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
 
 
 @register_extension_dtype
-class Int8Dtype(_IntegerDtype):
+class Int8Dtype(IntegerDtype):
     type = np.int8
     name = "Int8"
     __doc__ = _dtype_docstring.format(dtype="int8")
 
 
 @register_extension_dtype
-class Int16Dtype(_IntegerDtype):
+class Int16Dtype(IntegerDtype):
     type = np.int16
     name = "Int16"
     __doc__ = _dtype_docstring.format(dtype="int16")
 
 
 @register_extension_dtype
-class Int32Dtype(_IntegerDtype):
+class Int32Dtype(IntegerDtype):
     type = np.int32
     name = "Int32"
     __doc__ = _dtype_docstring.format(dtype="int32")
 
 
 @register_extension_dtype
-class Int64Dtype(_IntegerDtype):
+class Int64Dtype(IntegerDtype):
     type = np.int64
     name = "Int64"
     __doc__ = _dtype_docstring.format(dtype="int64")
 
 
 @register_extension_dtype
-class UInt8Dtype(_IntegerDtype):
+class UInt8Dtype(IntegerDtype):
     type = np.uint8
     name = "UInt8"
     __doc__ = _dtype_docstring.format(dtype="uint8")
 
 
 @register_extension_dtype
-class UInt16Dtype(_IntegerDtype):
+class UInt16Dtype(IntegerDtype):
     type = np.uint16
     name = "UInt16"
     __doc__ = _dtype_docstring.format(dtype="uint16")
 
 
 @register_extension_dtype
-class UInt32Dtype(_IntegerDtype):
+class UInt32Dtype(IntegerDtype):
     type = np.uint32
     name = "UInt32"
     __doc__ = _dtype_docstring.format(dtype="uint32")
 
 
 @register_extension_dtype
-class UInt64Dtype(_IntegerDtype):
+class UInt64Dtype(IntegerDtype):
     type = np.uint64
     name = "UInt64"
     __doc__ = _dtype_docstring.format(dtype="uint64")
 
 
-INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = {
+INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
     "int8": Int8Dtype(),
     "int16": Int16Dtype(),
     "int32": Int32Dtype(),

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
@@ -3,6 +3,8 @@
 import numbers
 from typing import (
     TYPE_CHECKING,
+    Any,
+    Callable,
     TypeVar,
 )
 
@@ -17,6 +19,7 @@
     DtypeObj,
 )
 from pandas.errors import AbstractMethodError
+from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.common import (
     is_bool_dtype,
@@ -41,6 +44,22 @@
 
 class NumericDtype(BaseMaskedDtype):
     _default_np_dtype: np.dtype
+    _checker: Callable[[Any], bool]  # is_foo_dtype
+
+    def __repr__(self) -> str:
+        return f"{self.name}Dtype()"
+
+    @cache_readonly
+    def is_signed_integer(self) -> bool:
+        return self.kind == "i"
+
+    @cache_readonly
+    def is_unsigned_integer(self) -> bool:
+        return self.kind == "u"
+
+    @property
+    def _is_numeric(self) -> bool:
+        return True
 
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
@@ -90,12 +109,27 @@ def __from_arrow__(
         else:
             return array_class._concat_same_type(results)
 
+    @classmethod
+    def _str_to_dtype_mapping(cls):
+        raise AbstractMethodError(cls)
+
     @classmethod
     def _standardize_dtype(cls, dtype) -> NumericDtype:
         """
         Convert a string representation or a numpy dtype to NumericDtype.
         """
-        raise AbstractMethodError(cls)
+        if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):
+            # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
+            # https://github.com/numpy/numpy/pull/7476
+            dtype = dtype.lower()
+
+        if not issubclass(type(dtype), cls):
+            mapping = cls._str_to_dtype_mapping()
+            try:
+                dtype = mapping[str(np.dtype(dtype))]
+            except KeyError as err:
+                raise ValueError(f"invalid dtype specified {dtype}") from err
+        return dtype
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -108,10 +142,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr
 
 
 def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):
-    if default_dtype.kind == "f":
-        checker = is_float_dtype
-    else:
-        checker = is_integer_dtype
+    checker = dtype_cls._checker
 
     inferred_type = None
 
@@ -188,6 +219,29 @@ class NumericArray(BaseMaskedArray):
 
     _dtype_cls: type[NumericDtype]
 
+    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
+        checker = self._dtype_cls._checker
+        if not (isinstance(values, np.ndarray) and checker(values.dtype)):
+            descr = (
+                "floating"
+                if self._dtype_cls.kind == "f"  # type: ignore[comparison-overlap]
+                else "integer"
+            )
+            raise TypeError(
+                f"values should be {descr} numpy array. Use "
+                "the 'pd.array' function instead"
+            )
+        if values.dtype == np.float16:
+            # If we don't raise here, then accessing self.dtype would raise
+            raise TypeError("FloatingArray does not support np.float16 dtype.")
+
+        super().__init__(values, mask, copy=copy)
+
+    @cache_readonly
+    def dtype(self) -> NumericDtype:
+        mapping = self._dtype_cls._str_to_dtype_mapping()
+        return mapping[str(self._data.dtype)]
+
     @classmethod
     def _coerce_to_array(
         cls, value, *, dtype: DtypeObj, copy: bool = False

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -45,7 +45,7 @@
 )
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.floating import FloatingDtype
-from pandas.core.arrays.integer import _IntegerDtype
+from pandas.core.arrays.integer import IntegerDtype
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
@@ -432,7 +432,7 @@ def astype(self, dtype, copy: bool = True):
                 return self.copy()
             return self
 
-        elif isinstance(dtype, _IntegerDtype):
+        elif isinstance(dtype, IntegerDtype):
             arr = self._ndarray.copy()
             mask = self.isna()
             arr[mask] = 0