From cf2dfa76d2736af151e429843be986884e1e74e4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 16 Feb 2022 05:36:21 -0800
Subject: [PATCH] REF: Share NumericArray/NumericDtype methods (#45997)

---
 pandas/core/arrays/floating.py            | 40 ++-----------
 pandas/core/arrays/integer.py             | 71 +++++------------------
 pandas/core/arrays/numeric.py             | 64 ++++++++++++++++++--
 pandas/core/arrays/string_.py             |  4 +-
 pandas/core/groupby/ops.py                | 10 ++--
 pandas/io/stata.py                        |  6 +-
 pandas/tests/frame/methods/test_astype.py |  5 --
 7 files changed, 89 insertions(+), 111 deletions(-)

diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
index d55aef953b5b5..49a71922f331b 100644
--- a/pandas/core/arrays/floating.py
+++ b/pandas/core/arrays/floating.py
@@ -3,8 +3,8 @@
 import numpy as np
 
 from pandas._typing import DtypeObj
-from pandas.util._decorators import cache_readonly
 
+from pandas.core.dtypes.common import is_float_dtype
 from pandas.core.dtypes.dtypes import register_extension_dtype
 
 from pandas.core.arrays.numeric import (
@@ -24,13 +24,7 @@ class FloatingDtype(NumericDtype):
     """
 
     _default_np_dtype = np.dtype(np.float64)
-
-    def __repr__(self) -> str:
-        return f"{self.name}Dtype()"
-
-    @property
-    def _is_numeric(self) -> bool:
-        return True
+    _checker = is_float_dtype
 
     @classmethod
     def construct_array_type(cls) -> type[FloatingArray]:
@@ -58,18 +52,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
         return None
 
     @classmethod
-    def _standardize_dtype(cls, dtype) -> FloatingDtype:
-        if isinstance(dtype, str) and dtype.startswith("Float"):
-            # Avoid DeprecationWarning from NumPy about np.dtype("Float64")
-            # https://github.com/numpy/numpy/pull/7476
-            dtype = dtype.lower()
-
-        if not issubclass(type(dtype), FloatingDtype):
-            try:
-                dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
-            except KeyError as err:
-                raise ValueError(f"invalid dtype specified {dtype}") from err
-        return dtype
+    def _str_to_dtype_mapping(cls):
+        return FLOAT_STR_TO_DTYPE
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -151,22 +135,6 @@ class FloatingArray(NumericArray):
     _truthy_value = 1.0
     _falsey_value = 0.0
 
-    @cache_readonly
-    def dtype(self) -> FloatingDtype:
-        return FLOAT_STR_TO_DTYPE[str(self._data.dtype)]
-
-    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
-        if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"):
-            raise TypeError(
-                "values should be floating numpy array. Use "
-                "the 'pd.array' function instead"
-            )
-        if values.dtype == np.float16:
-            # If we don't raise here, then accessing self.dtype would raise
-            raise TypeError("FloatingArray does not support np.float16 dtype.")
-
-        super().__init__(values, mask, copy=copy)
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} data.
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 056669f40ca87..9ef3939656ecd 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -3,9 +3,9 @@
 import numpy as np
 
 from pandas._typing import DtypeObj
-from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.base import register_extension_dtype
+from pandas.core.dtypes.common import is_integer_dtype
 
 from pandas.core.arrays.masked import BaseMaskedDtype
 from pandas.core.arrays.numeric import (
@@ -14,33 +14,18 @@
 )
 
 
-class _IntegerDtype(NumericDtype):
+class IntegerDtype(NumericDtype):
     """
     An ExtensionDtype to hold a single size & kind of integer dtype.
 
     These specific implementations are subclasses of the non-public
-    _IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
+    IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
 
     The attributes name & type are set when these subclasses are created.
     """
 
     _default_np_dtype = np.dtype(np.int64)
-
-    def __repr__(self) -> str:
-        sign = "U" if self.is_unsigned_integer else ""
-        return f"{sign}Int{8 * self.itemsize}Dtype()"
-
-    @cache_readonly
-    def is_signed_integer(self) -> bool:
-        return self.kind == "i"
-
-    @cache_readonly
-    def is_unsigned_integer(self) -> bool:
-        return self.kind == "u"
-
-    @property
-    def _is_numeric(self) -> bool:
-        return True
+    _checker = is_integer_dtype
 
     @classmethod
     def construct_array_type(cls) -> type[IntegerArray]:
@@ -86,20 +71,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
         return None
 
     @classmethod
-    def _standardize_dtype(cls, dtype) -> _IntegerDtype:
-        if isinstance(dtype, str) and (
-            dtype.startswith("Int") or dtype.startswith("UInt")
-        ):
-            # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
-            # https://github.com/numpy/numpy/pull/7476
-            dtype = dtype.lower()
-
-        if not issubclass(type(dtype), _IntegerDtype):
-            try:
-                dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
-            except KeyError as err:
-                raise ValueError(f"invalid dtype specified {dtype}") from err
-        return dtype
+    def _str_to_dtype_mapping(cls):
+        return INT_STR_TO_DTYPE
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -189,7 +162,7 @@ class IntegerArray(NumericArray):
     Length: 3, dtype: UInt16
     """
 
-    _dtype_cls = _IntegerDtype
+    _dtype_cls = IntegerDtype
 
     # The value used to fill '_data' to avoid upcasting
     _internal_fill_value = 1
@@ -197,18 +170,6 @@ class IntegerArray(NumericArray):
     _truthy_value = 1
     _falsey_value = 0
 
-    @cache_readonly
-    def dtype(self) -> _IntegerDtype:
-        return INT_STR_TO_DTYPE[str(self._data.dtype)]
-
-    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
-        if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
-            raise TypeError(
-                "values should be integer numpy array. Use "
-                "the 'pd.array' function instead"
-            )
-        super().__init__(values, mask, copy=copy)
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.
@@ -231,62 +192,62 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
 
 
 @register_extension_dtype
-class Int8Dtype(_IntegerDtype):
+class Int8Dtype(IntegerDtype):
     type = np.int8
     name = "Int8"
     __doc__ = _dtype_docstring.format(dtype="int8")
 
 
 @register_extension_dtype
-class Int16Dtype(_IntegerDtype):
+class Int16Dtype(IntegerDtype):
     type = np.int16
     name = "Int16"
     __doc__ = _dtype_docstring.format(dtype="int16")
 
 
 @register_extension_dtype
-class Int32Dtype(_IntegerDtype):
+class Int32Dtype(IntegerDtype):
     type = np.int32
     name = "Int32"
     __doc__ = _dtype_docstring.format(dtype="int32")
 
 
 @register_extension_dtype
-class Int64Dtype(_IntegerDtype):
+class Int64Dtype(IntegerDtype):
     type = np.int64
     name = "Int64"
     __doc__ = _dtype_docstring.format(dtype="int64")
 
 
 @register_extension_dtype
-class UInt8Dtype(_IntegerDtype):
+class UInt8Dtype(IntegerDtype):
     type = np.uint8
     name = "UInt8"
     __doc__ = _dtype_docstring.format(dtype="uint8")
 
 
 @register_extension_dtype
-class UInt16Dtype(_IntegerDtype):
+class UInt16Dtype(IntegerDtype):
     type = np.uint16
     name = "UInt16"
     __doc__ = _dtype_docstring.format(dtype="uint16")
 
 
 @register_extension_dtype
-class UInt32Dtype(_IntegerDtype):
+class UInt32Dtype(IntegerDtype):
     type = np.uint32
     name = "UInt32"
     __doc__ = _dtype_docstring.format(dtype="uint32")
 
 
 @register_extension_dtype
-class UInt64Dtype(_IntegerDtype):
+class UInt64Dtype(IntegerDtype):
     type = np.uint64
     name = "UInt64"
     __doc__ = _dtype_docstring.format(dtype="uint64")
 
 
-INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = {
+INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
     "int8": Int8Dtype(),
     "int16": Int16Dtype(),
     "int32": Int32Dtype(),
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 5ab1a9908fd02..958c9f7b0b3f1 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -3,6 +3,8 @@
 import numbers
 from typing import (
     TYPE_CHECKING,
+    Any,
+    Callable,
     TypeVar,
 )
 
@@ -17,6 +19,7 @@
     DtypeObj,
 )
 from pandas.errors import AbstractMethodError
+from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.common import (
     is_bool_dtype,
@@ -41,6 +44,22 @@
 
 class NumericDtype(BaseMaskedDtype):
     _default_np_dtype: np.dtype
+    _checker: Callable[[Any], bool]  # is_foo_dtype
+
+    def __repr__(self) -> str:
+        return f"{self.name}Dtype()"
+
+    @cache_readonly
+    def is_signed_integer(self) -> bool:
+        return self.kind == "i"
+
+    @cache_readonly
+    def is_unsigned_integer(self) -> bool:
+        return self.kind == "u"
+
+    @property
+    def _is_numeric(self) -> bool:
+        return True
 
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
@@ -90,12 +109,27 @@ def __from_arrow__(
         else:
             return array_class._concat_same_type(results)
 
+    @classmethod
+    def _str_to_dtype_mapping(cls):
+        raise AbstractMethodError(cls)
+
     @classmethod
     def _standardize_dtype(cls, dtype) -> NumericDtype:
         """
         Convert a string representation or a numpy dtype to NumericDtype.
         """
-        raise AbstractMethodError(cls)
+        if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):
+            # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
+            # https://github.com/numpy/numpy/pull/7476
+            dtype = dtype.lower()
+
+        if not issubclass(type(dtype), cls):
+            mapping = cls._str_to_dtype_mapping()
+            try:
+                dtype = mapping[str(np.dtype(dtype))]
+            except KeyError as err:
+                raise ValueError(f"invalid dtype specified {dtype}") from err
+        return dtype
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -108,10 +142,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr
 
 
 def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):
-    if default_dtype.kind == "f":
-        checker = is_float_dtype
-    else:
-        checker = is_integer_dtype
+    checker = dtype_cls._checker
 
     inferred_type = None
 
@@ -188,6 +219,29 @@ class NumericArray(BaseMaskedArray):
 
     _dtype_cls: type[NumericDtype]
 
+    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
+        checker = self._dtype_cls._checker
+        if not (isinstance(values, np.ndarray) and checker(values.dtype)):
+            descr = (
+                "floating"
+                if self._dtype_cls.kind == "f"  # type: ignore[comparison-overlap]
+                else "integer"
+            )
+            raise TypeError(
+                f"values should be {descr} numpy array. Use "
+                "the 'pd.array' function instead"
+            )
+        if values.dtype == np.float16:
+            # If we don't raise here, then accessing self.dtype would raise
+            raise TypeError("FloatingArray does not support np.float16 dtype.")
+
+        super().__init__(values, mask, copy=copy)
+
+    @cache_readonly
+    def dtype(self) -> NumericDtype:
+        mapping = self._dtype_cls._str_to_dtype_mapping()
+        return mapping[str(self._data.dtype)]
+
     @classmethod
     def _coerce_to_array(
         cls, value, *, dtype: DtypeObj, copy: bool = False
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index af1756470a9c0..ca4348e3bd06a 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -45,7 +45,7 @@
 )
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.floating import FloatingDtype
-from pandas.core.arrays.integer import _IntegerDtype
+from pandas.core.arrays.integer import IntegerDtype
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
@@ -432,7 +432,7 @@ def astype(self, dtype, copy: bool = True):
                 return self.copy()
             return self
 
-        elif isinstance(dtype, _IntegerDtype):
+        elif isinstance(dtype, IntegerDtype):
             arr = self._ndarray.copy()
             mask = self.isna()
             arr[mask] = 0
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index d4aa6ae9f4059..cf046d92dd6f3 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -76,7 +76,7 @@
 )
 from pandas.core.arrays.integer import (
     Int64Dtype,
-    _IntegerDtype,
+    IntegerDtype,
 )
 from pandas.core.arrays.masked import (
     BaseMaskedArray,
@@ -300,10 +300,10 @@ def _get_result_dtype(self, dtype: DtypeObj) -> DtypeObj:
         if how in ["add", "cumsum", "sum", "prod"]:
             if dtype == np.dtype(bool):
                 return np.dtype(np.int64)
-            elif isinstance(dtype, (BooleanDtype, _IntegerDtype)):
+            elif isinstance(dtype, (BooleanDtype, IntegerDtype)):
                 return Int64Dtype()
         elif how in ["mean", "median", "var"]:
-            if isinstance(dtype, (BooleanDtype, _IntegerDtype)):
+            if isinstance(dtype, (BooleanDtype, IntegerDtype)):
                 return Float64Dtype()
             elif is_float_dtype(dtype) or is_complex_dtype(dtype):
                 return dtype
@@ -341,7 +341,7 @@ def _ea_wrap_cython_operation(
             # All of the functions implemented here are ordinal, so we can
             #  operate on the tz-naive equivalents
             npvalues = values._ndarray.view("M8[ns]")
-        elif isinstance(values.dtype, (BooleanDtype, _IntegerDtype)):
+        elif isinstance(values.dtype, (BooleanDtype, IntegerDtype)):
             # IntegerArray or BooleanArray
             npvalues = values.to_numpy("float64", na_value=np.nan)
         elif isinstance(values.dtype, FloatingDtype):
@@ -378,7 +378,7 @@ def _reconstruct_ea_result(self, values, res_values):
         # TODO: allow EAs to override this logic
 
         if isinstance(
-            values.dtype, (BooleanDtype, _IntegerDtype, FloatingDtype, StringDtype)
+            values.dtype, (BooleanDtype, IntegerDtype, FloatingDtype, StringDtype)
         ):
             dtype = self._get_result_dtype(values.dtype)
             cls = dtype.construct_array_type()
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 883cc36e4c1f1..60c4634662296 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -62,7 +62,7 @@
     to_timedelta,
 )
 from pandas.core.arrays.boolean import BooleanDtype
-from pandas.core.arrays.integer import _IntegerDtype
+from pandas.core.arrays.integer import IntegerDtype
 from pandas.core.frame import DataFrame
 from pandas.core.indexes.base import Index
 from pandas.core.series import Series
@@ -585,7 +585,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
 
     for col in data:
         # Cast from unsupported types to supported types
-        is_nullable_int = isinstance(data[col].dtype, (_IntegerDtype, BooleanDtype))
+        is_nullable_int = isinstance(data[col].dtype, (IntegerDtype, BooleanDtype))
         orig = data[col]
         # We need to find orig_missing before altering data below
         orig_missing = orig.isna()
@@ -593,7 +593,7 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
             missing_loc = data[col].isna()
             if missing_loc.any():
                 # Replace with always safe value
-                fv = 0 if isinstance(data[col].dtype, _IntegerDtype) else False
+                fv = 0 if isinstance(data[col].dtype, IntegerDtype) else False
                 data.loc[missing_loc, col] = fv
             # Replace with NumPy-compatible column
             data[col] = data[col].astype(data[col].dtype.numpy_dtype)
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 0e7e4b537c719..6d343de9f5d3a 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -762,11 +762,6 @@ def test_astype_categorical_to_string_missing(self):
 class IntegerArrayNoCopy(pd.core.arrays.IntegerArray):
     # GH 42501
 
-    @classmethod
-    def _from_sequence(cls, scalars, *, dtype=None, copy=False):
-        values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
-        return IntegerArrayNoCopy(values, mask)
-
     def copy(self):
         assert False