Typeinterval part1 (pandas-dev#46080)

me-kbs · Feb 26, 2022 · 93ba57a · 93ba57a
1 parent 7ee8ab0
commit 93ba57a
Show file tree

Hide file tree

Showing 12 changed files with 79 additions and 34 deletions.
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -83,7 +83,7 @@
 PythonScalar = Union[str, int, float, bool]
 DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
 PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
-Scalar = Union[PythonScalar, PandasScalar]
+Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime]
 IntStrT = TypeVar("IntStrT", int, str)
 
 
@@ -304,3 +304,7 @@ def closed(self) -> bool:
 
 # read_xml parsers
 XMLParsers = Literal["lxml", "etree"]
+
+# Interval closed type
+
+IntervalClosedType = Literal["left", "right", "both", "neither"]
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -759,7 +759,7 @@ def factorize(
     else:
         dtype = values.dtype
         values = _ensure_data(values)
-        na_value: Scalar
+        na_value: Scalar | None
 
         if original.dtype.kind in ["m", "M"]:
             # Note: factorize_array will cast NaT bc it has a __int__

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -29,6 +29,7 @@
 from pandas._typing import (
     ArrayLike,
     Dtype,
+    IntervalClosedType,
     NpDtype,
     PositionalIndexer,
     ScalarIndexer,
@@ -200,6 +201,11 @@ class IntervalArray(IntervalMixin, ExtensionArray):
     can_hold_na = True
     _na_value = _fill_value = np.nan
 
+    # To make mypy recognize the fields
+    _left: np.ndarray
+    _right: np.ndarray
+    _dtype: IntervalDtype
+
     # ---------------------------------------------------------------------
     # Constructors
 
@@ -660,11 +666,7 @@ def __getitem__(
             if is_scalar(left) and isna(left):
                 return self._fill_value
             return Interval(left, right, self.closed)
-        # error: Argument 1 to "ndim" has incompatible type "Union[ndarray,
-        # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes,
-        # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
-        # Sequence[Sequence[Any]], _SupportsArray]"
-        if np.ndim(left) > 1:  # type: ignore[arg-type]
+        if np.ndim(left) > 1:
             # GH#30588 multi-dimensional indexer disallowed
             raise ValueError("multi-dimensional indexing not allowed")
         return self._shallow_copy(left, right)
@@ -1368,7 +1370,7 @@ def closed(self):
             ),
         }
     )
-    def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT:
+    def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:
         if closed not in VALID_CLOSED:
             msg = f"invalid option for 'closed': {closed}"
             raise ValueError(msg)
@@ -1669,8 +1671,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
 
         dtype = self._left.dtype
         if needs_i8_conversion(dtype):
-            new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
-            new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
+            # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
+            new_left = type(self._left)._from_sequence(  # type: ignore[attr-defined]
+                nc[:, 0], dtype=dtype
+            )
+            # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
+            new_right = type(self._right)._from_sequence(  # type: ignore[attr-defined]
+                nc[:, 1], dtype=dtype
+            )
         else:
             new_left = nc[:, 0].view(dtype)
             new_right = nc[:, 1].view(dtype)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -334,7 +334,7 @@ def to_numpy(
         self,
         dtype: npt.DTypeLike | None = None,
         copy: bool = False,
-        na_value: Scalar = lib.no_default,
+        na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default,
     ) -> np.ndarray:
         """
         Convert to a NumPy Array.

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -762,7 +762,7 @@ def _str_replace(
         return type(self)(result)
 
     def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
     ):
         if pa_version_under4p0:
             return super()._str_match(pat, case, flags, na)
@@ -771,7 +771,9 @@ def _str_match(
             pat = "^" + pat
         return self._str_contains(pat, case, flags, na, regex=True)
 
-    def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
+    def _str_fullmatch(
+        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
         if pa_version_under4p0:
             return super()._str_fullmatch(pat, case, flags, na)
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -35,7 +35,6 @@
     ArrayLike,
     NpDtype,
     RandomState,
-    Scalar,
     T,
 )
 from pandas.util._exceptions import find_stack_level
@@ -517,7 +516,7 @@ def f(x):
 
 
 def convert_to_list_like(
-    values: Scalar | Iterable | AnyArrayLike,
+    values: Hashable | Iterable | AnyArrayLike,
 ) -> list | AnyArrayLike:
     """
     Convert list-like or scalar input to list-like. List, numpy and pandas array-like

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -46,6 +46,7 @@
     FilePath,
     IndexKeyFunc,
     IndexLabel,
+    IntervalClosedType,
     JSONSerializable,
     Level,
     Manager,
@@ -7776,7 +7777,7 @@ def between_time(
         end_time,
         include_start: bool_t | lib.NoDefault = lib.no_default,
         include_end: bool_t | lib.NoDefault = lib.no_default,
-        inclusive: str | None = None,
+        inclusive: IntervalClosedType | None = None,
         axis=None,
     ) -> NDFrameT:
         """
@@ -7881,7 +7882,7 @@ def between_time(
             left = True if isinstance(include_start, lib.NoDefault) else include_start
             right = True if isinstance(include_end, lib.NoDefault) else include_end
 
-            inc_dict = {
+            inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = {
                 (True, True): "both",
                 (True, False): "left",
                 (False, True): "right",

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -33,6 +33,7 @@
 from pandas._typing import (
     Dtype,
     DtypeObj,
+    IntervalClosedType,
     npt,
 )
 from pandas.util._decorators import (
@@ -884,8 +885,8 @@ def date_range(
     tz=None,
     normalize: bool = False,
     name: Hashable = None,
-    closed: str | None | lib.NoDefault = lib.no_default,
-    inclusive: str | None = None,
+    closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default,
+    inclusive: IntervalClosedType | None = None,
     **kwargs,
 ) -> DatetimeIndex:
     """
@@ -1091,7 +1092,7 @@ def bdate_range(
     weekmask=None,
     holidays=None,
     closed: lib.NoDefault = lib.no_default,
-    inclusive: str | None = None,
+    inclusive: IntervalClosedType | None = None,
     **kwargs,
 ) -> DatetimeIndex:
     """

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -9,6 +9,7 @@
 from typing import (
     Any,
     Hashable,
+    Literal,
 )
 
 import numpy as np
@@ -28,6 +29,7 @@
 from pandas._typing import (
     Dtype,
     DtypeObj,
+    IntervalClosedType,
     npt,
 )
 from pandas.errors import InvalidIndexError
@@ -191,10 +193,12 @@ class IntervalIndex(ExtensionIndex):
     _typ = "intervalindex"
 
     # annotate properties pinned via inherit_names
-    closed: str
+    closed: IntervalClosedType
     is_non_overlapping_monotonic: bool
     closed_left: bool
     closed_right: bool
+    open_left: bool
+    open_right: bool
 
     _data: IntervalArray
     _values: IntervalArray
@@ -543,7 +547,7 @@ def _maybe_convert_i8(self, key):
 
         return key_i8
 
-    def _searchsorted_monotonic(self, label, side: str = "left"):
+    def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
         if not self.is_non_overlapping_monotonic:
             raise KeyError(
                 "can only get slices from an IntervalIndex if bounds are "
@@ -941,7 +945,12 @@ def _is_type_compatible(a, b) -> bool:
 
 
 def interval_range(
-    start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right"
+    start=None,
+    end=None,
+    periods=None,
+    freq=None,
+    name: Hashable = None,
+    closed: IntervalClosedType = "right",
 ) -> IntervalIndex:
     """
     Return a fixed frequency IntervalIndex.

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -10,8 +10,8 @@
     Hashable,
     List,
     Tuple,
-    TypeVar,
     Union,
+    cast,
     overload,
 )
 import warnings
@@ -66,6 +66,7 @@
 )
 from pandas.core import algorithms
 from pandas.core.algorithms import unique
+from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.datetimes import (
     maybe_convert_dtype,
     objects_to_datetime64ns,
@@ -85,7 +86,8 @@
 
 ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"]
 Scalar = Union[int, float, str]
-DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
+DatetimeScalar = Union[Scalar, datetime]
+
 DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
 start_caching_at = 50
 
@@ -638,7 +640,7 @@ def to_datetime(
     infer_datetime_format: bool = ...,
     origin=...,
     cache: bool = ...,
-) -> DatetimeScalar | NaTType:
+) -> Timestamp | NaTType:
     ...
 
 
@@ -1061,6 +1063,13 @@ def to_datetime(
             result = convert_listlike(arg, format, name=arg.name)
     elif is_list_like(arg):
         try:
+            # error: Argument 1 to "_maybe_cache" has incompatible type
+            # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,
+            # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],
+            # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"
+            arg = cast(
+                Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg
+            )
             cache_array = _maybe_cache(arg, format, cache, convert_listlike)
         except OutOfBoundsDatetime:
             # caching attempts to create a DatetimeIndex, which may raise

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
@@ -1,5 +1,10 @@
 from __future__ import annotations
 
+from typing import (
+    TYPE_CHECKING,
+    cast,
+)
+
 import numpy as np
 
 from pandas._typing import (
@@ -16,6 +21,9 @@
 
 from pandas.io.excel._base import BaseExcelReader
 
+if TYPE_CHECKING:
+    from pandas._libs.tslibs.nattype import NaTType
+
 
 @doc(storage_options=_shared_docs["storage_options"])
 class ODFReader(BaseExcelReader):
@@ -81,7 +89,9 @@ def get_sheet_by_name(self, name: str):
         self.close()
         raise ValueError(f"sheet {name} not found")
 
-    def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
+    def get_sheet_data(
+        self, sheet, convert_float: bool
+    ) -> list[list[Scalar | NaTType]]:
         """
         Parse an ODF Table into a list of lists
         """
@@ -99,12 +109,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
         empty_rows = 0
         max_row_len = 0
 
-        table: list[list[Scalar]] = []
+        table: list[list[Scalar | NaTType]] = []
 
         for sheet_row in sheet_rows:
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
-            table_row: list[Scalar] = []
+            table_row: list[Scalar | NaTType] = []
 
             for sheet_cell in sheet_cells:
                 if sheet_cell.qname == table_cell_name:
@@ -167,7 +177,7 @@ def _is_empty_row(self, row) -> bool:
 
         return True
 
-    def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
+    def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType:
         from odf.namespaces import OFFICENS
 
         if str(cell) == "#N/A":
@@ -200,9 +210,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
             cell_value = cell.attributes.get((OFFICENS, "date-value"))
             return pd.to_datetime(cell_value)
         elif cell_type == "time":
-            stamp = pd.to_datetime(str(cell))
-            # error: Item "str" of "Union[float, str, NaTType]" has no attribute "time"
-            return stamp.time()  # type: ignore[union-attr]
+            # cast needed because `pd.to_datetime can return NaTType,
+            # but we know this is a valid time
+            stamp = cast(pd.Timestamp, pd.to_datetime(str(cell)))
+            # cast needed here because Scalar doesn't include datetime.time
+            return cast(Scalar, stamp.time())
         else:
             self.close()
             raise ValueError(f"Unrecognized type {cell_type}")

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -892,7 +892,7 @@ def _clear_buffer(self) -> None:
 
     def _get_index_name(
         self, columns: list[Hashable]
-    ) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]:
+    ) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]:
         """
         Try several cases to get lines: