From 93ba57ab9c61841e1cc9017ced591c79d505ff2f Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 26 Feb 2022 14:26:16 -0500 Subject: [PATCH] Typeinterval part1 (#46080) --- pandas/_typing.py | 6 +++++- pandas/core/algorithms.py | 2 +- pandas/core/arrays/interval.py | 24 ++++++++++++++++-------- pandas/core/arrays/masked.py | 2 +- pandas/core/arrays/string_arrow.py | 6 ++++-- pandas/core/common.py | 3 +-- pandas/core/generic.py | 5 +++-- pandas/core/indexes/datetimes.py | 7 ++++--- pandas/core/indexes/interval.py | 15 ++++++++++++--- pandas/core/tools/datetimes.py | 15 ++++++++++++--- pandas/io/excel/_odfreader.py | 26 +++++++++++++++++++------- pandas/io/parsers/python_parser.py | 2 +- 12 files changed, 79 insertions(+), 34 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index c0383fe50a7e7..b897a4e8fe199 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -83,7 +83,7 @@ PythonScalar = Union[str, int, float, bool] DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] -Scalar = Union[PythonScalar, PandasScalar] +Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime] IntStrT = TypeVar("IntStrT", int, str) @@ -304,3 +304,7 @@ def closed(self) -> bool: # read_xml parsers XMLParsers = Literal["lxml", "etree"] + +# Interval closed type + +IntervalClosedType = Literal["left", "right", "both", "neither"] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8c10b62d83f9e..c57261c810663 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -759,7 +759,7 @@ def factorize( else: dtype = values.dtype values = _ensure_data(values) - na_value: Scalar + na_value: Scalar | None if original.dtype.kind in ["m", "M"]: # Note: factorize_array will cast NaT bc it has a __int__ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 2098dface40f6..63f8fc0262199 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -29,6 +29,7 @@ from pandas._typing import ( ArrayLike, Dtype, + IntervalClosedType, NpDtype, PositionalIndexer, ScalarIndexer, @@ -200,6 +201,11 @@ class IntervalArray(IntervalMixin, ExtensionArray): can_hold_na = True _na_value = _fill_value = np.nan + # To make mypy recognize the fields + _left: np.ndarray + _right: np.ndarray + _dtype: IntervalDtype + # --------------------------------------------------------------------- # Constructors @@ -660,11 +666,7 @@ def __getitem__( if is_scalar(left) and isna(left): return self._fill_value return Interval(left, right, self.closed) - # error: Argument 1 to "ndim" has incompatible type "Union[ndarray, - # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes, - # generic], Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - if np.ndim(left) > 1: # type: ignore[arg-type] + if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return self._shallow_copy(left, right) @@ -1368,7 +1370,7 @@ def closed(self): ), } ) - def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT: + def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT: if closed not in VALID_CLOSED: msg = f"invalid option for 'closed': {closed}" raise ValueError(msg) @@ -1669,8 +1671,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: dtype = self._left.dtype if needs_i8_conversion(dtype): - new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) - new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_left = type(self._left)._from_sequence( # type: ignore[attr-defined] + nc[:, 0], dtype=dtype + ) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_right = type(self._right)._from_sequence( # type: ignore[attr-defined] + nc[:, 1], dtype=dtype + ) else: new_left = nc[:, 0].view(dtype) new_right = nc[:, 1].view(dtype) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 649a9f6e8e081..76bbbfe2ebcb3 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -334,7 +334,7 @@ def to_numpy( self, dtype: npt.DTypeLike | None = None, copy: bool = False, - na_value: Scalar = lib.no_default, + na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy Array. diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3503b54dd478a..002def4d31e72 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -762,7 +762,7 @@ def _str_replace( return type(self)(result) def _str_match( - self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None ): if pa_version_under4p0: return super()._str_match(pat, case, flags, na) @@ -771,7 +771,9 @@ def _str_match( pat = "^" + pat return self._str_contains(pat, case, flags, na, regex=True) - def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None): + def _str_fullmatch( + self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None + ): if pa_version_under4p0: return super()._str_fullmatch(pat, case, flags, na) diff --git a/pandas/core/common.py b/pandas/core/common.py index 94fb09ddc79b3..62c2034505589 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -35,7 +35,6 @@ ArrayLike, NpDtype, RandomState, - Scalar, T, ) from pandas.util._exceptions import find_stack_level @@ -517,7 +516,7 @@ def f(x): def convert_to_list_like( - values: Scalar | Iterable | AnyArrayLike, + values: Hashable | Iterable | AnyArrayLike, ) -> list | AnyArrayLike: """ Convert list-like or scalar input to list-like. List, numpy and pandas array-like diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f9dffaaa399f..40cfeb796828b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -46,6 +46,7 @@ FilePath, IndexKeyFunc, IndexLabel, + IntervalClosedType, JSONSerializable, Level, Manager, @@ -7776,7 +7777,7 @@ def between_time( end_time, include_start: bool_t | lib.NoDefault = lib.no_default, include_end: bool_t | lib.NoDefault = lib.no_default, - inclusive: str | None = None, + inclusive: IntervalClosedType | None = None, axis=None, ) -> NDFrameT: """ @@ -7881,7 +7882,7 @@ def between_time( left = True if isinstance(include_start, lib.NoDefault) else include_start right = True if isinstance(include_end, lib.NoDefault) else include_end - inc_dict = { + inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = { (True, True): "both", (True, False): "left", (False, True): "right", diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 004d860b20a6f..4acdc7e6c7556 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -33,6 +33,7 @@ from pandas._typing import ( Dtype, DtypeObj, + IntervalClosedType, npt, ) from pandas.util._decorators import ( @@ -884,8 +885,8 @@ def date_range( tz=None, normalize: bool = False, name: Hashable = None, - closed: str | None | lib.NoDefault = lib.no_default, - inclusive: str | None = None, + closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default, + inclusive: IntervalClosedType | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -1091,7 +1092,7 @@ def bdate_range( weekmask=None, holidays=None, closed: lib.NoDefault = lib.no_default, - inclusive: str | None = None, + inclusive: IntervalClosedType | None = None, **kwargs, ) -> DatetimeIndex: """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1e39c1db1a73b..aea0326bed2fb 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -9,6 +9,7 @@ from typing import ( Any, Hashable, + Literal, ) import numpy as np @@ -28,6 +29,7 @@ from pandas._typing import ( Dtype, DtypeObj, + IntervalClosedType, npt, ) from pandas.errors import InvalidIndexError @@ -191,10 +193,12 @@ class IntervalIndex(ExtensionIndex): _typ = "intervalindex" # annotate properties pinned via inherit_names - closed: str + closed: IntervalClosedType is_non_overlapping_monotonic: bool closed_left: bool closed_right: bool + open_left: bool + open_right: bool _data: IntervalArray _values: IntervalArray @@ -543,7 +547,7 @@ def _maybe_convert_i8(self, key): return key_i8 - def _searchsorted_monotonic(self, label, side: str = "left"): + def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): if not self.is_non_overlapping_monotonic: raise KeyError( "can only get slices from an IntervalIndex if bounds are " @@ -941,7 +945,12 @@ def _is_type_compatible(a, b) -> bool: def interval_range( - start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right" + start=None, + end=None, + periods=None, + freq=None, + name: Hashable = None, + closed: IntervalClosedType = "right", ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index cdb2ea37d948c..21683ed100c72 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -10,8 +10,8 @@ Hashable, List, Tuple, - TypeVar, Union, + cast, overload, ) import warnings @@ -66,6 +66,7 @@ ) from pandas.core import algorithms from pandas.core.algorithms import unique +from pandas.core.arrays.base import ExtensionArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns, @@ -85,7 +86,8 @@ ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"] Scalar = Union[int, float, str] -DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) +DatetimeScalar = Union[Scalar, datetime] + DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible] start_caching_at = 50 @@ -638,7 +640,7 @@ def to_datetime( infer_datetime_format: bool = ..., origin=..., cache: bool = ..., -) -> DatetimeScalar | NaTType: +) -> Timestamp | NaTType: ... @@ -1061,6 +1063,13 @@ def to_datetime( result = convert_listlike(arg, format, name=arg.name) elif is_list_like(arg): try: + # error: Argument 1 to "_maybe_cache" has incompatible type + # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray, + # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...], + # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]" + arg = cast( + Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg + ) cache_array = _maybe_cache(arg, format, cache, convert_listlike) except OutOfBoundsDatetime: # caching attempts to create a DatetimeIndex, which may raise diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 6adce02dc50f0..384813b6ec65d 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,5 +1,10 @@ from __future__ import annotations +from typing import ( + TYPE_CHECKING, + cast, +) + import numpy as np from pandas._typing import ( @@ -16,6 +21,9 @@ from pandas.io.excel._base import BaseExcelReader +if TYPE_CHECKING: + from pandas._libs.tslibs.nattype import NaTType + @doc(storage_options=_shared_docs["storage_options"]) class ODFReader(BaseExcelReader): @@ -81,7 +89,9 @@ def get_sheet_by_name(self, name: str): self.close() raise ValueError(f"sheet {name} not found") - def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + def get_sheet_data( + self, sheet, convert_float: bool + ) -> list[list[Scalar | NaTType]]: """ Parse an ODF Table into a list of lists """ @@ -99,12 +109,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: empty_rows = 0 max_row_len = 0 - table: list[list[Scalar]] = [] + table: list[list[Scalar | NaTType]] = [] for sheet_row in sheet_rows: sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] empty_cells = 0 - table_row: list[Scalar] = [] + table_row: list[Scalar | NaTType] = [] for sheet_cell in sheet_cells: if sheet_cell.qname == table_cell_name: @@ -167,7 +177,7 @@ def _is_empty_row(self, row) -> bool: return True - def _get_cell_value(self, cell, convert_float: bool) -> Scalar: + def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: from odf.namespaces import OFFICENS if str(cell) == "#N/A": @@ -200,9 +210,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: cell_value = cell.attributes.get((OFFICENS, "date-value")) return pd.to_datetime(cell_value) elif cell_type == "time": - stamp = pd.to_datetime(str(cell)) - # error: Item "str" of "Union[float, str, NaTType]" has no attribute "time" - return stamp.time() # type: ignore[union-attr] + # cast needed because `pd.to_datetime can return NaTType, + # but we know this is a valid time + stamp = cast(pd.Timestamp, pd.to_datetime(str(cell))) + # cast needed here because Scalar doesn't include datetime.time + return cast(Scalar, stamp.time()) else: self.close() raise ValueError(f"Unrecognized type {cell_type}") diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 92031cb04e768..eac8e3b08083d 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -892,7 +892,7 @@ def _clear_buffer(self) -> None: def _get_index_name( self, columns: list[Hashable] - ) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]: + ) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]: """ Try several cases to get lines: