Skip to content

Commit

Permalink
Typeinterval part1 (pandas-dev#46080)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dr-Irv authored Feb 26, 2022
1 parent 7ee8ab0 commit 93ba57a
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 34 deletions.
6 changes: 5 additions & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
PythonScalar = Union[str, int, float, bool]
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
Scalar = Union[PythonScalar, PandasScalar]
Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime]
IntStrT = TypeVar("IntStrT", int, str)


Expand Down Expand Up @@ -304,3 +304,7 @@ def closed(self) -> bool:

# read_xml parsers
XMLParsers = Literal["lxml", "etree"]

# Interval closed type

IntervalClosedType = Literal["left", "right", "both", "neither"]
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ def factorize(
else:
dtype = values.dtype
values = _ensure_data(values)
na_value: Scalar
na_value: Scalar | None

if original.dtype.kind in ["m", "M"]:
# Note: factorize_array will cast NaT bc it has a __int__
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pandas._typing import (
ArrayLike,
Dtype,
IntervalClosedType,
NpDtype,
PositionalIndexer,
ScalarIndexer,
Expand Down Expand Up @@ -200,6 +201,11 @@ class IntervalArray(IntervalMixin, ExtensionArray):
can_hold_na = True
_na_value = _fill_value = np.nan

# To make mypy recognize the fields
_left: np.ndarray
_right: np.ndarray
_dtype: IntervalDtype

# ---------------------------------------------------------------------
# Constructors

Expand Down Expand Up @@ -660,11 +666,7 @@ def __getitem__(
if is_scalar(left) and isna(left):
return self._fill_value
return Interval(left, right, self.closed)
# error: Argument 1 to "ndim" has incompatible type "Union[ndarray,
# ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes,
# generic], Sequence[Union[int, float, complex, str, bytes, generic]],
# Sequence[Sequence[Any]], _SupportsArray]"
if np.ndim(left) > 1: # type: ignore[arg-type]
if np.ndim(left) > 1:
# GH#30588 multi-dimensional indexer disallowed
raise ValueError("multi-dimensional indexing not allowed")
return self._shallow_copy(left, right)
Expand Down Expand Up @@ -1368,7 +1370,7 @@ def closed(self):
),
}
)
def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT:
def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:
if closed not in VALID_CLOSED:
msg = f"invalid option for 'closed': {closed}"
raise ValueError(msg)
Expand Down Expand Up @@ -1669,8 +1671,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:

dtype = self._left.dtype
if needs_i8_conversion(dtype):
new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
# error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
new_left = type(self._left)._from_sequence( # type: ignore[attr-defined]
nc[:, 0], dtype=dtype
)
# error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
new_right = type(self._right)._from_sequence( # type: ignore[attr-defined]
nc[:, 1], dtype=dtype
)
else:
new_left = nc[:, 0].view(dtype)
new_right = nc[:, 1].view(dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def to_numpy(
self,
dtype: npt.DTypeLike | None = None,
copy: bool = False,
na_value: Scalar = lib.no_default,
na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default,
) -> np.ndarray:
"""
Convert to a NumPy Array.
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ def _str_replace(
return type(self)(result)

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if pa_version_under4p0:
return super()._str_match(pat, case, flags, na)
Expand All @@ -771,7 +771,9 @@ def _str_match(
pat = "^" + pat
return self._str_contains(pat, case, flags, na, regex=True)

def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
def _str_fullmatch(
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if pa_version_under4p0:
return super()._str_fullmatch(pat, case, flags, na)

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
ArrayLike,
NpDtype,
RandomState,
Scalar,
T,
)
from pandas.util._exceptions import find_stack_level
Expand Down Expand Up @@ -517,7 +516,7 @@ def f(x):


def convert_to_list_like(
values: Scalar | Iterable | AnyArrayLike,
values: Hashable | Iterable | AnyArrayLike,
) -> list | AnyArrayLike:
"""
Convert list-like or scalar input to list-like. List, numpy and pandas array-like
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
FilePath,
IndexKeyFunc,
IndexLabel,
IntervalClosedType,
JSONSerializable,
Level,
Manager,
Expand Down Expand Up @@ -7776,7 +7777,7 @@ def between_time(
end_time,
include_start: bool_t | lib.NoDefault = lib.no_default,
include_end: bool_t | lib.NoDefault = lib.no_default,
inclusive: str | None = None,
inclusive: IntervalClosedType | None = None,
axis=None,
) -> NDFrameT:
"""
Expand Down Expand Up @@ -7881,7 +7882,7 @@ def between_time(
left = True if isinstance(include_start, lib.NoDefault) else include_start
right = True if isinstance(include_end, lib.NoDefault) else include_end

inc_dict = {
inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = {
(True, True): "both",
(True, False): "left",
(False, True): "right",
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from pandas._typing import (
Dtype,
DtypeObj,
IntervalClosedType,
npt,
)
from pandas.util._decorators import (
Expand Down Expand Up @@ -884,8 +885,8 @@ def date_range(
tz=None,
normalize: bool = False,
name: Hashable = None,
closed: str | None | lib.NoDefault = lib.no_default,
inclusive: str | None = None,
closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default,
inclusive: IntervalClosedType | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -1091,7 +1092,7 @@ def bdate_range(
weekmask=None,
holidays=None,
closed: lib.NoDefault = lib.no_default,
inclusive: str | None = None,
inclusive: IntervalClosedType | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import (
Any,
Hashable,
Literal,
)

import numpy as np
Expand All @@ -28,6 +29,7 @@
from pandas._typing import (
Dtype,
DtypeObj,
IntervalClosedType,
npt,
)
from pandas.errors import InvalidIndexError
Expand Down Expand Up @@ -191,10 +193,12 @@ class IntervalIndex(ExtensionIndex):
_typ = "intervalindex"

# annotate properties pinned via inherit_names
closed: str
closed: IntervalClosedType
is_non_overlapping_monotonic: bool
closed_left: bool
closed_right: bool
open_left: bool
open_right: bool

_data: IntervalArray
_values: IntervalArray
Expand Down Expand Up @@ -543,7 +547,7 @@ def _maybe_convert_i8(self, key):

return key_i8

def _searchsorted_monotonic(self, label, side: str = "left"):
def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
if not self.is_non_overlapping_monotonic:
raise KeyError(
"can only get slices from an IntervalIndex if bounds are "
Expand Down Expand Up @@ -941,7 +945,12 @@ def _is_type_compatible(a, b) -> bool:


def interval_range(
start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right"
start=None,
end=None,
periods=None,
freq=None,
name: Hashable = None,
closed: IntervalClosedType = "right",
) -> IntervalIndex:
"""
Return a fixed frequency IntervalIndex.
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
Hashable,
List,
Tuple,
TypeVar,
Union,
cast,
overload,
)
import warnings
Expand Down Expand Up @@ -66,6 +66,7 @@
)
from pandas.core import algorithms
from pandas.core.algorithms import unique
from pandas.core.arrays.base import ExtensionArray
from pandas.core.arrays.datetimes import (
maybe_convert_dtype,
objects_to_datetime64ns,
Expand All @@ -85,7 +86,8 @@

ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"]
Scalar = Union[int, float, str]
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
DatetimeScalar = Union[Scalar, datetime]

DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
start_caching_at = 50

Expand Down Expand Up @@ -638,7 +640,7 @@ def to_datetime(
infer_datetime_format: bool = ...,
origin=...,
cache: bool = ...,
) -> DatetimeScalar | NaTType:
) -> Timestamp | NaTType:
...


Expand Down Expand Up @@ -1061,6 +1063,13 @@ def to_datetime(
result = convert_listlike(arg, format, name=arg.name)
elif is_list_like(arg):
try:
# error: Argument 1 to "_maybe_cache" has incompatible type
# "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,
# ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],
# Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"
arg = cast(
Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg
)
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
except OutOfBoundsDatetime:
# caching attempts to create a DatetimeIndex, which may raise
Expand Down
26 changes: 19 additions & 7 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from __future__ import annotations

from typing import (
TYPE_CHECKING,
cast,
)

import numpy as np

from pandas._typing import (
Expand All @@ -16,6 +21,9 @@

from pandas.io.excel._base import BaseExcelReader

if TYPE_CHECKING:
from pandas._libs.tslibs.nattype import NaTType


@doc(storage_options=_shared_docs["storage_options"])
class ODFReader(BaseExcelReader):
Expand Down Expand Up @@ -81,7 +89,9 @@ def get_sheet_by_name(self, name: str):
self.close()
raise ValueError(f"sheet {name} not found")

def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
def get_sheet_data(
self, sheet, convert_float: bool
) -> list[list[Scalar | NaTType]]:
"""
Parse an ODF Table into a list of lists
"""
Expand All @@ -99,12 +109,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
empty_rows = 0
max_row_len = 0

table: list[list[Scalar]] = []
table: list[list[Scalar | NaTType]] = []

for sheet_row in sheet_rows:
sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
empty_cells = 0
table_row: list[Scalar] = []
table_row: list[Scalar | NaTType] = []

for sheet_cell in sheet_cells:
if sheet_cell.qname == table_cell_name:
Expand Down Expand Up @@ -167,7 +177,7 @@ def _is_empty_row(self, row) -> bool:

return True

def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType:
from odf.namespaces import OFFICENS

if str(cell) == "#N/A":
Expand Down Expand Up @@ -200,9 +210,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
cell_value = cell.attributes.get((OFFICENS, "date-value"))
return pd.to_datetime(cell_value)
elif cell_type == "time":
stamp = pd.to_datetime(str(cell))
# error: Item "str" of "Union[float, str, NaTType]" has no attribute "time"
return stamp.time() # type: ignore[union-attr]
# cast needed because `pd.to_datetime can return NaTType,
# but we know this is a valid time
stamp = cast(pd.Timestamp, pd.to_datetime(str(cell)))
# cast needed here because Scalar doesn't include datetime.time
return cast(Scalar, stamp.time())
else:
self.close()
raise ValueError(f"Unrecognized type {cell_type}")
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ def _clear_buffer(self) -> None:

def _get_index_name(
self, columns: list[Hashable]
) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]:
) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]:
"""
Try several cases to get lines:
Expand Down

0 comments on commit 93ba57a

Please sign in to comment.