Skip to content

Commit

Permalink
Typ parts of c parser (pandas-dev#44677)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Dec 22, 2021
1 parent 9098d14 commit 9138b1d
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 14 deletions.
34 changes: 30 additions & 4 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
DefaultDict,
Hashable,
Iterable,
List,
Mapping,
Sequence,
Tuple,
cast,
final,
overload,
Expand Down Expand Up @@ -441,10 +443,15 @@ def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
return names

@final
def _maybe_make_multi_index_columns(self, columns, col_names=None):
def _maybe_make_multi_index_columns(
self,
columns: Sequence[Hashable],
col_names: Sequence[Hashable] | None = None,
) -> Sequence[Hashable] | MultiIndex:
# possibly create a column mi here
if _is_potential_multi_index(columns):
columns = MultiIndex.from_tuples(columns, names=col_names)
list_columns = cast(List[Tuple], columns)
return MultiIndex.from_tuples(list_columns, names=col_names)
return columns

@final
Expand Down Expand Up @@ -923,7 +930,25 @@ def _check_data_length(
stacklevel=find_stack_level(),
)

def _evaluate_usecols(self, usecols, names):
@overload
def _evaluate_usecols(
self,
usecols: set[int] | Callable[[Hashable], object],
names: Sequence[Hashable],
) -> set[int]:
...

@overload
def _evaluate_usecols(
self, usecols: set[str], names: Sequence[Hashable]
) -> set[str]:
...

def _evaluate_usecols(
self,
usecols: Callable[[Hashable], object] | set[str] | set[int],
names: Sequence[Hashable],
) -> set[str] | set[int]:
"""
Check whether or not the 'usecols' parameter
is a callable. If so, enumerates the 'names'
Expand Down Expand Up @@ -1289,7 +1314,8 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na):


def _is_potential_multi_index(
columns, index_col: bool | Sequence[int] | None = None
columns: Sequence[Hashable] | MultiIndex,
index_col: bool | Sequence[int] | None = None,
) -> bool:
"""
Check whether or not the `columns` parameter
Expand Down
36 changes: 28 additions & 8 deletions pandas/io/parsers/c_parser_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
from __future__ import annotations

from typing import (
Hashable,
Mapping,
Sequence,
)
import warnings

import numpy as np

import pandas._libs.parsers as parsers
from pandas._typing import (
ArrayLike,
DtypeArg,
DtypeObj,
FilePath,
ReadCsvBuffer,
)
Expand All @@ -20,6 +27,10 @@
from pandas.core.dtypes.concat import union_categoricals
from pandas.core.dtypes.dtypes import ExtensionDtype

from pandas import (
Index,
MultiIndex,
)
from pandas.core.indexes.api import ensure_index_from_sequences

from pandas.io.parsers.base_parser import (
Expand Down Expand Up @@ -193,7 +204,7 @@ def close(self) -> None:
except ValueError:
pass

def _set_noconvert_columns(self):
def _set_noconvert_columns(self) -> None:
"""
Set the columns that should not undergo dtype conversions.
Expand All @@ -214,7 +225,14 @@ def _set_noconvert_columns(self):
for col in noconvert_columns:
self._reader.set_noconvert(col)

def read(self, nrows=None):
def read(
self,
nrows: int | None = None,
) -> tuple[
Index | MultiIndex | None,
Sequence[Hashable] | MultiIndex,
Mapping[Hashable, ArrayLike],
]:
try:
if self.low_memory:
chunks = self._reader.read_low_memory(nrows)
Expand Down Expand Up @@ -306,11 +324,11 @@ def read(self, nrows=None):
index, names = self._make_index(date_data, alldata, names)

# maybe create a mi on the columns
names = self._maybe_make_multi_index_columns(names, self.col_names)
conv_names = self._maybe_make_multi_index_columns(names, self.col_names)

return index, names, date_data
return index, conv_names, date_data

def _filter_usecols(self, names):
def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
# hackish
usecols = self._evaluate_usecols(self.usecols, names)
if usecols is not None and len(names) != len(usecols):
Expand Down Expand Up @@ -395,13 +413,15 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
return result


def ensure_dtype_objs(dtype):
def ensure_dtype_objs(
dtype: DtypeArg | dict[Hashable, DtypeArg] | None
) -> DtypeObj | dict[Hashable, DtypeObj] | None:
"""
Ensure we have either None, a dtype object, or a dictionary mapping to
dtype objects.
"""
if isinstance(dtype, dict):
dtype = {k: pandas_dtype(dtype[k]) for k in dtype}
return {k: pandas_dtype(dtype[k]) for k in dtype}
elif dtype is not None:
dtype = pandas_dtype(dtype)
return pandas_dtype(dtype)
return dtype
5 changes: 3 additions & 2 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ def read(self, rows: int | None = None):
self.index_names,
self.dtype,
)
columns = self._maybe_make_multi_index_columns(columns, self.col_names)
return index, columns, col_dict
conv_columns = self._maybe_make_multi_index_columns(columns, self.col_names)
return index, conv_columns, col_dict

# handle new style for names in index
count_empty_content_vals = count_empty_vals(content[0])
Expand Down Expand Up @@ -560,6 +560,7 @@ def _handle_usecols(
usecols_key is used if there are string usecols.
"""
col_indices: set[int] | list[int]
if self.usecols is not None:
if callable(self.usecols):
col_indices = self._evaluate_usecols(self.usecols, usecols_key)
Expand Down

0 comments on commit 9138b1d

Please sign in to comment.