Typ parts of c parser (pandas-dev#44677)

me-kbs · Dec 22, 2021 · 9138b1d · 9138b1d
1 parent 9098d14
commit 9138b1d
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 14 deletions.
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -12,8 +12,10 @@
     DefaultDict,
     Hashable,
     Iterable,
+    List,
     Mapping,
     Sequence,
+    Tuple,
     cast,
     final,
     overload,
@@ -441,10 +443,15 @@ def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
         return names
 
     @final
-    def _maybe_make_multi_index_columns(self, columns, col_names=None):
+    def _maybe_make_multi_index_columns(
+        self,
+        columns: Sequence[Hashable],
+        col_names: Sequence[Hashable] | None = None,
+    ) -> Sequence[Hashable] | MultiIndex:
         # possibly create a column mi here
         if _is_potential_multi_index(columns):
-            columns = MultiIndex.from_tuples(columns, names=col_names)
+            list_columns = cast(List[Tuple], columns)
+            return MultiIndex.from_tuples(list_columns, names=col_names)
         return columns
 
     @final
@@ -923,7 +930,25 @@ def _check_data_length(
                 stacklevel=find_stack_level(),
             )
 
-    def _evaluate_usecols(self, usecols, names):
+    @overload
+    def _evaluate_usecols(
+        self,
+        usecols: set[int] | Callable[[Hashable], object],
+        names: Sequence[Hashable],
+    ) -> set[int]:
+        ...
+
+    @overload
+    def _evaluate_usecols(
+        self, usecols: set[str], names: Sequence[Hashable]
+    ) -> set[str]:
+        ...
+
+    def _evaluate_usecols(
+        self,
+        usecols: Callable[[Hashable], object] | set[str] | set[int],
+        names: Sequence[Hashable],
+    ) -> set[str] | set[int]:
         """
         Check whether or not the 'usecols' parameter
         is a callable.  If so, enumerates the 'names'
@@ -1289,7 +1314,8 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na):
 
 
 def _is_potential_multi_index(
-    columns, index_col: bool | Sequence[int] | None = None
+    columns: Sequence[Hashable] | MultiIndex,
+    index_col: bool | Sequence[int] | None = None,
 ) -> bool:
     """
     Check whether or not the `columns` parameter

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
@@ -1,12 +1,19 @@
 from __future__ import annotations
 
+from typing import (
+    Hashable,
+    Mapping,
+    Sequence,
+)
 import warnings
 
 import numpy as np
 
 import pandas._libs.parsers as parsers
 from pandas._typing import (
     ArrayLike,
+    DtypeArg,
+    DtypeObj,
     FilePath,
     ReadCsvBuffer,
 )
@@ -20,6 +27,10 @@
 from pandas.core.dtypes.concat import union_categoricals
 from pandas.core.dtypes.dtypes import ExtensionDtype
 
+from pandas import (
+    Index,
+    MultiIndex,
+)
 from pandas.core.indexes.api import ensure_index_from_sequences
 
 from pandas.io.parsers.base_parser import (
@@ -193,7 +204,7 @@ def close(self) -> None:
         except ValueError:
             pass
 
-    def _set_noconvert_columns(self):
+    def _set_noconvert_columns(self) -> None:
         """
         Set the columns that should not undergo dtype conversions.
 
@@ -214,7 +225,14 @@ def _set_noconvert_columns(self):
         for col in noconvert_columns:
             self._reader.set_noconvert(col)
 
-    def read(self, nrows=None):
+    def read(
+        self,
+        nrows: int | None = None,
+    ) -> tuple[
+        Index | MultiIndex | None,
+        Sequence[Hashable] | MultiIndex,
+        Mapping[Hashable, ArrayLike],
+    ]:
         try:
             if self.low_memory:
                 chunks = self._reader.read_low_memory(nrows)
@@ -306,11 +324,11 @@ def read(self, nrows=None):
             index, names = self._make_index(date_data, alldata, names)
 
         # maybe create a mi on the columns
-        names = self._maybe_make_multi_index_columns(names, self.col_names)
+        conv_names = self._maybe_make_multi_index_columns(names, self.col_names)
 
-        return index, names, date_data
+        return index, conv_names, date_data
 
-    def _filter_usecols(self, names):
+    def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
         # hackish
         usecols = self._evaluate_usecols(self.usecols, names)
         if usecols is not None and len(names) != len(usecols):
@@ -395,13 +413,15 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
     return result
 
 
-def ensure_dtype_objs(dtype):
+def ensure_dtype_objs(
+    dtype: DtypeArg | dict[Hashable, DtypeArg] | None
+) -> DtypeObj | dict[Hashable, DtypeObj] | None:
     """
     Ensure we have either None, a dtype object, or a dictionary mapping to
     dtype objects.
     """
     if isinstance(dtype, dict):
-        dtype = {k: pandas_dtype(dtype[k]) for k in dtype}
+        return {k: pandas_dtype(dtype[k]) for k in dtype}
     elif dtype is not None:
-        dtype = pandas_dtype(dtype)
+        return pandas_dtype(dtype)
     return dtype
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -270,8 +270,8 @@ def read(self, rows: int | None = None):
                 self.index_names,
                 self.dtype,
             )
-            columns = self._maybe_make_multi_index_columns(columns, self.col_names)
-            return index, columns, col_dict
+            conv_columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+            return index, conv_columns, col_dict
 
         # handle new style for names in index
         count_empty_content_vals = count_empty_vals(content[0])
@@ -560,6 +560,7 @@ def _handle_usecols(
 
         usecols_key is used if there are string usecols.
         """
+        col_indices: set[int] | list[int]
         if self.usecols is not None:
             if callable(self.usecols):
                 col_indices = self._evaluate_usecols(self.usecols, usecols_key)