diff --git a/tabula/util.py b/tabula/util.py index 7dc2453..2e49cf7 100644 --- a/tabula/util.py +++ b/tabula/util.py @@ -9,7 +9,7 @@ import shlex from dataclasses import dataclass from logging import getLogger -from typing import IO, Iterable, List, Optional, Union, cast +from typing import IO, Iterable, List, Optional, Sequence, Union, cast logger = getLogger(__name__) @@ -115,8 +115,9 @@ class TabulaOption: Password to decrypt document. Default: empty silent (bool, optional): Suppress all stderr output. - columns (iterable, optional): - X coordinates of column boundaries. + columns (Sequence, optional): + X coordinates of column boundaries. Must be sorted and of a datatype that + preserves order, e.g. tuple or list Example: ``[10.1, 20.2, 30.3]`` @@ -147,7 +148,7 @@ class TabulaOption: stream: bool = False password: Optional[str] = None silent: Optional[bool] = None - columns: Optional[Iterable[float]] = None + columns: Optional[Sequence[float]] = None relative_columns: bool = False format: Optional[str] = None batch: Optional[str] = None @@ -235,7 +236,7 @@ def build_option_list(self) -> List[str]: __options += ["--outfile", self.output_path] if self.columns: - if self.columns != sorted(self.columns): + if list(self.columns) != sorted(self.columns): raise ValueError("columns option should be sorted") __columns = _format_with_relative(self.columns, self.relative_columns) diff --git a/tests/test_read_pdf_table.py b/tests/test_read_pdf_table.py index e0510a3..e906dfc 100644 --- a/tests/test_read_pdf_table.py +++ b/tests/test_read_pdf_table.py @@ -91,6 +91,15 @@ def test_read_pdf_with_columns(self): )[0].equals(pd.read_csv(expected_csv)) ) + def test_read_pdf_with_tuple_columns(self): + pdf_path = "tests/resources/campaign_donors.pdf" + expected_csv = "tests/resources/campaign_donors.csv" + self.assertTrue( + tabula.read_pdf( + pdf_path, columns=(47, 147, 256, 310, 375, 431, 504), guess=False + )[0].equals(pd.read_csv(expected_csv)) + ) + def test_read_pdf_with_relative_columns(self): pdf_path = "tests/resources/campaign_donors.pdf" expected_csv = "tests/resources/campaign_donors.csv"