Skip to content

Commit

Permalink
feat(python): Add "drop_empty_cols" parameter for read_excel and `r…
Browse files Browse the repository at this point in the history
…ead_ods`
  • Loading branch information
alexander-beedie committed Dec 23, 2024
1 parent d0cac9d commit 28f1bd0
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def deduplicate_names(names: Iterable[str]) -> list[str]:
seen: MutableMapping[str, int] = Counter()
deduped = []
for nm in names:
deduped.append(f"{nm}{seen[nm]}" if nm in seen else nm)
deduped.append(f"{nm}{seen[nm] - 1}" if nm in seen else nm)
seen[nm] += 1
return deduped

Expand Down
10 changes: 6 additions & 4 deletions py-polars/polars/io/spreadsheet/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _sources(
return sources, read_multiple_workbooks


def _standardize_duplicate_names(s: str) -> str:
def _standardize_duplicates(s: str) -> str:
"""Standardize columns with '_duplicated_n' names."""
return re.sub(r"_duplicated_(\d+)", repl=r"\1", string=s)

Expand Down Expand Up @@ -277,7 +277,8 @@ def read_excel(
Indicate whether to omit empty rows when reading data into the DataFrame.
drop_empty_cols
Indicate whether to omit empty columns (with no headers) when reading data into
the DataFrame.
the DataFrame (note that empty column identification may vary depending on the
underlying engine being used).
raise_if_empty
When there is no data in the sheet,`NoDataError` is raised. If this parameter
is set to False, an empty DataFrame (with no columns) is returned instead.
Expand Down Expand Up @@ -505,7 +506,8 @@ def read_ods(
Indicate whether to omit empty rows when reading data into the DataFrame.
drop_empty_cols
Indicate whether to omit empty columns (with no headers) when reading data into
the DataFrame.
the DataFrame (note that empty column identification may vary depending on the
underlying engine being used).
raise_if_empty
When there is no data in the sheet,`NoDataError` is raised. If this parameter
is set to False, an empty DataFrame (with no columns) is returned instead.
Expand Down Expand Up @@ -1129,5 +1131,5 @@ def _read_spreadsheet_xlsx2csv(
if cast_to_boolean:
df = df.with_columns(*cast_to_boolean)

df = df.rename(_standardize_duplicate_names)
df = df.rename(_standardize_duplicates)
return _reorder_columns(df, columns)

0 comments on commit 28f1bd0

Please sign in to comment.