Skip to content

Commit

Permalink
refactor(rust): Fix duplicate cols in new-streaming parquet prefilter (
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Dec 23, 2024
1 parent 8df0cbe commit f5d8903
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion crates/polars-stream/src/nodes/io_sources/parquet/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,14 @@ impl ParquetSourceNode {
.collect_live_columns(&mut live_columns);
let v = (!live_columns.is_empty())
.then(|| {
let out = live_columns
let mut out = live_columns
.iter()
// Can be `None` - if the column is e.g. a hive column, or the row index column.
.filter_map(|x| projected_arrow_schema.index_of(x))
.collect::<Vec<_>>();

out.sort_unstable();

// There is at least one non-predicate column, or pre-filtering was
// explicitly requested (only useful for testing).
(out.len() < projected_arrow_schema.len()
Expand Down Expand Up @@ -363,6 +365,10 @@ impl ParquetSourceNode {
/// Returns 0..len in a Vec, excluding indices in `exclude`.
/// `exclude` needs to be a sorted list of unique values.
fn filtered_range(exclude: &[usize], len: usize) -> Vec<usize> {
if cfg!(debug_assertions) {
assert!(exclude.windows(2).all(|x| x[1] > x[0]));
}

let mut j = 0;

(0..len)
Expand Down

0 comments on commit f5d8903

Please sign in to comment.