Skip to content

Commit

Permalink
chore: more cleanup of error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
wiedld committed Dec 24, 2024
1 parent 810246d commit 9842d19
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 18 deletions.
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/analyzer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ impl Analyzer {
{
// verify the logical plan required invariants at the start, before analyzer
plan.check_invariants(InvariantLevel::Always)
.map_err(|e| e.context("assert_lp_invariants_before_analyzers"))?;
.map_err(|e| e.context("Invalid input plan passed to Analyzer"))?;

let start_time = Instant::now();
let mut new_plan = plan;
Expand Down Expand Up @@ -178,7 +178,7 @@ impl Analyzer {
// verify at the end, after the last LP analyzer pass, that the plan is executable.
new_plan
.check_invariants(InvariantLevel::Executable)
.map_err(|e| e.context("Invalid plan after Analyzer"))?;
.map_err(|e| e.context("Invalid (non-executable) plan after Analyzer"))?;

log_plan("Final analyzed plan", &new_plan);
debug!("Analyzer took {} ms", start_time.elapsed().as_millis());
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/decorrelate_predicate_subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ mod tests {
.build()?;

// Maybe okay if the table only has a single column?
let expected = "Invalid plan after Analyzer\
let expected = "Invalid (non-executable) plan after Analyzer\
\ncaused by\
\nError during planning: InSubquery should only return one column, but found 4";
assert_analyzer_check_err(vec![], plan, expected);
Expand Down Expand Up @@ -930,7 +930,7 @@ mod tests {
.project(vec![col("customer.c_custkey")])?
.build()?;

let expected = "Invalid plan after Analyzer\
let expected = "Invalid (non-executable) plan after Analyzer\
\ncaused by\
\nError during planning: InSubquery should only return one column";
assert_analyzer_check_err(vec![], plan, expected);
Expand Down
14 changes: 8 additions & 6 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ impl Optimizer {
{
// verify LP is valid, before the first LP optimizer pass.
plan.check_invariants(InvariantLevel::Executable)
.map_err(|e| e.context("Invalid plan before LP Optimizers"))?;
.map_err(|e| e.context("Invalid input plan before LP Optimizers"))?;

let start_time = Instant::now();
let options = config.options();
Expand Down Expand Up @@ -394,12 +394,12 @@ impl Optimizer {
.and_then(|tnr| {
// run checks optimizer invariant checks, per optimizer rule applied
assert_valid_optimization(&tnr.data, &starting_schema)
.map_err(|e| e.context(format!("check_optimizer_specific_invariants after optimizer rule: {}", rule.name())))?;
.map_err(|e| e.context(format!("Check optimizer-specific invariants after optimizer rule: {}", rule.name())))?;

// run LP invariant checks only in debug mode for performance reasons
#[cfg(debug_assertions)]
tnr.data.check_invariants(InvariantLevel::Executable)
.map_err(|e| e.context(format!("check_plan_is_executable after optimizer rule: {}", rule.name())))?;
.map_err(|e| e.context(format!("Invalid (non-executable) plan after Optimizer rule: {}", rule.name())))?;

Ok(tnr)
});
Expand Down Expand Up @@ -462,13 +462,15 @@ impl Optimizer {

// verify that the optimizer passes only mutated what was permitted.
assert_valid_optimization(&new_plan, &starting_schema).map_err(|e| {
e.context("check_optimizer_specific_invariants after all passes")
e.context("Check optimizer-specific invariants after all passes")
})?;

// verify LP is valid, after the last optimizer pass.
new_plan
.check_invariants(InvariantLevel::Executable)
.map_err(|e| e.context("Invalid plan after LP Optimizers"))?;
.map_err(|e| {
e.context("Invalid (non-executable) plan after LP Optimizers")
})?;

log_plan("Final optimized plan", &new_plan);
debug!("Optimizer took {} ms", start_time.elapsed().as_millis());
Expand Down Expand Up @@ -545,7 +547,7 @@ mod tests {
assert_eq!(
"Optimizer rule 'get table_scan rule' failed\n\
caused by\n\
check_optimizer_specific_invariants after optimizer rule: get table_scan rule\n\
Check optimizer-specific invariants after optimizer rule: get table_scan rule\n\
caused by\n\
Internal error: Failed due to a difference in schemas, \
original schema: DFSchema { inner: Schema { \
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/scalar_subquery_to_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ mod tests {
.project(vec![col("customer.c_custkey")])?
.build()?;

let expected = "Invalid plan after Analyzer\
let expected = "Invalid (non-executable) plan after Analyzer\
\ncaused by\
\nError during planning: Scalar subquery should only return one column";
assert_analyzer_check_err(vec![], plan, expected);
Expand Down Expand Up @@ -793,7 +793,7 @@ mod tests {
.project(vec![col("customer.c_custkey")])?
.build()?;

let expected = "Invalid plan after Analyzer\
let expected = "Invalid (non-executable) plan after Analyzer\
\ncaused by\
\nError during planning: Scalar subquery should only return one column";
assert_analyzer_check_err(vec![], plan, expected);
Expand Down
42 changes: 36 additions & 6 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -433,17 +433,32 @@ logical_plan
08)----------TableScan: t1 projection=[t1_int]

#invalid_scalar_subquery
statement error DataFusion error: Invalid plan after Analyzer\ncaused by\nError during planning: Scalar subquery should only return one column, but found 2: t2.t2_id, t2.t2_name
statement error
SELECT t1_id, t1_name, t1_int, (select t2_id, t2_name FROM t2 WHERE t2.t2_id = t1.t1_int) FROM t1
----
DataFusion error: Invalid (non-executable) plan after Analyzer
caused by
Error during planning: Scalar subquery should only return one column, but found 2: t2.t2_id, t2.t2_name


#subquery_not_allowed
#In/Exist Subquery is not allowed in ORDER BY clause.
statement error DataFusion error: Invalid plan after Analyzer\ncaused by\nError during planning: In/Exist subquery can only be used in Projection, Filter, TableScan, Window functions, Aggregate and Join plan nodes, but was used in \[Sort: t1.t1_int IN \(<subquery>\) ASC NULLS LAST\]
statement error
SELECT t1_id, t1_name, t1_int FROM t1 order by t1_int in (SELECT t2_int FROM t2 WHERE t1.t1_id > t1.t1_int)
----
DataFusion error: Invalid (non-executable) plan after Analyzer
caused by
Error during planning: In/Exist subquery can only be used in Projection, Filter, TableScan, Window functions, Aggregate and Join plan nodes, but was used in [Sort: t1.t1_int IN (<subquery>) ASC NULLS LAST]


#non_aggregated_correlated_scalar_subquery
statement error DataFusion error: Invalid plan after Analyzer\ncaused by\nError during planning: Correlated scalar subquery must be aggregated to return at most one row
statement error
SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1.t1_int) as t2_int from t1
----
DataFusion error: Invalid (non-executable) plan after Analyzer
caused by
Error during planning: Correlated scalar subquery must be aggregated to return at most one row


#non_aggregated_correlated_scalar_subquery_unique
query II rowsort
Expand All @@ -456,12 +471,22 @@ SELECT t1_id, (SELECT t3_int FROM t3 WHERE t3.t3_id = t1.t1_id) as t3_int from t


#non_aggregated_correlated_scalar_subquery
statement error DataFusion error: Invalid plan after Analyzer\ncaused by\nError during planning: Correlated scalar subquery must be aggregated to return at most one row
statement error
SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1_int group by t2_int) as t2_int from t1
----
DataFusion error: Invalid (non-executable) plan after Analyzer
caused by
Error during planning: Correlated scalar subquery must be aggregated to return at most one row


#non_aggregated_correlated_scalar_subquery_with_limit
statement error DataFusion error: Invalid plan after Analyzer\ncaused by\nError during planning: Correlated scalar subquery must be aggregated to return at most one row
statement error
SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1.t1_int limit 2) as t2_int from t1
----
DataFusion error: Invalid (non-executable) plan after Analyzer
caused by
Error during planning: Correlated scalar subquery must be aggregated to return at most one row


#non_aggregated_correlated_scalar_subquery_with_single_row
query TT
Expand Down Expand Up @@ -523,8 +548,13 @@ logical_plan
07)--TableScan: t1 projection=[t1_id]

#aggregated_correlated_scalar_subquery_with_extra_group_by_columns
statement error DataFusion error: Invalid plan after Analyzer\ncaused by\nError during planning: A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns
statement error
SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id group by t2_name) as t2_sum from t1
----
DataFusion error: Invalid (non-executable) plan after Analyzer
caused by
Error during planning: A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns


#support_agg_correlated_columns
query TT
Expand Down

0 comments on commit 9842d19

Please sign in to comment.