Skip to content

Improved error for expand wildcard rule #15287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ impl CheckColumnsSatisfyExprsPurpose {
fn message_prefix(&self) -> &'static str {
match self {
CheckColumnsSatisfyExprsPurpose::ProjectionMustReferenceAggregate => {
"Projection references non-aggregate values"
"Column in SELECT must be in GROUP BY or an aggregate function"
}
CheckColumnsSatisfyExprsPurpose::HavingMustReferenceAggregate => {
"HAVING clause references non-aggregate values"
"Column in HAVING must be in GROUP BY or an aggregate function"
}
}
}
Expand Down Expand Up @@ -159,7 +159,7 @@ fn check_column_satisfies_expr(
) -> Result<()> {
if !columns.contains(expr) {
return plan_err!(
"{}: Expression {} could not be resolved from available columns: {}",
"{}: While expanding wildcard, column \"{}\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"{}\" appears in the SELECT clause satisfies this requirement",
purpose.message_prefix(),
expr,
expr_vec_fmt!(columns)
Expand All @@ -169,7 +169,7 @@ fn check_column_satisfies_expr(
purpose.diagnostic_message(expr),
expr.spans().and_then(|spans| spans.first()),
)
.with_help(format!("add '{expr}' to GROUP BY clause"), None);
.with_help(format!("Either add '{expr}' to GROUP BY clause, or use an aggregare function like ANY_VALUE({expr})"), None);
err.with_diagnostic(diagnostic)
});
}
Expand Down Expand Up @@ -496,30 +496,30 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
///
/// For example an expr of **unnest(unnest(column1)) + unnest(unnest(unnest(column2)))**
/// ```text
/// ┌──────────────────┐
/// │ binaryexpr │
/// │ │
/// └──────────────────┘
/// f_down / / │ │
/// / / f_up │ │
/// / / f_down│ │f_up
/// unnest │ │
/// │ │
/// f_down / / f_up(rewriting) │ │
/// / /
/// / / unnest
/// unnest
/// f_down / / f_up(rewriting)
/// f_down / /f_up / /
/// / / / /
/// / / unnest
/// column1
/// f_down / /f_up
/// / /
/// / /
/// column2
/// ┌──────────────────┐
/// │ binaryexpr │
/// │ │
/// └──────────────────┘
/// f_down / / │ │
/// / / f_up │ │
/// / / f_down│ │f_up
/// unnest │ │
/// │ │
/// f_down / / f_up(rewriting) │ │
/// / /
/// / / unnest
/// unnest
/// f_down / / f_up(rewriting)
/// f_down / /f_up / /
/// / / / /
/// / / unnest
/// column1
/// f_down / /f_up
/// / /
/// / /
/// column2
/// ```
///
///
fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
if let Expr::Unnest(ref traversing_unnest) = expr {
if traversing_unnest == self.top_most_unnest.as_ref().unwrap() {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sql/tests/cases/diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ fn test_missing_non_aggregate_in_group_by() -> Result<()> {
assert_eq!(diag.span, Some(spans["a"]));
assert_eq!(
diag.helps[0].message,
"add 'person.first_name' to GROUP BY clause"
"Either add 'person.first_name' to GROUP BY clause, or use an aggregare function like ANY_VALUE(person.first_name)"
);
Ok(())
}
Expand Down
14 changes: 7 additions & 7 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ fn select_with_having_refers_to_invalid_column() {
HAVING first_name = 'M'";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: HAVING clause references non-aggregate values: Expression person.first_name could not be resolved from available columns: person.id, max(person.age)",
"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.id, max(person.age)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand All @@ -844,7 +844,7 @@ fn select_with_having_with_aggregate_not_in_select() {
HAVING MAX(age) > 100";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: Projection references non-aggregate values: Expression person.first_name could not be resolved from available columns: max(person.age)",
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"max(person.age)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand Down Expand Up @@ -880,7 +880,7 @@ fn select_aggregate_with_having_referencing_column_not_in_select() {
HAVING first_name = 'M'";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: HAVING clause references non-aggregate values: Expression person.first_name could not be resolved from available columns: count(*)",
"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"count(*)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand Down Expand Up @@ -1001,7 +1001,7 @@ fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by
HAVING MAX(age) > 10 AND last_name = 'M'";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: HAVING clause references non-aggregate values: Expression person.last_name could not be resolved from available columns: person.first_name, max(person.age)",
"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.last_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.first_name, max(person.age)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand Down Expand Up @@ -1365,7 +1365,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_res
let sql = "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), min(person.first_name)",
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.age\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.age + Int64(1), min(person.first_name)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand All @@ -1375,7 +1375,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_sel
let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), min(person.first_name)",
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.age\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.age + Int64(1), min(person.first_name)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand Down Expand Up @@ -1636,7 +1636,7 @@ fn select_7480_2() {
let sql = "SELECT c1, c13, MIN(c12) FROM aggregate_test_100 GROUP BY c1";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c13 could not be resolved from available columns: aggregate_test_100.c1, min(aggregate_test_100.c12)",
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"aggregate_test_100.c13\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"aggregate_test_100.c1, min(aggregate_test_100.c12)\" appears in the SELECT clause satisfies this requirement",
err.strip_backtrace()
);
}
Expand Down
10 changes: 5 additions & 5 deletions datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3468,7 +3468,7 @@ SELECT r.sn, SUM(l.amount), r.amount
# to associate it with other fields, aggregate should contain all the composite columns
# if any of the composite column is missing, we cannot use associated indices, inside select expression
# below query should fail
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression r\.amount could not be resolved from available columns: r\.sn, sum\(l\.amount\)
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "r\.amount" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "r\.sn, sum\(l\.amount\)" appears in the SELECT clause satisfies this requirement
SELECT r.sn, SUM(l.amount), r.amount
FROM sales_global_with_composite_pk AS l
JOIN sales_global_with_composite_pk AS r
Expand Down Expand Up @@ -3496,7 +3496,7 @@ NULL NULL NULL
# left join shouldn't propagate right side constraint,
# if right side is a unique key (unique and can contain null)
# Please note that, above query and this one is same except the constraint in the table.
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression r\.amount could not be resolved from available columns: r\.sn, sum\(r\.amount\)
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "r\.amount" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "r\.sn, sum\(r\.amount\)" appears in the SELECT clause satisfies this requirement
SELECT r.sn, r.amount, SUM(r.amount)
FROM (SELECT *
FROM sales_global_with_unique as l
Expand Down Expand Up @@ -3542,7 +3542,7 @@ SELECT column1, COUNT(*) as column2 FROM (VALUES (['a', 'b'], 1), (['c', 'd', 'e


# primary key should be aware from which columns it is associated
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression r\.sn could not be resolved from available columns: l\.sn, l\.zip_code, l\.country, l\.ts, l\.currency, l\.amount, sum\(l\.amount\)
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "r\.sn" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "l\.sn, l\.zip_code, l\.country, l\.ts, l\.currency, l\.amount, sum\(l\.amount\)" appears in the SELECT clause satisfies this requirement
SELECT l.sn, r.sn, SUM(l.amount), r.amount
FROM sales_global_with_pk AS l
JOIN sales_global_with_pk AS r
Expand Down Expand Up @@ -3633,7 +3633,7 @@ ORDER BY r.sn
4 100 2022-01-03T10:00:00

# after join, new window expressions shouldn't be associated with primary keys
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression rn1 could not be resolved from available columns: r\.sn, r\.ts, r\.amount, sum\(r\.amount\)
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "rn1" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "r\.sn, r\.ts, r\.amount, sum\(r\.amount\)" appears in the SELECT clause satisfies this requirement
SELECT r.sn, SUM(r.amount), rn1
FROM
(SELECT r.ts, r.sn, r.amount,
Expand Down Expand Up @@ -5135,7 +5135,7 @@ statement ok
CREATE TABLE test_case_expr(a INT, b TEXT) AS VALUES (1,'hello'), (2,'world')

query T
SELECT (CASE WHEN CONCAT(b, 'hello') = 'test' THEN 'good' ELSE 'bad' END) AS c
SELECT (CASE WHEN CONCAT(b, 'hello') = 'test' THEN 'good' ELSE 'bad' END) AS c
FROM test_case_expr GROUP BY c;
----
bad
Expand Down
14 changes: 7 additions & 7 deletions datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ AS VALUES

statement ok
CREATE TABLE nested_unnest_table
AS VALUES
AS VALUES
(struct('a', 'b', struct('c')), (struct('a', 'b', [10,20])), [struct('a', 'b')]),
(struct('d', 'e', struct('f')), (struct('x', 'y', [30,40, 50])), null)
;

statement ok
CREATE TABLE recursive_unnest_table
AS VALUES
AS VALUES
(struct([1], 'a'), [[[1],[2]],[[1,1]]], [struct([1],[[1,2]])]),
(struct([2], 'b'), [[[3,4],[5]],[[null,6],null,[7,8]]], [struct([2],[[3],[4]])])
;
Expand Down Expand Up @@ -264,9 +264,9 @@ NULL NULL 17
NULL NULL 18

query IIIT
select
unnest(column1), unnest(column2) + 2,
column3 * 10, unnest(array_remove(column1, '4'))
select
unnest(column1), unnest(column2) + 2,
column3 * 10, unnest(array_remove(column1, '4'))
from unnest_table;
----
1 9 10 1
Expand Down Expand Up @@ -795,7 +795,7 @@ select unnest(unnest(column2)) c2, count(column3) from recursive_unnest_table gr
[NULL, 6] 1
NULL 1

query error DataFusion error: Error during planning: Projection references non\-aggregate values
query error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "nested_unnest_table\.column1" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "UNNEST\(nested_unnest_table\.column1\)\[c0\]" appears in the SELECT clause satisfies this requirement
select unnest(column1) c1 from nested_unnest_table group by c1.c0;

# TODO: this query should work. see issue: https://github.com/apache/datafusion/issues/12794
Expand Down Expand Up @@ -875,7 +875,7 @@ query TT
explain select * from unnest_table u, unnest(u.column1);
----
logical_plan
01)Cross Join:
01)Cross Join:
02)--SubqueryAlias: u
03)----TableScan: unnest_table projection=[column1, column2, column3, column4, column5]
04)--Subquery:
Expand Down