Skip to content

Commit c2c43ec

Browse files
authored
Improved error for expand wildcard rule (#15287)
* Improve groupby error message for new version of expand wildcard logic, also modified related CICL content * Improved error message for new expand wildcard logic, also editted related CICL test * improved syntax requested by CICL process * unified and , updated test error message
1 parent 1c38aff commit c2c43ec

File tree

5 files changed

+47
-47
lines changed

5 files changed

+47
-47
lines changed

datafusion/sql/src/utils.rs

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,10 @@ impl CheckColumnsSatisfyExprsPurpose {
102102
fn message_prefix(&self) -> &'static str {
103103
match self {
104104
CheckColumnsSatisfyExprsPurpose::ProjectionMustReferenceAggregate => {
105-
"Projection references non-aggregate values"
105+
"Column in SELECT must be in GROUP BY or an aggregate function"
106106
}
107107
CheckColumnsSatisfyExprsPurpose::HavingMustReferenceAggregate => {
108-
"HAVING clause references non-aggregate values"
108+
"Column in HAVING must be in GROUP BY or an aggregate function"
109109
}
110110
}
111111
}
@@ -159,7 +159,7 @@ fn check_column_satisfies_expr(
159159
) -> Result<()> {
160160
if !columns.contains(expr) {
161161
return plan_err!(
162-
"{}: Expression {} could not be resolved from available columns: {}",
162+
"{}: While expanding wildcard, column \"{}\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"{}\" appears in the SELECT clause satisfies this requirement",
163163
purpose.message_prefix(),
164164
expr,
165165
expr_vec_fmt!(columns)
@@ -169,7 +169,7 @@ fn check_column_satisfies_expr(
169169
purpose.diagnostic_message(expr),
170170
expr.spans().and_then(|spans| spans.first()),
171171
)
172-
.with_help(format!("add '{expr}' to GROUP BY clause"), None);
172+
.with_help(format!("Either add '{expr}' to GROUP BY clause, or use an aggregare function like ANY_VALUE({expr})"), None);
173173
err.with_diagnostic(diagnostic)
174174
});
175175
}
@@ -496,30 +496,30 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
496496
///
497497
/// For example an expr of **unnest(unnest(column1)) + unnest(unnest(unnest(column2)))**
498498
/// ```text
499-
/// ┌──────────────────┐
500-
/// │ binaryexpr │
501-
/// │ │
502-
/// └──────────────────┘
503-
/// f_down / / │ │
504-
/// / / f_up │ │
505-
/// / / f_down│ │f_up
506-
/// unnest │ │
507-
/// │ │
508-
/// f_down / / f_up(rewriting) │ │
509-
/// / /
510-
/// / / unnest
511-
/// unnest
512-
/// f_down / / f_up(rewriting)
513-
/// f_down / /f_up / /
514-
/// / / / /
515-
/// / / unnest
516-
/// column1
517-
/// f_down / /f_up
518-
/// / /
519-
/// / /
520-
/// column2
499+
/// ┌──────────────────┐
500+
/// │ binaryexpr │
501+
/// │ │
502+
/// └──────────────────┘
503+
/// f_down / / │ │
504+
/// / / f_up │ │
505+
/// / / f_down│ │f_up
506+
/// unnest │ │
507+
/// │ │
508+
/// f_down / / f_up(rewriting) │ │
509+
/// / /
510+
/// / / unnest
511+
/// unnest
512+
/// f_down / / f_up(rewriting)
513+
/// f_down / /f_up / /
514+
/// / / / /
515+
/// / / unnest
516+
/// column1
517+
/// f_down / /f_up
518+
/// / /
519+
/// / /
520+
/// column2
521521
/// ```
522-
///
522+
///
523523
fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
524524
if let Expr::Unnest(ref traversing_unnest) = expr {
525525
if traversing_unnest == self.top_most_unnest.as_ref().unwrap() {

datafusion/sql/tests/cases/diagnostic.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ fn test_missing_non_aggregate_in_group_by() -> Result<()> {
190190
assert_eq!(diag.span, Some(spans["a"]));
191191
assert_eq!(
192192
diag.helps[0].message,
193-
"add 'person.first_name' to GROUP BY clause"
193+
"Either add 'person.first_name' to GROUP BY clause, or use an aggregare function like ANY_VALUE(person.first_name)"
194194
);
195195
Ok(())
196196
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ fn select_with_having_refers_to_invalid_column() {
820820
HAVING first_name = 'M'";
821821
let err = logical_plan(sql).expect_err("query should have failed");
822822
assert_eq!(
823-
"Error during planning: HAVING clause references non-aggregate values: Expression person.first_name could not be resolved from available columns: person.id, max(person.age)",
823+
"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.id, max(person.age)\" appears in the SELECT clause satisfies this requirement",
824824
err.strip_backtrace()
825825
);
826826
}
@@ -844,7 +844,7 @@ fn select_with_having_with_aggregate_not_in_select() {
844844
HAVING MAX(age) > 100";
845845
let err = logical_plan(sql).expect_err("query should have failed");
846846
assert_eq!(
847-
"Error during planning: Projection references non-aggregate values: Expression person.first_name could not be resolved from available columns: max(person.age)",
847+
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"max(person.age)\" appears in the SELECT clause satisfies this requirement",
848848
err.strip_backtrace()
849849
);
850850
}
@@ -880,7 +880,7 @@ fn select_aggregate_with_having_referencing_column_not_in_select() {
880880
HAVING first_name = 'M'";
881881
let err = logical_plan(sql).expect_err("query should have failed");
882882
assert_eq!(
883-
"Error during planning: HAVING clause references non-aggregate values: Expression person.first_name could not be resolved from available columns: count(*)",
883+
"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"count(*)\" appears in the SELECT clause satisfies this requirement",
884884
err.strip_backtrace()
885885
);
886886
}
@@ -1001,7 +1001,7 @@ fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by
10011001
HAVING MAX(age) > 10 AND last_name = 'M'";
10021002
let err = logical_plan(sql).expect_err("query should have failed");
10031003
assert_eq!(
1004-
"Error during planning: HAVING clause references non-aggregate values: Expression person.last_name could not be resolved from available columns: person.first_name, max(person.age)",
1004+
"Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.last_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.first_name, max(person.age)\" appears in the SELECT clause satisfies this requirement",
10051005
err.strip_backtrace()
10061006
);
10071007
}
@@ -1365,7 +1365,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_res
13651365
let sql = "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1";
13661366
let err = logical_plan(sql).expect_err("query should have failed");
13671367
assert_eq!(
1368-
"Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), min(person.first_name)",
1368+
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.age\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.age + Int64(1), min(person.first_name)\" appears in the SELECT clause satisfies this requirement",
13691369
err.strip_backtrace()
13701370
);
13711371
}
@@ -1375,7 +1375,7 @@ fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_sel
13751375
let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1";
13761376
let err = logical_plan(sql).expect_err("query should have failed");
13771377
assert_eq!(
1378-
"Error during planning: Projection references non-aggregate values: Expression person.age could not be resolved from available columns: person.age + Int64(1), min(person.first_name)",
1378+
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.age\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.age + Int64(1), min(person.first_name)\" appears in the SELECT clause satisfies this requirement",
13791379
err.strip_backtrace()
13801380
);
13811381
}
@@ -1636,7 +1636,7 @@ fn select_7480_2() {
16361636
let sql = "SELECT c1, c13, MIN(c12) FROM aggregate_test_100 GROUP BY c1";
16371637
let err = logical_plan(sql).expect_err("query should have failed");
16381638
assert_eq!(
1639-
"Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c13 could not be resolved from available columns: aggregate_test_100.c1, min(aggregate_test_100.c12)",
1639+
"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"aggregate_test_100.c13\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"aggregate_test_100.c1, min(aggregate_test_100.c12)\" appears in the SELECT clause satisfies this requirement",
16401640
err.strip_backtrace()
16411641
);
16421642
}

datafusion/sqllogictest/test_files/group_by.slt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3468,7 +3468,7 @@ SELECT r.sn, SUM(l.amount), r.amount
34683468
# to associate it with other fields, aggregate should contain all the composite columns
34693469
# if any of the composite column is missing, we cannot use associated indices, inside select expression
34703470
# below query should fail
3471-
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression r\.amount could not be resolved from available columns: r\.sn, sum\(l\.amount\)
3471+
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "r\.amount" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "r\.sn, sum\(l\.amount\)" appears in the SELECT clause satisfies this requirement
34723472
SELECT r.sn, SUM(l.amount), r.amount
34733473
FROM sales_global_with_composite_pk AS l
34743474
JOIN sales_global_with_composite_pk AS r
@@ -3496,7 +3496,7 @@ NULL NULL NULL
34963496
# left join shouldn't propagate right side constraint,
34973497
# if right side is a unique key (unique and can contain null)
34983498
# Please note that, above query and this one is same except the constraint in the table.
3499-
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression r\.amount could not be resolved from available columns: r\.sn, sum\(r\.amount\)
3499+
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "r\.amount" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "r\.sn, sum\(r\.amount\)" appears in the SELECT clause satisfies this requirement
35003500
SELECT r.sn, r.amount, SUM(r.amount)
35013501
FROM (SELECT *
35023502
FROM sales_global_with_unique as l
@@ -3542,7 +3542,7 @@ SELECT column1, COUNT(*) as column2 FROM (VALUES (['a', 'b'], 1), (['c', 'd', 'e
35423542

35433543

35443544
# primary key should be aware from which columns it is associated
3545-
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression r\.sn could not be resolved from available columns: l\.sn, l\.zip_code, l\.country, l\.ts, l\.currency, l\.amount, sum\(l\.amount\)
3545+
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "r\.sn" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "l\.sn, l\.zip_code, l\.country, l\.ts, l\.currency, l\.amount, sum\(l\.amount\)" appears in the SELECT clause satisfies this requirement
35463546
SELECT l.sn, r.sn, SUM(l.amount), r.amount
35473547
FROM sales_global_with_pk AS l
35483548
JOIN sales_global_with_pk AS r
@@ -3633,7 +3633,7 @@ ORDER BY r.sn
36333633
4 100 2022-01-03T10:00:00
36343634

36353635
# after join, new window expressions shouldn't be associated with primary keys
3636-
statement error DataFusion error: Error during planning: Projection references non\-aggregate values: Expression rn1 could not be resolved from available columns: r\.sn, r\.ts, r\.amount, sum\(r\.amount\)
3636+
statement error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "rn1" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "r\.sn, r\.ts, r\.amount, sum\(r\.amount\)" appears in the SELECT clause satisfies this requirement
36373637
SELECT r.sn, SUM(r.amount), rn1
36383638
FROM
36393639
(SELECT r.ts, r.sn, r.amount,
@@ -5135,7 +5135,7 @@ statement ok
51355135
CREATE TABLE test_case_expr(a INT, b TEXT) AS VALUES (1,'hello'), (2,'world')
51365136

51375137
query T
5138-
SELECT (CASE WHEN CONCAT(b, 'hello') = 'test' THEN 'good' ELSE 'bad' END) AS c
5138+
SELECT (CASE WHEN CONCAT(b, 'hello') = 'test' THEN 'good' ELSE 'bad' END) AS c
51395139
FROM test_case_expr GROUP BY c;
51405140
----
51415141
bad

datafusion/sqllogictest/test_files/unnest.slt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ AS VALUES
3232

3333
statement ok
3434
CREATE TABLE nested_unnest_table
35-
AS VALUES
35+
AS VALUES
3636
(struct('a', 'b', struct('c')), (struct('a', 'b', [10,20])), [struct('a', 'b')]),
3737
(struct('d', 'e', struct('f')), (struct('x', 'y', [30,40, 50])), null)
3838
;
3939

4040
statement ok
4141
CREATE TABLE recursive_unnest_table
42-
AS VALUES
42+
AS VALUES
4343
(struct([1], 'a'), [[[1],[2]],[[1,1]]], [struct([1],[[1,2]])]),
4444
(struct([2], 'b'), [[[3,4],[5]],[[null,6],null,[7,8]]], [struct([2],[[3],[4]])])
4545
;
@@ -264,9 +264,9 @@ NULL NULL 17
264264
NULL NULL 18
265265

266266
query IIIT
267-
select
268-
unnest(column1), unnest(column2) + 2,
269-
column3 * 10, unnest(array_remove(column1, '4'))
267+
select
268+
unnest(column1), unnest(column2) + 2,
269+
column3 * 10, unnest(array_remove(column1, '4'))
270270
from unnest_table;
271271
----
272272
1 9 10 1
@@ -795,7 +795,7 @@ select unnest(unnest(column2)) c2, count(column3) from recursive_unnest_table gr
795795
[NULL, 6] 1
796796
NULL 1
797797

798-
query error DataFusion error: Error during planning: Projection references non\-aggregate values
798+
query error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "nested_unnest_table\.column1" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "UNNEST\(nested_unnest_table\.column1\)\[c0\]" appears in the SELECT clause satisfies this requirement
799799
select unnest(column1) c1 from nested_unnest_table group by c1.c0;
800800

801801
# TODO: this query should work. see issue: https://github.com/apache/datafusion/issues/12794
@@ -875,7 +875,7 @@ query TT
875875
explain select * from unnest_table u, unnest(u.column1);
876876
----
877877
logical_plan
878-
01)Cross Join:
878+
01)Cross Join:
879879
02)--SubqueryAlias: u
880880
03)----TableScan: unnest_table projection=[column1, column2, column3, column4, column5]
881881
04)--Subquery:

0 commit comments

Comments
 (0)