From 364e3a478cb6639ef58c995ea4a7da0de6c7a394 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 8 Mar 2025 16:33:14 +0800 Subject: [PATCH 01/12] first draft --- .../optimizer/tests/optimizer_integration.rs | 36 --- datafusion/sql/src/select.rs | 71 +++++- datafusion/sql/tests/cases/plan_to_sql.rs | 137 +--------- datafusion/sql/tests/sql_integration.rs | 239 ------------------ datafusion/sqllogictest/test_files/alias.slt | 59 +++++ datafusion/sqllogictest/test_files/cte.slt | 84 ++++++ .../sqllogictest/test_files/group_by.slt | 29 +++ datafusion/sqllogictest/test_files/joins.slt | 113 +++++++++ datafusion/sqllogictest/test_files/order.slt | 39 +-- datafusion/sqllogictest/test_files/select.slt | 3 + .../sqllogictest/test_files/wildcard.slt | 9 + 11 files changed, 378 insertions(+), 441 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/alias.slt diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 66bd6b75123e..c109b4dabc2f 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -344,16 +344,6 @@ fn test_propagate_empty_relation_inner_join_and_unions() { assert_eq!(expected, format!("{plan}")); } -#[test] -fn select_wildcard_with_repeated_column() { - let sql = "SELECT *, col_int32 FROM test"; - let err = test_sql(sql).expect_err("query should have failed"); - assert_eq!( - "Schema error: Schema contains duplicate qualified field name test.col_int32", - err.strip_backtrace() - ); -} - #[test] fn select_wildcard_with_repeated_column_but_is_aliased() { let sql = "SELECT *, col_int32 as col_32 FROM test"; @@ -390,32 +380,6 @@ fn select_correlated_predicate_subquery_with_uppercase_ident() { assert_eq!(expected, format!("{plan}")); } -// The test should return an error -// because the wildcard didn't be expanded before type coercion -#[test] -fn test_union_coercion_with_wildcard() -> Result<()> { - let dialect = PostgreSqlDialect {}; - let context_provider = MyContextProvider::default(); - let sql = "select * from (SELECT col_int32, col_uint32 FROM test) union all select * from(SELECT col_uint32, col_int32 FROM test)"; - let statements = Parser::parse_sql(&dialect, sql)?; - let sql_to_rel = SqlToRel::new(&context_provider); - let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?; - - if let LogicalPlan::Union(union) = logical_plan { - let err = TypeCoercionRewriter::coerce_union(union) - .err() - .unwrap() - .to_string(); - assert_contains!( - err, - "Error during planning: Wildcard should be expanded before type coercion" - ); - } else { - panic!("Expected Union plan"); - } - Ok(()) -} - fn test_sql(sql: &str) -> Result { // parse the SQL let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ... diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index e21def4c3941..db488f665bce 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -27,14 +27,14 @@ use crate::utils::{ use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{not_impl_err, plan_err, Result}; +use datafusion_common::{not_impl_err, plan_err, Column, Result}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_sorts, }; use datafusion_expr::utils::{ - expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs, + expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs }; use datafusion_expr::{ qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter, @@ -583,7 +583,7 @@ impl SqlToRel<'_, S> { let mut error_builder = DataFusionErrorBuilder::new(); for expr in projection { match self.sql_select_to_rex(expr, plan, empty_from, planner_context) { - Ok(expr) => prepared_select_exprs.push(expr), + Ok(expr) => prepared_select_exprs.extend(expr), Err(err) => error_builder.add_error(err), } } @@ -597,7 +597,7 @@ impl SqlToRel<'_, S> { plan: &LogicalPlan, empty_from: bool, planner_context: &mut PlannerContext, - ) -> Result { + ) -> Result> { match sql { SelectItem::UnnamedExpr(expr) => { let expr = self.sql_to_expr(expr, plan.schema(), planner_context)?; @@ -606,7 +606,7 @@ impl SqlToRel<'_, S> { &[&[plan.schema()]], &plan.using_columns()?, )?; - Ok(col) + Ok(vec![col]) } SelectItem::ExprWithAlias { expr, alias } => { let select_expr = @@ -622,7 +622,7 @@ impl SqlToRel<'_, S> { Expr::Column(column) if column.name.eq(&name) => col, _ => col.alias(name), }; - Ok(expr) + Ok(vec![expr]) } SelectItem::Wildcard(options) => { Self::check_wildcard_options(&options)?; @@ -635,7 +635,18 @@ impl SqlToRel<'_, S> { planner_context, options, )?; - Ok(wildcard_with_options(planned_options)) + // Ok(vec![wildcard_with_options(planned_options)]) + + let expanded = + expand_wildcard(plan.schema(), plan, Some(&planned_options))?; + + let replaced = if let Some(replace) = planned_options.replace { + replace_columns(expanded, &replace)? + } else { + expanded + }; + + Ok(replaced) } SelectItem::QualifiedWildcard(object_name, options) => { Self::check_wildcard_options(&options)?; @@ -646,7 +657,23 @@ impl SqlToRel<'_, S> { planner_context, options, )?; - Ok(qualified_wildcard_with_options(qualifier, planned_options)) + + let expanded = expand_qualified_wildcard( + &qualifier, + plan.schema(), + Some(&planned_options), + )?; + // If there is a REPLACE statement, replace that column with the given + // replace expression. Column name remains the same. + let replaced = if let Some(replace) = planned_options.replace { + replace_columns(expanded, &replace)? + } else { + expanded + }; + + Ok(replaced) + + // Ok(vec![qualified_wildcard_with_options(qualifier, planned_options)]) } } } @@ -698,7 +725,10 @@ impl SqlToRel<'_, S> { planner_context, ) }) - .collect::>>()?; + .collect::>>()? + .into_iter() + .flatten() + .collect(); let planned_replace = PlannedReplaceSelectItem { items: replace.items.into_iter().map(|i| *i).collect(), planned_expressions: replace_expr, @@ -884,3 +914,26 @@ fn match_window_definitions( } Ok(()) } + +/// If there is a REPLACE statement in the projected expression in the form of +/// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces +/// that column with the given replace expression. Column name remains the same. +/// Multiple REPLACEs are also possible with comma separations. +fn replace_columns( + mut exprs: Vec, + replace: &PlannedReplaceSelectItem, +) -> Result> { + for expr in exprs.iter_mut() { + if let Expr::Column(Column { name, .. }) = expr { + if let Some((_, new_expr)) = replace + .items() + .iter() + .zip(replace.expressions().iter()) + .find(|(item, _)| item.column_name.value == *name) + { + *expr = new_expr.clone().alias(name.clone()) + } + } + } + Ok(exprs) +} \ No newline at end of file diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 5af93a01e6c9..fb1fb6f0fbf2 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -323,13 +323,6 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(MySqlDialect {}), unparser_dialect: Box::new(UnparserMySqlDialect {}), }, - TestStatementWithDialect { - sql: "select * from (select * from j1 limit 10);", - expected: - "SELECT * FROM (SELECT * FROM `j1` LIMIT 10) AS `derived_limit`", - parser_dialect: Box::new(MySqlDialect {}), - unparser_dialect: Box::new(UnparserMySqlDialect {}), - }, TestStatementWithDialect { sql: "select ta.j1_id from j1 ta order by j1_id limit 10;", expected: @@ -524,96 +517,6 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(SqliteDialect {}), }, - TestStatementWithDialect { - sql: "SELECT * FROM (SELECT j1_id + 1 FROM j1) AS temp_j(id2)", - expected: r#"SELECT * FROM (SELECT (`j1`.`j1_id` + 1) AS `id2` FROM `j1`) AS `temp_j`"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(SqliteDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM (SELECT j1_id FROM j1 LIMIT 1) AS temp_j(id2)", - expected: r#"SELECT * FROM (SELECT `j1`.`j1_id` AS `id2` FROM `j1` LIMIT 1) AS `temp_j`"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(SqliteDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3])", - expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))")"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", - expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", - expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]), j1", - expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") CROSS JOIN j1"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", - expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)", - expected: r#"SELECT * FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) UNION ALL SELECT * FROM (SELECT UNNEST([4, 5, 6]) AS "UNNEST(make_array(Int64(4),Int64(5),Int64(6)))") AS u (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3])", - expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", - expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS t1 (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", - expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS t1 (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]), j1", - expected: r#"SELECT * FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", - expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)", - expected: r#"SELECT * FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT * FROM UNNEST([4, 5, 6]) AS u (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT UNNEST([1,2,3])", - expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, TestStatementWithDialect { sql: "SELECT UNNEST([1,2,3]) as c1", expected: r#"SELECT UNNEST([1, 2, 3]) AS c1"#, @@ -626,30 +529,6 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, - TestStatementWithDialect { - sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)", - expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)", - expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), - }, - TestStatementWithDialect { - sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)", - expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))")"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, - TestStatementWithDialect { - sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)", - expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))") AS t1 (c1)"#, - parser_dialect: Box::new(GenericDialect {}), - unparser_dialect: Box::new(UnparserDefaultDialect {}), - }, ]; for query in tests { @@ -1456,13 +1335,13 @@ fn test_unnest_to_sql() { fn test_join_with_no_conditions() { sql_round_trip( GenericDialect {}, - "SELECT * FROM j1 JOIN j2", - "SELECT * FROM j1 CROSS JOIN j2", + "SELECT j1.j1_id FROM j1 JOIN j2", + "SELECT j1.j1_id FROM j1 CROSS JOIN j2", ); sql_round_trip( GenericDialect {}, - "SELECT * FROM j1 CROSS JOIN j2", - "SELECT * FROM j1 CROSS JOIN j2", + "SELECT j1.j1_id FROM j1 CROSS JOIN j2", + "SELECT j1.j1_id FROM j1 CROSS JOIN j2", ); } @@ -1547,7 +1426,7 @@ impl UserDefinedLogicalNodeUnparser for UnusedUnparser { fn test_unparse_extension_to_statement() -> Result<()> { let dialect = GenericDialect {}; let statement = Parser::new(&dialect) - .try_with_sql("SELECT * FROM j1")? + .try_with_sql("SELECT j1.j1_id FROM j1")? .parse_statement()?; let state = MockSessionState::default(); let context = MockContextProvider { state }; @@ -1563,7 +1442,7 @@ fn test_unparse_extension_to_statement() -> Result<()> { Arc::new(UnusedUnparser {}), ]); let sql = unparser.plan_to_sql(&extension)?; - let expected = "SELECT * FROM j1"; + let expected = "SELECT j1.j1_id FROM j1"; assert_eq!(sql.to_string(), expected); if let Some(err) = plan_to_sql(&extension).err() { @@ -1606,7 +1485,7 @@ impl UserDefinedLogicalNodeUnparser for MockSqlUnparser { fn test_unparse_extension_to_sql() -> Result<()> { let dialect = GenericDialect {}; let statement = Parser::new(&dialect) - .try_with_sql("SELECT * FROM j1")? + .try_with_sql("SELECT j1.j1_id FROM j1")? .parse_statement()?; let state = MockSessionState::default(); let context = MockContextProvider { state }; @@ -1626,7 +1505,7 @@ fn test_unparse_extension_to_sql() -> Result<()> { Arc::new(UnusedUnparser {}), ]); let sql = unparser.plan_to_sql(&plan)?; - let expected = "SELECT j1.j1_id AS user_id FROM (SELECT * FROM j1)"; + let expected = "SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id FROM j1)"; assert_eq!(sql.to_string(), expected); if let Some(err) = plan_to_sql(&plan).err() { diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 1df18302687e..f359173a6288 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -841,83 +841,6 @@ fn natural_right_join() { quick_test(sql, expected); } -#[test] -fn natural_join_no_common_becomes_cross_join() { - let sql = "SELECT * FROM person a NATURAL JOIN lineitem b"; - let expected = "Projection: *\ - \n Cross Join: \ - \n SubqueryAlias: a\ - \n TableScan: person\ - \n SubqueryAlias: b\ - \n TableScan: lineitem"; - quick_test(sql, expected); -} - -#[test] -fn using_join_multiple_keys() { - let sql = "SELECT * FROM person a join person b using (id, age)"; - let expected = "Projection: *\ - \n Inner Join: Using a.id = b.id, a.age = b.age\ - \n SubqueryAlias: a\ - \n TableScan: person\ - \n SubqueryAlias: b\ - \n TableScan: person"; - quick_test(sql, expected); -} - -#[test] -fn using_join_multiple_keys_subquery() { - let sql = - "SELECT age FROM (SELECT * FROM person a join person b using (id, age, state))"; - let expected = "Projection: a.age\ - \n Projection: *\ - \n Inner Join: Using a.id = b.id, a.age = b.age, a.state = b.state\ - \n SubqueryAlias: a\ - \n TableScan: person\ - \n SubqueryAlias: b\ - \n TableScan: person"; - quick_test(sql, expected); -} - -#[test] -fn using_join_multiple_keys_qualified_wildcard_select() { - let sql = "SELECT a.* FROM person a join person b using (id, age)"; - let expected = "Projection: a.*\ - \n Inner Join: Using a.id = b.id, a.age = b.age\ - \n SubqueryAlias: a\ - \n TableScan: person\ - \n SubqueryAlias: b\ - \n TableScan: person"; - quick_test(sql, expected); -} - -#[test] -fn using_join_multiple_keys_select_all_columns() { - let sql = "SELECT a.*, b.* FROM person a join person b using (id, age)"; - let expected = "Projection: a.*, b.*\ - \n Inner Join: Using a.id = b.id, a.age = b.age\ - \n SubqueryAlias: a\ - \n TableScan: person\ - \n SubqueryAlias: b\ - \n TableScan: person"; - quick_test(sql, expected); -} - -#[test] -fn using_join_multiple_keys_multiple_joins() { - let sql = "SELECT * FROM person a join person b using (id, age, state) join person c using (id, age, state)"; - let expected = "Projection: *\ - \n Inner Join: Using a.id = c.id, a.age = c.age, a.state = c.state\ - \n Inner Join: Using a.id = b.id, a.age = b.age, a.state = b.state\ - \n SubqueryAlias: a\ - \n TableScan: person\ - \n SubqueryAlias: b\ - \n TableScan: person\ - \n SubqueryAlias: c\ - \n TableScan: person"; - quick_test(sql, expected); -} - #[test] fn select_with_having() { let sql = "SELECT id, age @@ -1233,24 +1156,6 @@ fn select_binary_expr_nested() { quick_test(sql, expected); } -#[test] -fn select_wildcard_with_groupby() { - quick_test( - r#"SELECT * FROM person GROUP BY id, first_name, last_name, age, state, salary, birth_date, "😀""#, - "Projection: *\ - \n Aggregate: groupBy=[[person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person.😀]], aggr=[[]]\ - \n TableScan: person", - ); - quick_test( - "SELECT * FROM (SELECT first_name, last_name FROM person) AS a GROUP BY first_name, last_name", - "Projection: *\ - \n Aggregate: groupBy=[[a.first_name, a.last_name]], aggr=[[]]\ - \n SubqueryAlias: a\ - \n Projection: person.first_name, person.last_name\ - \n TableScan: person", - ); -} - #[test] fn select_simple_aggregate() { quick_test( @@ -1618,15 +1523,6 @@ fn select_aggregate_with_non_column_inner_expression_with_groupby() { ); } -#[test] -fn test_wildcard() { - quick_test( - "SELECT * from person", - "Projection: *\ - \n TableScan: person", - ); -} - #[test] fn select_count_one() { let sql = "SELECT count(1) FROM person"; @@ -2958,88 +2854,6 @@ fn scalar_subquery_reference_outer_field() { quick_test(sql, expected); } -#[test] -fn subquery_references_cte() { - let sql = "WITH \ - cte AS (SELECT * FROM person) \ - SELECT * FROM person WHERE EXISTS (SELECT * FROM cte WHERE id = person.id)"; - - let expected = "Projection: *\ - \n Filter: EXISTS ()\ - \n Subquery:\ - \n Projection: *\ - \n Filter: cte.id = outer_ref(person.id)\ - \n SubqueryAlias: cte\ - \n Projection: *\ - \n TableScan: person\ - \n TableScan: person"; - - quick_test(sql, expected) -} - -#[test] -fn cte_with_no_column_names() { - let sql = "WITH \ - numbers AS ( \ - SELECT 1 as a, 2 as b, 3 as c \ - ) \ - SELECT * FROM numbers;"; - - let expected = "Projection: *\ - \n SubqueryAlias: numbers\ - \n Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c\ - \n EmptyRelation"; - - quick_test(sql, expected) -} - -#[test] -fn cte_with_column_names() { - let sql = "WITH \ - numbers(a, b, c) AS ( \ - SELECT 1, 2, 3 \ - ) \ - SELECT * FROM numbers;"; - - let expected = "Projection: *\ - \n SubqueryAlias: numbers\ - \n Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c\ - \n Projection: Int64(1), Int64(2), Int64(3)\ - \n EmptyRelation"; - - quick_test(sql, expected) -} - -#[test] -fn cte_with_column_aliases_precedence() { - // The end result should always be what CTE specification says - let sql = "WITH \ - numbers(a, b, c) AS ( \ - SELECT 1 as x, 2 as y, 3 as z \ - ) \ - SELECT * FROM numbers;"; - - let expected = "Projection: *\ - \n SubqueryAlias: numbers\ - \n Projection: x AS a, y AS b, z AS c\ - \n Projection: Int64(1) AS x, Int64(2) AS y, Int64(3) AS z\ - \n EmptyRelation"; - quick_test(sql, expected) -} - -#[test] -fn cte_unbalanced_number_of_columns() { - let sql = "WITH \ - numbers(a) AS ( \ - SELECT 1, 2, 3 \ - ) \ - SELECT * FROM numbers;"; - - let expected = "Error during planning: Source table contains 3 columns but only 1 names given as column alias"; - let result = logical_plan(sql).err().unwrap(); - assert_eq!(result.strip_backtrace(), expected); -} - #[test] fn aggregate_with_rollup() { let sql = @@ -3240,21 +3054,6 @@ fn lateral_nested_left_join() { quick_test(sql, expected); } -#[test] -fn lateral_unnest() { - let sql = "SELECT * from unnest_table u, unnest(u.array_col)"; - let expected = "Projection: *\ - \n Cross Join: \ - \n SubqueryAlias: u\ - \n TableScan: unnest_table\ - \n Subquery:\ - \n Projection: __unnest_placeholder(outer_ref(u.array_col),depth=1) AS UNNEST(outer_ref(u.array_col))\ - \n Unnest: lists[__unnest_placeholder(outer_ref(u.array_col))|depth=1] structs[]\ - \n Projection: outer_ref(u.array_col) AS __unnest_placeholder(outer_ref(u.array_col))\ - \n EmptyRelation"; - quick_test(sql, expected); -} - #[test] fn hive_aggregate_with_filter() -> Result<()> { let dialect = &HiveDialect {}; @@ -4320,44 +4119,6 @@ fn test_prepare_statement_bad_list_idx() { assert_contains!(err.to_string(), "Error during planning: Failed to parse placeholder id: invalid digit found in string"); } -#[test] -fn test_table_alias() { - let sql = "select * from (\ - (select id from person) t1 \ - CROSS JOIN \ - (select age from person) t2 \ - ) as f"; - - let expected = "Projection: *\ - \n SubqueryAlias: f\ - \n Cross Join: \ - \n SubqueryAlias: t1\ - \n Projection: person.id\ - \n TableScan: person\ - \n SubqueryAlias: t2\ - \n Projection: person.age\ - \n TableScan: person"; - quick_test(sql, expected); - - let sql = "select * from (\ - (select id from person) t1 \ - CROSS JOIN \ - (select age from person) t2 \ - ) as f (c1, c2)"; - - let expected = "Projection: *\ - \n SubqueryAlias: f\ - \n Projection: t1.id AS c1, t2.age AS c2\ - \n Cross Join: \ - \n SubqueryAlias: t1\ - \n Projection: person.id\ - \n TableScan: person\ - \n SubqueryAlias: t2\ - \n Projection: person.age\ - \n TableScan: person"; - quick_test(sql, expected); -} - #[test] fn test_inner_join_with_cast_key() { let sql = "SELECT person.id, person.age diff --git a/datafusion/sqllogictest/test_files/alias.slt b/datafusion/sqllogictest/test_files/alias.slt new file mode 100644 index 000000000000..340ffb6078e4 --- /dev/null +++ b/datafusion/sqllogictest/test_files/alias.slt @@ -0,0 +1,59 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# test table alias +statement count 0 +create table t1(id int); + +statement count 0 +create table t2(age int); + +query TT +explain select * from ((select id from t1) cross join (select age from t2)) as f; +---- +logical_plan +01)SubqueryAlias: f +02)--Cross Join: +03)----TableScan: t1 projection=[id] +04)----TableScan: t2 projection=[age] +physical_plan +01)CrossJoinExec +02)--DataSourceExec: partitions=1, partition_sizes=[0] +03)--DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain select * from ((select id from t1) cross join (select age from t2)) as f(c1, c2); +---- +logical_plan +01)SubqueryAlias: f +02)--Projection: t1.id AS c1, t2.age AS c2 +03)----Cross Join: +04)------TableScan: t1 projection=[id] +05)------TableScan: t2 projection=[age] +physical_plan +01)ProjectionExec: expr=[id@0 as c1, age@1 as c2] +02)--CrossJoinExec +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +statement count 0 +drop table t1; + +statement count 0 +drop table t2; \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index 95b9b5a9252e..e36023e8072f 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -859,3 +859,87 @@ SELECT * FROM 400 500 1 400 500 2 400 500 3 + +query error DataFusion error: Error during planning: Source table contains 3 columns but only 1 names given as column alias +with numbers(a) as (select 1, 2, 3) select * from numbers; + +query TT +explain with numbers(a,b,c) as (select 1 as x, 2 as y, 3 as z) select * from numbers; +---- +logical_plan +01)SubqueryAlias: numbers +02)--Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c +03)----EmptyRelation +physical_plan +01)ProjectionExec: expr=[1 as a, 2 as b, 3 as c] +02)--PlaceholderRowExec + +query TT +explain with numbers(a,b,c) as (select 1,2,3) select * from numbers; +---- +logical_plan +01)SubqueryAlias: numbers +02)--Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c +03)----EmptyRelation +physical_plan +01)ProjectionExec: expr=[1 as a, 2 as b, 3 as c] +02)--PlaceholderRowExec + +query TT +explain with numbers as (select 1 as a, 2 as b, 3 as c) select * from numbers; +---- +logical_plan +01)SubqueryAlias: numbers +02)--Projection: Int64(1) AS a, Int64(2) AS b, Int64(3) AS c +03)----EmptyRelation +physical_plan +01)ProjectionExec: expr=[1 as a, 2 as b, 3 as c] +02)--PlaceholderRowExec + +statement count 0 +create table person (id int, name string, primary key(id)) + +query TT +explain with cte as (select * from person) SELECT * FROM person WHERE EXISTS (SELECT * FROM cte WHERE id = person.id); +---- +logical_plan +01)LeftSemi Join: person.id = __correlated_sq_1.id +02)--TableScan: person projection=[id, name] +03)--SubqueryAlias: __correlated_sq_1 +04)----SubqueryAlias: cte +05)------TableScan: person projection=[id] +physical_plan +01)CoalesceBatchesExec: target_batch_size=8182 +02)--HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(id@0, id@0)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +statement count 0 +drop table person; + +statement count 0 +create table j1(a int); + +statement count 0 +create table j2(b int); + +query TT +explain SELECT * FROM j1, LATERAL (SELECT 1) AS j2; +---- +logical_plan +01)Cross Join: +02)--TableScan: j1 projection=[a] +03)--SubqueryAlias: j2 +04)----Projection: Int64(1) +05)------EmptyRelation +physical_plan +01)CrossJoinExec +02)--DataSourceExec: partitions=1, partition_sizes=[0] +03)--ProjectionExec: expr=[1 as Int64(1)] +04)----PlaceholderRowExec + +statement count 0 +drop table j1; + +statement count 0 +drop table j2; diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 2b3ebcda1520..0cc8045dccd0 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -5537,3 +5537,32 @@ drop view t statement ok drop table source; + + +# test select_wildcard_with_groupby +statement count 0 +create table t(a int, b int, c int, "😀" int); + +query TT +explain select * from t group by a, b, c, "😀"; +---- +logical_plan +01)Aggregate: groupBy=[[t.a, t.b, t.c, t.😀]], aggr=[[]] +02)--TableScan: t projection=[a, b, c, 😀] +physical_plan +01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b, c@2 as c, 😀@3 as 😀], aggr=[] +02)--DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain select * from (select a, b from t) as c group by a, b; +---- +logical_plan +01)Aggregate: groupBy=[[c.a, c.b]], aggr=[[]] +02)--SubqueryAlias: c +03)----TableScan: t projection=[a, b] +physical_plan +01)AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[] +02)--DataSourceExec: partitions=1, partition_sizes=[0] + +statement count 0 +drop table t; diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 0397e0c367b1..59a41ff1fa20 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -4541,3 +4541,116 @@ DROP TABLE test statement ok set datafusion.execution.target_partitions = 1; + +# test using_join_multiple_keys_subquery +statement count 0 +create table person(id int, age int, state int); + +statement count 0 +create table lineitem(c1 int); + +query TT +explain SELECT * FROM person a join person b using (id, age); +---- +logical_plan +01)Projection: a.id, a.age, a.state, b.state +02)--Inner Join: a.id = b.id, a.age = b.age +03)----SubqueryAlias: a +04)------TableScan: person projection=[id, age, state] +05)----SubqueryAlias: b +06)------TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2, state@5] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT age FROM (SELECT * FROM person a join person b using (id, age, state)); +---- +logical_plan +01)Projection: a.age +02)--Inner Join: a.id = b.id, a.age = b.age, a.state = b.state +03)----SubqueryAlias: a +04)------TableScan: person projection=[id, age, state] +05)----SubqueryAlias: b +06)------TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[age@1] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT a.* FROM person a join person b using (id, age); +---- +logical_plan +01)Projection: a.id, a.age, a.state +02)--Inner Join: a.id = b.id, a.age = b.age +03)----SubqueryAlias: a +04)------TableScan: person projection=[id, age, state] +05)----SubqueryAlias: b +06)------TableScan: person projection=[id, age] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT a.*, b.* FROM person a join person b using (id, age); +---- +logical_plan +01)Inner Join: a.id = b.id, a.age = b.age +02)--SubqueryAlias: a +03)----TableScan: person projection=[id, age, state] +04)--SubqueryAlias: b +05)----TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT * FROM person a join person b using (id, age, state) join person c using (id, age, state); +---- +logical_plan +01)Projection: a.id, a.age, a.state +02)--Inner Join: a.id = c.id, a.age = c.age, a.state = c.state +03)----Projection: a.id, a.age, a.state +04)------Inner Join: a.id = b.id, a.age = b.age, a.state = b.state +05)--------SubqueryAlias: a +06)----------TableScan: person projection=[id, age, state] +07)--------SubqueryAlias: b +08)----------TableScan: person projection=[id, age, state] +09)----SubqueryAlias: c +10)------TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2] +03)----CoalesceBatchesExec: target_batch_size=3 +04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2] +05)--------DataSourceExec: partitions=1, partition_sizes=[0] +06)--------DataSourceExec: partitions=1, partition_sizes=[0] +07)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT * FROM person a NATURAL JOIN lineitem b; +---- +logical_plan +01)Cross Join: +02)--SubqueryAlias: a +03)----TableScan: person projection=[id, age, state] +04)--SubqueryAlias: b +05)----TableScan: lineitem projection=[c1] +physical_plan +01)CrossJoinExec +02)--DataSourceExec: partitions=1, partition_sizes=[0] +03)--DataSourceExec: partitions=1, partition_sizes=[0] + +statement count 0 +drop table person; + +query +drop table lineitem; diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index d7da21c58ec6..20dbeefad571 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -985,13 +985,20 @@ drop table ambiguity_test; statement ok create table t(a0 int, a int, b int, c int) as values (1, 2, 3, 4), (5, 6, 7, 8); -# expect this query to run successfully, not error -query III +# b is not selected in subquery +query error DataFusion error: Schema error: No field named b\. Valid fields are t1\.c, t1\.a, t1\.a0\. select * from (select c, a, NULL::int as a0 from t order by a, c) t1 union all select * from (select c, NULL::int as a, a0 from t order by a0, c) t2 order by c, a, a0, b limit 2; + +query III +select * from (select c, a, NULL::int as a0 from t order by a, c) t1 +union all +select * from (select c, NULL::int as a, a0 from t order by a0, c) t2 +order by c, a, a0 +limit 2; ---- 4 2 NULL 4 NULL 1 @@ -1231,44 +1238,20 @@ physical_plan # Test: inputs into union with different orderings -query TT +query error DataFusion error: Schema error: No field named d\. Valid fields are t1\.b, t1\.c, t1\.a, t1\.a0\. explain select * from (select b, c, a, NULL::int as a0 from ordered_table order by a, c) t1 union all select * from (select b, c, NULL::int as a, a0 from ordered_table order by a0, c) t2 order by d, c, a, a0, b limit 2; ----- -logical_plan -01)Projection: t1.b, t1.c, t1.a, t1.a0 -02)--Sort: t1.d ASC NULLS LAST, t1.c ASC NULLS LAST, t1.a ASC NULLS LAST, t1.a0 ASC NULLS LAST, t1.b ASC NULLS LAST, fetch=2 -03)----Union -04)------SubqueryAlias: t1 -05)--------Projection: ordered_table.b, ordered_table.c, ordered_table.a, Int32(NULL) AS a0, ordered_table.d -06)----------TableScan: ordered_table projection=[a, b, c, d] -07)------SubqueryAlias: t2 -08)--------Projection: ordered_table.b, ordered_table.c, Int32(NULL) AS a, ordered_table.a0, ordered_table.d -09)----------TableScan: ordered_table projection=[a0, b, c, d] -physical_plan -01)ProjectionExec: expr=[b@0 as b, c@1 as c, a@2 as a, a0@3 as a0] -02)--SortPreservingMergeExec: [d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], fetch=2 -03)----UnionExec -04)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false] -05)--------ProjectionExec: expr=[b@1 as b, c@2 as c, a@0 as a, NULL as a0, d@3 as d] -06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true -07)------SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[false] -08)--------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d] -09)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true # Test: run the query from above -query IIII +query error DataFusion error: Schema error: No field named d\. Valid fields are t1\.b, t1\.c, t1\.a, t1\.a0\. select * from (select b, c, a, NULL::int as a0 from ordered_table order by a, c) t1 union all select * from (select b, c, NULL::int as a, a0 from ordered_table order by a0, c) t2 order by d, c, a, a0, b limit 2; ----- -0 0 0 NULL -0 0 NULL 1 statement ok diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index f1ac0696bff9..bedd7603201a 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1820,6 +1820,9 @@ query I select a from t; ---- +query +drop table t; + statement ok set datafusion.optimizer.max_passes=3; diff --git a/datafusion/sqllogictest/test_files/wildcard.slt b/datafusion/sqllogictest/test_files/wildcard.slt index 7c076f040feb..ffda15a1dda1 100644 --- a/datafusion/sqllogictest/test_files/wildcard.slt +++ b/datafusion/sqllogictest/test_files/wildcard.slt @@ -145,3 +145,12 @@ DROP TABLE t2; statement ok DROP TABLE aggregate_simple; + +statement ok +create table t(a int, b int, c int) as values (1, 2, 3); + +query error DataFusion error: Error during planning: Projections require unique expression names but the expression "t\.a" at position 0 and "t\.a" at position 3 have the same name\. Consider aliasing \("AS"\) one of them\. +select *, a from t; + +statement count 0 +drop table t; \ No newline at end of file From cba1dff9dc4712aef2223a567086cce91896ebf0 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 8 Mar 2025 17:05:48 +0800 Subject: [PATCH 02/12] fix tests --- datafusion/sql/tests/sql_integration.rs | 286 ------------------ datafusion/sqllogictest/test_files/copy.slt | 17 ++ datafusion/sqllogictest/test_files/cte.slt | 62 ++++ datafusion/sqllogictest/test_files/joins.slt | 155 +++++++++- .../sqllogictest/test_files/prepare.slt | 15 + datafusion/sqllogictest/test_files/select.slt | 12 +- .../sqllogictest/test_files/subquery.slt | 20 ++ .../sqllogictest/test_files/wildcard.slt | 8 +- 8 files changed, 286 insertions(+), 289 deletions(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index f359173a6288..6877e7826437 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -54,15 +54,6 @@ use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect}; mod cases; mod common; -#[test] -fn test_schema_support() { - quick_test( - "SELECT * FROM s1.test", - "Projection: *\ - \n TableScan: s1.test", - ); -} - #[test] fn parse_decimals() { let test_data = [ @@ -449,19 +440,6 @@ Explain quick_test(sql, plan); } -#[test] -fn plan_copy_to_query() { - let sql = "COPY (select * from test_decimal limit 10) to 'output.csv'"; - let plan = r#" -CopyTo: format=csv output_url=output.csv options: () - Limit: skip=0, fetch=10 - Projection: * - TableScan: test_decimal - "# - .trim(); - quick_test(sql, plan); -} - #[test] fn plan_insert() { let sql = @@ -585,15 +563,6 @@ fn select_repeated_column() { ); } -#[test] -fn select_wildcard_with_repeated_column_but_is_aliased() { - quick_test( - "SELECT *, first_name AS fn from person", - "Projection: *, person.first_name AS fn\ - \n TableScan: person", - ); -} - #[test] fn select_scalar_func_with_literal_no_relation() { quick_test( @@ -793,30 +762,6 @@ fn join_with_ambiguous_column() { quick_test(sql, expected); } -#[test] -fn where_selection_with_ambiguous_column() { - let sql = "SELECT * FROM person a, person b WHERE id = id + 1"; - let err = logical_plan(sql) - .expect_err("query should have failed") - .strip_backtrace(); - assert_eq!( - "\"Schema error: Ambiguous reference to unqualified field id\"", - format!("{err:?}") - ); -} - -#[test] -fn natural_join() { - let sql = "SELECT * FROM lineitem a NATURAL JOIN lineitem b"; - let expected = "Projection: *\ - \n Inner Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price\ - \n SubqueryAlias: a\ - \n TableScan: lineitem\ - \n SubqueryAlias: b\ - \n TableScan: lineitem"; - quick_test(sql, expected); -} - #[test] fn natural_left_join() { let sql = "SELECT l_item_id FROM lineitem a NATURAL LEFT JOIN lineitem b"; @@ -1302,56 +1247,6 @@ fn select_interval_out_of_range() { ); } -#[test] -fn recursive_ctes() { - let sql = " - WITH RECURSIVE numbers AS ( - select 1 as n - UNION ALL - select n + 1 FROM numbers WHERE N < 10 - ) - select * from numbers;"; - quick_test( - sql, - "Projection: *\ - \n SubqueryAlias: numbers\ - \n RecursiveQuery: is_distinct=false\ - \n Projection: Int64(1) AS n\ - \n EmptyRelation\ - \n Projection: numbers.n + Int64(1)\ - \n Filter: numbers.n < Int64(10)\ - \n TableScan: numbers", - ) -} - -#[test] -fn recursive_ctes_disabled() { - let sql = " - WITH RECURSIVE numbers AS ( - select 1 as n - UNION ALL - select n + 1 FROM numbers WHERE N < 10 - ) - select * from numbers;"; - - // manually setting up test here so that we can disable recursive ctes - let mut state = MockSessionState::default(); - state.config_options.execution.enable_recursive_ctes = false; - let context = MockContextProvider { state }; - - let planner = SqlToRel::new_with_options(&context, ParserOptions::default()); - let result = DFParser::parse_sql_with_dialect(sql, &GenericDialect {}); - let mut ast = result.unwrap(); - - let err = planner - .statement_to_plan(ast.pop_front().unwrap()) - .expect_err("query should have failed"); - assert_eq!( - "This feature is not implemented: Recursive CTEs are not enabled", - err.strip_backtrace() - ); -} - #[test] fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() { quick_test( @@ -1956,20 +1851,6 @@ fn join_with_using() { quick_test(sql, expected); } -#[test] -fn project_wildcard_on_join_with_using() { - let sql = "SELECT * \ - FROM lineitem \ - JOIN lineitem as lineitem2 \ - USING (l_item_id)"; - let expected = "Projection: *\ - \n Inner Join: Using lineitem.l_item_id = lineitem2.l_item_id\ - \n TableScan: lineitem\ - \n SubqueryAlias: lineitem2\ - \n TableScan: lineitem"; - quick_test(sql, expected); -} - #[test] fn equijoin_explicit_syntax_3_tables() { let sql = "SELECT id, order_id, l_description \ @@ -2763,24 +2644,6 @@ fn exists_subquery_schema_outer_schema_overlap() { quick_test(sql, expected); } -#[test] -fn exists_subquery_wildcard() { - let sql = "SELECT id FROM person p WHERE EXISTS \ - (SELECT * FROM person \ - WHERE last_name = p.last_name \ - AND state = p.state)"; - - let expected = "Projection: p.id\ - \n Filter: EXISTS ()\ - \n Subquery:\ - \n Projection: *\ - \n Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)\ - \n TableScan: person\ - \n SubqueryAlias: p\ - \n TableScan: person"; - quick_test(sql, expected); -} - #[test] fn in_subquery_uncorrelated() { let sql = "SELECT id FROM person p WHERE id IN \ @@ -2947,113 +2810,6 @@ fn join_on_complex_condition() { quick_test(sql, expected); } -#[test] -fn lateral_constant() { - let sql = "SELECT * FROM j1, LATERAL (SELECT 1) AS j2"; - let expected = "Projection: *\ - \n Cross Join: \ - \n TableScan: j1\ - \n SubqueryAlias: j2\ - \n Projection: Int64(1)\ - \n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn lateral_comma_join() { - let sql = "SELECT j1_string, j2_string FROM - j1, \ - LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2"; - let expected = "Projection: j1.j1_string, j2.j2_string\ - \n Cross Join: \ - \n TableScan: j1\ - \n SubqueryAlias: j2\ - \n Subquery:\ - \n Projection: *\ - \n Filter: outer_ref(j1.j1_id) < j2.j2_id\ - \n TableScan: j2"; - quick_test(sql, expected); -} - -#[test] -fn lateral_comma_join_referencing_join_rhs() { - let sql = "SELECT * FROM\ - \n j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id),\ - \n LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4;"; - let expected = "Projection: *\ - \n Cross Join: \ - \n Inner Join: Filter: j1.j1_id = j2.j2_id\ - \n TableScan: j1\ - \n Inner Join: Filter: j2.j2_id = j3.j3_id - Int64(2)\ - \n TableScan: j2\ - \n TableScan: j3\ - \n SubqueryAlias: j4\ - \n Subquery:\ - \n Projection: *\ - \n Filter: j3.j3_string = outer_ref(j2.j2_string)\ - \n TableScan: j3"; - quick_test(sql, expected); -} - -#[test] -fn lateral_comma_join_with_shadowing() { - // The j1_id on line 3 references the (closest) j1 definition from line 2. - let sql = "\ - SELECT * FROM j1, LATERAL (\ - SELECT * FROM j1, LATERAL (\ - SELECT * FROM j2 WHERE j1_id = j2_id\ - ) as j2\ - ) as j2;"; - let expected = "Projection: *\ - \n Cross Join: \ - \n TableScan: j1\ - \n SubqueryAlias: j2\ - \n Subquery:\ - \n Projection: *\ - \n Cross Join: \ - \n TableScan: j1\ - \n SubqueryAlias: j2\ - \n Subquery:\ - \n Projection: *\ - \n Filter: outer_ref(j1.j1_id) = j2.j2_id\ - \n TableScan: j2"; - quick_test(sql, expected); -} - -#[test] -fn lateral_left_join() { - let sql = "SELECT j1_string, j2_string FROM \ - j1 \ - LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true);"; - let expected = "Projection: j1.j1_string, j2.j2_string\ - \n Left Join: Filter: Boolean(true)\ - \n TableScan: j1\ - \n SubqueryAlias: j2\ - \n Subquery:\ - \n Projection: *\ - \n Filter: outer_ref(j1.j1_id) < j2.j2_id\ - \n TableScan: j2"; - quick_test(sql, expected); -} - -#[test] -fn lateral_nested_left_join() { - let sql = "SELECT * FROM - j1, \ - (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true))"; - let expected = "Projection: *\ - \n Cross Join: \ - \n TableScan: j1\ - \n Left Join: Filter: Boolean(true)\ - \n TableScan: j2\ - \n SubqueryAlias: j3\ - \n Subquery:\ - \n Projection: *\ - \n Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id\ - \n TableScan: j3"; - quick_test(sql, expected); -} - #[test] fn hive_aggregate_with_filter() -> Result<()> { let dialect = &HiveDialect {}; @@ -3314,20 +3070,6 @@ fn test_one_side_constant_full_join() { quick_test(sql, expected); } -#[test] -fn test_select_all_inner_join() { - let sql = "SELECT * - FROM person \ - INNER JOIN orders \ - ON orders.customer_id * 2 = person.id + 10"; - - let expected = "Projection: *\ - \n Inner Join: Filter: orders.customer_id * Int64(2) = person.id + Int64(10)\ - \n TableScan: person\ - \n TableScan: orders"; - quick_test(sql, expected); -} - #[test] fn test_select_join_key_inner_join() { let sql = "SELECT orders.customer_id * 2, person.id + 10 @@ -4057,34 +3799,6 @@ fn test_prepare_statement_to_plan_limit() { prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); } -#[test] -fn test_prepare_statement_to_plan_value_list() { - let sql = "PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter);"; - - let expected_plan = "Prepare: \"my_plan\" [Utf8, Utf8] \ - \n Projection: *\ - \n SubqueryAlias: t\ - \n Projection: column1 AS num, column2 AS letter\ - \n Values: (Int64(1), $1), (Int64(2), $2)"; - - let expected_dt = "[Utf8, Utf8]"; - - let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt); - - /////////////////// - // replace params with values - let param_values = vec![ - ScalarValue::from("a".to_string()), - ScalarValue::from("b".to_string()), - ]; - let expected_plan = "Projection: *\ - \n SubqueryAlias: t\ - \n Projection: column1 AS num, column2 AS letter\ - \n Values: (Int64(1), Utf8(\"a\") AS $1), (Int64(2), Utf8(\"b\") AS $2)"; - - prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); -} - #[test] fn test_prepare_statement_unknown_list_param() { let sql = "SELECT id from person where id = $2"; diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index f39ff56ce449..e2bb23e35732 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -631,3 +631,20 @@ COPY source_table to '/tmp/table.parquet' (row_group_size 55 + 102); # Copy using execution.keep_partition_by_columns with an invalid value query error DataFusion error: Invalid or Unsupported Configuration: provided value for 'execution.keep_partition_by_columns' was not recognized: "invalid_value" COPY source_table to '/tmp/table.parquet' OPTIONS (execution.keep_partition_by_columns invalid_value); + +statement count 0 +create table t; + +query TT +explain COPY (select * from t limit 10) to 'output.csv'; +---- +logical_plan +01)CopyTo: format=csv output_url=output.csv options: () +02)--Limit: skip=0, fetch=10 +03)----TableScan: t projection=[], fetch=10 +physical_plan +01)DataSinkExec: sink=CsvSink(file_groups=[]) +02)--DataSourceExec: partitions=1, partition_sizes=[0], fetch=10 + +statement count 0 +drop table t; diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index e36023e8072f..e019af9775a4 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -943,3 +943,65 @@ drop table j1; statement count 0 drop table j2; + +query TT +explain WITH RECURSIVE numbers AS ( + select 1 as n + UNION ALL + select n + 1 FROM numbers WHERE N < 10 +) select * from numbers; +---- +logical_plan +01)SubqueryAlias: numbers +02)--RecursiveQuery: is_distinct=false +03)----Projection: Int64(1) AS n +04)------EmptyRelation +05)----Projection: numbers.n + Int64(1) +06)------Filter: numbers.n < Int64(10) +07)--------TableScan: numbers +physical_plan +01)RecursiveQueryExec: name=numbers, is_distinct=false +02)--ProjectionExec: expr=[1 as n] +03)----PlaceholderRowExec +04)--CoalescePartitionsExec +05)----ProjectionExec: expr=[n@0 + 1 as numbers.n + Int64(1)] +06)------CoalesceBatchesExec: target_batch_size=8182 +07)--------FilterExec: n@0 < 10 +08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)------------WorkTableExec: name=numbers + +query TT +explain WITH RECURSIVE numbers AS ( + select 1 as n + UNION ALL + select n + 1 FROM numbers WHERE N < 10 +) select * from numbers; +---- +logical_plan +01)SubqueryAlias: numbers +02)--RecursiveQuery: is_distinct=false +03)----Projection: Int64(1) AS n +04)------EmptyRelation +05)----Projection: numbers.n + Int64(1) +06)------Filter: numbers.n < Int64(10) +07)--------TableScan: numbers +physical_plan +01)RecursiveQueryExec: name=numbers, is_distinct=false +02)--ProjectionExec: expr=[1 as n] +03)----PlaceholderRowExec +04)--CoalescePartitionsExec +05)----ProjectionExec: expr=[n@0 + 1 as numbers.n + Int64(1)] +06)------CoalesceBatchesExec: target_batch_size=8182 +07)--------FilterExec: n@0 < 10 +08)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)------------WorkTableExec: name=numbers + +statement count 0 +set datafusion.execution.enable_recursive_ctes = false; + +query error DataFusion error: This feature is not implemented: Recursive CTEs are not enabled +explain WITH RECURSIVE numbers AS ( + select 1 as n + UNION ALL + select n + 1 FROM numbers WHERE N < 10 +) select * from numbers; diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 59a41ff1fa20..50af06dc40fc 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -4649,8 +4649,161 @@ physical_plan 02)--DataSourceExec: partitions=1, partition_sizes=[0] 03)--DataSourceExec: partitions=1, partition_sizes=[0] +query TT +explain SELECT * FROM lineitem JOIN lineitem as lineitem2 USING (c1) +---- +logical_plan +01)Projection: lineitem.c1 +02)--Inner Join: lineitem.c1 = lineitem2.c1 +03)----TableScan: lineitem projection=[c1] +04)----SubqueryAlias: lineitem2 +05)------TableScan: lineitem projection=[c1] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c1@0, c1@0)], projection=[c1@0] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + statement count 0 drop table person; -query +statement count 0 drop table lineitem; + +statement count 0 +create table j1(j1_string varchar, j1_id int); + +statement count 0 +create table j2(j2_string varchar, j2_id int); + +statement count 0 +create table j3(j3_string varchar, j3_id int); + +statement count 0 +create table j4(j4_string varchar, j4_id int); + +query TT +explain SELECT j1_string, j2_string FROM j1, LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2; +---- +logical_plan +01)Cross Join: +02)--TableScan: j1 projection=[j1_string] +03)--SubqueryAlias: j2 +04)----Projection: j2.j2_string +05)------Subquery: +06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id +07)----------TableScan: j2 projection=[j2_string, j2_id] +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Int32, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) + +query TT +explain SELECT * FROM j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id), LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4 +---- +logical_plan +01)Cross Join: +02)--Inner Join: CAST(j2.j2_id AS Int64) = CAST(j3.j3_id AS Int64) - Int64(2) +03)----Inner Join: j1.j1_id = j2.j2_id +04)------TableScan: j1 projection=[j1_string, j1_id] +05)------TableScan: j2 projection=[j2_string, j2_id] +06)----TableScan: j3 projection=[j3_string, j3_id] +07)--SubqueryAlias: j4 +08)----Subquery: +09)------Filter: j3.j3_string = outer_ref(j2.j2_string) +10)--------TableScan: j3 projection=[j3_string, j3_id] +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Utf8, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" }) + +query TT +explain SELECT * FROM j1, LATERAL (SELECT * FROM j1, LATERAL (SELECT * FROM j2 WHERE j1_id = j2_id) as j2) as j2; +---- +logical_plan +01)Cross Join: +02)--TableScan: j1 projection=[j1_string, j1_id] +03)--SubqueryAlias: j2 +04)----Subquery: +05)------Cross Join: +06)--------TableScan: j1 projection=[j1_string, j1_id] +07)--------SubqueryAlias: j2 +08)----------Subquery: +09)------------Filter: outer_ref(j1.j1_id) = j2.j2_id +10)--------------TableScan: j2 projection=[j2_string, j2_id] +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Int32, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) + +query TT +explain SELECT j1_string, j2_string FROM j1 LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true); +---- +logical_plan +01)Left Join: +02)--TableScan: j1 projection=[j1_string] +03)--SubqueryAlias: j2 +04)----Projection: j2.j2_string +05)------Subquery: +06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id +07)----------TableScan: j2 projection=[j2_string, j2_id] +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Int32, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) + +query TT +explain SELECT * FROM j1, (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true)); +---- +logical_plan +01)Cross Join: +02)--TableScan: j1 projection=[j1_string, j1_id] +03)--Left Join: +04)----TableScan: j2 projection=[j2_string, j2_id] +05)----SubqueryAlias: j3 +06)------Subquery: +07)--------Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id +08)----------TableScan: j3 projection=[j3_string, j3_id] +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Int32, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) + +query TT +explain SELECT * FROM j1, LATERAL (SELECT 1) AS j2; +---- +logical_plan +01)Cross Join: +02)--TableScan: j1 projection=[j1_string, j1_id] +03)--SubqueryAlias: j2 +04)----Projection: Int64(1) +05)------EmptyRelation +physical_plan +01)CrossJoinExec +02)--DataSourceExec: partitions=1, partition_sizes=[0] +03)--ProjectionExec: expr=[1 as Int64(1)] +04)----PlaceholderRowExec + +statement count 0 +drop table j1; + +statement count 0 +drop table j2; + +statement count 0 +drop table j3; + +statement count 0 +drop table j4; + +statement count 0 +create table person(id int); + +statement count 0 +create table orders(customer_id int); + +query TT +explain SELECT * FROM person INNER JOIN orders ON orders.customer_id * 2 = person.id + 10 +---- +logical_plan +01)Inner Join: CAST(person.id AS Int64) + Int64(10) = CAST(orders.customer_id AS Int64) * Int64(2) +02)--TableScan: person projection=[id] +03)--TableScan: orders projection=[customer_id] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(person.id + Int64(10)@1, orders.customer_id * Int64(2)@1)], projection=[id@0, customer_id@2] +03)----ProjectionExec: expr=[id@0 as id, CAST(id@0 AS Int64) + 10 as person.id + Int64(10)] +04)------DataSourceExec: partitions=1, partition_sizes=[0] +05)----ProjectionExec: expr=[customer_id@0 as customer_id, CAST(customer_id@0 AS Int64) * 2 as orders.customer_id * Int64(2)] +06)------DataSourceExec: partitions=1, partition_sizes=[0] + +statement count 0 +drop table person; + +statement count 0 +drop table orders; diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt index 5d0f417640ec..33df0d26f361 100644 --- a/datafusion/sqllogictest/test_files/prepare.slt +++ b/datafusion/sqllogictest/test_files/prepare.slt @@ -312,3 +312,18 @@ SET datafusion.explain.logical_plan_only=false; statement ok DROP TABLE person; + +statement ok +SET datafusion.explain.logical_plan_only=true; + +statement count 0 +PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); + +statement count 5 +explain PREPARE my_plan(STRING, STRING) AS SELECT * FROM (VALUES(1, $1), (2, $2)) AS t (num, letter); + +query IT +EXECUTE my_plan('a', 'b'); +---- +1 a +2 b diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index bedd7603201a..d5e0c449762f 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1820,7 +1820,7 @@ query I select a from t; ---- -query +statement count 0 drop table t; statement ok @@ -1845,3 +1845,13 @@ SELECT t1.v1 FROM (SELECT 1 AS "t1.v1"); # Test issue: https://github.com/apache/datafusion/issues/14124 query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 10000 \* 100000000000000000000000000000000000 SELECT ('0.54321543215432154321543215432154321'::DECIMAL(35,35) + 10000)::VARCHAR + +# where_selection_with_ambiguous_column +statement ok +CREATE TABLE t(a int, b int, id int); + +query error DataFusion error: Schema error: Ambiguous reference to unqualified field id +select * from t a, t b where id = id + 1; + +statement count 0 +drop table t; diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 207bb72fd549..5a722c2288ac 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -1431,3 +1431,23 @@ drop table t1; statement count 0 drop table t2; + + +# test exists_subquery_wildcard +statement count 0 +create table person(id int, last_name int, state int); + +query TT +explain SELECT id FROM person p WHERE EXISTS + (SELECT * FROM person WHERE last_name = p.last_name AND state = p.state) +---- +logical_plan +01)Projection: p.id +02)--LeftSemi Join: p.last_name = __correlated_sq_1.last_name, p.state = __correlated_sq_1.state +03)----SubqueryAlias: p +04)------TableScan: person projection=[id, last_name, state] +05)----SubqueryAlias: __correlated_sq_1 +06)------TableScan: person projection=[last_name, state] + +statement count 0 +drop table person; diff --git a/datafusion/sqllogictest/test_files/wildcard.slt b/datafusion/sqllogictest/test_files/wildcard.slt index ffda15a1dda1..1a480eac0cc3 100644 --- a/datafusion/sqllogictest/test_files/wildcard.slt +++ b/datafusion/sqllogictest/test_files/wildcard.slt @@ -152,5 +152,11 @@ create table t(a int, b int, c int) as values (1, 2, 3); query error DataFusion error: Error during planning: Projections require unique expression names but the expression "t\.a" at position 0 and "t\.a" at position 3 have the same name\. Consider aliasing \("AS"\) one of them\. select *, a from t; +# a is aliased to other name so the query is valid +query IIII +select *, a as aka from t; +---- +1 2 3 1 + statement count 0 -drop table t; \ No newline at end of file +drop table t; From d195a5ad3307f43fbcf5e31dcfd5f291097feda4 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 8 Mar 2025 17:11:01 +0800 Subject: [PATCH 03/12] cleanup --- datafusion/sql/src/select.rs | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index db488f665bce..2daa1f0bec09 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -34,7 +34,8 @@ use datafusion_expr::expr_rewriter::{ normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_sorts, }; use datafusion_expr::utils::{ - expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs + expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns, + find_aggregate_exprs, find_window_exprs, }; use datafusion_expr::{ qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter, @@ -635,18 +636,15 @@ impl SqlToRel<'_, S> { planner_context, options, )?; - // Ok(vec![wildcard_with_options(planned_options)]) let expanded = - expand_wildcard(plan.schema(), plan, Some(&planned_options))?; + expand_wildcard(plan.schema(), plan, Some(&planned_options))?; - let replaced = if let Some(replace) = planned_options.replace { - replace_columns(expanded, &replace)? - } else { - expanded - }; - - Ok(replaced) + if let Some(replace) = planned_options.replace { + replace_columns(expanded, &replace) + } else { + Ok(expanded) + } } SelectItem::QualifiedWildcard(object_name, options) => { Self::check_wildcard_options(&options)?; @@ -665,15 +663,11 @@ impl SqlToRel<'_, S> { )?; // If there is a REPLACE statement, replace that column with the given // replace expression. Column name remains the same. - let replaced = if let Some(replace) = planned_options.replace { - replace_columns(expanded, &replace)? + if let Some(replace) = planned_options.replace { + replace_columns(expanded, &replace) } else { - expanded - }; - - Ok(replaced) - - // Ok(vec![qualified_wildcard_with_options(qualifier, planned_options)]) + Ok(expanded) + } } } } @@ -936,4 +930,4 @@ fn replace_columns( } } Ok(exprs) -} \ No newline at end of file +} From ff071e80faccb3fb1ca34208e40b6589a5b1dad3 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 8 Mar 2025 17:22:10 +0800 Subject: [PATCH 04/12] assert --- datafusion/sql/src/select.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 2daa1f0bec09..70567e77c079 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -38,7 +38,7 @@ use datafusion_expr::utils::{ find_aggregate_exprs, find_window_exprs, }; use datafusion_expr::{ - qualified_wildcard_with_options, wildcard_with_options, Aggregate, Expr, Filter, + Aggregate, Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, LogicalPlanBuilderOptions, Partitioning, }; @@ -93,6 +93,12 @@ impl SqlToRel<'_, S> { planner_context, )?; + // TOOD: remove this after Expr::Wildcard is removed + #[allow(deprecated)] + for expr in &select_exprs { + debug_assert!(!matches!(expr, Expr::Wildcard { .. })); + } + // Having and group by clause may reference aliases defined in select projection let projected_plan = self.project(base_plan.clone(), select_exprs.clone())?; From 59f7e98e45c7ec1b030d19184a2eb926ef181e15 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Sat, 8 Mar 2025 17:26:20 +0800 Subject: [PATCH 05/12] clippy --- datafusion/optimizer/tests/optimizer_integration.rs | 4 +--- datafusion/sql/src/select.rs | 7 ++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index c109b4dabc2f..5e66c7ec0313 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -22,16 +22,14 @@ use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; use datafusion_common::config::ConfigOptions; -use datafusion_common::{assert_contains, plan_err, Result, TableReference}; +use datafusion_common::{plan_err, Result, TableReference}; use datafusion_expr::planner::ExprPlanner; -use datafusion_expr::sqlparser::dialect::PostgreSqlDialect; use datafusion_expr::test::function_stub::sum_udaf; use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF}; use datafusion_functions_aggregate::average::avg_udaf; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::planner::AggregateFunctionPlanner; use datafusion_functions_window::planner::WindowFunctionPlanner; -use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter; use datafusion_optimizer::analyzer::Analyzer; use datafusion_optimizer::optimizer::Optimizer; use datafusion_optimizer::{OptimizerConfig, OptimizerContext, OptimizerRule}; diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 70567e77c079..ce9c5d2f7ccb 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -38,9 +38,8 @@ use datafusion_expr::utils::{ find_aggregate_exprs, find_window_exprs, }; use datafusion_expr::{ - Aggregate, Expr, Filter, - GroupingSet, LogicalPlan, LogicalPlanBuilder, LogicalPlanBuilderOptions, - Partitioning, + Aggregate, Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, + LogicalPlanBuilderOptions, Partitioning, }; use indexmap::IndexMap; @@ -646,6 +645,8 @@ impl SqlToRel<'_, S> { let expanded = expand_wildcard(plan.schema(), plan, Some(&planned_options))?; + // If there is a REPLACE statement, replace that column with the given + // replace expression. Column name remains the same. if let Some(replace) = planned_options.replace { replace_columns(expanded, &replace) } else { From 271abd3e201f1434cf2d2b747d78e1113c59a9bc Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Tue, 11 Mar 2025 09:25:24 +0800 Subject: [PATCH 06/12] fix test --- datafusion/sql/tests/cases/plan_to_sql.rs | 132 ++++++++++++++++++++-- 1 file changed, 124 insertions(+), 8 deletions(-) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index fb1fb6f0fbf2..78303653b63a 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -323,6 +323,13 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(MySqlDialect {}), unparser_dialect: Box::new(UnparserMySqlDialect {}), }, + TestStatementWithDialect { + sql: "select j1_id from (select j1_id from j1 limit 10);", + expected: + "SELECT `j1`.`j1_id` FROM (SELECT `j1`.`j1_id` FROM `j1` LIMIT 10) AS `derived_limit`", + parser_dialect: Box::new(MySqlDialect {}), + unparser_dialect: Box::new(UnparserMySqlDialect {}), + }, TestStatementWithDialect { sql: "select ta.j1_id from j1 ta order by j1_id limit 10;", expected: @@ -517,6 +524,90 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(SqliteDialect {}), }, + TestStatementWithDialect { + sql: "SELECT * FROM (SELECT j1_id + 1 FROM j1) AS temp_j(id2)", + expected: r#"SELECT `temp_j`.`id2` FROM (SELECT (`j1`.`j1_id` + 1) AS `id2` FROM `j1`) AS `temp_j`"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(SqliteDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM (SELECT j1_id FROM j1 LIMIT 1) AS temp_j(id2)", + expected: r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2` FROM `j1` LIMIT 1) AS `temp_j`"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(SqliteDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3])", + expected: r#"SELECT "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))" FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))")"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT t1.c1 FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]), j1", + expected: r#"SELECT "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))", j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") CROSS JOIN j1"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", + expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)", + expected: r#"SELECT u.c1 FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) UNION ALL SELECT u.c1 FROM (SELECT UNNEST([4, 5, 6]) AS "UNNEST(make_array(Int64(4),Int64(5),Int64(6)))") AS u (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3])", + expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)", + expected: r#"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]), j1", + expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", + expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)", + expected: r#"SELECT u.c1 FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT u.c1 FROM UNNEST([4, 5, 6]) AS u (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT UNNEST([1,2,3])", + expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, TestStatementWithDialect { sql: "SELECT UNNEST([1,2,3]) as c1", expected: r#"SELECT UNNEST([1, 2, 3]) AS c1"#, @@ -529,6 +620,30 @@ fn roundtrip_statement_with_dialect() -> Result<()> { parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, + TestStatementWithDialect { + sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)", + expected: r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)", + expected: r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), + }, + TestStatementWithDialect { + sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)", + expected: r#"SELECT u.array_col, u.struct_col, "UNNEST(outer_ref(u.array_col))" FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))")"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, + TestStatementWithDialect { + sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)", + expected: r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))") AS t1 (c1)"#, + parser_dialect: Box::new(GenericDialect {}), + unparser_dialect: Box::new(UnparserDefaultDialect {}), + }, ]; for query in tests { @@ -1335,13 +1450,13 @@ fn test_unnest_to_sql() { fn test_join_with_no_conditions() { sql_round_trip( GenericDialect {}, - "SELECT j1.j1_id FROM j1 JOIN j2", - "SELECT j1.j1_id FROM j1 CROSS JOIN j2", + "SELECT j1.j1_id, j1.j1_string FROM j1 JOIN j2", + "SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2", ); sql_round_trip( GenericDialect {}, - "SELECT j1.j1_id FROM j1 CROSS JOIN j2", - "SELECT j1.j1_id FROM j1 CROSS JOIN j2", + "SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2", + "SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2", ); } @@ -1426,7 +1541,7 @@ impl UserDefinedLogicalNodeUnparser for UnusedUnparser { fn test_unparse_extension_to_statement() -> Result<()> { let dialect = GenericDialect {}; let statement = Parser::new(&dialect) - .try_with_sql("SELECT j1.j1_id FROM j1")? + .try_with_sql("SELECT * FROM j1")? .parse_statement()?; let state = MockSessionState::default(); let context = MockContextProvider { state }; @@ -1442,7 +1557,7 @@ fn test_unparse_extension_to_statement() -> Result<()> { Arc::new(UnusedUnparser {}), ]); let sql = unparser.plan_to_sql(&extension)?; - let expected = "SELECT j1.j1_id FROM j1"; + let expected = "SELECT j1.j1_id, j1.j1_string FROM j1"; assert_eq!(sql.to_string(), expected); if let Some(err) = plan_to_sql(&extension).err() { @@ -1485,7 +1600,7 @@ impl UserDefinedLogicalNodeUnparser for MockSqlUnparser { fn test_unparse_extension_to_sql() -> Result<()> { let dialect = GenericDialect {}; let statement = Parser::new(&dialect) - .try_with_sql("SELECT j1.j1_id FROM j1")? + .try_with_sql("SELECT * FROM j1")? .parse_statement()?; let state = MockSessionState::default(); let context = MockContextProvider { state }; @@ -1505,7 +1620,8 @@ fn test_unparse_extension_to_sql() -> Result<()> { Arc::new(UnusedUnparser {}), ]); let sql = unparser.plan_to_sql(&plan)?; - let expected = "SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id FROM j1)"; + let expected = + "SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id, j1.j1_string FROM j1)"; assert_eq!(sql.to_string(), expected); if let Some(err) = plan_to_sql(&plan).err() { From 09d14dfdd41ee0782d855de82d023f294662c9d4 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Tue, 11 Mar 2025 09:29:56 +0800 Subject: [PATCH 07/12] fix test --- datafusion/sqllogictest/test_files/order.slt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 2d319e8317c5..f088e071d7e7 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -996,13 +996,6 @@ limit 2; 4 2 NULL 3 4 NULL 1 3 -query III -select * from (select c, a, NULL::int as a0 from t order by a, c) t1 -union all -select * from (select c, NULL::int as a, a0 from t order by a0, c) t2 -order by c, a, a0 -limit 2; - query III select * from (select c, a, NULL::int as a0 from t order by a, c) t1 union all From 013c481329ac0a2c44811502e1994e9ff22f5934 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Tue, 11 Mar 2025 19:51:36 +0800 Subject: [PATCH 08/12] Refactor wildcard expansion in InlineTableScan and update related tests --- datafusion/optimizer/src/analyzer/inline_table_scan.rs | 6 ++++-- datafusion/optimizer/src/analyzer/mod.rs | 3 --- datafusion/sqllogictest/test_files/explain.slt | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index 95781b395f3c..4662e0d44a18 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -23,7 +23,8 @@ use crate::analyzer::AnalyzerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{Column, Result}; -use datafusion_expr::{logical_plan::LogicalPlan, wildcard, Expr, LogicalPlanBuilder}; +use datafusion_expr::utils::expand_wildcard; +use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder}; /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`] /// (DataFrame / ViewTable) @@ -92,7 +93,8 @@ fn generate_projection_expr( ))); } } else { - exprs.push(wildcard()); + let expanded = expand_wildcard(sub_plan.schema(), sub_plan, None)?; + exprs.extend(expanded); } Ok(exprs) } diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs index c506616d142e..d2ac9575f7c3 100644 --- a/datafusion/optimizer/src/analyzer/mod.rs +++ b/datafusion/optimizer/src/analyzer/mod.rs @@ -99,9 +99,6 @@ impl Analyzer { pub fn new() -> Self { let rules: Vec> = vec![ Arc::new(InlineTableScan::new()), - // Every rule that will generate [Expr::Wildcard] should be placed in front of [ExpandWildcardRule]. - Arc::new(ExpandWildcardRule::new()), - // [Expr::Wildcard] should be expanded before [TypeCoercion] Arc::new(ResolveGroupingFunction::new()), Arc::new(TypeCoercion::new()), ]; diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index cab7308f6ff8..1d63d02bb941 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -175,7 +175,6 @@ initial_logical_plan 01)Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c 02)--TableScan: simple_explain_test logical_plan after inline_table_scan SAME TEXT AS ABOVE -logical_plan after expand_wildcard_rule SAME TEXT AS ABOVE logical_plan after resolve_grouping_function SAME TEXT AS ABOVE logical_plan after type_coercion SAME TEXT AS ABOVE analyzed_logical_plan SAME TEXT AS ABOVE From 78256ec4b167138a36f75449f46aa2fe74fcc2af Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Tue, 11 Mar 2025 20:02:47 +0800 Subject: [PATCH 09/12] rm rule --- .../src/analyzer/expand_wildcard_rule.rs | 333 ------------------ datafusion/optimizer/src/analyzer/mod.rs | 2 - 2 files changed, 335 deletions(-) delete mode 100644 datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs deleted file mode 100644 index 8015ebfc7534..000000000000 --- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs +++ /dev/null @@ -1,333 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use crate::AnalyzerRule; -use datafusion_common::config::ConfigOptions; -use datafusion_common::tree_node::{Transformed, TransformedResult}; -use datafusion_common::{Column, Result}; -use datafusion_expr::builder::validate_unique_names; -use datafusion_expr::expr::PlannedReplaceSelectItem; -use datafusion_expr::utils::{ - expand_qualified_wildcard, expand_wildcard, find_base_plan, -}; -use datafusion_expr::{ - Distinct, DistinctOn, Expr, LogicalPlan, Projection, SubqueryAlias, -}; - -#[derive(Default, Debug)] -pub struct ExpandWildcardRule {} - -impl ExpandWildcardRule { - pub fn new() -> Self { - Self {} - } -} - -impl AnalyzerRule for ExpandWildcardRule { - fn analyze(&self, plan: LogicalPlan, _: &ConfigOptions) -> Result { - // Because the wildcard expansion is based on the schema of the input plan, - // using `transform_up_with_subqueries` here. - plan.transform_up_with_subqueries(expand_internal).data() - } - - fn name(&self) -> &str { - "expand_wildcard_rule" - } -} - -fn expand_internal(plan: LogicalPlan) -> Result> { - match plan { - LogicalPlan::Projection(Projection { expr, input, .. }) => { - let projected_expr = expand_exprlist(&input, expr)?; - validate_unique_names("Projections", projected_expr.iter())?; - Ok(Transformed::yes( - Projection::try_new(projected_expr, Arc::clone(&input)) - .map(LogicalPlan::Projection)?, - )) - } - // The schema of the plan should also be updated if the child plan is transformed. - LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => { - Ok(Transformed::yes( - SubqueryAlias::try_new(input, alias).map(LogicalPlan::SubqueryAlias)?, - )) - } - LogicalPlan::Distinct(Distinct::On(distinct_on)) => { - let projected_expr = - expand_exprlist(&distinct_on.input, distinct_on.select_expr)?; - validate_unique_names("Distinct", projected_expr.iter())?; - Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::On( - DistinctOn::try_new( - distinct_on.on_expr, - projected_expr, - distinct_on.sort_expr, - distinct_on.input, - )?, - )))) - } - _ => Ok(Transformed::no(plan)), - } -} - -fn expand_exprlist(input: &LogicalPlan, expr: Vec) -> Result> { - let mut projected_expr = vec![]; - let input = find_base_plan(input); - for e in expr { - match e { - #[expect(deprecated)] - Expr::Wildcard { qualifier, options } => { - if let Some(qualifier) = qualifier { - let expanded = expand_qualified_wildcard( - &qualifier, - input.schema(), - Some(&options), - )?; - // If there is a REPLACE statement, replace that column with the given - // replace expression. Column name remains the same. - let replaced = if let Some(replace) = options.replace { - replace_columns(expanded, &replace)? - } else { - expanded - }; - projected_expr.extend(replaced); - } else { - let expanded = - expand_wildcard(input.schema(), input, Some(&options))?; - // If there is a REPLACE statement, replace that column with the given - // replace expression. Column name remains the same. - let replaced = if let Some(replace) = options.replace { - replace_columns(expanded, &replace)? - } else { - expanded - }; - projected_expr.extend(replaced); - } - } - // A workaround to handle the case when the column name is "*". - // We transform the expression to a Expr::Column through [Column::from_name] in many places. - // It would also convert the wildcard expression to a column expression with name "*". - Expr::Column(Column { - ref relation, - ref name, - // TODO Should we use these spans? - spans: _, - }) => { - if name.eq("*") { - if let Some(qualifier) = relation { - projected_expr.extend(expand_qualified_wildcard( - qualifier, - input.schema(), - None, - )?); - } else { - projected_expr.extend(expand_wildcard( - input.schema(), - input, - None, - )?); - } - } else { - projected_expr.push(e.clone()); - } - } - _ => projected_expr.push(e), - } - } - Ok(projected_expr) -} - -/// If there is a REPLACE statement in the projected expression in the form of -/// "REPLACE (some_column_within_an_expr AS some_column)", this function replaces -/// that column with the given replace expression. Column name remains the same. -/// Multiple REPLACEs are also possible with comma separations. -fn replace_columns( - mut exprs: Vec, - replace: &PlannedReplaceSelectItem, -) -> Result> { - for expr in exprs.iter_mut() { - if let Expr::Column(Column { name, .. }) = expr { - if let Some((_, new_expr)) = replace - .items() - .iter() - .zip(replace.expressions().iter()) - .find(|(item, _)| item.column_name.value == *name) - { - *expr = new_expr.clone().alias(name.clone()) - } - } - } - Ok(exprs) -} - -#[cfg(test)] -mod tests { - use arrow::datatypes::{DataType, Field, Schema}; - - use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan}; - use crate::Analyzer; - use datafusion_common::{JoinType, TableReference}; - use datafusion_expr::{ - col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder, - }; - - use super::*; - - fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> { - assert_analyzed_plan_eq_display_indent( - Arc::new(ExpandWildcardRule::new()), - plan, - expected, - ) - } - - #[test] - fn test_expand_wildcard() -> Result<()> { - let table_scan = test_table_scan()?; - let plan = LogicalPlanBuilder::from(table_scan) - .project(vec![wildcard()])? - .build()?; - let expected = - "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\ - \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; - assert_plan_eq(plan, expected) - } - - #[test] - fn test_expand_qualified_wildcard() -> Result<()> { - let table_scan = test_table_scan()?; - let plan = LogicalPlanBuilder::from(table_scan) - .project(vec![qualified_wildcard(TableReference::bare("test"))])? - .build()?; - let expected = - "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\ - \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; - assert_plan_eq(plan, expected) - } - - #[test] - fn test_expand_qualified_wildcard_in_subquery() -> Result<()> { - let table_scan = test_table_scan()?; - let plan = LogicalPlanBuilder::from(table_scan) - .project(vec![qualified_wildcard(TableReference::bare("test"))])? - .build()?; - let plan = LogicalPlanBuilder::from(plan) - .project(vec![wildcard()])? - .build()?; - let expected = - "Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\ - \n Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\ - \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; - assert_plan_eq(plan, expected) - } - - #[test] - fn test_expand_wildcard_in_subquery() -> Result<()> { - let projection_a = LogicalPlanBuilder::from(test_table_scan()?) - .project(vec![col("a")])? - .build()?; - let subquery = LogicalPlanBuilder::from(projection_a) - .project(vec![wildcard()])? - .build()?; - let plan = LogicalPlanBuilder::from(test_table_scan()?) - .filter(in_subquery(col("a"), Arc::new(subquery)))? - .project(vec![wildcard()])? - .build()?; - let expected = "\ - Projection: test.a, test.b, test.c [a:UInt32, b:UInt32, c:UInt32]\ - \n Filter: test.a IN () [a:UInt32, b:UInt32, c:UInt32]\ - \n Subquery: [a:UInt32]\ - \n Projection: test.a [a:UInt32]\ - \n Projection: test.a [a:UInt32]\ - \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]\ - \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; - assert_plan_eq(plan, expected) - } - - #[test] - fn test_expand_wildcard_in_distinct_on() -> Result<()> { - let table_scan = test_table_scan()?; - let plan = LogicalPlanBuilder::from(table_scan) - .distinct_on(vec![col("a")], vec![wildcard()], None)? - .build()?; - let expected = "\ - DistinctOn: on_expr=[[test.a]], select_expr=[[test.a, test.b, test.c]], sort_expr=[[]] [a:UInt32, b:UInt32, c:UInt32]\ - \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; - assert_plan_eq(plan, expected) - } - - #[test] - fn test_subquery_schema() -> Result<()> { - let analyzer = Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]); - let options = ConfigOptions::default(); - let subquery = LogicalPlanBuilder::from(test_table_scan()?) - .project(vec![wildcard()])? - .build()?; - let plan = LogicalPlanBuilder::from(subquery) - .alias("sub")? - .project(vec![wildcard()])? - .build()?; - let analyzed_plan = analyzer.execute_and_check(plan, &options, |_, _| {})?; - for x in analyzed_plan.inputs() { - for field in x.schema().fields() { - assert_ne!(field.name(), "*"); - } - } - Ok(()) - } - - fn employee_schema() -> Schema { - Schema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new("first_name", DataType::Utf8, false), - Field::new("last_name", DataType::Utf8, false), - Field::new("state", DataType::Utf8, false), - Field::new("salary", DataType::Int32, false), - ]) - } - - #[test] - fn plan_using_join_wildcard_projection() -> Result<()> { - let t2 = table_scan(Some("t2"), &employee_schema(), None)?.build()?; - - let plan = table_scan(Some("t1"), &employee_schema(), None)? - .join_using(t2, JoinType::Inner, vec!["id"])? - .project(vec![wildcard()])? - .build()?; - - let expected = "Projection: *\ - \n Inner Join: Using t1.id = t2.id\ - \n TableScan: t1\ - \n TableScan: t2"; - - assert_eq!(expected, format!("{plan}")); - - let analyzer = Analyzer::with_rules(vec![Arc::new(ExpandWildcardRule::new())]); - let options = ConfigOptions::default(); - - let analyzed_plan = analyzer.execute_and_check(plan, &options, |_, _| {})?; - - // id column should only show up once in projection - let expected = "Projection: t1.id, t1.first_name, t1.last_name, t1.state, t1.salary, t2.first_name, t2.last_name, t2.state, t2.salary\ - \n Inner Join: Using t1.id = t2.id\ - \n TableScan: t1\ - \n TableScan: t2"; - assert_eq!(expected, format!("{analyzed_plan}")); - - Ok(()) - } -} diff --git a/datafusion/optimizer/src/analyzer/mod.rs b/datafusion/optimizer/src/analyzer/mod.rs index d2ac9575f7c3..1d199f2faafc 100644 --- a/datafusion/optimizer/src/analyzer/mod.rs +++ b/datafusion/optimizer/src/analyzer/mod.rs @@ -28,7 +28,6 @@ use datafusion_common::Result; use datafusion_expr::expr_rewriter::FunctionRewrite; use datafusion_expr::{InvariantLevel, LogicalPlan}; -use crate::analyzer::expand_wildcard_rule::ExpandWildcardRule; use crate::analyzer::inline_table_scan::InlineTableScan; use crate::analyzer::resolve_grouping_function::ResolveGroupingFunction; use crate::analyzer::type_coercion::TypeCoercion; @@ -36,7 +35,6 @@ use crate::utils::log_plan; use self::function_rewrite::ApplyFunctionRewrites; -pub mod expand_wildcard_rule; pub mod function_rewrite; pub mod inline_table_scan; pub mod resolve_grouping_function; From 63fb30b5557d2991d97752a36cd5d9432388db85 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Tue, 11 Mar 2025 20:09:56 +0800 Subject: [PATCH 10/12] Remove ExpandWildcardRule from Analyzer in ViewTable --- datafusion/core/src/datasource/view.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs index 91e9b6789fda..e4f57b0d9798 100644 --- a/datafusion/core/src/datasource/view.rs +++ b/datafusion/core/src/datasource/view.rs @@ -30,7 +30,6 @@ use datafusion_catalog::Session; use datafusion_common::config::ConfigOptions; use datafusion_common::Column; use datafusion_expr::{LogicalPlanBuilder, TableProviderFilterPushDown}; -use datafusion_optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule; use datafusion_optimizer::analyzer::type_coercion::TypeCoercion; use datafusion_optimizer::Analyzer; @@ -68,11 +67,11 @@ impl ViewTable { fn apply_required_rule(logical_plan: LogicalPlan) -> Result { let options = ConfigOptions::default(); - Analyzer::with_rules(vec![ - Arc::new(ExpandWildcardRule::new()), - Arc::new(TypeCoercion::new()), - ]) - .execute_and_check(logical_plan, &options, |_, _| {}) + Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())]).execute_and_check( + logical_plan, + &options, + |_, _| {}, + ) } /// Get definition ref From 6cdf4d78796cda54d0bfe93ab56427d311b4b071 Mon Sep 17 00:00:00 2001 From: Jay Zhan Date: Tue, 11 Mar 2025 20:45:39 +0800 Subject: [PATCH 11/12] fix --- datafusion/optimizer/src/analyzer/inline_table_scan.rs | 2 +- datafusion/substrait/src/logical_plan/producer.rs | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index 4662e0d44a18..350e65e1e329 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -183,7 +183,7 @@ mod tests { let plan = scan.filter(col("x.a").eq(lit(1)))?.build()?; let expected = "Filter: x.a = Int32(1)\ \n SubqueryAlias: x\ - \n Projection: *\ + \n Projection: y.a, y.b\ \n TableScan: y"; assert_analyzed_plan_eq(Arc::new(InlineTableScan::new()), plan, expected) diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index fc24d5bb91f0..6756fb184b91 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -15,9 +15,6 @@ // specific language governing permissions and limitations // under the License. -use datafusion::config::ConfigOptions; -use datafusion::optimizer::analyzer::expand_wildcard_rule::ExpandWildcardRule; -use datafusion::optimizer::AnalyzerRule; use std::sync::Arc; use substrait::proto::expression_reference::ExprType; @@ -434,10 +431,6 @@ pub fn to_substrait_plan(plan: &LogicalPlan, state: &SessionState) -> Result Date: Wed, 12 Mar 2025 08:13:25 +0800 Subject: [PATCH 12/12] clippy --- datafusion/substrait/src/logical_plan/producer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index bd2f7fdd75c6..3021d4e38f5b 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -435,7 +435,7 @@ pub fn to_substrait_plan(plan: &LogicalPlan, state: &SessionState) -> Result