|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +use std::vec; |
| 19 | + |
| 20 | +use arrow_schema::*; |
| 21 | +use datafusion_common::{DFSchema, Result, TableReference}; |
| 22 | +use datafusion_expr::{col, table_scan}; |
| 23 | +use datafusion_sql::planner::{ContextProvider, PlannerContext, SqlToRel}; |
| 24 | +use datafusion_sql::unparser::dialect::{ |
| 25 | + DefaultDialect as UnparserDefaultDialect, Dialect as UnparserDialect, |
| 26 | + MySqlDialect as UnparserMySqlDialect, |
| 27 | +}; |
| 28 | +use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser}; |
| 29 | + |
| 30 | +use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect}; |
| 31 | +use sqlparser::parser::Parser; |
| 32 | + |
| 33 | +use crate::common::MockContextProvider; |
| 34 | + |
| 35 | +#[test] |
| 36 | +fn roundtrip_expr() { |
| 37 | + let tests: Vec<(TableReference, &str, &str)> = vec![ |
| 38 | + (TableReference::bare("person"), "age > 35", r#"(age > 35)"#), |
| 39 | + ( |
| 40 | + TableReference::bare("person"), |
| 41 | + "id = '10'", |
| 42 | + r#"(id = '10')"#, |
| 43 | + ), |
| 44 | + ( |
| 45 | + TableReference::bare("person"), |
| 46 | + "CAST(id AS VARCHAR)", |
| 47 | + r#"CAST(id AS VARCHAR)"#, |
| 48 | + ), |
| 49 | + ( |
| 50 | + TableReference::bare("person"), |
| 51 | + "SUM((age * 2))", |
| 52 | + r#"SUM((age * 2))"#, |
| 53 | + ), |
| 54 | + ]; |
| 55 | + |
| 56 | + let roundtrip = |table, sql: &str| -> Result<String> { |
| 57 | + let dialect = GenericDialect {}; |
| 58 | + let sql_expr = Parser::new(&dialect).try_with_sql(sql)?.parse_expr()?; |
| 59 | + |
| 60 | + let context = MockContextProvider::default(); |
| 61 | + let schema = context.get_table_source(table)?.schema(); |
| 62 | + let df_schema = DFSchema::try_from(schema.as_ref().clone())?; |
| 63 | + let sql_to_rel = SqlToRel::new(&context); |
| 64 | + let expr = |
| 65 | + sql_to_rel.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())?; |
| 66 | + |
| 67 | + let ast = expr_to_sql(&expr)?; |
| 68 | + |
| 69 | + Ok(format!("{}", ast)) |
| 70 | + }; |
| 71 | + |
| 72 | + for (table, query, expected) in tests { |
| 73 | + let actual = roundtrip(table, query).unwrap(); |
| 74 | + assert_eq!(actual, expected); |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +#[test] |
| 79 | +fn roundtrip_statement() -> Result<()> { |
| 80 | + let tests: Vec<&str> = vec![ |
| 81 | + "select ta.j1_id from j1 ta;", |
| 82 | + "select ta.j1_id from j1 ta order by ta.j1_id;", |
| 83 | + "select * from j1 ta order by ta.j1_id, ta.j1_string desc;", |
| 84 | + "select * from j1 limit 10;", |
| 85 | + "select ta.j1_id from j1 ta where ta.j1_id > 1;", |
| 86 | + "select ta.j1_id, tb.j2_string from j1 ta join j2 tb on (ta.j1_id = tb.j2_id);", |
| 87 | + "select ta.j1_id, tb.j2_string, tc.j3_string from j1 ta join j2 tb on (ta.j1_id = tb.j2_id) join j3 tc on (ta.j1_id = tc.j3_id);", |
| 88 | + "select * from (select id, first_name from person)", |
| 89 | + "select * from (select id, first_name from (select * from person))", |
| 90 | + "select id, count(*) as cnt from (select id from person) group by id", |
| 91 | + "select (id-1)/2, count(*) / (sum(id/10)-1) as agg_expr from (select (id-1) as id from person) group by id", |
| 92 | + "select CAST(id/2 as VARCHAR) NOT LIKE 'foo*' from person where NOT EXISTS (select ta.j1_id, tb.j2_string from j1 ta join j2 tb on (ta.j1_id = tb.j2_id))", |
| 93 | + r#"select "First Name" from person_quoted_cols"#, |
| 94 | + "select DISTINCT id FROM person", |
| 95 | + "select DISTINCT on (id) id, first_name from person", |
| 96 | + "select DISTINCT on (id) id, first_name from person order by id", |
| 97 | + r#"select id, count("First Name") as cnt from (select id, "First Name" from person_quoted_cols) group by id"#, |
| 98 | + "select id, count(*) as cnt from (select p1.id as id from person p1 inner join person p2 on p1.id=p2.id) group by id", |
| 99 | + "select id, count(*), first_name from person group by first_name, id", |
| 100 | + "select id, sum(age), first_name from person group by first_name, id", |
| 101 | + "select id, count(*), first_name |
| 102 | + from person |
| 103 | + where id!=3 and first_name=='test' |
| 104 | + group by first_name, id |
| 105 | + having count(*)>5 and count(*)<10 |
| 106 | + order by count(*)", |
| 107 | + r#"select id, count("First Name") as count_first_name, "Last Name" |
| 108 | + from person_quoted_cols |
| 109 | + where id!=3 and "First Name"=='test' |
| 110 | + group by "Last Name", id |
| 111 | + having count_first_name>5 and count_first_name<10 |
| 112 | + order by count_first_name, "Last Name""#, |
| 113 | + r#"select p.id, count("First Name") as count_first_name, |
| 114 | + "Last Name", sum(qp.id/p.id - (select sum(id) from person_quoted_cols) ) / (select count(*) from person) |
| 115 | + from (select id, "First Name", "Last Name" from person_quoted_cols) qp |
| 116 | + inner join (select * from person) p |
| 117 | + on p.id = qp.id |
| 118 | + where p.id!=3 and "First Name"=='test' and qp.id in |
| 119 | + (select id from (select id, count(*) from person group by id having count(*) > 0)) |
| 120 | + group by "Last Name", p.id |
| 121 | + having count_first_name>5 and count_first_name<10 |
| 122 | + order by count_first_name, "Last Name""#, |
| 123 | + r#"SELECT j1_string as string FROM j1 |
| 124 | + UNION ALL |
| 125 | + SELECT j2_string as string FROM j2"#, |
| 126 | + r#"SELECT j1_string as string FROM j1 |
| 127 | + UNION ALL |
| 128 | + SELECT j2_string as string FROM j2 |
| 129 | + ORDER BY string DESC |
| 130 | + LIMIT 10"# |
| 131 | + ]; |
| 132 | + |
| 133 | + // For each test sql string, we transform as follows: |
| 134 | + // sql -> ast::Statement (s1) -> LogicalPlan (p1) -> ast::Statement (s2) -> LogicalPlan (p2) |
| 135 | + // We test not that s1==s2, but rather p1==p2. This ensures that unparser preserves the logical |
| 136 | + // query information of the original sql string and disreguards other differences in syntax or |
| 137 | + // quoting. |
| 138 | + for query in tests { |
| 139 | + let dialect = GenericDialect {}; |
| 140 | + let statement = Parser::new(&dialect) |
| 141 | + .try_with_sql(query)? |
| 142 | + .parse_statement()?; |
| 143 | + |
| 144 | + let context = MockContextProvider::default(); |
| 145 | + let sql_to_rel = SqlToRel::new(&context); |
| 146 | + let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); |
| 147 | + |
| 148 | + let roundtrip_statement = plan_to_sql(&plan)?; |
| 149 | + |
| 150 | + let actual = format!("{}", &roundtrip_statement); |
| 151 | + println!("roundtrip sql: {actual}"); |
| 152 | + println!("plan {}", plan.display_indent()); |
| 153 | + |
| 154 | + let plan_roundtrip = sql_to_rel |
| 155 | + .sql_statement_to_plan(roundtrip_statement.clone()) |
| 156 | + .unwrap(); |
| 157 | + |
| 158 | + assert_eq!(plan, plan_roundtrip); |
| 159 | + } |
| 160 | + |
| 161 | + Ok(()) |
| 162 | +} |
| 163 | + |
| 164 | +#[test] |
| 165 | +fn roundtrip_crossjoin() -> Result<()> { |
| 166 | + let query = "select j1.j1_id, j2.j2_string from j1, j2"; |
| 167 | + |
| 168 | + let dialect = GenericDialect {}; |
| 169 | + let statement = Parser::new(&dialect) |
| 170 | + .try_with_sql(query)? |
| 171 | + .parse_statement()?; |
| 172 | + |
| 173 | + let context = MockContextProvider::default(); |
| 174 | + let sql_to_rel = SqlToRel::new(&context); |
| 175 | + let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); |
| 176 | + |
| 177 | + let roundtrip_statement = plan_to_sql(&plan)?; |
| 178 | + |
| 179 | + let actual = format!("{}", &roundtrip_statement); |
| 180 | + println!("roundtrip sql: {actual}"); |
| 181 | + println!("plan {}", plan.display_indent()); |
| 182 | + |
| 183 | + let plan_roundtrip = sql_to_rel |
| 184 | + .sql_statement_to_plan(roundtrip_statement.clone()) |
| 185 | + .unwrap(); |
| 186 | + |
| 187 | + let expected = "Projection: j1.j1_id, j2.j2_string\ |
| 188 | + \n Inner Join: Filter: Boolean(true)\ |
| 189 | + \n TableScan: j1\ |
| 190 | + \n TableScan: j2"; |
| 191 | + |
| 192 | + assert_eq!(format!("{plan_roundtrip:?}"), expected); |
| 193 | + |
| 194 | + Ok(()) |
| 195 | +} |
| 196 | + |
| 197 | +#[test] |
| 198 | +fn roundtrip_statement_with_dialect() -> Result<()> { |
| 199 | + struct TestStatementWithDialect { |
| 200 | + sql: &'static str, |
| 201 | + expected: &'static str, |
| 202 | + parser_dialect: Box<dyn Dialect>, |
| 203 | + unparser_dialect: Box<dyn UnparserDialect>, |
| 204 | + } |
| 205 | + let tests: Vec<TestStatementWithDialect> = vec![ |
| 206 | + TestStatementWithDialect { |
| 207 | + sql: "select ta.j1_id from j1 ta order by j1_id limit 10;", |
| 208 | + expected: |
| 209 | + "SELECT `ta`.`j1_id` FROM `j1` AS `ta` ORDER BY `ta`.`j1_id` ASC LIMIT 10", |
| 210 | + parser_dialect: Box::new(MySqlDialect {}), |
| 211 | + unparser_dialect: Box::new(UnparserMySqlDialect {}), |
| 212 | + }, |
| 213 | + TestStatementWithDialect { |
| 214 | + sql: "select ta.j1_id from j1 ta order by j1_id limit 10;", |
| 215 | + expected: r#"SELECT ta.j1_id FROM j1 AS ta ORDER BY ta.j1_id ASC NULLS LAST LIMIT 10"#, |
| 216 | + parser_dialect: Box::new(GenericDialect {}), |
| 217 | + unparser_dialect: Box::new(UnparserDefaultDialect {}), |
| 218 | + }, |
| 219 | + ]; |
| 220 | + |
| 221 | + for query in tests { |
| 222 | + let statement = Parser::new(&*query.parser_dialect) |
| 223 | + .try_with_sql(query.sql)? |
| 224 | + .parse_statement()?; |
| 225 | + |
| 226 | + let context = MockContextProvider::default(); |
| 227 | + let sql_to_rel = SqlToRel::new(&context); |
| 228 | + let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); |
| 229 | + |
| 230 | + let unparser = Unparser::new(&*query.unparser_dialect); |
| 231 | + let roundtrip_statement = unparser.plan_to_sql(&plan)?; |
| 232 | + |
| 233 | + let actual = format!("{}", &roundtrip_statement); |
| 234 | + println!("roundtrip sql: {actual}"); |
| 235 | + println!("plan {}", plan.display_indent()); |
| 236 | + |
| 237 | + assert_eq!(query.expected, actual); |
| 238 | + } |
| 239 | + |
| 240 | + Ok(()) |
| 241 | +} |
| 242 | + |
| 243 | +#[test] |
| 244 | +fn test_unnest_logical_plan() -> Result<()> { |
| 245 | + let query = "select unnest(struct_col), unnest(array_col), struct_col, array_col from unnest_table"; |
| 246 | + |
| 247 | + let dialect = GenericDialect {}; |
| 248 | + let statement = Parser::new(&dialect) |
| 249 | + .try_with_sql(query)? |
| 250 | + .parse_statement()?; |
| 251 | + |
| 252 | + let context = MockContextProvider::default(); |
| 253 | + let sql_to_rel = SqlToRel::new(&context); |
| 254 | + let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); |
| 255 | + |
| 256 | + let expected = "Projection: unnest(unnest_table.struct_col).field1, unnest(unnest_table.struct_col).field2, unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\ |
| 257 | + \n Unnest: lists[unnest(unnest_table.array_col)] structs[unnest(unnest_table.struct_col)]\ |
| 258 | + \n Projection: unnest_table.struct_col AS unnest(unnest_table.struct_col), unnest_table.array_col AS unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\ |
| 259 | + \n TableScan: unnest_table"; |
| 260 | + |
| 261 | + assert_eq!(format!("{plan:?}"), expected); |
| 262 | + |
| 263 | + Ok(()) |
| 264 | +} |
| 265 | + |
| 266 | +#[test] |
| 267 | +fn test_table_references_in_plan_to_sql() { |
| 268 | + fn test(table_name: &str, expected_sql: &str) { |
| 269 | + let schema = Schema::new(vec![ |
| 270 | + Field::new("id", DataType::Utf8, false), |
| 271 | + Field::new("value", DataType::Utf8, false), |
| 272 | + ]); |
| 273 | + let plan = table_scan(Some(table_name), &schema, None) |
| 274 | + .unwrap() |
| 275 | + .project(vec![col("id"), col("value")]) |
| 276 | + .unwrap() |
| 277 | + .build() |
| 278 | + .unwrap(); |
| 279 | + let sql = plan_to_sql(&plan).unwrap(); |
| 280 | + |
| 281 | + assert_eq!(format!("{}", sql), expected_sql) |
| 282 | + } |
| 283 | + |
| 284 | + test("catalog.schema.table", "SELECT catalog.\"schema\".\"table\".id, catalog.\"schema\".\"table\".\"value\" FROM catalog.\"schema\".\"table\""); |
| 285 | + test("schema.table", "SELECT \"schema\".\"table\".id, \"schema\".\"table\".\"value\" FROM \"schema\".\"table\""); |
| 286 | + test( |
| 287 | + "table", |
| 288 | + "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"", |
| 289 | + ); |
| 290 | +} |
0 commit comments