Skip to content

Commit d3237b2

Browse files
jonahgaoalamb
andauthored
fix: schema error when parsing order-by expressions (#10234)
* fix: schema error when parsing order-by expressions * add test from issue * improve order_by_to_sort_expr * add test * Update datafusion/sqllogictest/test_files/order.slt Co-authored-by: Andrew Lamb <[email protected]> * fix tests * add test --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 39d9323 commit d3237b2

File tree

10 files changed

+191
-50
lines changed

10 files changed

+191
-50
lines changed

datafusion/sql/src/expr/function.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
150150
planner_context,
151151
// Numeric literals in window function ORDER BY are treated as constants
152152
false,
153+
None,
153154
)?;
154155

155156
let func_deps = schema.functional_dependencies();
@@ -219,8 +220,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
219220
} else {
220221
// User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
221222
if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
222-
let order_by =
223-
self.order_by_to_sort_expr(&order_by, schema, planner_context, true)?;
223+
let order_by = self.order_by_to_sort_expr(
224+
&order_by,
225+
schema,
226+
planner_context,
227+
true,
228+
None,
229+
)?;
224230
let order_by = (!order_by.is_empty()).then_some(order_by);
225231
let args = self.function_args_to_expr(args, schema, planner_context)?;
226232
// TODO: Support filter and distinct for UDAFs
@@ -236,8 +242,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
236242

237243
// next, aggregate built-ins
238244
if let Ok(fun) = AggregateFunction::from_str(&name) {
239-
let order_by =
240-
self.order_by_to_sort_expr(&order_by, schema, planner_context, true)?;
245+
let order_by = self.order_by_to_sort_expr(
246+
&order_by,
247+
schema,
248+
planner_context,
249+
true,
250+
None,
251+
)?;
241252
let order_by = (!order_by.is_empty()).then_some(order_by);
242253
let args = self.function_args_to_expr(args, schema, planner_context)?;
243254
let filter: Option<Box<Expr>> = filter

datafusion/sql/src/expr/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
699699
input_schema,
700700
planner_context,
701701
true,
702+
None,
702703
)?)
703704
} else {
704705
None

datafusion/sql/src/expr/order_by.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,39 @@ use sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value};
2424
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
2525
/// Convert sql [OrderByExpr] to `Vec<Expr>`.
2626
///
27-
/// If `literal_to_column` is true, treat any numeric literals (e.g. `2`) as a 1 based index
28-
/// into the SELECT list (e.g. `SELECT a, b FROM table ORDER BY 2`).
27+
/// `input_schema` and `additional_schema` are used to resolve column references in the order-by expressions.
28+
/// `input_schema` is the schema of the input logical plan, typically derived from the SELECT list.
29+
///
30+
/// Usually order-by expressions can only reference the input plan's columns.
31+
/// But the `SELECT ... FROM ... ORDER BY ...` syntax is a special case. Besides the input schema,
32+
/// it can reference an `additional_schema` derived from the `FROM` clause.
33+
///
34+
/// If `literal_to_column` is true, treat any numeric literals (e.g. `2`) as a 1 based index into the
35+
/// SELECT list (e.g. `SELECT a, b FROM table ORDER BY 2`). Literals only reference the `input_schema`.
36+
///
2937
/// If false, interpret numeric literals as constant values.
3038
pub(crate) fn order_by_to_sort_expr(
3139
&self,
3240
exprs: &[OrderByExpr],
33-
schema: &DFSchema,
41+
input_schema: &DFSchema,
3442
planner_context: &mut PlannerContext,
3543
literal_to_column: bool,
44+
additional_schema: Option<&DFSchema>,
3645
) -> Result<Vec<Expr>> {
46+
if exprs.is_empty() {
47+
return Ok(vec![]);
48+
}
49+
50+
let mut combined_schema;
51+
let order_by_schema = match additional_schema {
52+
Some(schema) => {
53+
combined_schema = input_schema.clone();
54+
combined_schema.merge(schema);
55+
&combined_schema
56+
}
57+
None => input_schema,
58+
};
59+
3760
let mut expr_vec = vec![];
3861
for e in exprs {
3962
let OrderByExpr {
@@ -52,17 +75,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
5275
return plan_err!(
5376
"Order by index starts at 1 for column indexes"
5477
);
55-
} else if schema.fields().len() < field_index {
78+
} else if input_schema.fields().len() < field_index {
5679
return plan_err!(
5780
"Order by column out of bounds, specified: {}, max: {}",
5881
field_index,
59-
schema.fields().len()
82+
input_schema.fields().len()
6083
);
6184
}
6285

63-
Expr::Column(Column::from(schema.qualified_field(field_index - 1)))
86+
Expr::Column(Column::from(
87+
input_schema.qualified_field(field_index - 1),
88+
))
6489
}
65-
e => self.sql_expr_to_logical_expr(e.clone(), schema, planner_context)?,
90+
e => self.sql_expr_to_logical_expr(
91+
e.clone(),
92+
order_by_schema,
93+
planner_context,
94+
)?,
6695
};
6796
let asc = asc.unwrap_or(true);
6897
expr_vec.push(Expr::Sort(Sort::new(

datafusion/sql/src/query.rs

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use datafusion_expr::{
2525
Operator,
2626
};
2727
use sqlparser::ast::{
28-
Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, Value,
28+
Expr as SQLExpr, Offset as SQLOffset, Query, SelectInto, SetExpr, Value,
2929
};
3030

3131
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -46,29 +46,35 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
4646
query: Query,
4747
planner_context: &mut PlannerContext,
4848
) -> Result<LogicalPlan> {
49-
let mut set_expr = query.body;
5049
if let Some(with) = query.with {
5150
self.plan_with_clause(with, planner_context)?;
5251
}
53-
// Take the `SelectInto` for later processing.
54-
let select_into = match set_expr.as_mut() {
55-
SetExpr::Select(select) => select.into.take(),
56-
_ => None,
57-
};
58-
let plan = self.set_expr_to_plan(*set_expr, planner_context)?;
59-
let plan = self.order_by(plan, query.order_by, planner_context)?;
60-
let mut plan = self.limit(plan, query.offset, query.limit)?;
61-
if let Some(into) = select_into {
62-
plan = LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(CreateMemoryTable {
63-
name: self.object_name_to_table_reference(into.name)?,
64-
constraints: Constraints::empty(),
65-
input: Arc::new(plan),
66-
if_not_exists: false,
67-
or_replace: false,
68-
column_defaults: vec![],
69-
}))
52+
53+
let set_expr = *query.body;
54+
match set_expr {
55+
SetExpr::Select(mut select) => {
56+
let select_into = select.into.take();
57+
// Order-by expressions may refer to columns in the `FROM` clause,
58+
// so we need to process `SELECT` and `ORDER BY` together.
59+
let plan =
60+
self.select_to_plan(*select, query.order_by, planner_context)?;
61+
let plan = self.limit(plan, query.offset, query.limit)?;
62+
// Process the `SELECT INTO` after `LIMIT`.
63+
self.select_into(plan, select_into)
64+
}
65+
other => {
66+
let plan = self.set_expr_to_plan(other, planner_context)?;
67+
let order_by_rex = self.order_by_to_sort_expr(
68+
&query.order_by,
69+
plan.schema(),
70+
planner_context,
71+
true,
72+
None,
73+
)?;
74+
let plan = self.order_by(plan, order_by_rex)?;
75+
self.limit(plan, query.offset, query.limit)
76+
}
7077
}
71-
Ok(plan)
7278
}
7379

7480
/// Wrap a plan in a limit
@@ -114,26 +120,43 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
114120
}
115121

116122
/// Wrap the logical in a sort
117-
fn order_by(
123+
pub(super) fn order_by(
118124
&self,
119125
plan: LogicalPlan,
120-
order_by: Vec<OrderByExpr>,
121-
planner_context: &mut PlannerContext,
126+
order_by: Vec<Expr>,
122127
) -> Result<LogicalPlan> {
123128
if order_by.is_empty() {
124129
return Ok(plan);
125130
}
126131

127-
let order_by_rex =
128-
self.order_by_to_sort_expr(&order_by, plan.schema(), planner_context, true)?;
129-
130132
if let LogicalPlan::Distinct(Distinct::On(ref distinct_on)) = plan {
131133
// In case of `DISTINCT ON` we must capture the sort expressions since during the plan
132134
// optimization we're effectively doing a `first_value` aggregation according to them.
133-
let distinct_on = distinct_on.clone().with_sort_expr(order_by_rex)?;
135+
let distinct_on = distinct_on.clone().with_sort_expr(order_by)?;
134136
Ok(LogicalPlan::Distinct(Distinct::On(distinct_on)))
135137
} else {
136-
LogicalPlanBuilder::from(plan).sort(order_by_rex)?.build()
138+
LogicalPlanBuilder::from(plan).sort(order_by)?.build()
139+
}
140+
}
141+
142+
/// Wrap the logical plan in a `SelectInto`
143+
fn select_into(
144+
&self,
145+
plan: LogicalPlan,
146+
select_into: Option<SelectInto>,
147+
) -> Result<LogicalPlan> {
148+
match select_into {
149+
Some(into) => Ok(LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(
150+
CreateMemoryTable {
151+
name: self.object_name_to_table_reference(into.name)?,
152+
constraints: Constraints::empty(),
153+
input: Arc::new(plan),
154+
if_not_exists: false,
155+
or_replace: false,
156+
column_defaults: vec![],
157+
},
158+
))),
159+
_ => Ok(plan),
137160
}
138161
}
139162
}

datafusion/sql/src/select.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
2929
use datafusion_common::{Column, UnnestOptions};
3030
use datafusion_expr::expr::{Alias, Unnest};
3131
use datafusion_expr::expr_rewriter::{
32-
normalize_col, normalize_col_with_schemas_and_ambiguity_check,
32+
normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols,
3333
};
3434
use datafusion_expr::utils::{
3535
expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns,
@@ -39,8 +39,8 @@ use datafusion_expr::{
3939
Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning,
4040
};
4141
use sqlparser::ast::{
42-
Distinct, Expr as SQLExpr, GroupByExpr, ReplaceSelectItem, WildcardAdditionalOptions,
43-
WindowType,
42+
Distinct, Expr as SQLExpr, GroupByExpr, OrderByExpr, ReplaceSelectItem,
43+
WildcardAdditionalOptions, WindowType,
4444
};
4545
use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
4646

@@ -49,6 +49,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
4949
pub(super) fn select_to_plan(
5050
&self,
5151
mut select: Select,
52+
order_by: Vec<OrderByExpr>,
5253
planner_context: &mut PlannerContext,
5354
) -> Result<LogicalPlan> {
5455
// check for unsupported syntax first
@@ -94,6 +95,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
9495
let mut combined_schema = base_plan.schema().as_ref().clone();
9596
combined_schema.merge(projected_plan.schema());
9697

98+
// Order-by expressions prioritize referencing columns from the select list,
99+
// then from the FROM clause.
100+
let order_by_rex = self.order_by_to_sort_expr(
101+
&order_by,
102+
projected_plan.schema().as_ref(),
103+
planner_context,
104+
true,
105+
Some(base_plan.schema().as_ref()),
106+
)?;
107+
let order_by_rex = normalize_cols(order_by_rex, &projected_plan)?;
108+
97109
// this alias map is resolved and looked up in both having exprs and group by exprs
98110
let alias_map = extract_aliases(&select_exprs);
99111

@@ -248,9 +260,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
248260
.collect::<Result<Vec<_>>>()?;
249261

250262
// Build the final plan
251-
return LogicalPlanBuilder::from(base_plan)
263+
LogicalPlanBuilder::from(base_plan)
252264
.distinct_on(on_expr, select_exprs, None)?
253-
.build();
265+
.build()
254266
}
255267
}?;
256268

@@ -274,6 +286,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
274286
plan
275287
};
276288

289+
let plan = self.order_by(plan, order_by_rex)?;
290+
277291
Ok(plan)
278292
}
279293

datafusion/sql/src/set_expr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
2727
planner_context: &mut PlannerContext,
2828
) -> Result<LogicalPlan> {
2929
match set_expr {
30-
SetExpr::Select(s) => self.select_to_plan(*s, planner_context),
30+
SetExpr::Select(s) => self.select_to_plan(*s, vec![], planner_context),
3131
SetExpr::Values(v) => self.sql_values_to_plan(v, planner_context),
3232
SetExpr::SetOperation {
3333
op,

datafusion/sql/src/statement.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
942942
for expr in order_exprs {
943943
// Convert each OrderByExpr to a SortExpr:
944944
let expr_vec =
945-
self.order_by_to_sort_expr(&expr, schema, planner_context, true)?;
945+
self.order_by_to_sort_expr(&expr, schema, planner_context, true, None)?;
946946
// Verify that columns of all SortExprs exist in the schema:
947947
for expr in expr_vec.iter() {
948948
for column in expr.to_columns()?.iter() {

datafusion/sql/tests/sql_integration.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3646,7 +3646,7 @@ fn test_select_distinct_order_by() {
36463646
let sql = "SELECT distinct '1' from person order by id";
36473647

36483648
let expected =
3649-
"Error during planning: For SELECT DISTINCT, ORDER BY expressions id must appear in select list";
3649+
"Error during planning: For SELECT DISTINCT, ORDER BY expressions person.id must appear in select list";
36503650

36513651
// It should return error.
36523652
let result = logical_plan(sql);

0 commit comments

Comments
 (0)