Skip to content

Commit 337ebb8

Browse files
jayzhan211alamb
authored andcommitted
Support DuckDB style struct syntax (apache#11214)
* struct literal Signed-off-by: jayzhan211 <[email protected]> * add nested Signed-off-by: jayzhan211 <[email protected]> * fmt Signed-off-by: jayzhan211 <[email protected]> * rm useless comment Signed-off-by: jayzhan211 <[email protected]> * switch to NYI error, derive debug/clone * improve documentation strings * Avoid stack overflow by putting code in a new function --------- Signed-off-by: jayzhan211 <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent c487aad commit 337ebb8

File tree

7 files changed

+145
-27
lines changed

7 files changed

+145
-27
lines changed

datafusion/core/src/execution/session_state.rs

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -231,26 +231,16 @@ impl SessionState {
231231
);
232232
}
233233

234-
let mut user_defined_sql_planners = vec![];
235-
236-
// register crate of array expressions (if enabled)
237-
#[cfg(feature = "array_expressions")]
238-
{
239-
let array_planner =
240-
Arc::new(functions_array::planner::ArrayFunctionPlanner) as _;
241-
242-
let field_access_planner =
243-
Arc::new(functions_array::planner::FieldAccessPlanner) as _;
244-
245-
user_defined_sql_planners.extend(vec![array_planner, field_access_planner]);
246-
}
247-
#[cfg(feature = "datetime_expressions")]
248-
{
249-
let extract_planner =
250-
Arc::new(functions::datetime::planner::ExtractPlanner::default()) as _;
251-
252-
user_defined_sql_planners.push(extract_planner);
253-
}
234+
let user_defined_sql_planners: Vec<Arc<dyn UserDefinedSQLPlanner>> = vec![
235+
Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
236+
// register crate of array expressions (if enabled)
237+
#[cfg(feature = "array_expressions")]
238+
Arc::new(functions_array::planner::ArrayFunctionPlanner),
239+
#[cfg(feature = "array_expressions")]
240+
Arc::new(functions_array::planner::FieldAccessPlanner),
241+
#[cfg(feature = "datetime_expressions")]
242+
Arc::new(functions::datetime::planner::ExtractPlanner),
243+
];
254244

255245
let mut new_self = SessionState {
256246
session_id,

datafusion/expr/src/planner.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ pub trait ContextProvider {
8484

8585
/// This trait allows users to customize the behavior of the SQL planner
8686
pub trait UserDefinedSQLPlanner: Send + Sync {
87-
/// Plan the binary operation between two expressions, returns OriginalBinaryExpr if not possible
87+
/// Plan the binary operation between two expressions, returns original
88+
/// BinaryExpr if not possible
8889
fn plan_binary_op(
8990
&self,
9091
expr: RawBinaryExpr,
@@ -93,7 +94,9 @@ pub trait UserDefinedSQLPlanner: Send + Sync {
9394
Ok(PlannerResult::Original(expr))
9495
}
9596

96-
/// Plan the field access expression, returns OriginalFieldAccessExpr if not possible
97+
/// Plan the field access expression
98+
///
99+
/// returns original FieldAccessExpr if not possible
97100
fn plan_field_access(
98101
&self,
99102
expr: RawFieldAccessExpr,
@@ -102,7 +105,9 @@ pub trait UserDefinedSQLPlanner: Send + Sync {
102105
Ok(PlannerResult::Original(expr))
103106
}
104107

105-
// Plan the array literal, returns OriginalArray if not possible
108+
/// Plan the array literal, returns OriginalArray if not possible
109+
///
110+
/// Returns origin expression arguments if not possible
106111
fn plan_array_literal(
107112
&self,
108113
exprs: Vec<Expr>,
@@ -111,8 +116,20 @@ pub trait UserDefinedSQLPlanner: Send + Sync {
111116
Ok(PlannerResult::Original(exprs))
112117
}
113118

114-
// Plan the Extract expression, e.g., EXTRACT(month FROM foo)
115-
// returns origin expression arguments if not possible
119+
/// Plan the dictionary literal `{ key: value, ...}`
120+
///
121+
/// Returns origin expression arguments if not possible
122+
fn plan_dictionary_literal(
123+
&self,
124+
expr: RawDictionaryExpr,
125+
_schema: &DFSchema,
126+
) -> Result<PlannerResult<RawDictionaryExpr>> {
127+
Ok(PlannerResult::Original(expr))
128+
}
129+
130+
/// Plan an extract expression, e.g., `EXTRACT(month FROM foo)`
131+
///
132+
/// Returns origin expression arguments if not possible
116133
fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {
117134
Ok(PlannerResult::Original(args))
118135
}
@@ -142,6 +159,16 @@ pub struct RawFieldAccessExpr {
142159
pub expr: Expr,
143160
}
144161

162+
/// A Dictionary literal expression `{ key: value, ...}`
163+
///
164+
/// This structure is used by [`UserDefinedSQLPlanner`] to plan operators with
165+
/// custom expressions.
166+
#[derive(Debug, Clone)]
167+
pub struct RawDictionaryExpr {
168+
pub keys: Vec<Expr>,
169+
pub values: Vec<Expr>,
170+
}
171+
145172
/// Result of planning a raw expr with [`UserDefinedSQLPlanner`]
146173
#[derive(Debug, Clone)]
147174
pub enum PlannerResult<T> {

datafusion/functions/src/core/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub mod named_struct;
2929
pub mod nullif;
3030
pub mod nvl;
3131
pub mod nvl2;
32+
pub mod planner;
3233
pub mod r#struct;
3334

3435
// create UDFs
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use datafusion_common::DFSchema;
19+
use datafusion_common::Result;
20+
use datafusion_expr::planner::{PlannerResult, RawDictionaryExpr, UserDefinedSQLPlanner};
21+
22+
use super::named_struct;
23+
24+
#[derive(Default)]
25+
pub struct CoreFunctionPlanner {}
26+
27+
impl UserDefinedSQLPlanner for CoreFunctionPlanner {
28+
fn plan_dictionary_literal(
29+
&self,
30+
expr: RawDictionaryExpr,
31+
_schema: &DFSchema,
32+
) -> Result<PlannerResult<RawDictionaryExpr>> {
33+
let mut args = vec![];
34+
for (k, v) in expr.keys.into_iter().zip(expr.values.into_iter()) {
35+
args.push(k);
36+
args.push(v);
37+
}
38+
Ok(PlannerResult::Planned(named_struct().call(args)))
39+
}
40+
}

datafusion/functions/src/datetime/planner.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use datafusion_expr::{
2525
};
2626

2727
#[derive(Default)]
28-
pub struct ExtractPlanner {}
28+
pub struct ExtractPlanner;
2929

3030
impl UserDefinedSQLPlanner for ExtractPlanner {
3131
fn plan_extract(&self, args: Vec<Expr>) -> Result<PlannerResult<Vec<Expr>>> {

datafusion/sql/src/expr/mod.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
use arrow_schema::DataType;
1919
use arrow_schema::TimeUnit;
2020
use datafusion_expr::planner::PlannerResult;
21+
use datafusion_expr::planner::RawDictionaryExpr;
2122
use datafusion_expr::planner::RawFieldAccessExpr;
22-
use sqlparser::ast::{CastKind, Expr as SQLExpr, Subscript, TrimWhereField, Value};
23+
use sqlparser::ast::{
24+
CastKind, DictionaryField, Expr as SQLExpr, Subscript, TrimWhereField, Value,
25+
};
2326

2427
use datafusion_common::{
2528
internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
@@ -619,10 +622,42 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
619622
}
620623
},
621624
))),
625+
SQLExpr::Dictionary(fields) => {
626+
self.try_plan_dictionary_literal(fields, schema, planner_context)
627+
}
622628
_ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
623629
}
624630
}
625631

632+
fn try_plan_dictionary_literal(
633+
&self,
634+
fields: Vec<DictionaryField>,
635+
schema: &DFSchema,
636+
planner_context: &mut PlannerContext,
637+
) -> Result<Expr> {
638+
let mut keys = vec![];
639+
let mut values = vec![];
640+
for field in fields {
641+
let key = lit(field.key.value);
642+
let value =
643+
self.sql_expr_to_logical_expr(*field.value, schema, planner_context)?;
644+
keys.push(key);
645+
values.push(value);
646+
}
647+
648+
let mut raw_expr = RawDictionaryExpr { keys, values };
649+
650+
for planner in self.planners.iter() {
651+
match planner.plan_dictionary_literal(raw_expr, schema)? {
652+
PlannerResult::Planned(expr) => {
653+
return Ok(expr);
654+
}
655+
PlannerResult::Original(expr) => raw_expr = expr,
656+
}
657+
}
658+
not_impl_err!("Unsupported dictionary literal: {raw_expr:?}")
659+
}
660+
626661
/// Parses a struct(..) expression
627662
fn parse_struct(
628663
&self,

datafusion/sqllogictest/test_files/struct.slt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,13 @@ select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values;
162162
{scalar: 27, array: 2, null: }
163163
{scalar: 27, array: 3, null: }
164164

165+
query ?
166+
select {'scalar': 27, 'array': values.a, 'null': NULL} from values;
167+
----
168+
{scalar: 27, array: 1, null: }
169+
{scalar: 27, array: 2, null: }
170+
{scalar: 27, array: 3, null: }
171+
165172
# named_struct with mixed scalar and array values #2
166173
query ?
167174
select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values;
@@ -170,6 +177,13 @@ select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values;
170177
{array: 2, scalar: 27, null: }
171178
{array: 3, scalar: 27, null: }
172179

180+
query ?
181+
select {'array': values.a, 'scalar': 27, 'null': NULL} from values;
182+
----
183+
{array: 1, scalar: 27, null: }
184+
{array: 2, scalar: 27, null: }
185+
{array: 3, scalar: 27, null: }
186+
173187
# named_struct with mixed scalar and array values #3
174188
query ?
175189
select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values;
@@ -207,3 +221,14 @@ query T
207221
select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
208222
----
209223
Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])
224+
225+
query T
226+
select arrow_typeof({'first': 1, 'second': 2, 'third': 3});
227+
----
228+
Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])
229+
230+
# test nested struct literal
231+
query ?
232+
select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}};
233+
----
234+
{animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}}

0 commit comments

Comments
 (0)