Skip to content

Commit a6ff1fe

Browse files
authored
Use struct instead of named_struct when there are no aliases (#9897)
* Revert "use alias (#9894)" This reverts commit 9487ca0. * Use `struct` instead of `named_struct` when there are no aliases * Update docs * fmt
1 parent f51fda5 commit a6ff1fe

File tree

5 files changed

+68
-10
lines changed

5 files changed

+68
-10
lines changed

datafusion/sql/src/expr/mod.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
589589
}
590590
}
591591

592+
/// Parses a struct(..) expression
592593
fn parse_struct(
593594
&self,
594595
values: Vec<SQLExpr>,
@@ -599,6 +600,25 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
599600
if !fields.is_empty() {
600601
return not_impl_err!("Struct fields are not supported yet");
601602
}
603+
604+
if values
605+
.iter()
606+
.any(|value| matches!(value, SQLExpr::Named { .. }))
607+
{
608+
self.create_named_struct(values, input_schema, planner_context)
609+
} else {
610+
self.create_struct(values, input_schema, planner_context)
611+
}
612+
}
613+
614+
// Handles a call to struct(...) where the arguments are named. For example
615+
// `struct (v as foo, v2 as bar)` by creating a call to the `named_struct` function
616+
fn create_named_struct(
617+
&self,
618+
values: Vec<SQLExpr>,
619+
input_schema: &DFSchema,
620+
planner_context: &mut PlannerContext,
621+
) -> Result<Expr> {
602622
let args = values
603623
.into_iter()
604624
.enumerate()
@@ -643,6 +663,34 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
643663
)))
644664
}
645665

666+
// Handles a call to struct(...) where the arguments are not named. For example
667+
// `struct (v, v2)` by creating a call to the `struct` function
668+
// which will create a struct with fields named `c0`, `c1`, etc.
669+
fn create_struct(
670+
&self,
671+
values: Vec<SQLExpr>,
672+
input_schema: &DFSchema,
673+
planner_context: &mut PlannerContext,
674+
) -> Result<Expr> {
675+
let args = values
676+
.into_iter()
677+
.map(|value| {
678+
self.sql_expr_to_logical_expr(value, input_schema, planner_context)
679+
})
680+
.collect::<Result<Vec<_>>>()?;
681+
let struct_func = self
682+
.context_provider
683+
.get_function_meta("struct")
684+
.ok_or_else(|| {
685+
internal_datafusion_err!("Unable to find expected 'struct' function")
686+
})?;
687+
688+
Ok(Expr::ScalarFunction(ScalarFunction::new_udf(
689+
struct_func,
690+
args,
691+
)))
692+
}
693+
646694
fn parse_array_agg(
647695
&self,
648696
array_agg: ArrayAgg,

datafusion/sqllogictest/test_files/explain.slt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,8 @@ query TT
390390
explain select struct(1, 2.3, 'abc');
391391
----
392392
logical_plan
393-
Projection: Struct({c0:1,c1:2.3,c2:abc}) AS named_struct(Utf8("c0"),Int64(1),Utf8("c1"),Float64(2.3),Utf8("c2"),Utf8("abc"))
393+
Projection: Struct({c0:1,c1:2.3,c2:abc}) AS struct(Int64(1),Float64(2.3),Utf8("abc"))
394394
--EmptyRelation
395395
physical_plan
396-
ProjectionExec: expr=[{c0:1,c1:2.3,c2:abc} as named_struct(Utf8("c0"),Int64(1),Utf8("c1"),Float64(2.3),Utf8("c2"),Utf8("abc"))]
396+
ProjectionExec: expr=[{c0:1,c1:2.3,c2:abc} as struct(Int64(1),Float64(2.3),Utf8("abc"))]
397397
--PlaceholderRowExec

datafusion/sqllogictest/test_files/expr.slt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2288,39 +2288,39 @@ select struct(time,load1,load2,host) from t1;
22882288

22892289
# can have an aggregate function with an inner coalesce
22902290
query TR
2291-
select t2.info['c3'] as host, sum(coalesce(t2.info)['c1']) from (select struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
2291+
select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c1']) from (select struct(time,load1,load2,host) from t1) t2 where t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
22922292
----
22932293
host1 1.1
22942294
host2 2.2
22952295
host3 3.3
22962296

22972297
# can have an aggregate function with an inner CASE WHEN
22982298
query TR
2299-
select t2.info['c3'] as host, sum((case when t2.info['c3'] is not null then t2.info end)['c2']) from (select struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
2299+
select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum((case when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c2']) from (select struct(time,load1,load2,host) from t1) t2 where t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
23002300
----
23012301
host1 101
23022302
host2 202
23032303
host3 303
23042304

23052305
# can have 2 projections with aggr(short_circuited), with different short-circuited expr
23062306
query TRR
2307-
select t2.info['c3'] as host, sum(coalesce(t2.info)['c1']), sum((case when t2.info['c3'] is not null then t2.info end)['c2']) from (select struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
2307+
select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c1']), sum((case when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c2']) from (select struct(time,load1,load2,host) from t1) t2 where t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
23082308
----
23092309
host1 1.1 101
23102310
host2 2.2 202
23112311
host3 3.3 303
23122312

23132313
# can have 2 projections with aggr(short_circuited), with the same short-circuited expr (e.g. CASE WHEN)
23142314
query TRR
2315-
select t2.info['c3'] as host, sum((case when t2.info['c3'] is not null then t2.info end)['c1']), sum((case when t2.info['c3'] is not null then t2.info end)['c2']) from (select struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
2315+
select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum((case when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c1']), sum((case when t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] is not null then t2."struct(t1.time,t1.load1,t1.load2,t1.host)" end)['c2']) from (select struct(time,load1,load2,host) from t1) t2 where t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
23162316
----
23172317
host1 1.1 101
23182318
host2 2.2 202
23192319
host3 3.3 303
23202320

23212321
# can have 2 projections with aggr(short_circuited), with the same short-circuited expr (e.g. coalesce)
23222322
query TRR
2323-
select t2.info['c3'] as host, sum(coalesce(t2.info)['c1']), sum(coalesce(t2.info)['c2']) from (select struct(time,load1,load2,host) as info from t1) t2 where t2.info['c3'] IS NOT NULL group by t2.info['c3'] order by host;
2323+
select t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] as host, sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c1']), sum(coalesce(t2."struct(t1.time,t1.load1,t1.load2,t1.host)")['c2']) from (select struct(time,load1,load2,host) from t1) t2 where t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] IS NOT NULL group by t2."struct(t1.time,t1.load1,t1.load2,t1.host)"['c3'] order by host;
23242324
----
23252325
host1 1.1 101
23262326
host2 2.2 202

datafusion/sqllogictest/test_files/struct.slt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ query TT
8585
explain select struct(a, b, c) from values;
8686
----
8787
logical_plan
88-
Projection: named_struct(Utf8("c0"), values.a, Utf8("c1"), values.b, Utf8("c2"), values.c)
88+
Projection: struct(values.a, values.b, values.c)
8989
--TableScan: values projection=[a, b, c]
9090
physical_plan
91-
ProjectionExec: expr=[named_struct(c0, a@0, c1, b@1, c2, c@2) as named_struct(Utf8("c0"),values.a,Utf8("c1"),values.b,Utf8("c2"),values.c)]
91+
ProjectionExec: expr=[struct(a@0, b@1, c@2) as struct(values.a,values.b,values.c)]
9292
--MemoryExec: partitions=1, partition_sizes=[1]
9393

9494
# error on 0 arguments
@@ -179,4 +179,4 @@ drop table values;
179179
query T
180180
select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
181181
----
182-
Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])
182+
Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3336,6 +3336,16 @@ select * from t;
33363336
| 3 | 4 |
33373337
+---+---+
33383338
3339+
-- use default names `c0`, `c1`
3340+
❯ select struct(a, b) from t;
3341+
+-----------------+
3342+
| struct(t.a,t.b) |
3343+
+-----------------+
3344+
| {c0: 1, c1: 2} |
3345+
| {c0: 3, c1: 4} |
3346+
+-----------------+
3347+
3348+
-- name the first field `field_a`
33393349
select struct(a as field_a, b) from t;
33403350
+--------------------------------------------------+
33413351
| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) |

0 commit comments

Comments
 (0)