Skip to content

Commit d7721f1

Browse files
committed
fix issue where CTE could not be referenced more than 1 time
1 parent 3096c1d commit d7721f1

File tree

3 files changed

+144
-32
lines changed

3 files changed

+144
-32
lines changed

datafusion/core/src/physical_planner.rs

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
2020
use std::collections::HashMap;
2121
use std::fmt::Write;
22-
use std::sync::Arc;
22+
use std::sync::atomic::AtomicI32;
23+
use std::sync::{Arc, OnceLock};
2324

2425
use crate::datasource::file_format::arrow::ArrowFormat;
2526
use crate::datasource::file_format::avro::AvroFormat;
@@ -444,11 +445,13 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
444445
logical_plan: &LogicalPlan,
445446
session_state: &SessionState,
446447
) -> Result<Arc<dyn ExecutionPlan>> {
448+
reset_recursive_cte_physical_plan_branch_number();
449+
447450
match self.handle_explain(logical_plan, session_state).await? {
448451
Some(plan) => Ok(plan),
449452
None => {
450453
let plan = self
451-
.create_initial_plan(logical_plan, session_state)
454+
.create_initial_plan(logical_plan, session_state, None)
452455
.await?;
453456
self.optimize_internal(plan, session_state, |_, _| {})
454457
}
@@ -479,6 +482,23 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
479482
}
480483
}
481484

485+
// atomic global incrmenter
486+
487+
static RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH: OnceLock<AtomicI32> = OnceLock::new();
488+
489+
fn new_recursive_cte_physical_plan_branch_number() -> u32 {
490+
let counter = RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
491+
.get_or_init(|| AtomicI32::new(0))
492+
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
493+
counter as u32
494+
}
495+
496+
fn reset_recursive_cte_physical_plan_branch_number() {
497+
RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
498+
.get_or_init(|| AtomicI32::new(0))
499+
.store(0, std::sync::atomic::Ordering::SeqCst);
500+
}
501+
482502
impl DefaultPhysicalPlanner {
483503
/// Create a physical planner that uses `extension_planners` to
484504
/// plan user-defined logical nodes [`LogicalPlan::Extension`].
@@ -499,6 +519,7 @@ impl DefaultPhysicalPlanner {
499519
&'a self,
500520
logical_plans: impl IntoIterator<Item = &'a LogicalPlan> + Send + 'a,
501521
session_state: &'a SessionState,
522+
ctx: Option<&'a String>,
502523
) -> BoxFuture<'a, Result<Vec<Arc<dyn ExecutionPlan>>>> {
503524
async move {
504525
// First build futures with as little references as possible, then performing some stream magic.
@@ -511,7 +532,7 @@ impl DefaultPhysicalPlanner {
511532
.into_iter()
512533
.enumerate()
513534
.map(|(idx, lp)| async move {
514-
let plan = self.create_initial_plan(lp, session_state).await?;
535+
let plan = self.create_initial_plan(lp, session_state, ctx).await?;
515536
Ok((idx, plan)) as Result<_>
516537
})
517538
.collect::<Vec<_>>();
@@ -540,6 +561,7 @@ impl DefaultPhysicalPlanner {
540561
&'a self,
541562
logical_plan: &'a LogicalPlan,
542563
session_state: &'a SessionState,
564+
ctx: Option<&'a String>,
543565
) -> BoxFuture<'a, Result<Arc<dyn ExecutionPlan>>> {
544566
async move {
545567
let exec_plan: Result<Arc<dyn ExecutionPlan>> = match logical_plan {
@@ -565,7 +587,7 @@ impl DefaultPhysicalPlanner {
565587
single_file_output,
566588
copy_options,
567589
}) => {
568-
let input_exec = self.create_initial_plan(input, session_state).await?;
590+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
569591

570592
// TODO: make this behavior configurable via options (should copy to create path/file as needed?)
571593
// TODO: add additional configurable options for if existing files should be overwritten or
@@ -618,7 +640,7 @@ impl DefaultPhysicalPlanner {
618640
let name = table_name.table();
619641
let schema = session_state.schema_for_ref(table_name)?;
620642
if let Some(provider) = schema.table(name).await {
621-
let input_exec = self.create_initial_plan(input, session_state).await?;
643+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
622644
provider.insert_into(session_state, input_exec, false).await
623645
} else {
624646
return exec_err!(
@@ -635,7 +657,7 @@ impl DefaultPhysicalPlanner {
635657
let name = table_name.table();
636658
let schema = session_state.schema_for_ref(table_name)?;
637659
if let Some(provider) = schema.table(name).await {
638-
let input_exec = self.create_initial_plan(input, session_state).await?;
660+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
639661
provider.insert_into(session_state, input_exec, true).await
640662
} else {
641663
return exec_err!(
@@ -676,7 +698,7 @@ impl DefaultPhysicalPlanner {
676698
);
677699
}
678700

679-
let input_exec = self.create_initial_plan(input, session_state).await?;
701+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
680702

681703
// at this moment we are guaranteed by the logical planner
682704
// to have all the window_expr to have equal sort key
@@ -772,7 +794,7 @@ impl DefaultPhysicalPlanner {
772794
..
773795
}) => {
774796
// Initially need to perform the aggregate and then merge the partitions
775-
let input_exec = self.create_initial_plan(input, session_state).await?;
797+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
776798
let physical_input_schema = input_exec.schema();
777799
let logical_input_schema = input.as_ref().schema();
778800

@@ -849,7 +871,7 @@ impl DefaultPhysicalPlanner {
849871
)?))
850872
}
851873
LogicalPlan::Projection(Projection { input, expr, .. }) => {
852-
let input_exec = self.create_initial_plan(input, session_state).await?;
874+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
853875
let input_schema = input.as_ref().schema();
854876

855877
let physical_exprs = expr
@@ -901,7 +923,7 @@ impl DefaultPhysicalPlanner {
901923
)?))
902924
}
903925
LogicalPlan::Filter(filter) => {
904-
let physical_input = self.create_initial_plan(&filter.input, session_state).await?;
926+
let physical_input = self.create_initial_plan(&filter.input, session_state, ctx).await?;
905927
let input_schema = physical_input.as_ref().schema();
906928
let input_dfschema = filter.input.schema();
907929

@@ -914,7 +936,7 @@ impl DefaultPhysicalPlanner {
914936
Ok(Arc::new(FilterExec::try_new(runtime_expr, physical_input)?))
915937
}
916938
LogicalPlan::Union(Union { inputs, schema }) => {
917-
let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state).await?;
939+
let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state, ctx).await?;
918940

919941
if schema.fields().len() < physical_plans[0].schema().fields().len() {
920942
// `schema` could be a subset of the child schema. For example
@@ -929,7 +951,7 @@ impl DefaultPhysicalPlanner {
929951
input,
930952
partitioning_scheme,
931953
}) => {
932-
let physical_input = self.create_initial_plan(input, session_state).await?;
954+
let physical_input = self.create_initial_plan(input, session_state, ctx).await?;
933955
let input_schema = physical_input.schema();
934956
let input_dfschema = input.as_ref().schema();
935957
let physical_partitioning = match partitioning_scheme {
@@ -960,7 +982,7 @@ impl DefaultPhysicalPlanner {
960982
)?))
961983
}
962984
LogicalPlan::Sort(Sort { expr, input, fetch, .. }) => {
963-
let physical_input = self.create_initial_plan(input, session_state).await?;
985+
let physical_input = self.create_initial_plan(input, session_state, ctx).await?;
964986
let input_schema = physical_input.as_ref().schema();
965987
let input_dfschema = input.as_ref().schema();
966988
let sort_expr = expr
@@ -1051,12 +1073,12 @@ impl DefaultPhysicalPlanner {
10511073
};
10521074

10531075
return self
1054-
.create_initial_plan(&join_plan, session_state)
1076+
.create_initial_plan(&join_plan, session_state, ctx)
10551077
.await;
10561078
}
10571079

10581080
// All equi-join keys are columns now, create physical join plan
1059-
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state).await?;
1081+
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state, ctx).await?;
10601082
let [physical_left, physical_right]: [Arc<dyn ExecutionPlan>; 2] = left_right.try_into().map_err(|_| DataFusionError::Internal("`create_initial_plan_multi` is broken".to_string()))?;
10611083
let left_df_schema = left.schema();
10621084
let right_df_schema = right.schema();
@@ -1191,7 +1213,7 @@ impl DefaultPhysicalPlanner {
11911213
}
11921214
}
11931215
LogicalPlan::CrossJoin(CrossJoin { left, right, .. }) => {
1194-
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state).await?;
1216+
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state, ctx).await?;
11951217
let [left, right]: [Arc<dyn ExecutionPlan>; 2] = left_right.try_into().map_err(|_| DataFusionError::Internal("`create_initial_plan_multi` is broken".to_string()))?;
11961218
Ok(Arc::new(CrossJoinExec::new(left, right)))
11971219
}
@@ -1204,10 +1226,10 @@ impl DefaultPhysicalPlanner {
12041226
SchemaRef::new(schema.as_ref().to_owned().into()),
12051227
))),
12061228
LogicalPlan::SubqueryAlias(SubqueryAlias { input, .. }) => {
1207-
self.create_initial_plan(input, session_state).await
1229+
self.create_initial_plan(input, session_state, ctx).await
12081230
}
12091231
LogicalPlan::Limit(Limit { input, skip, fetch, .. }) => {
1210-
let input = self.create_initial_plan(input, session_state).await?;
1232+
let input = self.create_initial_plan(input, session_state, ctx).await?;
12111233

12121234
// GlobalLimitExec requires a single partition for input
12131235
let input = if input.output_partitioning().partition_count() == 1 {
@@ -1225,7 +1247,7 @@ impl DefaultPhysicalPlanner {
12251247
Ok(Arc::new(GlobalLimitExec::new(input, *skip, *fetch)))
12261248
}
12271249
LogicalPlan::Unnest(Unnest { input, column, schema, options }) => {
1228-
let input = self.create_initial_plan(input, session_state).await?;
1250+
let input = self.create_initial_plan(input, session_state, ctx).await?;
12291251
let column_exec = schema.index_of_column(column)
12301252
.map(|idx| Column::new(&column.name, idx))?;
12311253
let schema = SchemaRef::new(schema.as_ref().to_owned().into());
@@ -1278,7 +1300,7 @@ impl DefaultPhysicalPlanner {
12781300
"Unsupported logical plan: Analyze must be root of the plan"
12791301
),
12801302
LogicalPlan::Extension(e) => {
1281-
let physical_inputs = self.create_initial_plan_multi(e.node.inputs(), session_state).await?;
1303+
let physical_inputs = self.create_initial_plan_multi(e.node.inputs(), session_state, ctx).await?;
12821304

12831305
let mut maybe_plan = None;
12841306
for planner in &self.extension_planners {
@@ -1314,13 +1336,19 @@ impl DefaultPhysicalPlanner {
13141336
Ok(plan)
13151337
}
13161338
}
1339+
// LogicalPlan::SubqueryAlias(SubqueryAlias())
13171340
LogicalPlan::RecursiveQuery(RecursiveQuery { name, static_term, recursive_term, is_distinct }) => {
1318-
let static_term = self.create_initial_plan(static_term, session_state).await?;
1319-
let recursive_term = self.create_initial_plan(recursive_term, session_state).await?;
1341+
let name = format!("{}-{}", name, new_recursive_cte_physical_plan_branch_number());
1342+
1343+
let ctx = Some(&name);
1344+
1345+
let static_term = self.create_initial_plan(static_term, session_state, ctx).await?;
1346+
let recursive_term = self.create_initial_plan(recursive_term, session_state, ctx).await?;
13201347

13211348
Ok(Arc::new(RecursiveQueryExec::new(name.clone(), static_term, recursive_term, *is_distinct)))
13221349
}
1323-
LogicalPlan::NamedRelation(NamedRelation {name, schema}) => {
1350+
LogicalPlan::NamedRelation(NamedRelation {schema, ..}) => {
1351+
let name = ctx.expect("NamedRelation must have a context that contains the recursive query's branch name");
13241352
// Named relations is how we represent access to any sort of dynamic data provider. They
13251353
// differ from tables in the sense that they can start existing dynamically during the
13261354
// execution of a query and then disappear before it even finishes.
@@ -1895,6 +1923,8 @@ impl DefaultPhysicalPlanner {
18951923
logical_plan: &LogicalPlan,
18961924
session_state: &SessionState,
18971925
) -> Result<Option<Arc<dyn ExecutionPlan>>> {
1926+
reset_recursive_cte_physical_plan_branch_number();
1927+
18981928
if let LogicalPlan::Explain(e) = logical_plan {
18991929
use PlanType::*;
19001930
let mut stringified_plans = vec![];
@@ -1910,7 +1940,7 @@ impl DefaultPhysicalPlanner {
19101940

19111941
if !config.logical_plan_only && e.logical_optimization_succeeded {
19121942
match self
1913-
.create_initial_plan(e.plan.as_ref(), session_state)
1943+
.create_initial_plan(e.plan.as_ref(), session_state, None)
19141944
.await
19151945
{
19161946
Ok(input) => {

datafusion/sql/src/query.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ use datafusion_common::{
2323
plan_err, sql_err, Constraints, DFSchema, DataFusionError, Result, ScalarValue,
2424
};
2525
use datafusion_expr::{
26-
CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder,
26+
logical_plan, CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan,
27+
LogicalPlanBuilder,
2728
};
2829
use sqlparser::ast::{
2930
Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, SetOperator,
@@ -133,10 +134,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
133134
static_metadata,
134135
)?;
135136

137+
let name = cte_name.clone();
138+
136139
// Step 2.2: Create a temporary relation logical plan that will be used
137140
// as the input to the recursive term
138141
let named_relation = LogicalPlanBuilder::named_relation(
139-
cte_name.as_str(),
142+
&name,
140143
Arc::new(named_relation_schema),
141144
)
142145
.build()?;
@@ -157,14 +160,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
157160

158161
// ---------- Step 4: Create the final plan ------------------
159162
// Step 4.1: Compile the final plan
160-
let final_plan = LogicalPlanBuilder::from(static_plan)
161-
.to_recursive_query(
162-
cte_name.clone(),
163-
recursive_plan,
164-
distinct,
165-
)?
163+
let logical_plan = LogicalPlanBuilder::from(static_plan)
164+
.to_recursive_query(name, recursive_plan, distinct)?
166165
.build()?;
167166

167+
let final_plan =
168+
self.apply_table_alias(logical_plan, cte.alias)?;
169+
168170
// Step 4.2: Remove the temporary relation from the planning context and replace it
169171
// with the final plan.
170172
planner_context.insert_cte(cte_name.clone(), final_plan);

datafusion/sqllogictest/test_files/cte.slt

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,83 @@ WITH RECURSIVE nodes AS (
131131
SELECT sum(id) FROM nodes
132132
----
133133
55
134+
135+
# setup
136+
statement ok
137+
CREATE TABLE t(a BIGINT) AS VALUES(1),(2),(3);
138+
139+
# referencing CTE multiple times does not error
140+
query II rowsort
141+
WITH RECURSIVE my_cte AS (
142+
SELECT a from t
143+
UNION ALL
144+
SELECT a+2 as a
145+
FROM my_cte
146+
WHERE a<5
147+
)
148+
SELECT * FROM my_cte t1, my_cte
149+
----
150+
1 1
151+
1 2
152+
1 3
153+
1 3
154+
1 4
155+
1 5
156+
1 5
157+
1 6
158+
2 1
159+
2 2
160+
2 3
161+
2 3
162+
2 4
163+
2 5
164+
2 5
165+
2 6
166+
3 1
167+
3 1
168+
3 2
169+
3 2
170+
3 3
171+
3 3
172+
3 3
173+
3 3
174+
3 4
175+
3 4
176+
3 5
177+
3 5
178+
3 5
179+
3 5
180+
3 6
181+
3 6
182+
4 1
183+
4 2
184+
4 3
185+
4 3
186+
4 4
187+
4 5
188+
4 5
189+
4 6
190+
5 1
191+
5 1
192+
5 2
193+
5 2
194+
5 3
195+
5 3
196+
5 3
197+
5 3
198+
5 4
199+
5 4
200+
5 5
201+
5 5
202+
5 5
203+
5 5
204+
5 6
205+
5 6
206+
6 1
207+
6 2
208+
6 3
209+
6 3
210+
6 4
211+
6 5
212+
6 5
213+
6 6

0 commit comments

Comments
 (0)