19
19
20
20
use std:: collections:: HashMap ;
21
21
use std:: fmt:: Write ;
22
- use std:: sync:: Arc ;
22
+ use std:: sync:: atomic:: AtomicI32 ;
23
+ use std:: sync:: { Arc , OnceLock } ;
23
24
24
25
use crate :: datasource:: file_format:: arrow:: ArrowFormat ;
25
26
use crate :: datasource:: file_format:: avro:: AvroFormat ;
@@ -444,11 +445,13 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
444
445
logical_plan : & LogicalPlan ,
445
446
session_state : & SessionState ,
446
447
) -> Result < Arc < dyn ExecutionPlan > > {
448
+ reset_recursive_cte_physical_plan_branch_number ( ) ;
449
+
447
450
match self . handle_explain ( logical_plan, session_state) . await ? {
448
451
Some ( plan) => Ok ( plan) ,
449
452
None => {
450
453
let plan = self
451
- . create_initial_plan ( logical_plan, session_state)
454
+ . create_initial_plan ( logical_plan, session_state, None )
452
455
. await ?;
453
456
self . optimize_internal ( plan, session_state, |_, _| { } )
454
457
}
@@ -479,6 +482,23 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
479
482
}
480
483
}
481
484
485
+ // atomic global incrmenter
486
+
487
+ static RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH : OnceLock < AtomicI32 > = OnceLock :: new ( ) ;
488
+
489
+ fn new_recursive_cte_physical_plan_branch_number ( ) -> u32 {
490
+ let counter = RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
491
+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
492
+ . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
493
+ counter as u32
494
+ }
495
+
496
+ fn reset_recursive_cte_physical_plan_branch_number ( ) {
497
+ RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
498
+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
499
+ . store ( 0 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
500
+ }
501
+
482
502
impl DefaultPhysicalPlanner {
483
503
/// Create a physical planner that uses `extension_planners` to
484
504
/// plan user-defined logical nodes [`LogicalPlan::Extension`].
@@ -499,6 +519,7 @@ impl DefaultPhysicalPlanner {
499
519
& ' a self ,
500
520
logical_plans : impl IntoIterator < Item = & ' a LogicalPlan > + Send + ' a ,
501
521
session_state : & ' a SessionState ,
522
+ ctx : Option < & ' a String > ,
502
523
) -> BoxFuture < ' a , Result < Vec < Arc < dyn ExecutionPlan > > > > {
503
524
async move {
504
525
// First build futures with as little references as possible, then performing some stream magic.
@@ -511,7 +532,7 @@ impl DefaultPhysicalPlanner {
511
532
. into_iter ( )
512
533
. enumerate ( )
513
534
. map ( |( idx, lp) | async move {
514
- let plan = self . create_initial_plan ( lp, session_state) . await ?;
535
+ let plan = self . create_initial_plan ( lp, session_state, ctx ) . await ?;
515
536
Ok ( ( idx, plan) ) as Result < _ >
516
537
} )
517
538
. collect :: < Vec < _ > > ( ) ;
@@ -540,6 +561,7 @@ impl DefaultPhysicalPlanner {
540
561
& ' a self ,
541
562
logical_plan : & ' a LogicalPlan ,
542
563
session_state : & ' a SessionState ,
564
+ ctx : Option < & ' a String > ,
543
565
) -> BoxFuture < ' a , Result < Arc < dyn ExecutionPlan > > > {
544
566
async move {
545
567
let exec_plan: Result < Arc < dyn ExecutionPlan > > = match logical_plan {
@@ -565,7 +587,7 @@ impl DefaultPhysicalPlanner {
565
587
single_file_output,
566
588
copy_options,
567
589
} ) => {
568
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
590
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
569
591
570
592
// TODO: make this behavior configurable via options (should copy to create path/file as needed?)
571
593
// TODO: add additional configurable options for if existing files should be overwritten or
@@ -618,7 +640,7 @@ impl DefaultPhysicalPlanner {
618
640
let name = table_name. table ( ) ;
619
641
let schema = session_state. schema_for_ref ( table_name) ?;
620
642
if let Some ( provider) = schema. table ( name) . await {
621
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
643
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
622
644
provider. insert_into ( session_state, input_exec, false ) . await
623
645
} else {
624
646
return exec_err ! (
@@ -635,7 +657,7 @@ impl DefaultPhysicalPlanner {
635
657
let name = table_name. table ( ) ;
636
658
let schema = session_state. schema_for_ref ( table_name) ?;
637
659
if let Some ( provider) = schema. table ( name) . await {
638
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
660
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
639
661
provider. insert_into ( session_state, input_exec, true ) . await
640
662
} else {
641
663
return exec_err ! (
@@ -676,7 +698,7 @@ impl DefaultPhysicalPlanner {
676
698
) ;
677
699
}
678
700
679
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
701
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
680
702
681
703
// at this moment we are guaranteed by the logical planner
682
704
// to have all the window_expr to have equal sort key
@@ -772,7 +794,7 @@ impl DefaultPhysicalPlanner {
772
794
..
773
795
} ) => {
774
796
// Initially need to perform the aggregate and then merge the partitions
775
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
797
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
776
798
let physical_input_schema = input_exec. schema ( ) ;
777
799
let logical_input_schema = input. as_ref ( ) . schema ( ) ;
778
800
@@ -849,7 +871,7 @@ impl DefaultPhysicalPlanner {
849
871
) ?) )
850
872
}
851
873
LogicalPlan :: Projection ( Projection { input, expr, .. } ) => {
852
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
874
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
853
875
let input_schema = input. as_ref ( ) . schema ( ) ;
854
876
855
877
let physical_exprs = expr
@@ -901,7 +923,7 @@ impl DefaultPhysicalPlanner {
901
923
) ?) )
902
924
}
903
925
LogicalPlan :: Filter ( filter) => {
904
- let physical_input = self . create_initial_plan ( & filter. input , session_state) . await ?;
926
+ let physical_input = self . create_initial_plan ( & filter. input , session_state, ctx ) . await ?;
905
927
let input_schema = physical_input. as_ref ( ) . schema ( ) ;
906
928
let input_dfschema = filter. input . schema ( ) ;
907
929
@@ -914,7 +936,7 @@ impl DefaultPhysicalPlanner {
914
936
Ok ( Arc :: new ( FilterExec :: try_new ( runtime_expr, physical_input) ?) )
915
937
}
916
938
LogicalPlan :: Union ( Union { inputs, schema } ) => {
917
- let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state) . await ?;
939
+ let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state, ctx ) . await ?;
918
940
919
941
if schema. fields ( ) . len ( ) < physical_plans[ 0 ] . schema ( ) . fields ( ) . len ( ) {
920
942
// `schema` could be a subset of the child schema. For example
@@ -929,7 +951,7 @@ impl DefaultPhysicalPlanner {
929
951
input,
930
952
partitioning_scheme,
931
953
} ) => {
932
- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
954
+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
933
955
let input_schema = physical_input. schema ( ) ;
934
956
let input_dfschema = input. as_ref ( ) . schema ( ) ;
935
957
let physical_partitioning = match partitioning_scheme {
@@ -960,7 +982,7 @@ impl DefaultPhysicalPlanner {
960
982
) ?) )
961
983
}
962
984
LogicalPlan :: Sort ( Sort { expr, input, fetch, .. } ) => {
963
- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
985
+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
964
986
let input_schema = physical_input. as_ref ( ) . schema ( ) ;
965
987
let input_dfschema = input. as_ref ( ) . schema ( ) ;
966
988
let sort_expr = expr
@@ -1051,12 +1073,12 @@ impl DefaultPhysicalPlanner {
1051
1073
} ;
1052
1074
1053
1075
return self
1054
- . create_initial_plan ( & join_plan, session_state)
1076
+ . create_initial_plan ( & join_plan, session_state, ctx )
1055
1077
. await ;
1056
1078
}
1057
1079
1058
1080
// All equi-join keys are columns now, create physical join plan
1059
- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1081
+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
1060
1082
let [ physical_left, physical_right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
1061
1083
let left_df_schema = left. schema ( ) ;
1062
1084
let right_df_schema = right. schema ( ) ;
@@ -1191,7 +1213,7 @@ impl DefaultPhysicalPlanner {
1191
1213
}
1192
1214
}
1193
1215
LogicalPlan :: CrossJoin ( CrossJoin { left, right, .. } ) => {
1194
- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1216
+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
1195
1217
let [ left, right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
1196
1218
Ok ( Arc :: new ( CrossJoinExec :: new ( left, right) ) )
1197
1219
}
@@ -1204,10 +1226,10 @@ impl DefaultPhysicalPlanner {
1204
1226
SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ,
1205
1227
) ) ) ,
1206
1228
LogicalPlan :: SubqueryAlias ( SubqueryAlias { input, .. } ) => {
1207
- self . create_initial_plan ( input, session_state) . await
1229
+ self . create_initial_plan ( input, session_state, ctx ) . await
1208
1230
}
1209
1231
LogicalPlan :: Limit ( Limit { input, skip, fetch, .. } ) => {
1210
- let input = self . create_initial_plan ( input, session_state) . await ?;
1232
+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
1211
1233
1212
1234
// GlobalLimitExec requires a single partition for input
1213
1235
let input = if input. output_partitioning ( ) . partition_count ( ) == 1 {
@@ -1225,7 +1247,7 @@ impl DefaultPhysicalPlanner {
1225
1247
Ok ( Arc :: new ( GlobalLimitExec :: new ( input, * skip, * fetch) ) )
1226
1248
}
1227
1249
LogicalPlan :: Unnest ( Unnest { input, column, schema, options } ) => {
1228
- let input = self . create_initial_plan ( input, session_state) . await ?;
1250
+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
1229
1251
let column_exec = schema. index_of_column ( column)
1230
1252
. map ( |idx| Column :: new ( & column. name , idx) ) ?;
1231
1253
let schema = SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ;
@@ -1278,7 +1300,7 @@ impl DefaultPhysicalPlanner {
1278
1300
"Unsupported logical plan: Analyze must be root of the plan"
1279
1301
) ,
1280
1302
LogicalPlan :: Extension ( e) => {
1281
- let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state) . await ?;
1303
+ let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state, ctx ) . await ?;
1282
1304
1283
1305
let mut maybe_plan = None ;
1284
1306
for planner in & self . extension_planners {
@@ -1314,13 +1336,19 @@ impl DefaultPhysicalPlanner {
1314
1336
Ok ( plan)
1315
1337
}
1316
1338
}
1339
+ // LogicalPlan::SubqueryAlias(SubqueryAlias())
1317
1340
LogicalPlan :: RecursiveQuery ( RecursiveQuery { name, static_term, recursive_term, is_distinct } ) => {
1318
- let static_term = self . create_initial_plan ( static_term, session_state) . await ?;
1319
- let recursive_term = self . create_initial_plan ( recursive_term, session_state) . await ?;
1341
+ let name = format ! ( "{}-{}" , name, new_recursive_cte_physical_plan_branch_number( ) ) ;
1342
+
1343
+ let ctx = Some ( & name) ;
1344
+
1345
+ let static_term = self . create_initial_plan ( static_term, session_state, ctx) . await ?;
1346
+ let recursive_term = self . create_initial_plan ( recursive_term, session_state, ctx) . await ?;
1320
1347
1321
1348
Ok ( Arc :: new ( RecursiveQueryExec :: new ( name. clone ( ) , static_term, recursive_term, * is_distinct) ) )
1322
1349
}
1323
- LogicalPlan :: NamedRelation ( NamedRelation { name, schema} ) => {
1350
+ LogicalPlan :: NamedRelation ( NamedRelation { schema, ..} ) => {
1351
+ let name = ctx. expect ( "NamedRelation must have a context that contains the recursive query's branch name" ) ;
1324
1352
// Named relations is how we represent access to any sort of dynamic data provider. They
1325
1353
// differ from tables in the sense that they can start existing dynamically during the
1326
1354
// execution of a query and then disappear before it even finishes.
@@ -1895,6 +1923,8 @@ impl DefaultPhysicalPlanner {
1895
1923
logical_plan : & LogicalPlan ,
1896
1924
session_state : & SessionState ,
1897
1925
) -> Result < Option < Arc < dyn ExecutionPlan > > > {
1926
+ reset_recursive_cte_physical_plan_branch_number ( ) ;
1927
+
1898
1928
if let LogicalPlan :: Explain ( e) = logical_plan {
1899
1929
use PlanType :: * ;
1900
1930
let mut stringified_plans = vec ! [ ] ;
@@ -1910,7 +1940,7 @@ impl DefaultPhysicalPlanner {
1910
1940
1911
1941
if !config. logical_plan_only && e. logical_optimization_succeeded {
1912
1942
match self
1913
- . create_initial_plan ( e. plan . as_ref ( ) , session_state)
1943
+ . create_initial_plan ( e. plan . as_ref ( ) , session_state, None )
1914
1944
. await
1915
1945
{
1916
1946
Ok ( input) => {
0 commit comments