Skip to content

Commit a6cdd0d

Browse files
authored
Recursive CTEs: Stage 3 - add execution support (#8840)
* rebase all execution and preceding recursive cte work add config flag for recursive ctes update docs from script update slt test for doc change restore testing pin add sql -> logical plan support * impl cte as work table * move SharedState to continuance * impl WorkTableState wip: readying pr to implement only logical plan fix sql integration test wip: add sql test for logical plan wip: format test assertion wip: remove uncessary with qualifier method some docs more docs Add comments to `RecursiveQuery` Update datfusion-cli Cargo.lock Fix clippy better errors and comments add sql -> logical plan support * impl cte as work table * move SharedState to continuance * impl WorkTableState wip: readying pr to implement only logical plan fix sql integration test wip: add sql test for logical plan wip: format test assertion wip: remove uncessary with qualifier method some docs more docs impl execution support add sql -> logical plan support * impl cte as work table * move SharedState to continuance * impl WorkTableState wip: readying pr to implement only logical plan partway through porting over isidentical's work Continuing implementation with fixes and improvements Lint fixes ensure that repartitions are not added immediately after RecursiveExec in the physical-plan add trivial sqllogictest more recursive tests remove test that asserts recursive cte should fail additional cte test wip: remove tokio from physical plan dev deps format cargo tomls fix issue where CTE could not be referenced more than 1 time wip: fixes after rebase but tpcds_physical_q54 keeps overflowing its stack Impl NamedRelation as CteWorkTable * impl cte as work table * move SharedState to continuance * impl WorkTableState * upd * assign work table state * upd * upd fix min repro but still broken on larger test case set config in sql logic tests clean up cte slt tests fixes fix option add group by test case and more test case files wip add window function recursive cte example simplify stream impl for recrusive query stream add explain to trivial test case move setting of recursive ctes to slt file and add test to ensure multiple record batches are produced each iteration remove tokio dep and remove mut lint, comments and remove tokio stream update submodule pin to feat branch that contains csvs update submodule pin to feat branch that contains csvs * error if recursive ctes are nested * error if recursive cte is referenced multiple times within the recursive term * wip * fix rebase * move testing files into main repo * update testing pin to main pin * tweaks
1 parent a7a74fa commit a6cdd0d

File tree

14 files changed

+1330
-9
lines changed

14 files changed

+1330
-9
lines changed

datafusion/core/src/datasource/cte_worktable.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,14 @@ use std::sync::Arc;
2222

2323
use arrow::datatypes::SchemaRef;
2424
use async_trait::async_trait;
25-
use datafusion_common::not_impl_err;
25+
use datafusion_physical_plan::work_table::WorkTableExec;
2626

2727
use crate::{
2828
error::Result,
2929
logical_expr::{Expr, LogicalPlan, TableProviderFilterPushDown},
3030
physical_plan::ExecutionPlan,
3131
};
3232

33-
use datafusion_common::DataFusionError;
34-
3533
use crate::datasource::{TableProvider, TableType};
3634
use crate::execution::context::SessionState;
3735

@@ -84,7 +82,11 @@ impl TableProvider for CteWorkTable {
8482
_filters: &[Expr],
8583
_limit: Option<usize>,
8684
) -> Result<Arc<dyn ExecutionPlan>> {
87-
not_impl_err!("scan not implemented for CteWorkTable yet")
85+
// TODO: pushdown filters and limits
86+
Ok(Arc::new(WorkTableExec::new(
87+
self.name.clone(),
88+
self.table_schema.clone(),
89+
)))
8890
}
8991

9092
fn supports_filter_pushdown(

datafusion/core/src/physical_optimizer/projection_pushdown.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ fn try_swapping_with_streaming_table(
248248
StreamingTableExec::try_new(
249249
streaming_table.partition_schema().clone(),
250250
streaming_table.partitions().clone(),
251-
Some(&new_projections),
251+
Some(new_projections.as_ref()),
252252
lex_orderings,
253253
streaming_table.is_infinite(),
254254
)

datafusion/core/src/physical_planner.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ use crate::physical_plan::joins::{
5858
use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
5959
use crate::physical_plan::memory::MemoryExec;
6060
use crate::physical_plan::projection::ProjectionExec;
61+
use crate::physical_plan::recursive_query::RecursiveQueryExec;
6162
use crate::physical_plan::repartition::RepartitionExec;
6263
use crate::physical_plan::sorts::sort::SortExec;
6364
use crate::physical_plan::union::UnionExec;
@@ -894,7 +895,7 @@ impl DefaultPhysicalPlanner {
894895
let filter = FilterExec::try_new(runtime_expr, physical_input)?;
895896
Ok(Arc::new(filter.with_default_selectivity(selectivity)?))
896897
}
897-
LogicalPlan::Union(Union { inputs, .. }) => {
898+
LogicalPlan::Union(Union { inputs, schema: _ }) => {
898899
let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state).await?;
899900

900901
Ok(Arc::new(UnionExec::new(physical_plans)))
@@ -1288,8 +1289,10 @@ impl DefaultPhysicalPlanner {
12881289
Ok(plan)
12891290
}
12901291
}
1291-
LogicalPlan::RecursiveQuery(RecursiveQuery { name: _, static_term: _, recursive_term: _, is_distinct: _,.. }) => {
1292-
not_impl_err!("Physical counterpart of RecursiveQuery is not implemented yet")
1292+
LogicalPlan::RecursiveQuery(RecursiveQuery { name, static_term, recursive_term, is_distinct,.. }) => {
1293+
let static_term = self.create_initial_plan(static_term, session_state).await?;
1294+
let recursive_term = self.create_initial_plan(recursive_term, session_state).await?;
1295+
Ok(Arc::new(RecursiveQueryExec::try_new(name.clone(), static_term, recursive_term, *is_distinct)?))
12931296
}
12941297
};
12951298
exec_plan
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
time,name,account_balance
2+
1,John,100
3+
1,Tim,200
4+
2,John,300
5+
2,Tim,400
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
name,account_growth
2+
John,3
3+
Tim,20
4+
Eliza,150
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
Index,product,price,prices_row_num
2+
1,Holden,334.8,1
3+
2,Mercedes-Benz,623.22,2
4+
3,Aston Martin,363.48,3
5+
4,GMC,615.67,4
6+
5,Lincoln,521.13,5
7+
6,Mitsubishi,143.05,6
8+
7,Infiniti,861.82,7
9+
8,Ford,330.57,8
10+
9,GMC,136.87,9
11+
10,Toyota,106.29,10
12+
11,Pontiac,686.95,11
13+
12,Ford,197.48,12
14+
13,Honda,774.42,13
15+
14,Dodge,854.26,14
16+
15,Bentley,628.82,15
17+
16,Chevrolet,756.82,16
18+
17,Volkswagen,438.51,17
19+
18,Mazda,156.15,18
20+
19,Hyundai,322.43,19
21+
20,Oldsmobile,979.95,20
22+
21,Geo,359.59,21
23+
22,Ford,960.75,22
24+
23,Subaru,106.75,23
25+
24,Pontiac,13.4,24
26+
25,Mercedes-Benz,858.46,25
27+
26,Subaru,55.72,26
28+
27,BMW,316.69,27
29+
28,Chevrolet,290.32,28
30+
29,Mercury,296.8,29
31+
30,Dodge,410.78,30
32+
31,Oldsmobile,18.07,31
33+
32,Subaru,442.22,32
34+
33,Dodge,93.29,33
35+
34,Honda,282.9,34
36+
35,Chevrolet,750.87,35
37+
36,Lexus,249.82,36
38+
37,Ford,732.33,37
39+
38,Toyota,680.78,38
40+
39,Nissan,657.01,39
41+
40,Mazda,200.76,40
42+
41,Nissan,251.44,41
43+
42,Buick,714.44,42
44+
43,Ford,436.2,43
45+
44,Volvo,865.53,44
46+
45,Saab,471.52,45
47+
46,Mercedes-Benz,51.13,46
48+
47,Chrysler,943.52,47
49+
48,Lamborghini,181.6,48
50+
49,Hyundai,634.89,49
51+
50,Ford,757.58,50
52+
51,Porsche,294.64,51
53+
52,Ford,261.34,52
54+
53,Chrysler,822.01,53
55+
54,Audi,430.68,54
56+
55,Mitsubishi,69.12,55
57+
56,Mazda,723.16,56
58+
57,Mazda,711.46,57
59+
58,Land Rover,435.15,58
60+
59,Buick,189.58,59
61+
60,GMC,651.92,60
62+
61,Mazda,491.37,61
63+
62,BMW,346.18,62
64+
63,Ford,456.25,63
65+
64,Ford,10.65,64
66+
65,Mazda,985.39,65
67+
66,Mercedes-Benz,955.79,66
68+
67,Honda,550.95,67
69+
68,Mitsubishi,127.6,68
70+
69,Mercedes-Benz,840.65,69
71+
70,Infiniti,647.45,70
72+
71,Bentley,827.26,71
73+
72,Lincoln,822.22,72
74+
73,Plymouth,970.55,73
75+
74,Ford,595.05,74
76+
75,Maybach,808.46,75
77+
76,Chevrolet,341.48,76
78+
77,Jaguar,759.03,77
79+
78,Land Rover,625.01,78
80+
79,Lincoln,289.13,79
81+
80,Suzuki,285.24,80
82+
81,GMC,253.4,81
83+
82,Oldsmobile,174.76,82
84+
83,Lincoln,434.17,83
85+
84,Dodge,887.38,84
86+
85,Mercedes-Benz,308.65,85
87+
86,GMC,182.71,86
88+
87,Ford,619.62,87
89+
88,Lexus,228.63,88
90+
89,Hyundai,901.06,89
91+
90,Chevrolet,615.65,90
92+
91,GMC,460.19,91
93+
92,Mercedes-Benz,729.28,92
94+
93,Dodge,414.69,93
95+
94,Maserati,300.83,94
96+
95,Suzuki,503.64,95
97+
96,Audi,275.05,96
98+
97,Ford,303.25,97
99+
98,Lotus,101.01,98
100+
99,Lincoln,721.05,99
101+
100,Kia,833.31,100
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
region_id,salesperson_id,sale_amount
2+
101,1,1000
3+
102,2,500
4+
101,2,700
5+
103,3,800
6+
102,4,300
7+
101,4,400
8+
102,5,600
9+
103,6,500
10+
101,7,900
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
salesperson_id,manager_id
2+
1,
3+
2,1
4+
3,1
5+
4,2
6+
5,2
7+
6,3
8+
7,3
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
time,other
2+
1,foo
3+
2,bar
4+
4,baz
5+
5,qux

datafusion/physical-plan/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ pub mod metrics;
6161
mod ordering;
6262
pub mod placeholder_row;
6363
pub mod projection;
64+
pub mod recursive_query;
6465
pub mod repartition;
6566
pub mod sorts;
6667
pub mod stream;
@@ -71,6 +72,7 @@ pub mod union;
7172
pub mod unnest;
7273
pub mod values;
7374
pub mod windows;
75+
pub mod work_table;
7476

7577
pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
7678
pub use crate::metrics::Metric;

0 commit comments

Comments
 (0)