Skip to content

Commit 37c22e4

Browse files
alambappletreeisyellow
authored andcommitted
Introduce OptimizerRule::rewrite to rewrite in place, Rewrite SimplifyExprs to avoid copies (apache#9954)
1 parent 41c7f3d commit 37c22e4

File tree

3 files changed

+139
-88
lines changed

3 files changed

+139
-88
lines changed

datafusion/core/tests/simplification.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use datafusion_expr::{
3232
LogicalPlanBuilder, ScalarUDF, Volatility,
3333
};
3434
use datafusion_functions::math;
35+
use datafusion_optimizer::optimizer::Optimizer;
3536
use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyExpressions};
3637
use datafusion_optimizer::{OptimizerContext, OptimizerRule};
3738
use std::sync::Arc;
@@ -109,14 +110,14 @@ fn test_table_scan() -> LogicalPlan {
109110
.expect("building plan")
110111
}
111112

112-
fn get_optimized_plan_formatted(plan: &LogicalPlan, date_time: &DateTime<Utc>) -> String {
113+
fn get_optimized_plan_formatted(plan: LogicalPlan, date_time: &DateTime<Utc>) -> String {
113114
let config = OptimizerContext::new().with_query_execution_start_time(*date_time);
114-
let rule = SimplifyExpressions::new();
115115

116-
let optimized_plan = rule
117-
.try_optimize(plan, &config)
118-
.unwrap()
119-
.expect("failed to optimize plan");
116+
// Use Optimizer to do plan traversal
117+
fn observe(_plan: &LogicalPlan, _rule: &dyn OptimizerRule) {}
118+
let optimizer = Optimizer::with_rules(vec![Arc::new(SimplifyExpressions::new())]);
119+
let optimized_plan = optimizer.optimize(plan, &config, observe).unwrap();
120+
120121
format!("{optimized_plan:?}")
121122
}
122123

@@ -238,7 +239,7 @@ fn to_timestamp_expr_folded() -> Result<()> {
238239
let expected = "Projection: TimestampNanosecond(1599566400000000000, None) AS to_timestamp(Utf8(\"2020-09-08T12:00:00+00:00\"))\
239240
\n TableScan: test"
240241
.to_string();
241-
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
242+
let actual = get_optimized_plan_formatted(plan, &Utc::now());
242243
assert_eq!(expected, actual);
243244
Ok(())
244245
}
@@ -262,7 +263,7 @@ fn now_less_than_timestamp() -> Result<()> {
262263
// expression down to a single constant (true)
263264
let expected = "Filter: Boolean(true)\
264265
\n TableScan: test";
265-
let actual = get_optimized_plan_formatted(&plan, &time);
266+
let actual = get_optimized_plan_formatted(plan, &time);
266267

267268
assert_eq!(expected, actual);
268269
Ok(())
@@ -290,7 +291,7 @@ fn select_date_plus_interval() -> Result<()> {
290291
// expression down to a single constant (true)
291292
let expected = r#"Projection: Date32("18636") AS to_timestamp(Utf8("2020-09-08T12:05:00+00:00")) + IntervalDayTime("528280977408")
292293
TableScan: test"#;
293-
let actual = get_optimized_plan_formatted(&plan, &time);
294+
let actual = get_optimized_plan_formatted(plan, &time);
294295

295296
assert_eq!(expected, actual);
296297
Ok(())
@@ -308,7 +309,7 @@ fn simplify_project_scalar_fn() -> Result<()> {
308309
// after simplify: t.f as "power(t.f, 1.0)"
309310
let expected = "Projection: test.f AS power(test.f,Float64(1))\
310311
\n TableScan: test";
311-
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
312+
let actual = get_optimized_plan_formatted(plan, &Utc::now());
312313
assert_eq!(expected, actual);
313314
Ok(())
314315
}
@@ -330,7 +331,7 @@ fn simplify_scan_predicate() -> Result<()> {
330331
// before simplify: t.g = power(t.f, 1.0)
331332
// after simplify: (t.g = t.f) as "t.g = power(t.f, 1.0)"
332333
let expected = "TableScan: test, full_filters=[g = f AS g = power(f,Float64(1))]";
333-
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
334+
let actual = get_optimized_plan_formatted(plan, &Utc::now());
334335
assert_eq!(expected, actual);
335336
Ok(())
336337
}
@@ -461,7 +462,7 @@ fn multiple_now() -> Result<()> {
461462
.build()?;
462463

463464
// expect the same timestamp appears in both exprs
464-
let actual = get_optimized_plan_formatted(&plan, &time);
465+
let actual = get_optimized_plan_formatted(plan, &time);
465466
let expected = format!(
466467
"Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\
467468
\n TableScan: test",

datafusion/optimizer/src/optimizer.rs

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use datafusion_common::alias::AliasGenerator;
2727
use datafusion_common::config::ConfigOptions;
2828
use datafusion_common::instant::Instant;
2929
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
30-
use datafusion_common::{DFSchema, DataFusionError, Result};
30+
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
3131
use datafusion_expr::logical_plan::LogicalPlan;
3232

3333
use crate::common_subexpr_eliminate::CommonSubexprEliminate;
@@ -69,8 +69,12 @@ use crate::utils::log_plan;
6969
/// [`SessionState::add_optimizer_rule`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionState.html#method.add_optimizer_rule
7070
7171
pub trait OptimizerRule {
72-
/// Try and rewrite `plan` to an optimized form, returning None if the plan cannot be
73-
/// optimized by this rule.
72+
/// Try and rewrite `plan` to an optimized form, returning None if the plan
73+
/// cannot be optimized by this rule.
74+
///
75+
/// Note this API will be deprecated in the future as it requires `clone`ing
76+
/// the input plan, which can be expensive. OptimizerRules should implement
77+
/// [`Self::rewrite`] instead.
7478
fn try_optimize(
7579
&self,
7680
plan: &LogicalPlan,
@@ -80,12 +84,31 @@ pub trait OptimizerRule {
8084
/// A human readable name for this optimizer rule
8185
fn name(&self) -> &str;
8286

83-
/// How should the rule be applied by the optimizer? See comments on [`ApplyOrder`] for details.
87+
/// How should the rule be applied by the optimizer? See comments on
88+
/// [`ApplyOrder`] for details.
8489
///
85-
/// If a rule use default None, it should traverse recursively plan inside itself
90+
/// If returns `None`, the default, the rule must handle recursion itself
8691
fn apply_order(&self) -> Option<ApplyOrder> {
8792
None
8893
}
94+
95+
/// Does this rule support rewriting owned plans (rather than by reference)?
96+
fn supports_rewrite(&self) -> bool {
97+
false
98+
}
99+
100+
/// Try to rewrite `plan` to an optimized form, returning `Transformed::yes`
101+
/// if the plan was rewritten and `Transformed::no` if it was not.
102+
///
103+
/// Note: this function is only called if [`Self::supports_rewrite`] returns
104+
/// true. Otherwise the Optimizer calls [`Self::try_optimize`]
105+
fn rewrite(
106+
&self,
107+
_plan: LogicalPlan,
108+
_config: &dyn OptimizerConfig,
109+
) -> Result<Transformed<LogicalPlan>, DataFusionError> {
110+
internal_err!("rewrite is not implemented for {}", self.name())
111+
}
89112
}
90113

91114
/// Options to control the DataFusion Optimizer.
@@ -298,12 +321,19 @@ fn optimize_plan_node(
298321
rule: &dyn OptimizerRule,
299322
config: &dyn OptimizerConfig,
300323
) -> Result<Transformed<LogicalPlan>> {
301-
// TODO: add API to OptimizerRule to allow rewriting by ownership
302-
rule.try_optimize(&plan, config)
303-
.map(|maybe_plan| match maybe_plan {
304-
Some(new_plan) => Transformed::yes(new_plan),
324+
if rule.supports_rewrite() {
325+
return rule.rewrite(plan, config);
326+
}
327+
328+
rule.try_optimize(&plan, config).map(|maybe_plan| {
329+
match maybe_plan {
330+
Some(new_plan) => {
331+
// if the node was rewritten by the optimizer, replace the node
332+
Transformed::yes(new_plan)
333+
}
305334
None => Transformed::no(plan),
306-
})
335+
}
336+
})
307337
}
308338

309339
impl Optimizer {

0 commit comments

Comments
 (0)