From d592275785419c5a9af2bec2437fc2396d040722 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Mon, 29 Apr 2024 17:19:18 -0700 Subject: [PATCH 1/4] First pass of https://github.com/apache/datafusion/issues/10296 --- .../optimizer/src/eliminate_nested_union.rs | 51 ++++++++++++------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index da2a6a17214e..65571cb83644 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -18,7 +18,8 @@ //! [`EliminateNestedUnion`]: flattens nested `Union` to a single `Union` use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::Result; +use datafusion_common::tree_node::Transformed; +use datafusion_common::{internal_err, Result}; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::{Distinct, LogicalPlan, Union}; use std::sync::Arc; @@ -37,9 +38,29 @@ impl EliminateNestedUnion { impl OptimizerRule for EliminateNestedUnion { fn try_optimize( &self, - plan: &LogicalPlan, + _plan: &LogicalPlan, _config: &dyn OptimizerConfig, ) -> Result> { + internal_err!("Should have called EliminateFilter::rewrite") + } + + fn name(&self) -> &str { + "eliminate_nested_union" + } + + fn apply_order(&self) -> Option { + Some(ApplyOrder::BottomUp) + } + + fn supports_rewrite(&self) -> bool { + true + } + + fn rewrite( + &self, + plan: LogicalPlan, + _config: &dyn OptimizerConfig, + ) -> Result> { match plan { LogicalPlan::Union(Union { inputs, schema }) => { let inputs = inputs @@ -47,12 +68,12 @@ impl OptimizerRule for EliminateNestedUnion { .flat_map(extract_plans_from_union) .collect::>(); - Ok(Some(LogicalPlan::Union(Union { + Ok(Transformed::yes(LogicalPlan::Union(Union { inputs, - schema: schema.clone(), + schema, }))) } - LogicalPlan::Distinct(Distinct::All(plan)) => match plan.as_ref() { + LogicalPlan::Distinct(Distinct::All(ref nested_plan)) => match nested_plan.as_ref() { LogicalPlan::Union(Union { inputs, schema }) => { let inputs = inputs .iter() @@ -60,26 +81,18 @@ impl OptimizerRule for EliminateNestedUnion { .flat_map(extract_plans_from_union) .collect::>(); - Ok(Some(LogicalPlan::Distinct(Distinct::All(Arc::new( - LogicalPlan::Union(Union { + Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::All( + Arc::new(LogicalPlan::Union(Union { inputs, schema: schema.clone(), - }), - ))))) + })), + )))) } - _ => Ok(None), + _ => Ok(Transformed::no(plan)), }, - _ => Ok(None), + _ => Ok(Transformed::no(plan)), } } - - fn name(&self) -> &str { - "eliminate_nested_union" - } - - fn apply_order(&self) -> Option { - Some(ApplyOrder::BottomUp) - } } fn extract_plans_from_union(plan: &Arc) -> Vec> { From 1339a921e639bc1452f54ae13c2560aaa247f9e9 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Tue, 30 Apr 2024 14:34:27 -0700 Subject: [PATCH 2/4] Update datafusion/optimizer/src/eliminate_nested_union.rs Co-authored-by: Andrew Lamb --- datafusion/optimizer/src/eliminate_nested_union.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index 65571cb83644..a7ee0695220d 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -41,7 +41,7 @@ impl OptimizerRule for EliminateNestedUnion { _plan: &LogicalPlan, _config: &dyn OptimizerConfig, ) -> Result> { - internal_err!("Should have called EliminateFilter::rewrite") + internal_err!("Should have called EliminateNestedUnion::rewrite") } fn name(&self) -> &str { From 6fb1e55f071a7f8dcc4f36c405b0f0ec72a2fede Mon Sep 17 00:00:00 2001 From: Matt Green Date: Wed, 1 May 2024 12:20:23 -0700 Subject: [PATCH 3/4] fix formatting --- .../optimizer/src/eliminate_nested_union.rs | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index a7ee0695220d..aa6f2b497531 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -73,23 +73,25 @@ impl OptimizerRule for EliminateNestedUnion { schema, }))) } - LogicalPlan::Distinct(Distinct::All(ref nested_plan)) => match nested_plan.as_ref() { - LogicalPlan::Union(Union { inputs, schema }) => { - let inputs = inputs - .iter() - .map(extract_plan_from_distinct) - .flat_map(extract_plans_from_union) - .collect::>(); - - Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::All( - Arc::new(LogicalPlan::Union(Union { - inputs, - schema: schema.clone(), - })), - )))) + LogicalPlan::Distinct(Distinct::All(ref nested_plan)) => { + match nested_plan.as_ref() { + LogicalPlan::Union(Union { inputs, schema }) => { + let inputs = inputs + .iter() + .map(extract_plan_from_distinct) + .flat_map(extract_plans_from_union) + .collect::>(); + + Ok(Transformed::yes(LogicalPlan::Distinct(Distinct::All( + Arc::new(LogicalPlan::Union(Union { + inputs, + schema: schema.clone(), + })), + )))) + } + _ => Ok(Transformed::no(plan)), } - _ => Ok(Transformed::no(plan)), - }, + } _ => Ok(Transformed::no(plan)), } } From 257c4614bd06125aebfb5dc6a104f92ea528ab91 Mon Sep 17 00:00:00 2001 From: Matt Green Date: Wed, 1 May 2024 15:22:33 -0700 Subject: [PATCH 4/4] return original expr instead of clone() --- datafusion/expr/src/expr_rewriter/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 14154189a126..700dd560ec0b 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -250,7 +250,7 @@ fn coerce_exprs_for_schema( _ => expr.cast_to(new_type, src_schema), } } else { - Ok(expr.clone()) + Ok(expr) } }) .collect::>()