Skip to content

Commit 1a002bc

Browse files
authored
Minor: Improve documentation about optimizer (#9967)
* Minor: Improve documentation about optimizer * fix unused commit
1 parent f7b4ed0 commit 1a002bc

27 files changed

+79
-48
lines changed

datafusion/optimizer/src/analyzer/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`Analyzer`] and [`AnalyzerRule`]
1819
use std::sync::Arc;
1920

2021
use log::debug;

datafusion/optimizer/src/decorrelate.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`PullUpCorrelatedExpr`] converts correlated subqueries to `Joins`
19+
1820
use std::collections::{BTreeSet, HashMap};
1921
use std::ops::Deref;
2022

@@ -31,8 +33,11 @@ use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
3133
use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
3234
use datafusion_physical_expr::execution_props::ExecutionProps;
3335

34-
/// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's 'Filter'.
35-
/// It adds the inner reference columns to the 'Projection' or 'Aggregate' of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
36+
/// This struct rewrite the sub query plan by pull up the correlated
37+
/// expressions(contains outer reference columns) from the inner subquery's
38+
/// 'Filter'. It adds the inner reference columns to the 'Projection' or
39+
/// 'Aggregate' of the subquery if they are missing, so that they can be
40+
/// evaluated by the parent operator as the join condition.
3641
pub struct PullUpCorrelatedExpr {
3742
pub join_filters: Vec<Expr>,
3843
// mapping from the plan to its holding correlated columns
@@ -54,7 +59,9 @@ pub struct PullUpCorrelatedExpr {
5459
/// This is used to handle the Count bug
5560
pub const UN_MATCHED_ROW_INDICATOR: &str = "__always_true";
5661

57-
/// Mapping from expr display name to its evaluation result on empty record batch (for example: 'count(*)' is 'ScalarValue(0)', 'count(*) + 2' is 'ScalarValue(2)')
62+
/// Mapping from expr display name to its evaluation result on empty record
63+
/// batch (for example: 'count(*)' is 'ScalarValue(0)', 'count(*) + 2' is
64+
/// 'ScalarValue(2)')
5865
pub type ExprResultMap = HashMap<String, Expr>;
5966

6067
impl TreeNodeRewriter for PullUpCorrelatedExpr {

datafusion/optimizer/src/decorrelate_predicate_subquery.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`DecorrelatePredicateSubquery`] converts `IN`/`EXISTS` subquery predicates to `SEMI`/`ANTI` joins
1819
use std::collections::BTreeSet;
1920
use std::ops::Deref;
2021
use std::sync::Arc;

datafusion/optimizer/src/eliminate_cross_join.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to eliminate cross join to inner join if join predicates are available in filters.
18+
//! [`EliminateCrossJoin`] converts `CROSS JOIN` to `INNER JOIN` if join predicates are available.
1919
use std::collections::HashSet;
2020
use std::sync::Arc;
2121

datafusion/optimizer/src/eliminate_duplicated_expr.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`EliminateDuplicatedExpr`] Removes redundant expressions
19+
1820
use crate::optimizer::ApplyOrder;
1921
use crate::{OptimizerConfig, OptimizerRule};
2022
use datafusion_common::Result;

datafusion/optimizer/src/eliminate_filter.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to replace `where false or null` on a plan with an empty relation.
19-
//! This saves time in planning and executing the query.
20-
//! Note that this rule should be applied after simplify expressions optimizer rule.
18+
//! [`EliminateFilter`] replaces `where false` or `where null` with an empty relation.
19+
2120
use crate::optimizer::ApplyOrder;
2221
use datafusion_common::{Result, ScalarValue};
2322
use datafusion_expr::{
@@ -27,7 +26,11 @@ use datafusion_expr::{
2726

2827
use crate::{OptimizerConfig, OptimizerRule};
2928

30-
/// Optimization rule that eliminate the scalar value (true/false/null) filter with an [LogicalPlan::EmptyRelation]
29+
/// Optimization rule that eliminate the scalar value (true/false/null) filter
30+
/// with an [LogicalPlan::EmptyRelation]
31+
///
32+
/// This saves time in planning and executing the query.
33+
/// Note that this rule should be applied after simplify expressions optimizer rule.
3134
#[derive(Default)]
3235
pub struct EliminateFilter;
3336

datafusion/optimizer/src/eliminate_join.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`EliminateJoin`] rewrites `INNER JOIN` with `true`/`null`
1819
use crate::optimizer::ApplyOrder;
1920
use crate::{OptimizerConfig, OptimizerRule};
2021
use datafusion_common::{Result, ScalarValue};
@@ -24,7 +25,7 @@ use datafusion_expr::{
2425
CrossJoin, Expr,
2526
};
2627

27-
/// Eliminates joins when inner join condition is false.
28+
/// Eliminates joins when join condition is false.
2829
/// Replaces joins when inner join condition is true with a cross join.
2930
#[derive(Default)]
3031
pub struct EliminateJoin;

datafusion/optimizer/src/eliminate_limit.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,19 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to replace `LIMIT 0` or
19-
//! `LIMIT whose ancestor LIMIT's skip is greater than or equal to current's fetch`
20-
//! on a plan with an empty relation.
21-
//! This rule also removes OFFSET 0 from the [LogicalPlan]
22-
//! This saves time in planning and executing the query.
18+
//! [`EliminateLimit`] eliminates `LIMIT` when possible
2319
use crate::optimizer::ApplyOrder;
2420
use crate::{OptimizerConfig, OptimizerRule};
2521
use datafusion_common::Result;
2622
use datafusion_expr::logical_plan::{EmptyRelation, LogicalPlan};
2723

28-
/// Optimization rule that eliminate LIMIT 0 or useless LIMIT(skip:0, fetch:None).
29-
/// It can cooperate with `propagate_empty_relation` and `limit_push_down`.
24+
/// Optimizer rule to replace `LIMIT 0` or `LIMIT` whose ancestor LIMIT's skip is
25+
/// greater than or equal to current's fetch
26+
///
27+
/// It can cooperate with `propagate_empty_relation` and `limit_push_down`. on a
28+
/// plan with an empty relation.
29+
///
30+
/// This rule also removes OFFSET 0 from the [LogicalPlan]
3031
#[derive(Default)]
3132
pub struct EliminateLimit;
3233

datafusion/optimizer/src/eliminate_nested_union.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to replace nested unions to single union.
18+
//! [`EliminateNestedUnion`]: flattens nested `Union` to a single `Union`
1919
use crate::optimizer::ApplyOrder;
2020
use crate::{OptimizerConfig, OptimizerRule};
2121
use datafusion_common::Result;

datafusion/optimizer/src/eliminate_one_union.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to eliminate one union.
18+
//! [`EliminateOneUnion`] eliminates single element `Union`
1919
use crate::{OptimizerConfig, OptimizerRule};
2020
use datafusion_common::Result;
2121
use datafusion_expr::logical_plan::{LogicalPlan, Union};

datafusion/optimizer/src/eliminate_outer_join.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to eliminate left/right/full join to inner join if possible.
18+
//! [`EliminateOuterJoin`] converts `LEFT/RIGHT/FULL` joins to `INNER` joins
1919
use crate::{OptimizerConfig, OptimizerRule};
2020
use datafusion_common::{Column, DFSchema, Result};
2121
use datafusion_expr::logical_plan::{Join, JoinType, LogicalPlan};

datafusion/optimizer/src/extract_equijoin_predicate.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! [`ExtractEquijoinPredicate`] rule that extracts equijoin predicates
18+
//! [`ExtractEquijoinPredicate`] identifies equality join (equijoin) predicates
1919
use crate::optimizer::ApplyOrder;
2020
use crate::{OptimizerConfig, OptimizerRule};
2121
use datafusion_common::DFSchema;

datafusion/optimizer/src/filter_null_join_keys.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! The FilterNullJoinKeys rule will identify inner joins with equi-join conditions
19-
//! where the join key is nullable on one side and non-nullable on the other side
20-
//! and then insert an `IsNotNull` filter on the nullable side since null values
21-
//! can never match.
18+
//! [`FilterNullJoinKeys`] adds filters to join inputs when input isn't nullable
2219
2320
use crate::optimizer::ApplyOrder;
2421
use crate::{OptimizerConfig, OptimizerRule};

datafusion/optimizer/src/lib.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,19 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! # DataFusion Optimizer
19+
//!
20+
//! Contains rules for rewriting [`LogicalPlan`]s
21+
//!
22+
//! 1. [`Analyzer`] applies [`AnalyzerRule`]s to transform `LogicalPlan`s
23+
//! to make the plan valid prior to the rest of the DataFusion optimization
24+
//! process (for example, [`TypeCoercion`]).
25+
//!
26+
//! 2. [`Optimizer`] applies [`OptimizerRule`]s to transform `LogicalPlan`s
27+
//! into equivalent, but more efficient plans.
28+
//!
29+
//! [`LogicalPlan`]: datafusion_expr::LogicalPlan
30+
//! [`TypeCoercion`]: analyzer::type_coercion::TypeCoercion
1831
pub mod analyzer;
1932
pub mod common_subexpr_eliminate;
2033
pub mod decorrelate;
@@ -46,7 +59,8 @@ pub mod utils;
4659
#[cfg(test)]
4760
pub mod test;
4861

49-
pub use optimizer::{OptimizerConfig, OptimizerContext, OptimizerRule};
62+
pub use analyzer::{Analyzer, AnalyzerRule};
63+
pub use optimizer::{Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule};
5064
pub use utils::optimize_children;
5165

5266
mod plan_signature;

datafusion/optimizer/src/optimize_projections.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to prune unnecessary columns from intermediate schemas
19-
//! inside the [`LogicalPlan`]. This rule:
20-
//! - Removes unnecessary columns that do not appear at the output and/or are
21-
//! not used during any computation step.
22-
//! - Adds projections to decrease table column size before operators that
23-
//! benefit from a smaller memory footprint at its input.
24-
//! - Removes unnecessary [`LogicalPlan::Projection`]s from the [`LogicalPlan`].
18+
//! [`OptimizeProjections`] identifies and eliminates unused columns
2519
2620
use std::collections::HashSet;
2721
use std::sync::Arc;
@@ -44,7 +38,13 @@ use datafusion_expr::utils::inspect_expr_pre;
4438
use hashbrown::HashMap;
4539
use itertools::{izip, Itertools};
4640

47-
/// A rule for optimizing logical plans by removing unused columns/fields.
41+
/// Optimizer rule to prune unnecessary columns from intermediate schemas
42+
/// inside the [`LogicalPlan`]. This rule:
43+
/// - Removes unnecessary columns that do not appear at the output and/or are
44+
/// not used during any computation step.
45+
/// - Adds projections to decrease table column size before operators that
46+
/// benefit from a smaller memory footprint at its input.
47+
/// - Removes unnecessary [`LogicalPlan::Projection`]s from the [`LogicalPlan`].
4848
///
4949
/// `OptimizeProjections` is an optimizer rule that identifies and eliminates
5050
/// columns from a logical plan that are not used by downstream operations.

datafusion/optimizer/src/optimizer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Query optimizer traits
18+
//! [`Optimizer`] and [`OptimizerRule`]
1919
2020
use std::collections::HashSet;
2121
use std::sync::Arc;
@@ -54,7 +54,7 @@ use datafusion_expr::logical_plan::LogicalPlan;
5454
use chrono::{DateTime, Utc};
5555
use log::{debug, warn};
5656

57-
/// `OptimizerRule` transforms one [`LogicalPlan`] into another which
57+
/// `OptimizerRule`s transforms one [`LogicalPlan`] into another which
5858
/// computes the same results, but in a potentially more efficient
5959
/// way. If there are no suitable transformations for the input plan,
6060
/// the optimizer should simply return it unmodified.

datafusion/optimizer/src/propagate_empty_relation.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`PropagateEmptyRelation`] eliminates nodes fed by `EmptyRelation`
1819
use datafusion_common::{plan_err, Result};
1920
use datafusion_expr::logical_plan::LogicalPlan;
2021
use datafusion_expr::{EmptyRelation, JoinType, Projection, Union};

datafusion/optimizer/src/push_down_filter.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
// specific language governing permissions and limitations
1313
// under the License.
1414

15-
//! [`PushDownFilter`] Moves filters so they are applied as early as possible in
16-
//! the plan.
15+
//! [`PushDownFilter`] applies filters as early as possible
1716
1817
use std::collections::{HashMap, HashSet};
1918
use std::sync::Arc;

datafusion/optimizer/src/push_down_limit.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Optimizer rule to push down LIMIT in the query plan
19-
//! It will push down through projection, limits (taking the smaller limit)
18+
//! [`PushDownLimit`] pushes `LIMIT` earlier in the query plan
2019
2120
use std::sync::Arc;
2221

@@ -29,7 +28,9 @@ use datafusion_expr::logical_plan::{
2928
};
3029
use datafusion_expr::CrossJoin;
3130

32-
/// Optimization rule that tries to push down LIMIT.
31+
/// Optimization rule that tries to push down `LIMIT`.
32+
///
33+
//. It will push down through projection, limits (taking the smaller limit)
3334
#[derive(Default)]
3435
pub struct PushDownLimit {}
3536

datafusion/optimizer/src/push_down_projection.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Projection Push Down optimizer rule ensures that only referenced columns are
19-
//! loaded into memory
20-
2118
#[cfg(test)]
2219
mod tests {
2320
use std::collections::HashMap;

datafusion/optimizer/src/replace_distinct_aggregate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`ReplaceDistinctWithAggregate`] replaces `DISTINCT ...` with `GROUP BY ...`
1819
use crate::optimizer::{ApplyOrder, ApplyOrder::BottomUp};
1920
use crate::{OptimizerConfig, OptimizerRule};
2021

datafusion/optimizer/src/rewrite_disjunctive_predicate.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`RewriteDisjunctivePredicate`] rewrites predicates to reduce redundancy
19+
1820
use crate::optimizer::ApplyOrder;
1921
use crate::{OptimizerConfig, OptimizerRule};
2022
use datafusion_common::Result;

datafusion/optimizer/src/scalar_subquery_to_join.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`ScalarSubqueryToJoin`] rewriting scalar subquery filters to `JOIN`s
19+
1820
use std::collections::{BTreeSet, HashMap};
1921
use std::sync::Arc;
2022

datafusion/optimizer/src/simplify_expressions/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! [`SimplifyExpressions`] simplifies expressions in the logical plan,
19+
//! [`ExprSimplifier`] simplifies individual `Expr`s.
20+
1821
pub mod expr_simplifier;
1922
mod guarantees;
2023
mod inlist_simplifier;

datafusion/optimizer/src/single_distinct_to_groupby.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! single distinct to group by optimizer rule
18+
//! [`SingleDistinctToGroupBy`] replaces `AGG(DISTINCT ..)` with `AGG(..) GROUP BY ..`
1919
2020
use std::sync::Arc;
2121

datafusion/optimizer/src/unwrap_cast_in_comparison.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Unwrap-cast binary comparison rule can be used to the binary/inlist comparison expr now, and other type
19-
//! of expr can be added if needed.
20-
//! This rule can reduce adding the `Expr::Cast` the expr instead of adding the `Expr::Cast` to literal expr.
18+
//! [`UnwrapCastInComparison`] rewrites `CAST(col) = lit` to `col = CAST(lit)`
2119
2220
use std::cmp::Ordering;
2321
use std::sync::Arc;

datafusion/optimizer/src/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Collection of utility functions that are leveraged by the query optimizer rules
18+
//! Utility functions leveraged by the query optimizer rules
1919
2020
use std::collections::{BTreeSet, HashMap};
2121

0 commit comments

Comments
 (0)