Skip to content

Commit 490fdf9

Browse files
authored
Minor: format comments in filter pushdown rule (#10437)
1 parent cf0cba7 commit 490fdf9

File tree

1 file changed

+74
-73
lines changed

1 file changed

+74
-73
lines changed

datafusion/optimizer/src/push_down_filter.rs

Lines changed: 74 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -131,25 +131,25 @@ use crate::{OptimizerConfig, OptimizerRule};
131131
#[derive(Default)]
132132
pub struct PushDownFilter {}
133133

134-
// For a given JOIN logical plan, determine whether each side of the join is preserved.
135-
// We say a join side is preserved if the join returns all or a subset of the rows from
136-
// the relevant side, such that each row of the output table directly maps to a row of
137-
// the preserved input table. If a table is not preserved, it can provide extra null rows.
138-
// That is, there may be rows in the output table that don't directly map to a row in the
139-
// input table.
140-
//
141-
// For example:
142-
// - In an inner join, both sides are preserved, because each row of the output
143-
// maps directly to a row from each side.
144-
// - In a left join, the left side is preserved and the right is not, because
145-
// there may be rows in the output that don't directly map to a row in the
146-
// right input (due to nulls filling where there is no match on the right).
147-
//
148-
// This is important because we can always push down post-join filters to a preserved
149-
// side of the join, assuming the filter only references columns from that side. For the
150-
// non-preserved side it can be more tricky.
151-
//
152-
// Returns a tuple of booleans - (left_preserved, right_preserved).
134+
/// For a given JOIN logical plan, determine whether each side of the join is preserved.
135+
/// We say a join side is preserved if the join returns all or a subset of the rows from
136+
/// the relevant side, such that each row of the output table directly maps to a row of
137+
/// the preserved input table. If a table is not preserved, it can provide extra null rows.
138+
/// That is, there may be rows in the output table that don't directly map to a row in the
139+
/// input table.
140+
///
141+
/// For example:
142+
/// - In an inner join, both sides are preserved, because each row of the output
143+
/// maps directly to a row from each side.
144+
/// - In a left join, the left side is preserved and the right is not, because
145+
/// there may be rows in the output that don't directly map to a row in the
146+
/// right input (due to nulls filling where there is no match on the right).
147+
///
148+
/// This is important because we can always push down post-join filters to a preserved
149+
/// side of the join, assuming the filter only references columns from that side. For the
150+
/// non-preserved side it can be more tricky.
151+
///
152+
/// Returns a tuple of booleans - (left_preserved, right_preserved).
153153
fn lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
154154
match plan {
155155
LogicalPlan::Join(Join { join_type, .. }) => match join_type {
@@ -169,9 +169,10 @@ fn lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
169169
}
170170
}
171171

172-
// For a given JOIN logical plan, determine whether each side of the join is preserved
173-
// in terms on join filtering.
174-
// Predicates from join filter can only be pushed to preserved join side.
172+
/// For a given JOIN logical plan, determine whether each side of the join is preserved
173+
/// in terms on join filtering.
174+
///
175+
/// Predicates from join filter can only be pushed to preserved join side.
175176
fn on_lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
176177
match plan {
177178
LogicalPlan::Join(Join { join_type, .. }) => match join_type {
@@ -190,11 +191,11 @@ fn on_lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
190191
}
191192
}
192193

193-
// Determine which predicates in state can be pushed down to a given side of a join.
194-
// To determine this, we need to know the schema of the relevant join side and whether
195-
// or not the side's rows are preserved when joining. If the side is not preserved, we
196-
// do not push down anything. Otherwise we can push down predicates where all of the
197-
// relevant columns are contained on the relevant join side's schema.
194+
/// Determine which predicates in state can be pushed down to a given side of a join.
195+
/// To determine this, we need to know the schema of the relevant join side and whether
196+
/// or not the side's rows are preserved when joining. If the side is not preserved, we
197+
/// do not push down anything. Otherwise we can push down predicates where all of the
198+
/// relevant columns are contained on the relevant join side's schema.
198199
fn can_pushdown_join_predicate(predicate: &Expr, schema: &DFSchema) -> Result<bool> {
199200
let schema_columns = schema
200201
.iter()
@@ -215,7 +216,7 @@ fn can_pushdown_join_predicate(predicate: &Expr, schema: &DFSchema) -> Result<bo
215216
== columns.len())
216217
}
217218

218-
// Determine whether the predicate can evaluate as the join conditions
219+
/// Determine whether the predicate can evaluate as the join conditions
219220
fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
220221
let mut is_evaluate = true;
221222
predicate.apply(|expr| match expr {
@@ -261,39 +262,39 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
261262
Ok(is_evaluate)
262263
}
263264

264-
// examine OR clause to see if any useful clauses can be extracted and push down.
265-
// extract at least one qual from each sub clauses of OR clause, then form the quals
266-
// to new OR clause as predicate.
267-
//
268-
// Filter: (a = c and a < 20) or (b = d and b > 10)
269-
// join/crossjoin:
270-
// TableScan: projection=[a, b]
271-
// TableScan: projection=[c, d]
272-
//
273-
// is optimized to
274-
//
275-
// Filter: (a = c and a < 20) or (b = d and b > 10)
276-
// join/crossjoin:
277-
// Filter: (a < 20) or (b > 10)
278-
// TableScan: projection=[a, b]
279-
// TableScan: projection=[c, d]
280-
//
281-
// In general, predicates of this form:
282-
//
283-
// (A AND B) OR (C AND D)
284-
//
285-
// will be transformed to
286-
//
287-
// ((A AND B) OR (C AND D)) AND (A OR C)
288-
//
289-
// OR
290-
//
291-
// ((A AND B) OR (C AND D)) AND ((A AND B) OR C)
292-
//
293-
// OR
294-
//
295-
// do nothing.
296-
//
265+
/// examine OR clause to see if any useful clauses can be extracted and push down.
266+
/// extract at least one qual from each sub clauses of OR clause, then form the quals
267+
/// to new OR clause as predicate.
268+
///
269+
/// # Example
270+
/// ```text
271+
/// Filter: (a = c and a < 20) or (b = d and b > 10)
272+
/// join/crossjoin:
273+
/// TableScan: projection=[a, b]
274+
/// TableScan: projection=[c, d]
275+
/// ```
276+
///
277+
/// is optimized to
278+
///
279+
/// ```text
280+
/// Filter: (a = c and a < 20) or (b = d and b > 10)
281+
/// join/crossjoin:
282+
/// Filter: (a < 20) or (b > 10)
283+
/// TableScan: projection=[a, b]
284+
/// TableScan: projection=[c, d]
285+
/// ```
286+
///
287+
/// In general, predicates of this form:
288+
///
289+
/// ```sql
290+
/// (A AND B) OR (C AND D)
291+
/// ```
292+
///
293+
/// will be transformed to one of:
294+
///
295+
/// * `((A AND B) OR (C AND D)) AND (A OR C)`
296+
/// * `((A AND B) OR (C AND D)) AND ((A AND B) OR C)`
297+
/// * do nothing.
297298
fn extract_or_clauses_for_join<'a>(
298299
filters: &'a [Expr],
299300
schema: &'a DFSchema,
@@ -329,17 +330,17 @@ fn extract_or_clauses_for_join<'a>(
329330
})
330331
}
331332

332-
// extract qual from OR sub-clause.
333-
//
334-
// A qual is extracted if it only contains set of column references in schema_columns.
335-
//
336-
// For AND clause, we extract from both sub-clauses, then make new AND clause by extracted
337-
// clauses if both extracted; Otherwise, use the extracted clause from any sub-clauses or None.
338-
//
339-
// For OR clause, we extract from both sub-clauses, then make new OR clause by extracted clauses if both extracted;
340-
// Otherwise, return None.
341-
//
342-
// For other clause, apply the rule above to extract clause.
333+
/// extract qual from OR sub-clause.
334+
///
335+
/// A qual is extracted if it only contains set of column references in schema_columns.
336+
///
337+
/// For AND clause, we extract from both sub-clauses, then make new AND clause by extracted
338+
/// clauses if both extracted; Otherwise, use the extracted clause from any sub-clauses or None.
339+
///
340+
/// For OR clause, we extract from both sub-clauses, then make new OR clause by extracted clauses if both extracted;
341+
/// Otherwise, return None.
342+
///
343+
/// For other clause, apply the rule above to extract clause.
343344
fn extract_or_clause(expr: &Expr, schema_columns: &HashSet<Column>) -> Option<Expr> {
344345
let mut predicate = None;
345346

@@ -396,7 +397,7 @@ fn extract_or_clause(expr: &Expr, schema_columns: &HashSet<Column>) -> Option<Ex
396397
predicate
397398
}
398399

399-
// push down join/cross-join
400+
/// push down join/cross-join
400401
fn push_down_all_join(
401402
predicates: Vec<Expr>,
402403
infer_predicates: Vec<Expr>,

0 commit comments

Comments
 (0)