Skip to content

Commit 629c6e4

Browse files
committed
New test & minor code suggestions
1 parent 2e55f59 commit 629c6e4

File tree

2 files changed

+78
-2
lines changed

2 files changed

+78
-2
lines changed

datafusion/core/src/physical_plan/filter.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,80 @@ mod tests {
561561
Ok(())
562562
}
563563

564+
#[tokio::test]
565+
async fn test_filter_statistics_column_level_nested_multiple() -> Result<()> {
566+
// Table:
567+
// a: min=1, max=100
568+
// b: min=1, max=50
569+
let schema = Schema::new(vec![
570+
Field::new("a", DataType::Int32, false),
571+
Field::new("b", DataType::Int32, false),
572+
]);
573+
let input = Arc::new(StatisticsExec::new(
574+
Statistics {
575+
num_rows: Some(100),
576+
column_statistics: Some(vec![
577+
ColumnStatistics {
578+
min_value: Some(ScalarValue::Int32(Some(1))),
579+
max_value: Some(ScalarValue::Int32(Some(100))),
580+
..Default::default()
581+
},
582+
ColumnStatistics {
583+
min_value: Some(ScalarValue::Int32(Some(1))),
584+
max_value: Some(ScalarValue::Int32(Some(50))),
585+
..Default::default()
586+
},
587+
]),
588+
..Default::default()
589+
},
590+
schema.clone(),
591+
));
592+
593+
// WHERE a <= 25
594+
let a_lte_25: Arc<dyn ExecutionPlan> = Arc::new(FilterExec::try_new(
595+
binary(col("a", &schema)?, Operator::LtEq, lit(25i32), &schema)?,
596+
input,
597+
)?);
598+
599+
// WHERE b > 45
600+
let b_gt_5: Arc<dyn ExecutionPlan> = Arc::new(FilterExec::try_new(
601+
binary(col("b", &schema)?, Operator::Gt, lit(45i32), &schema)?,
602+
a_lte_25,
603+
)?);
604+
605+
// WHERE a >= 10
606+
let filter: Arc<dyn ExecutionPlan> = Arc::new(FilterExec::try_new(
607+
binary(col("a", &schema)?, Operator::GtEq, lit(10i32), &schema)?,
608+
b_gt_5,
609+
)?);
610+
611+
let statistics = filter.statistics();
612+
// On a uniform distribution, only fifteen rows will satisfy the
613+
// filter that 'a' proposed (a >= 10 AND a <= 25) (15/100) and only
614+
// 5 rows will satisfy the filter that 'b' proposed (b > 45) (5/50).
615+
//
616+
// Which would result with a selectivity of '15/100 * 5/50' or 0.015
617+
// and that means about %1.5 of the all rows (rounded up to 2 rows).
618+
assert_eq!(statistics.num_rows, Some(2));
619+
assert_eq!(
620+
statistics.column_statistics,
621+
Some(vec![
622+
ColumnStatistics {
623+
min_value: Some(ScalarValue::Int32(Some(10))),
624+
max_value: Some(ScalarValue::Int32(Some(25))),
625+
..Default::default()
626+
},
627+
ColumnStatistics {
628+
min_value: Some(ScalarValue::Int32(Some(45))),
629+
max_value: Some(ScalarValue::Int32(Some(50))),
630+
..Default::default()
631+
}
632+
])
633+
);
634+
635+
Ok(())
636+
}
637+
564638
#[tokio::test]
565639
async fn test_filter_statistics_when_input_stats_missing() -> Result<()> {
566640
// Table:

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,9 @@ impl PartialEq<dyn Any> for BinaryExpr {
719719

720720
// Analyze the comparison between an expression (on the left) and a scalar value
721721
// (on the right). The new boundaries will indicate whether it is always true, always
722-
// false, or unknown (with a probablistic selectivity value attached).
722+
// false, or unknown (with a probablistic selectivity value attached). This operation
723+
// will also include the new upper/lower boundaries for the operand on the left if
724+
// they can be determined.
723725
fn analyze_expr_scalar_comparison(
724726
context: AnalysisContext,
725727
op: &Operator,
@@ -3180,8 +3182,8 @@ mod tests {
31803182
let analysis_ctx =
31813183
analyze_expr_scalar_comparison(context, &operator, &left, right);
31823184
let boundaries = analysis_ctx
3183-
.clone()
31843185
.boundaries
3186+
.as_ref()
31853187
.expect("Analysis must complete for this test!");
31863188

31873189
assert_eq!(

0 commit comments

Comments
 (0)