@@ -561,6 +561,80 @@ mod tests {
561
561
Ok ( ( ) )
562
562
}
563
563
564
+ #[ tokio:: test]
565
+ async fn test_filter_statistics_column_level_nested_multiple ( ) -> Result < ( ) > {
566
+ // Table:
567
+ // a: min=1, max=100
568
+ // b: min=1, max=50
569
+ let schema = Schema :: new ( vec ! [
570
+ Field :: new( "a" , DataType :: Int32 , false ) ,
571
+ Field :: new( "b" , DataType :: Int32 , false ) ,
572
+ ] ) ;
573
+ let input = Arc :: new ( StatisticsExec :: new (
574
+ Statistics {
575
+ num_rows : Some ( 100 ) ,
576
+ column_statistics : Some ( vec ! [
577
+ ColumnStatistics {
578
+ min_value: Some ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
579
+ max_value: Some ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
580
+ ..Default :: default ( )
581
+ } ,
582
+ ColumnStatistics {
583
+ min_value: Some ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
584
+ max_value: Some ( ScalarValue :: Int32 ( Some ( 50 ) ) ) ,
585
+ ..Default :: default ( )
586
+ } ,
587
+ ] ) ,
588
+ ..Default :: default ( )
589
+ } ,
590
+ schema. clone ( ) ,
591
+ ) ) ;
592
+
593
+ // WHERE a <= 25
594
+ let a_lte_25: Arc < dyn ExecutionPlan > = Arc :: new ( FilterExec :: try_new (
595
+ binary ( col ( "a" , & schema) ?, Operator :: LtEq , lit ( 25i32 ) , & schema) ?,
596
+ input,
597
+ ) ?) ;
598
+
599
+ // WHERE b > 45
600
+ let b_gt_5: Arc < dyn ExecutionPlan > = Arc :: new ( FilterExec :: try_new (
601
+ binary ( col ( "b" , & schema) ?, Operator :: Gt , lit ( 45i32 ) , & schema) ?,
602
+ a_lte_25,
603
+ ) ?) ;
604
+
605
+ // WHERE a >= 10
606
+ let filter: Arc < dyn ExecutionPlan > = Arc :: new ( FilterExec :: try_new (
607
+ binary ( col ( "a" , & schema) ?, Operator :: GtEq , lit ( 10i32 ) , & schema) ?,
608
+ b_gt_5,
609
+ ) ?) ;
610
+
611
+ let statistics = filter. statistics ( ) ;
612
+ // On a uniform distribution, only fifteen rows will satisfy the
613
+ // filter that 'a' proposed (a >= 10 AND a <= 25) (15/100) and only
614
+ // 5 rows will satisfy the filter that 'b' proposed (b > 45) (5/50).
615
+ //
616
+ // Which would result with a selectivity of '15/100 * 5/50' or 0.015
617
+ // and that means about %1.5 of the all rows (rounded up to 2 rows).
618
+ assert_eq ! ( statistics. num_rows, Some ( 2 ) ) ;
619
+ assert_eq ! (
620
+ statistics. column_statistics,
621
+ Some ( vec![
622
+ ColumnStatistics {
623
+ min_value: Some ( ScalarValue :: Int32 ( Some ( 10 ) ) ) ,
624
+ max_value: Some ( ScalarValue :: Int32 ( Some ( 25 ) ) ) ,
625
+ ..Default :: default ( )
626
+ } ,
627
+ ColumnStatistics {
628
+ min_value: Some ( ScalarValue :: Int32 ( Some ( 45 ) ) ) ,
629
+ max_value: Some ( ScalarValue :: Int32 ( Some ( 50 ) ) ) ,
630
+ ..Default :: default ( )
631
+ }
632
+ ] )
633
+ ) ;
634
+
635
+ Ok ( ( ) )
636
+ }
637
+
564
638
#[ tokio:: test]
565
639
async fn test_filter_statistics_when_input_stats_missing ( ) -> Result < ( ) > {
566
640
// Table:
0 commit comments