@@ -657,10 +657,8 @@ impl PhysicalExpr for BinaryExpr {
657
657
) ) )
658
658
}
659
659
660
- /// Return the boundaries of this binary expression's result. If the expression itself
661
- /// is a comparison which changes the boundaries of one of its inputs (a = 20 would pin
662
- /// a to 20), then it might update the input's boundaries directly on the context.
663
- fn analyze ( & self , context : & mut AnalysisContext ) -> Option < ExprBoundaries > {
660
+ /// Return the boundaries of this binary expression's result.
661
+ fn boundaries ( & self , context : & AnalysisContext ) -> Option < ExprBoundaries > {
664
662
match & self . op {
665
663
Operator :: Eq
666
664
| Operator :: Gt
@@ -669,8 +667,8 @@ impl PhysicalExpr for BinaryExpr {
669
667
| Operator :: GtEq => {
670
668
// We currently only support comparison when we know at least one of the sides are
671
669
// a known value (a scalar). This includes predicates like a > 20 or 5 > a.
672
- let left_boundaries = self . left . analyze ( context) ?;
673
- let right_boundaries = self . right . analyze ( context) ?;
670
+ let left_boundaries = self . left . boundaries ( context) ?;
671
+ let right_boundaries = self . right . boundaries ( context) ?;
674
672
let ( op, left, right) = match right_boundaries. reduce ( ) {
675
673
Some ( right_value) => {
676
674
// We know the right side is a scalar, so we can use the operator as is
@@ -707,13 +705,13 @@ impl PartialEq<dyn Any> for BinaryExpr {
707
705
// false, or unknown (with a probablistic selectivity value attached).
708
706
fn analyze_expr_scalar_comparison (
709
707
op : & Operator ,
710
- context : & mut AnalysisContext ,
708
+ context : & AnalysisContext ,
711
709
left : & Arc < dyn PhysicalExpr > ,
712
710
right : ScalarValue ,
713
711
) -> Option < ExprBoundaries > {
714
- let left_bounds = left. analyze ( context) ?;
715
- let left_min = left_bounds. min_value . clone ( ) ;
716
- let left_max = left_bounds. max_value . clone ( ) ;
712
+ let left_bounds = left. boundaries ( context) ?;
713
+ let left_min = left_bounds. min_value ;
714
+ let left_max = left_bounds. max_value ;
717
715
718
716
// Direct selectivity is applicable when we can determine that this comparison will
719
717
// always be true or false (e.g. `x > 10` where the `x`'s min value is 11 or `a < 5`
@@ -762,43 +760,6 @@ fn analyze_expr_scalar_comparison(
762
760
}
763
761
} ?;
764
762
765
- let ( left_min, left_max) = match op {
766
- // TODO: for lt/gt, we technically should shrink the possibility space
767
- // by one since a < 5 means that 5 is not a possible value for `a`. However,
768
- // it is currently tricky to do so (e.g. for floats, we can get away with 4.999
769
- // so we need a smarter logic to find out what is the closest value that is
770
- // different from the scalar_value).
771
- Operator :: Lt | Operator :: LtEq => {
772
- // We only want to update the upper bound when we know it will help us (e.g.
773
- // it is actually smaller than what we have right now) and it is a valid
774
- // value (e.g. [0, 100] < -100 would update the boundaries to [0, -100] if
775
- // there weren't the selectivity check).
776
- if right < left_max && selectivity > 0.0 {
777
- ( left_min, right)
778
- } else {
779
- ( left_min, left_max)
780
- }
781
- }
782
- Operator :: Gt | Operator :: GtEq => {
783
- // Same as above, but this time we want to limit the lower bound.
784
- if right > left_min && selectivity > 0.0 {
785
- ( right, left_max)
786
- } else {
787
- ( left_min, left_max)
788
- }
789
- }
790
- // For equality, we don't have the range problem so even if the selectivity
791
- // is 0.0, we can still update the boundaries.
792
- Operator :: Eq => ( right. clone ( ) , right) ,
793
- _ => unreachable ! ( ) ,
794
- } ;
795
-
796
- // The context represents all the knowledge we have gathered during the
797
- // analysis process, which we can now add more since the expression's upper
798
- // and lower boundaries might have changed.
799
- let left_bounds = ExprBoundaries :: new ( left_min, left_max, left_bounds. distinct_count ) ;
800
- left. apply ( context, & left_bounds) ;
801
-
802
763
// The selectivity can't be be greater than 1.0.
803
764
assert ! ( selectivity <= 1.0 ) ;
804
765
@@ -3007,13 +2968,12 @@ mod tests {
3007
2968
( ( Operator :: GtEq , 200 ) , ( 0.0 , 1 , 100 ) ) ,
3008
2969
] ;
3009
2970
3010
- for ( ( operator, rhs) , ( exp_selectivity, exp_min, exp_max) ) in cases {
3011
- let mut context =
3012
- AnalysisContext :: from_statistics ( & schema, statistics. clone ( ) ) ;
2971
+ for ( ( operator, rhs) , ( exp_selectivity, _, _) ) in cases {
2972
+ let context = AnalysisContext :: from_statistics ( & schema, statistics. clone ( ) ) ;
3013
2973
let left = col ( "a" , & schema) . unwrap ( ) ;
3014
2974
let right = ScalarValue :: Int64 ( Some ( rhs) ) ;
3015
2975
let boundaries =
3016
- analyze_expr_scalar_comparison ( & operator, & mut context, & left, right)
2976
+ analyze_expr_scalar_comparison ( & operator, & context, & left, right)
3017
2977
. expect ( "this case should not return None" ) ;
3018
2978
3019
2979
assert_eq ! (
@@ -3036,12 +2996,6 @@ mod tests {
3036
2996
assert_eq ! ( boundaries. min_value, ScalarValue :: Boolean ( Some ( false ) ) ) ;
3037
2997
assert_eq ! ( boundaries. max_value, ScalarValue :: Boolean ( Some ( true ) ) ) ;
3038
2998
}
3039
-
3040
- let left_boundaries = left
3041
- . analyze ( & mut context)
3042
- . expect ( "this case should not return None" ) ;
3043
- assert_eq ! ( left_boundaries. min_value, ScalarValue :: Int64 ( Some ( exp_min) ) ) ;
3044
- assert_eq ! ( left_boundaries. max_value, ScalarValue :: Int64 ( Some ( exp_max) ) ) ;
3045
2999
}
3046
3000
Ok ( ( ) )
3047
3001
}
@@ -3083,13 +3037,12 @@ mod tests {
3083
3037
( ( Operator :: GtEq , 50.7 ) , ( 1.0 / distance, 50.7 , 50.7 ) ) ,
3084
3038
] ;
3085
3039
3086
- for ( ( operator, rhs) , ( exp_selectivity, exp_min, exp_max) ) in cases {
3087
- let mut context =
3088
- AnalysisContext :: from_statistics ( & schema, statistics. clone ( ) ) ;
3040
+ for ( ( operator, rhs) , ( exp_selectivity, _, _) ) in cases {
3041
+ let context = AnalysisContext :: from_statistics ( & schema, statistics. clone ( ) ) ;
3089
3042
let left = col ( "a" , & schema) . unwrap ( ) ;
3090
3043
let right = ScalarValue :: from ( rhs) ;
3091
3044
let boundaries =
3092
- analyze_expr_scalar_comparison ( & operator, & mut context, & left, right)
3045
+ analyze_expr_scalar_comparison ( & operator, & context, & left, right)
3093
3046
. expect ( "this case should not return None" ) ;
3094
3047
3095
3048
assert_eq ! (
@@ -3112,12 +3065,6 @@ mod tests {
3112
3065
assert_eq ! ( boundaries. min_value, ScalarValue :: from( false ) ) ;
3113
3066
assert_eq ! ( boundaries. max_value, ScalarValue :: from( true ) ) ;
3114
3067
}
3115
-
3116
- let left_boundaries = left
3117
- . analyze ( & mut context)
3118
- . expect ( "this case should not return None" ) ;
3119
- assert_eq ! ( left_boundaries. min_value, ScalarValue :: from( exp_min) ) ;
3120
- assert_eq ! ( left_boundaries. max_value, ScalarValue :: from( exp_max) ) ;
3121
3068
}
3122
3069
Ok ( ( ) )
3123
3070
}
@@ -3137,18 +3084,12 @@ mod tests {
3137
3084
& schema,
3138
3085
) ;
3139
3086
3140
- let mut context = AnalysisContext :: from_statistics ( & schema, statistics) ;
3087
+ let context = AnalysisContext :: from_statistics ( & schema, statistics) ;
3141
3088
let predicate_boundaries = gt
3142
- . analyze ( & mut context)
3089
+ . boundaries ( & context)
3143
3090
. expect ( "boundaries should not be None" ) ;
3144
3091
assert_eq ! ( predicate_boundaries. selectivity, Some ( 0.76 ) ) ;
3145
3092
3146
- let col_boundaries = a
3147
- . analyze ( & mut context)
3148
- . expect ( "boundaries should not be None" ) ;
3149
- assert_eq ! ( col_boundaries. min_value, ScalarValue :: from( 25 ) ) ;
3150
- assert_eq ! ( col_boundaries. max_value, ScalarValue :: from( 100 ) ) ;
3151
-
3152
3093
Ok ( ( ) )
3153
3094
}
3154
3095
@@ -3171,18 +3112,12 @@ mod tests {
3171
3112
& schema,
3172
3113
) ;
3173
3114
3174
- let mut context = AnalysisContext :: from_statistics ( & schema, statistics) ;
3115
+ let context = AnalysisContext :: from_statistics ( & schema, statistics) ;
3175
3116
let predicate_boundaries = gt
3176
- . analyze ( & mut context)
3117
+ . boundaries ( & context)
3177
3118
. expect ( "boundaries should not be None" ) ;
3178
3119
assert_eq ! ( predicate_boundaries. selectivity, Some ( 0.5 ) ) ;
3179
3120
3180
- let col_boundaries = a
3181
- . analyze ( & mut context)
3182
- . expect ( "boundaries should not be None" ) ;
3183
- assert_eq ! ( col_boundaries. min_value, ScalarValue :: from( 1 ) ) ;
3184
- assert_eq ! ( col_boundaries. max_value, ScalarValue :: from( 50 ) ) ;
3185
-
3186
3121
Ok ( ( ) )
3187
3122
}
3188
3123
}
0 commit comments