@@ -92,9 +92,12 @@ pub struct ExprSimplifier<S> {
92
92
/// Should expressions be canonicalized before simplification? Defaults to
93
93
/// true
94
94
canonicalize : bool ,
95
+ /// Maximum number of simplifier cycles
96
+ max_simplifier_cycles : u32 ,
95
97
}
96
98
97
99
pub const THRESHOLD_INLINE_INLIST : usize = 3 ;
100
+ pub const DEFAULT_MAX_SIMPLIFIER_CYCLES : u32 = 3 ;
98
101
99
102
impl < S : SimplifyInfo > ExprSimplifier < S > {
100
103
/// Create a new `ExprSimplifier` with the given `info` such as an
@@ -107,10 +110,11 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
107
110
info,
108
111
guarantees : vec ! [ ] ,
109
112
canonicalize : true ,
113
+ max_simplifier_cycles : DEFAULT_MAX_SIMPLIFIER_CYCLES ,
110
114
}
111
115
}
112
116
113
- /// Simplifies this [`Expr`]`s as much as possible, evaluating
117
+ /// Simplifies this [`Expr`] as much as possible, evaluating
114
118
/// constants and applying algebraic simplifications.
115
119
///
116
120
/// The types of the expression must match what operators expect,
@@ -171,7 +175,18 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
171
175
/// let expr = simplifier.simplify(expr).unwrap();
172
176
/// assert_eq!(expr, b_lt_2);
173
177
/// ```
174
- pub fn simplify ( & self , mut expr : Expr ) -> Result < Expr > {
178
+ pub fn simplify ( & self , expr : Expr ) -> Result < Expr > {
179
+ Ok ( self . simplify_with_cycle_count ( expr) ?. 0 )
180
+ }
181
+
182
+ /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
183
+ /// constants and applying algebraic simplifications. Additionally returns a `u32`
184
+ /// representing the number of simplification cycles performed, which can be useful for testing
185
+ /// optimizations.
186
+ ///
187
+ /// See [Self::simplify] for details and usage examples.
188
+ ///
189
+ pub fn simplify_with_cycle_count ( & self , mut expr : Expr ) -> Result < ( Expr , u32 ) > {
175
190
let mut simplifier = Simplifier :: new ( & self . info ) ;
176
191
let mut const_evaluator = ConstEvaluator :: try_new ( self . info . execution_props ( ) ) ?;
177
192
let mut shorten_in_list_simplifier = ShortenInListSimplifier :: new ( ) ;
@@ -181,24 +196,26 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
181
196
expr = expr. rewrite ( & mut Canonicalizer :: new ( ) ) . data ( ) ?
182
197
}
183
198
184
- // TODO iterate until no changes are made during rewrite
185
- // (evaluating constants can enable new simplifications and
186
- // simplifications can enable new constant evaluation)
187
- // https://github.com/apache/datafusion/issues/1160
188
- expr. rewrite ( & mut const_evaluator)
189
- . data ( ) ?
190
- . rewrite ( & mut simplifier)
191
- . data ( ) ?
192
- . rewrite ( & mut guarantee_rewriter)
193
- . data ( ) ?
194
- // run both passes twice to try an minimize simplifications that we missed
195
- . rewrite ( & mut const_evaluator)
196
- . data ( ) ?
197
- . rewrite ( & mut simplifier)
198
- . data ( ) ?
199
- // shorten inlist should be started after other inlist rules are applied
200
- . rewrite ( & mut shorten_in_list_simplifier)
201
- . data ( )
199
+ // Evaluating constants can enable new simplifications and
200
+ // simplifications can enable new constant evaluation
201
+ // see `Self::with_max_cycles`
202
+ let mut num_cycles = 0 ;
203
+ loop {
204
+ let Transformed {
205
+ data, transformed, ..
206
+ } = expr
207
+ . rewrite ( & mut const_evaluator) ?
208
+ . transform_data ( |expr| expr. rewrite ( & mut simplifier) ) ?
209
+ . transform_data ( |expr| expr. rewrite ( & mut guarantee_rewriter) ) ?;
210
+ expr = data;
211
+ num_cycles += 1 ;
212
+ if !transformed || num_cycles >= self . max_simplifier_cycles {
213
+ break ;
214
+ }
215
+ }
216
+ // shorten inlist should be started after other inlist rules are applied
217
+ expr = expr. rewrite ( & mut shorten_in_list_simplifier) . data ( ) ?;
218
+ Ok ( ( expr, num_cycles) )
202
219
}
203
220
204
221
/// Apply type coercion to an [`Expr`] so that it can be
@@ -323,6 +340,63 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
323
340
self . canonicalize = canonicalize;
324
341
self
325
342
}
343
+
344
+ /// Specifies the maximum number of simplification cycles to run.
345
+ ///
346
+ /// The simplifier can perform multiple passes of simplification. This is
347
+ /// because the output of one simplification step can allow more optimizations
348
+ /// in another simplification step. For example, constant evaluation can allow more
349
+ /// expression simplifications, and expression simplifications can allow more constant
350
+ /// evaluations.
351
+ ///
352
+ /// This method specifies the maximum number of allowed iteration cycles before the simplifier
353
+ /// returns an [Expr] output. However, it does not always perform the maximum number of cycles.
354
+ /// The simplifier will attempt to detect when an [Expr] is unchanged by all the simplification
355
+ /// passes, and return early. This avoids wasting time on unnecessary [Expr] tree traversals.
356
+ ///
357
+ /// If no maximum is specified, the value of [DEFAULT_MAX_SIMPLIFIER_CYCLES] is used
358
+ /// instead.
359
+ ///
360
+ /// ```rust
361
+ /// use arrow::datatypes::{DataType, Field, Schema};
362
+ /// use datafusion_expr::{col, lit, Expr};
363
+ /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
364
+ /// use datafusion_expr::execution_props::ExecutionProps;
365
+ /// use datafusion_expr::simplify::SimplifyContext;
366
+ /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
367
+ ///
368
+ /// let schema = Schema::new(vec![
369
+ /// Field::new("a", DataType::Int64, false),
370
+ /// ])
371
+ /// .to_dfschema_ref().unwrap();
372
+ ///
373
+ /// // Create the simplifier
374
+ /// let props = ExecutionProps::new();
375
+ /// let context = SimplifyContext::new(&props)
376
+ /// .with_schema(schema);
377
+ /// let simplifier = ExprSimplifier::new(context);
378
+ ///
379
+ /// // Expression: a IS NOT NULL
380
+ /// let expr = col("a").is_not_null();
381
+ ///
382
+ /// // When using default maximum cycles, 2 cycles will be performed.
383
+ /// let (simplified_expr, count) = simplifier.simplify_with_cycle_count(expr.clone()).unwrap();
384
+ /// assert_eq!(simplified_expr, lit(true));
385
+ /// // 2 cycles were executed, but only 1 was needed
386
+ /// assert_eq!(count, 2);
387
+ ///
388
+ /// // Only 1 simplification pass is necessary here, so we can set the maximum cycles to 1.
389
+ /// let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count(expr.clone()).unwrap();
390
+ /// // Expression has been rewritten to: (c = a AND b = 1)
391
+ /// assert_eq!(simplified_expr, lit(true));
392
+ /// // Only 1 cycle was executed
393
+ /// assert_eq!(count, 1);
394
+ ///
395
+ /// ```
396
+ pub fn with_max_cycles ( mut self , max_simplifier_cycles : u32 ) -> Self {
397
+ self . max_simplifier_cycles = max_simplifier_cycles;
398
+ self
399
+ }
326
400
}
327
401
328
402
/// Canonicalize any BinaryExprs that are not in canonical form
@@ -404,6 +478,8 @@ struct ConstEvaluator<'a> {
404
478
enum ConstSimplifyResult {
405
479
// Expr was simplifed and contains the new expression
406
480
Simplified ( ScalarValue ) ,
481
+ // Expr was not simplified and original value is returned
482
+ NotSimplified ( ScalarValue ) ,
407
483
// Evaluation encountered an error, contains the original expression
408
484
SimplifyRuntimeError ( DataFusionError , Expr ) ,
409
485
}
@@ -450,6 +526,9 @@ impl<'a> TreeNodeRewriter for ConstEvaluator<'a> {
450
526
ConstSimplifyResult :: Simplified ( s) => {
451
527
Ok ( Transformed :: yes ( Expr :: Literal ( s) ) )
452
528
}
529
+ ConstSimplifyResult :: NotSimplified ( s) => {
530
+ Ok ( Transformed :: no ( Expr :: Literal ( s) ) )
531
+ }
453
532
ConstSimplifyResult :: SimplifyRuntimeError ( _, expr) => {
454
533
Ok ( Transformed :: yes ( expr) )
455
534
}
@@ -548,7 +627,7 @@ impl<'a> ConstEvaluator<'a> {
548
627
/// Internal helper to evaluates an Expr
549
628
pub ( crate ) fn evaluate_to_scalar ( & mut self , expr : Expr ) -> ConstSimplifyResult {
550
629
if let Expr :: Literal ( s) = expr {
551
- return ConstSimplifyResult :: Simplified ( s) ;
630
+ return ConstSimplifyResult :: NotSimplified ( s) ;
552
631
}
553
632
554
633
let phys_expr =
@@ -1672,15 +1751,14 @@ fn inlist_except(mut l1: InList, l2: InList) -> Result<Expr> {
1672
1751
1673
1752
#[ cfg( test) ]
1674
1753
mod tests {
1754
+ use datafusion_common:: { assert_contains, DFSchemaRef , ToDFSchema } ;
1755
+ use datafusion_expr:: { interval_arithmetic:: Interval , * } ;
1675
1756
use std:: {
1676
1757
collections:: HashMap ,
1677
1758
ops:: { BitAnd , BitOr , BitXor } ,
1678
1759
sync:: Arc ,
1679
1760
} ;
1680
1761
1681
- use datafusion_common:: { assert_contains, DFSchemaRef , ToDFSchema } ;
1682
- use datafusion_expr:: { interval_arithmetic:: Interval , * } ;
1683
-
1684
1762
use crate :: simplify_expressions:: SimplifyContext ;
1685
1763
use crate :: test:: test_table_scan_with_name;
1686
1764
@@ -2868,6 +2946,19 @@ mod tests {
2868
2946
try_simplify ( expr) . unwrap ( )
2869
2947
}
2870
2948
2949
+ fn try_simplify_with_cycle_count ( expr : Expr ) -> Result < ( Expr , u32 ) > {
2950
+ let schema = expr_test_schema ( ) ;
2951
+ let execution_props = ExecutionProps :: new ( ) ;
2952
+ let simplifier = ExprSimplifier :: new (
2953
+ SimplifyContext :: new ( & execution_props) . with_schema ( schema) ,
2954
+ ) ;
2955
+ simplifier. simplify_with_cycle_count ( expr)
2956
+ }
2957
+
2958
+ fn simplify_with_cycle_count ( expr : Expr ) -> ( Expr , u32 ) {
2959
+ try_simplify_with_cycle_count ( expr) . unwrap ( )
2960
+ }
2961
+
2871
2962
fn simplify_with_guarantee (
2872
2963
expr : Expr ,
2873
2964
guarantees : Vec < ( Expr , NullableInterval ) > ,
@@ -3575,4 +3666,40 @@ mod tests {
3575
3666
3576
3667
assert_eq ! ( simplify( expr) , expected) ;
3577
3668
}
3669
+
3670
+ #[ test]
3671
+ fn test_simplify_cycles ( ) {
3672
+ // TRUE
3673
+ let expr = lit ( true ) ;
3674
+ let expected = lit ( true ) ;
3675
+ let ( expr, num_iter) = simplify_with_cycle_count ( expr) ;
3676
+ assert_eq ! ( expr, expected) ;
3677
+ assert_eq ! ( num_iter, 1 ) ;
3678
+
3679
+ // (true != NULL) OR (5 > 10)
3680
+ let expr = lit ( true ) . not_eq ( lit_bool_null ( ) ) . or ( lit ( 5 ) . gt ( lit ( 10 ) ) ) ;
3681
+ let expected = lit_bool_null ( ) ;
3682
+ let ( expr, num_iter) = simplify_with_cycle_count ( expr) ;
3683
+ assert_eq ! ( expr, expected) ;
3684
+ assert_eq ! ( num_iter, 2 ) ;
3685
+
3686
+ // NOTE: this currently does not simplify
3687
+ // (((c4 - 10) + 10) *100) / 100
3688
+ let expr = ( ( ( col ( "c4" ) - lit ( 10 ) ) + lit ( 10 ) ) * lit ( 100 ) ) / lit ( 100 ) ;
3689
+ let expected = expr. clone ( ) ;
3690
+ let ( expr, num_iter) = simplify_with_cycle_count ( expr) ;
3691
+ assert_eq ! ( expr, expected) ;
3692
+ assert_eq ! ( num_iter, 1 ) ;
3693
+
3694
+ // ((c4<1 or c3<2) and c3_non_null<3) and false
3695
+ let expr = col ( "c4" )
3696
+ . lt ( lit ( 1 ) )
3697
+ . or ( col ( "c3" ) . lt ( lit ( 2 ) ) )
3698
+ . and ( col ( "c3_non_null" ) . lt ( lit ( 3 ) ) )
3699
+ . and ( lit ( false ) ) ;
3700
+ let expected = lit ( false ) ;
3701
+ let ( expr, num_iter) = simplify_with_cycle_count ( expr) ;
3702
+ assert_eq ! ( expr, expected) ;
3703
+ assert_eq ! ( num_iter, 2 ) ;
3704
+ }
3578
3705
}
0 commit comments