@@ -23,11 +23,14 @@ use arrow::datatypes::{DataType, Field};
23
23
use arrow:: util:: pretty:: { pretty_format_batches, pretty_format_columns} ;
24
24
use datafusion:: prelude:: * ;
25
25
use datafusion_common:: { DFSchema , ScalarValue } ;
26
+ use datafusion_expr:: execution_props:: ExecutionProps ;
27
+ use datafusion_expr:: simplify:: SimplifyContext ;
26
28
use datafusion_expr:: ExprFunctionExt ;
27
29
use datafusion_functions:: core:: expr_ext:: FieldAccessor ;
28
30
use datafusion_functions_aggregate:: first_last:: first_value_udaf;
29
31
use datafusion_functions_aggregate:: sum:: sum_udaf;
30
32
use datafusion_functions_nested:: expr_ext:: { IndexAccessor , SliceAccessor } ;
33
+ use datafusion_optimizer:: simplify_expressions:: ExprSimplifier ;
31
34
use sqlparser:: ast:: NullTreatment ;
32
35
/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
33
36
use std:: sync:: { Arc , LazyLock } ;
@@ -304,6 +307,37 @@ async fn test_aggregate_ext_null_treatment() {
304
307
. await ;
305
308
}
306
309
310
+ #[ tokio:: test]
311
+ async fn test_create_physical_expr ( ) {
312
+ // create_physical_expr does not simplify the expression
313
+ // 1 + 1
314
+ create_expr_test ( lit ( 1i32 ) + lit ( 2i32 ) , "1 + 2" ) ;
315
+ // However, you can run the simplifier before creating the physical
316
+ // expression. This mimics what delta.rs and other non-sql libraries do to
317
+ // create predicates
318
+ //
319
+ // 1 + 1
320
+ create_simplified_expr_test ( lit ( 1i32 ) + lit ( 2i32 ) , "3" ) ;
321
+ }
322
+
323
+ #[ tokio:: test]
324
+ async fn test_create_physical_expr_coercion ( ) {
325
+ // create_physical_expr does apply type coercion and unwrapping in cast
326
+ //
327
+ // expect the cast on the literals
328
+ // compare string function to int `id = 1`
329
+ create_expr_test ( col ( "id" ) . eq ( lit ( 1i32 ) ) , "id@0 = CAST(1 AS Utf8)" ) ;
330
+ create_expr_test ( lit ( 1i32 ) . eq ( col ( "id" ) ) , "CAST(1 AS Utf8) = id@0" ) ;
331
+ // compare int col to string literal `i = '202410'`
332
+ // Note this casts the column (not the field)
333
+ create_expr_test ( col ( "i" ) . eq ( lit ( "202410" ) ) , "CAST(i@1 AS Utf8) = 202410" ) ;
334
+ create_expr_test ( lit ( "202410" ) . eq ( col ( "i" ) ) , "202410 = CAST(i@1 AS Utf8)" ) ;
335
+ // however, when simplified the casts on i should removed
336
+ // https://github.com/apache/datafusion/issues/14944
337
+ create_simplified_expr_test ( col ( "i" ) . eq ( lit ( "202410" ) ) , "CAST(i@1 AS Utf8) = 202410" ) ;
338
+ create_simplified_expr_test ( lit ( "202410" ) . eq ( col ( "i" ) ) , "CAST(i@1 AS Utf8) = 202410" ) ;
339
+ }
340
+
307
341
/// Evaluates the specified expr as an aggregate and compares the result to the
308
342
/// expected result.
309
343
async fn evaluate_agg_test ( expr : Expr , expected_lines : Vec < & str > ) {
@@ -350,6 +384,38 @@ fn evaluate_expr_test(expr: Expr, expected_lines: Vec<&str>) {
350
384
) ;
351
385
}
352
386
387
+ /// Creates the physical expression from Expr and compares the Debug expression
388
+ /// to the expected result.
389
+ fn create_expr_test ( expr : Expr , expected_expr : & str ) {
390
+ let batch = & TEST_BATCH ;
391
+ let df_schema = DFSchema :: try_from ( batch. schema ( ) ) . unwrap ( ) ;
392
+ let physical_expr = SessionContext :: new ( )
393
+ . create_physical_expr ( expr, & df_schema)
394
+ . unwrap ( ) ;
395
+
396
+ assert_eq ! ( physical_expr. to_string( ) , expected_expr) ;
397
+ }
398
+
399
+ /// Creates the physical expression from Expr and runs the expr simplifier
400
+ fn create_simplified_expr_test ( expr : Expr , expected_expr : & str ) {
401
+ let batch = & TEST_BATCH ;
402
+ let df_schema = DFSchema :: try_from ( batch. schema ( ) ) . unwrap ( ) ;
403
+
404
+ // Simplify the expression first
405
+ let props = ExecutionProps :: new ( ) ;
406
+ let simplify_context =
407
+ SimplifyContext :: new ( & props) . with_schema ( df_schema. clone ( ) . into ( ) ) ;
408
+ let simplifier = ExprSimplifier :: new ( simplify_context) . with_max_cycles ( 10 ) ;
409
+ let simplified = simplifier. simplify ( expr) . unwrap ( ) ;
410
+ create_expr_test ( simplified, expected_expr) ;
411
+ }
412
+
413
+ /// Returns a Batch with 3 rows and 4 columns:
414
+ ///
415
+ /// id: Utf8
416
+ /// i: Int64
417
+ /// props: Struct
418
+ /// list: List<String>
353
419
static TEST_BATCH : LazyLock < RecordBatch > = LazyLock :: new ( || {
354
420
let string_array: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [ "1" , "2" , "3" ] ) ) ;
355
421
let int_array: ArrayRef =
0 commit comments