@@ -71,13 +71,13 @@ use datafusion_common::{
71
71
config:: { ConfigExtension , TableOptions } ,
72
72
exec_err, not_impl_err, plan_datafusion_err, plan_err,
73
73
tree_node:: { TreeNodeRecursion , TreeNodeVisitor } ,
74
- SchemaReference , TableReference ,
74
+ DFSchema , SchemaReference , TableReference ,
75
75
} ;
76
76
use datafusion_execution:: registry:: SerializerRegistry ;
77
77
use datafusion_expr:: {
78
78
logical_plan:: { DdlStatement , Statement } ,
79
79
var_provider:: is_system_variables,
80
- Expr , StringifiedPlan , UserDefinedLogicalNode , WindowUDF ,
80
+ Expr , ExprSchemable , StringifiedPlan , UserDefinedLogicalNode , WindowUDF ,
81
81
} ;
82
82
use datafusion_sql:: {
83
83
parser:: { CopyToSource , CopyToStatement , DFParser } ,
@@ -87,15 +87,20 @@ use datafusion_sql::{
87
87
88
88
use async_trait:: async_trait;
89
89
use chrono:: { DateTime , Utc } ;
90
+ use datafusion_common:: tree_node:: TreeNode ;
90
91
use parking_lot:: RwLock ;
91
92
use sqlparser:: dialect:: dialect_from_str;
92
93
use url:: Url ;
93
94
use uuid:: Uuid ;
94
95
96
+ use crate :: physical_expr:: PhysicalExpr ;
95
97
pub use datafusion_execution:: config:: SessionConfig ;
96
98
pub use datafusion_execution:: TaskContext ;
97
99
pub use datafusion_expr:: execution_props:: ExecutionProps ;
98
100
use datafusion_expr:: expr_rewriter:: FunctionRewrite ;
101
+ use datafusion_expr:: simplify:: SimplifyInfo ;
102
+ use datafusion_optimizer:: simplify_expressions:: ExprSimplifier ;
103
+ use datafusion_physical_expr:: create_physical_expr;
99
104
100
105
mod avro;
101
106
mod csv;
@@ -523,6 +528,41 @@ impl SessionContext {
523
528
}
524
529
}
525
530
531
+ /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type
532
+ /// coercion and function rewrites.
533
+ ///
534
+ /// Note: The expression is not [simplified] or otherwise optimized: `a = 1
535
+ /// + 2` will not be simplified to `a = 3` as this is a more involved process.
536
+ /// See the [expr_api] example for how to simplify expressions.
537
+ ///
538
+ /// # Example
539
+ /// ```
540
+ /// # use std::sync::Arc;
541
+ /// # use arrow::datatypes::{DataType, Field, Schema};
542
+ /// # use datafusion::prelude::*;
543
+ /// # use datafusion_common::DFSchema;
544
+ /// // a = 1 (i64)
545
+ /// let expr = col("a").eq(lit(1i64));
546
+ /// // provide type information that `a` is an Int32
547
+ /// let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
548
+ /// let df_schema = DFSchema::try_from(schema).unwrap();
549
+ /// // Create a PhysicalExpr. Note DataFusion automatically coerces (casts) `1i64` to `1i32`
550
+ /// let physical_expr = SessionContext::new()
551
+ /// .create_physical_expr(expr, &df_schema).unwrap();
552
+ /// ```
553
+ /// # See Also
554
+ /// * [`SessionState::create_physical_expr`] for a lower level API
555
+ ///
556
+ /// [simplified]: datafusion_optimizer::simplify_expressions
557
+ /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
558
+ pub fn create_physical_expr (
559
+ & self ,
560
+ expr : Expr ,
561
+ df_schema : & DFSchema ,
562
+ ) -> Result < Arc < dyn PhysicalExpr > > {
563
+ self . state . read ( ) . create_physical_expr ( expr, df_schema)
564
+ }
565
+
526
566
// return an empty dataframe
527
567
fn return_empty_dataframe ( & self ) -> Result < DataFrame > {
528
568
let plan = LogicalPlanBuilder :: empty ( false ) . build ( ) ?;
@@ -1946,13 +1986,14 @@ impl SessionState {
1946
1986
}
1947
1987
}
1948
1988
1949
- /// Creates a physical plan from a logical plan .
1989
+ /// Creates a physical [`ExecutionPlan`] plan from a [`LogicalPlan`] .
1950
1990
///
1951
1991
/// Note: this first calls [`Self::optimize`] on the provided
1952
1992
/// plan.
1953
1993
///
1954
- /// This function will error for [`LogicalPlan`]s such as catalog
1955
- /// DDL `CREATE TABLE` must be handled by another layer.
1994
+ /// This function will error for [`LogicalPlan`]s such as catalog DDL like
1995
+ /// `CREATE TABLE`, which do not have corresponding physical plans and must
1996
+ /// be handled by another layer, typically [`SessionContext`].
1956
1997
pub async fn create_physical_plan (
1957
1998
& self ,
1958
1999
logical_plan : & LogicalPlan ,
@@ -1963,6 +2004,39 @@ impl SessionState {
1963
2004
. await
1964
2005
}
1965
2006
2007
+ /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type
2008
+ /// coercion, and function rewrites.
2009
+ ///
2010
+ /// Note: The expression is not [simplified] or otherwise optimized: `a = 1
2011
+ /// + 2` will not be simplified to `a = 3` as this is a more involved process.
2012
+ /// See the [expr_api] example for how to simplify expressions.
2013
+ ///
2014
+ /// # See Also:
2015
+ /// * [`SessionContext::create_physical_expr`] for a higher-level API
2016
+ /// * [`create_physical_expr`] for a lower-level API
2017
+ ///
2018
+ /// [simplified]: datafusion_optimizer::simplify_expressions
2019
+ /// [expr_api]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
2020
+ pub fn create_physical_expr (
2021
+ & self ,
2022
+ expr : Expr ,
2023
+ df_schema : & DFSchema ,
2024
+ ) -> Result < Arc < dyn PhysicalExpr > > {
2025
+ let simplifier =
2026
+ ExprSimplifier :: new ( SessionSimplifyProvider :: new ( self , df_schema) ) ;
2027
+ // apply type coercion here to ensure types match
2028
+ let mut expr = simplifier. coerce ( expr, df_schema) ?;
2029
+
2030
+ // rewrite Exprs to functions if necessary
2031
+ let config_options = self . config_options ( ) ;
2032
+ for rewrite in self . analyzer . function_rewrites ( ) {
2033
+ expr = expr
2034
+ . transform_up ( |expr| rewrite. rewrite ( expr, df_schema, config_options) ) ?
2035
+ . data ;
2036
+ }
2037
+ create_physical_expr ( & expr, df_schema, self . execution_props ( ) )
2038
+ }
2039
+
1966
2040
/// Return the session ID
1967
2041
pub fn session_id ( & self ) -> & str {
1968
2042
& self . session_id
@@ -2040,6 +2114,35 @@ impl SessionState {
2040
2114
}
2041
2115
}
2042
2116
2117
+ struct SessionSimplifyProvider < ' a > {
2118
+ state : & ' a SessionState ,
2119
+ df_schema : & ' a DFSchema ,
2120
+ }
2121
+
2122
+ impl < ' a > SessionSimplifyProvider < ' a > {
2123
+ fn new ( state : & ' a SessionState , df_schema : & ' a DFSchema ) -> Self {
2124
+ Self { state, df_schema }
2125
+ }
2126
+ }
2127
+
2128
+ impl < ' a > SimplifyInfo for SessionSimplifyProvider < ' a > {
2129
+ fn is_boolean_type ( & self , expr : & Expr ) -> Result < bool > {
2130
+ Ok ( expr. get_type ( self . df_schema ) ? == DataType :: Boolean )
2131
+ }
2132
+
2133
+ fn nullable ( & self , expr : & Expr ) -> Result < bool > {
2134
+ expr. nullable ( self . df_schema )
2135
+ }
2136
+
2137
+ fn execution_props ( & self ) -> & ExecutionProps {
2138
+ self . state . execution_props ( )
2139
+ }
2140
+
2141
+ fn get_data_type ( & self , expr : & Expr ) -> Result < DataType > {
2142
+ expr. get_type ( self . df_schema )
2143
+ }
2144
+ }
2145
+
2043
2146
struct SessionContextProvider < ' a > {
2044
2147
state : & ' a SessionState ,
2045
2148
tables : HashMap < String , Arc < dyn TableSource > > ,
0 commit comments