From eca6d1b6116ec9b1e4587c1f2dfa0933d7231776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Mon, 29 Apr 2024 11:12:54 +0800 Subject: [PATCH 1/2] Remove ScalarFunctionDefinition::Name --- datafusion/common/src/scalar/mod.rs | 2 +- datafusion/core/src/datasource/listing/helpers.rs | 5 +---- datafusion/core/src/physical_planner.rs | 9 ++------- datafusion/expr/src/expr.rs | 14 -------------- datafusion/expr/src/expr_schema.rs | 3 --- datafusion/expr/src/tree_node.rs | 3 --- datafusion/functions/src/math/log.rs | 9 ++++----- datafusion/functions/src/math/power.rs | 9 ++++----- datafusion/optimizer/src/analyzer/type_coercion.rs | 3 --- datafusion/optimizer/src/push_down_filter.rs | 1 - .../src/simplify_expressions/expr_simplifier.rs | 1 - datafusion/physical-expr/src/planner.rs | 3 --- datafusion/physical-expr/src/scalar_function.rs | 7 +------ datafusion/proto/src/logical_plan/to_proto.rs | 6 ------ datafusion/proto/src/physical_plan/to_proto.rs | 5 ----- datafusion/substrait/src/logical_plan/producer.rs | 7 +------ 16 files changed, 14 insertions(+), 73 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index e71d82fb3beb..b796358e12ba 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -4548,7 +4548,7 @@ mod tests { // thus the size of the enum appears to as well // The value may also change depending on rust version - assert_eq!(std::mem::size_of::(), 64); + assert_eq!(std::mem::size_of::(), 48); } #[test] diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 9dfd18f1881e..372f61b1e6d1 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -37,7 +37,7 @@ use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use log::{debug, trace}; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{internal_err, Column, DFSchema, DataFusionError}; +use datafusion_common::{Column, DFSchema, DataFusionError}; use datafusion_expr::{Expr, ScalarFunctionDefinition, Volatility}; use datafusion_physical_expr::create_physical_expr; use object_store::path::Path; @@ -100,9 +100,6 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool { } } } - ScalarFunctionDefinition::Name(_) => { - internal_err!("Function `Expr` with name should be resolved.") - } } } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index b7b6c20b19bb..a041ab31f7cf 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -90,8 +90,8 @@ use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::expr_vec_fmt; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; use datafusion_expr::{ - DescribeTable, DmlStatement, Extension, Filter, RecursiveQuery, - ScalarFunctionDefinition, StringifiedPlan, WindowFrame, WindowFrameBound, WriteOp, + DescribeTable, DmlStatement, Extension, Filter, RecursiveQuery, StringifiedPlan, + WindowFrame, WindowFrameBound, WriteOp, }; use datafusion_physical_expr::expressions::Literal; use datafusion_physical_expr::LexOrdering; @@ -240,11 +240,6 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { }; } Expr::ScalarFunction(fun) => { - // function should be resolved during `AnalyzerRule`s - if let ScalarFunctionDefinition::Name(_) = fun.func_def { - return internal_err!("Function `Expr` with name should be resolved."); - } - create_function_physical_name(fun.name(), false, &fun.args, None) } Expr::WindowFunction(WindowFunction { diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index e310eaa7e48f..f32fed5db5a3 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -404,9 +404,6 @@ impl Between { pub enum ScalarFunctionDefinition { /// Resolved to a user defined function UDF(Arc), - /// A scalar function constructed with name. This variant can not be executed directly - /// and instead must be resolved to one of the other variants prior to physical planning. - Name(Arc), } /// ScalarFunction expression invokes a built-in scalar function @@ -430,7 +427,6 @@ impl ScalarFunctionDefinition { pub fn name(&self) -> &str { match self { ScalarFunctionDefinition::UDF(udf) => udf.name(), - ScalarFunctionDefinition::Name(func_name) => func_name.as_ref(), } } @@ -441,11 +437,6 @@ impl ScalarFunctionDefinition { ScalarFunctionDefinition::UDF(udf) => { Ok(udf.signature().volatility == crate::Volatility::Volatile) } - ScalarFunctionDefinition::Name(func) => { - internal_err!( - "Cannot determine volatility of unresolved function: {func}" - ) - } } } } @@ -2100,11 +2091,6 @@ mod test { ), })); assert!(ScalarFunctionDefinition::UDF(udf).is_volatile().unwrap()); - - // Unresolved function - ScalarFunctionDefinition::Name(Arc::from("UnresolvedFunc")) - .is_volatile() - .expect_err("Shouldn't determine volatility of unresolved function"); } use super::*; diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index c5ae0f1b831a..f93f08574906 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -156,9 +156,6 @@ impl ExprSchemable for Expr { // expressiveness of `TypeSignature`), then infer return type Ok(fun.return_type_from_exprs(args, schema, &arg_data_types)?) } - ScalarFunctionDefinition::Name(_) => { - internal_err!("Function `Expr` with name should be resolved.") - } } } Expr::WindowFunction(WindowFunction { fun, args, .. }) => { diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs index 471ed0b975b0..ae3ca9afc4f5 100644 --- a/datafusion/expr/src/tree_node.rs +++ b/datafusion/expr/src/tree_node.rs @@ -286,9 +286,6 @@ impl TreeNode for Expr { ScalarFunctionDefinition::UDF(fun) => { Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, new_args))) } - ScalarFunctionDefinition::Name(_) => { - internal_err!("Function `Expr` with name should be resolved.") - } })? } Expr::WindowFunction(WindowFunction { diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 0a29e1ecfc12..b82873912647 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -208,14 +208,13 @@ impl ScalarUDFImpl for LogFunc { /// Returns true if the function is `PowerFunc` fn is_pow(func_def: &ScalarFunctionDefinition) -> bool { - if let ScalarFunctionDefinition::UDF(fun) = func_def { - fun.as_ref() + match func_def { + ScalarFunctionDefinition::UDF(fun) => fun + .as_ref() .inner() .as_any() .downcast_ref::() - .is_some() - } else { - false + .is_some(), } } diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 29caa63a9422..8cc6b4c02aeb 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -153,14 +153,13 @@ impl ScalarUDFImpl for PowerFunc { /// Return true if this function call is a call to `Log` fn is_log(func_def: &ScalarFunctionDefinition) -> bool { - if let ScalarFunctionDefinition::UDF(fun) = func_def { - fun.as_ref() + match func_def { + ScalarFunctionDefinition::UDF(fun) => fun + .as_ref() .inner() .as_any() .downcast_ref::() - .is_some() - } else { - false + .is_some(), } } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index f96a359f9d47..4c0ce1ad34bc 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -318,9 +318,6 @@ impl TreeNodeRewriter for TypeCoercionRewriter { ScalarFunction::new_udf(fun, new_expr), ))) } - ScalarFunctionDefinition::Name(_) => { - internal_err!("Function `Expr` with name should be resolved.") - } }, Expr::AggregateFunction(expr::AggregateFunction { func_def, diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 0572dc5ea4f1..8462cf86f154 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -253,7 +253,6 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result { | Expr::Case(_) | Expr::Cast(_) | Expr::TryCast(_) - | Expr::ScalarFunction(..) | Expr::InList { .. } => Ok(TreeNodeRecursion::Continue), Expr::Sort(_) | Expr::AggregateFunction(_) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 2fb06e659d70..fb5125f09769 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -528,7 +528,6 @@ impl<'a> ConstEvaluator<'a> { ScalarFunctionDefinition::UDF(fun) => { Self::volatility_ok(fun.signature().volatility) } - ScalarFunctionDefinition::Name(_) => false, }, Expr::Literal(_) | Expr::Unnest(_) diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index f46e5f6ec68f..7fff55ed42d5 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -316,9 +316,6 @@ pub fn create_physical_expr( args, input_dfschema, ), - ScalarFunctionDefinition::Name(_) => { - internal_err!("Function `Expr` with name should be resolved.") - } } } Expr::Between(Between { diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 3b360fc20c39..e160141f6331 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -37,7 +37,7 @@ use std::sync::Arc; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; -use datafusion_common::{internal_err, Result}; +use datafusion_common::Result; use datafusion_expr::{ expr_vec_fmt, ColumnarValue, FuncMonotonicity, ScalarFunctionDefinition, }; @@ -152,11 +152,6 @@ impl PhysicalExpr for ScalarFunctionExpr { fun.invoke(&inputs) } } - ScalarFunctionDefinition::Name(_) => { - internal_err!( - "Name function must be resolved to one of the other variants prior to physical planning" - ) - } } } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 45aebc88dc63..b2236847ace8 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -786,12 +786,6 @@ pub fn serialize_expr( )), } } - ScalarFunctionDefinition::Name(_) => { - return Err(Error::NotImplemented( - "Proto serialization error: Trying to serialize a unresolved function" - .to_string(), - )); - } } } Expr::Not(expr) => { diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index aa6121bebc34..a6af5e0bb508 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -546,11 +546,6 @@ pub fn serialize_physical_expr( ScalarFunctionDefinition::UDF(udf) => { codec.try_encode_udf(udf, &mut buf)?; } - _ => { - return not_impl_err!( - "Proto serialization error: Trying to serialize a unresolved function" - ); - } } let fun_definition = if buf.is_empty() { None } else { Some(buf) }; diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index a6a38ab6145c..39b2b0aa1606 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -36,7 +36,7 @@ use datafusion::common::{substrait_err, DFSchemaRef}; use datafusion::logical_expr::aggregate_function; use datafusion::logical_expr::expr::{ AggregateFunctionDefinition, Alias, BinaryExpr, Case, Cast, GroupingSet, InList, - InSubquery, ScalarFunctionDefinition, Sort, WindowFunction, + InSubquery, Sort, WindowFunction, }; use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Operator}; use datafusion::prelude::Expr; @@ -941,11 +941,6 @@ pub fn to_substrait_rex( }); } - // function should be resolved during `AnalyzerRule` - if let ScalarFunctionDefinition::Name(_) = fun.func_def { - return internal_err!("Function `Expr` with name should be resolved."); - } - let function_anchor = _register_function(fun.name().to_string(), extension_info); Ok(Expression { From 9d30f45c816572c5bdfab49ad8295b62868e1cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Mon, 29 Apr 2024 11:27:11 +0800 Subject: [PATCH 2/2] Revert test case change --- datafusion/common/src/scalar/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index b796358e12ba..e71d82fb3beb 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -4548,7 +4548,7 @@ mod tests { // thus the size of the enum appears to as well // The value may also change depending on rust version - assert_eq!(std::mem::size_of::(), 48); + assert_eq!(std::mem::size_of::(), 64); } #[test]