Skip to content

chore: remove deprecated variants of UDF's invoke (invoke, invoke_no_args, invoke_batch) #15123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use datafusion_common::Result;
use datafusion_common::{JoinSide, JoinType, ScalarValue};
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
use datafusion_expr::{Operator, ScalarUDF, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::{
Operator, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_physical_expr::expressions::{
binary, cast, col, BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr,
};
Expand Down Expand Up @@ -57,6 +59,7 @@ use datafusion_physical_plan::streaming::StreamingTableExec;
use datafusion_physical_plan::union::UnionExec;
use datafusion_physical_plan::{get_plan_string, ExecutionPlan};

use datafusion_expr_common::columnar_value::ColumnarValue;
use itertools::Itertools;

/// Mocked UDF
Expand Down Expand Up @@ -89,6 +92,10 @@ impl ScalarUDFImpl for DummyUDF {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int32)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
Copy link
Member

@Weijun-H Weijun-H Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about using not_impl_err instead of painc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked other traits, it would prefer unimplemented!().

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW this is a test

}
}

#[test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,10 @@ impl ScalarUDFImpl for CastToI64UDF {
Ok(DataType::Int64)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}

// Demonstrate simplifying a UDF
fn simplify(
&self,
Expand Down Expand Up @@ -946,6 +950,10 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
Ok(self.return_type.clone())
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}

fn simplify(
&self,
args: Vec<Expr>,
Expand Down
98 changes: 2 additions & 96 deletions datafusion/expr/src/udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
use crate::expr::schema_name_from_exprs_comma_separated_without_space;
use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
use crate::sort_properties::{ExprProperties, SortProperties};
use crate::{
ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature,
};
use crate::{ColumnarValue, Documentation, Expr, Signature};
use arrow::datatypes::DataType;
use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue};
use datafusion_expr_common::interval_arithmetic::Interval;
Expand Down Expand Up @@ -198,53 +196,18 @@ impl ScalarUDF {
self.inner.simplify(args, info)
}

#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke(args)
}

Comment on lines -201 to -206
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

#[allow(deprecated)]
pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
self.inner.is_nullable(args, schema)
}

#[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")]
pub fn invoke_batch(
&self,
args: &[ColumnarValue],
number_rows: usize,
) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke_batch(args, number_rows)
}

/// Invoke the function on `args`, returning the appropriate result.
///
/// See [`ScalarUDFImpl::invoke_with_args`] for details.
pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
self.inner.invoke_with_args(args)
}

/// Invoke the function without `args` but number of rows, returning the appropriate result.
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke_no_args(number_rows)
}

/// Returns a `ScalarFunctionImplementation` that can invoke the function
/// during execution
#[deprecated(since = "42.0.0", note = "Use `invoke_with_args` instead")]
pub fn fun(&self) -> ScalarFunctionImplementation {
let captured = Arc::clone(&self.inner);
#[allow(deprecated)]
Arc::new(move |args| captured.invoke(args))
}

/// Get the circuits of inner implementation
pub fn short_circuits(&self) -> bool {
self.inner.short_circuits()
Expand Down Expand Up @@ -568,47 +531,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
true
}

/// Invoke the function on `args`, returning the appropriate result
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
not_impl_err!(
"Function {} does not implement invoke but called",
self.name()
)
}

/// Invoke the function with `args` and the number of rows,
/// returning the appropriate result.
///
/// Note: See notes on [`Self::invoke_with_args`]
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
///
/// See <https://github.com/apache/datafusion/issues/13515> for more details.
#[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")]
fn invoke_batch(
&self,
args: &[ColumnarValue],
number_rows: usize,
) -> Result<ColumnarValue> {
match args.is_empty() {
true =>
{
#[allow(deprecated)]
self.invoke_no_args(number_rows)
}
false =>
{
#[allow(deprecated)]
self.invoke(args)
}
}
}

/// Invoke the function returning the appropriate result.
///
/// # Performance
Expand All @@ -619,23 +541,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
///
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
/// to arrays, which will likely be simpler code, but be slower.
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.invoke_batch(&args.args, args.number_rows)
}

/// Invoke the function without `args`, instead the number of rows are provided,
/// returning the appropriate result.
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
fn invoke_no_args(&self, _number_rows: usize) -> Result<ColumnarValue> {
not_impl_err!(
"Function {} does not implement invoke_no_args but called",
self.name()
)
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue>;

/// Returns any aliases (alternate names) for this function.
///
Expand Down
11 changes: 6 additions & 5 deletions datafusion/functions-nested/src/max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ use datafusion_common::cast::as_list_array;
use datafusion_common::utils::take_function_args;
use datafusion_common::{exec_err, ScalarValue};
use datafusion_doc::Documentation;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::{
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions_aggregate::min_max;
use datafusion_macros::user_doc;
use itertools::Itertools;
Expand Down Expand Up @@ -96,12 +98,11 @@ impl ScalarUDFImpl for ArrayMax {
}
}

fn invoke_batch(
fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
make_scalar_function(array_max_inner)(args)
make_scalar_function(array_max_inner)(&args.args)
}

fn aliases(&self) -> &[String] {
Expand Down
10 changes: 7 additions & 3 deletions datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -797,8 +797,8 @@ mod test {
use datafusion_expr::logical_plan::{table_scan, JoinType};
use datafusion_expr::{
grouping_set, is_null, not, AccumulatorFactoryFunction, AggregateUDF,
ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF,
Volatility,
ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
SimpleAggregateUDF, Volatility,
};
use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};

Expand Down Expand Up @@ -1598,7 +1598,7 @@ mod test {
Ok(DataType::Int32)
}

fn invoke(&self, _: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("not implemented")
}
}
Expand Down Expand Up @@ -1705,5 +1705,9 @@ mod test {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Float64)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4307,6 +4307,10 @@ mod tests {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int16)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}

#[test]
Expand Down
18 changes: 13 additions & 5 deletions datafusion/proto/tests/cases/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@
// under the License.

use arrow::datatypes::{DataType, Field};
use std::any::Any;
use std::fmt::Debug;

use datafusion::logical_expr::ColumnarValue;
use datafusion_common::plan_err;
use datafusion_expr::function::AccumulatorArgs;
use datafusion_expr::{
Accumulator, AggregateUDFImpl, PartitionEvaluator, ScalarUDFImpl, Signature,
Volatility, WindowUDFImpl,
Accumulator, AggregateUDFImpl, PartitionEvaluator, ScalarFunctionArgs, ScalarUDFImpl,
Signature, Volatility, WindowUDFImpl,
};
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
use std::any::Any;
use std::fmt::Debug;

mod roundtrip_logical_plan;
mod roundtrip_physical_plan;
Expand Down Expand Up @@ -69,6 +69,14 @@ impl ScalarUDFImpl for MyRegexUdf {
plan_err!("regex_udf only accepts Utf8 arguments")
}
}

fn invoke_with_args(
&self,
_args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
panic!("dummy - not implemented")
}

fn aliases(&self) -> &[String] {
&self.aliases
}
Expand Down
8 changes: 6 additions & 2 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1661,8 +1661,8 @@ mod tests {
use datafusion_expr::{
case, cast, col, cube, exists, grouping_set, interval_datetime_lit,
interval_year_month_lit, lit, not, not_exists, out_ref_col, placeholder, rollup,
table_scan, try_cast, when, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
WindowFrame, WindowFunctionDefinition,
table_scan, try_cast, when, ColumnarValue, ScalarFunctionArgs, ScalarUDF,
ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition,
};
use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt};
use datafusion_functions::expr_fn::{get_field, named_struct};
Expand Down Expand Up @@ -1711,6 +1711,10 @@ mod tests {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int32)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}
// See sql::tests for E2E tests.

Expand Down
8 changes: 6 additions & 2 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use datafusion_expr::{
col,
logical_plan::{LogicalPlan, Prepare},
test::function_stub::sum_udaf,
CreateIndex, DdlStatement, ScalarUDF, ScalarUDFImpl, Signature, Statement,
Volatility,
ColumnarValue, CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF,
ScalarUDFImpl, Signature, Statement, Volatility,
};
use datafusion_functions::{string, unicode};
use datafusion_sql::{
Expand Down Expand Up @@ -2477,6 +2477,10 @@ impl ScalarUDFImpl for DummyUDF {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(self.return_type.clone())
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}

/// Create logical plan, write with formatter, compare to expected output
Expand Down