Skip to content

Commit f47ea73

Browse files
authored
fix: mark ScalarUDFImpl::invoke_batch as deprecated (#15049)
* fix: mark ScalarUDFImpl::invoke_batch as deprecated should use invoke_with_args instead See #14123 (comment) * fix deprecated usage that clippy warns about * fix another deprecated usage that clippy warns about * fix the rest of benches * fix two more implementations - now all that's left is in udf.rs * fix clippy * cleanup some leftover comments
1 parent b0d3736 commit f47ea73

24 files changed

+467
-181
lines changed

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,12 +1228,8 @@ impl ScalarUDFImpl for MyRegexUdf {
12281228
}
12291229
}
12301230

1231-
fn invoke_batch(
1232-
&self,
1233-
args: &[ColumnarValue],
1234-
_number_rows: usize,
1235-
) -> Result<ColumnarValue> {
1236-
match args {
1231+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
1232+
match args.args.as_slice() {
12371233
[ColumnarValue::Scalar(ScalarValue::Utf8(value))] => {
12381234
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(
12391235
self.matches(value.as_deref()),

datafusion/expr/src/udf.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,11 +225,13 @@ impl ScalarUDF {
225225
self.inner.is_nullable(args, schema)
226226
}
227227

228+
#[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")]
228229
pub fn invoke_batch(
229230
&self,
230231
args: &[ColumnarValue],
231232
number_rows: usize,
232233
) -> Result<ColumnarValue> {
234+
#[allow(deprecated)]
233235
self.inner.invoke_batch(args, number_rows)
234236
}
235237

@@ -244,15 +246,15 @@ impl ScalarUDF {
244246
///
245247
/// Note: This method is deprecated and will be removed in future releases.
246248
/// User defined functions should implement [`Self::invoke_with_args`] instead.
247-
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
249+
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
248250
pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
249251
#[allow(deprecated)]
250252
self.inner.invoke_no_args(number_rows)
251253
}
252254

253255
/// Returns a `ScalarFunctionImplementation` that can invoke the function
254256
/// during execution
255-
#[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")]
257+
#[deprecated(since = "42.0.0", note = "Use `invoke_with_args` instead")]
256258
pub fn fun(&self) -> ScalarFunctionImplementation {
257259
let captured = Arc::clone(&self.inner);
258260
#[allow(deprecated)]
@@ -613,6 +615,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
613615
/// User defined functions should implement [`Self::invoke_with_args`] instead.
614616
///
615617
/// See <https://github.com/apache/datafusion/issues/13515> for more details.
618+
#[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")]
616619
fn invoke_batch(
617620
&self,
618621
args: &[ColumnarValue],
@@ -643,6 +646,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
643646
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
644647
/// to arrays, which will likely be simpler code, but be slower.
645648
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
649+
#[allow(deprecated)]
646650
self.invoke_batch(&args.args, args.number_rows)
647651
}
648652

datafusion/functions-nested/benches/map.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use std::sync::Arc;
2828

2929
use datafusion_common::ScalarValue;
3030
use datafusion_expr::planner::ExprPlanner;
31-
use datafusion_expr::{ColumnarValue, Expr};
31+
use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs};
3232
use datafusion_functions_nested::map::map_udf;
3333
use datafusion_functions_nested::planner::NestedFunctionPlanner;
3434

@@ -94,11 +94,18 @@ fn criterion_benchmark(c: &mut Criterion) {
9494
let keys = ColumnarValue::Scalar(ScalarValue::List(Arc::new(key_list)));
9595
let values = ColumnarValue::Scalar(ScalarValue::List(Arc::new(value_list)));
9696

97+
let return_type = &map_udf()
98+
.return_type(&[DataType::Utf8, DataType::Int32])
99+
.expect("should get return type");
100+
97101
b.iter(|| {
98102
black_box(
99-
// TODO use invoke_with_args
100103
map_udf()
101-
.invoke_batch(&[keys.clone(), values.clone()], 1)
104+
.invoke_with_args(ScalarFunctionArgs {
105+
args: vec![keys.clone(), values.clone()],
106+
number_rows: 1,
107+
return_type,
108+
})
102109
.expect("map should work on valid values"),
103110
);
104111
});

datafusion/functions/benches/character_length.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::datatypes::DataType;
2021
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22+
use datafusion_expr::ScalarFunctionArgs;
2123
use helper::gen_string_array;
2224

2325
mod helper;
@@ -26,6 +28,8 @@ fn criterion_benchmark(c: &mut Criterion) {
2628
// All benches are single batch run with 8192 rows
2729
let character_length = datafusion_functions::unicode::character_length();
2830

31+
let return_type = DataType::Utf8;
32+
2933
let n_rows = 8192;
3034
for str_len in [8, 32, 128, 4096] {
3135
// StringArray ASCII only
@@ -34,8 +38,11 @@ fn criterion_benchmark(c: &mut Criterion) {
3438
&format!("character_length_StringArray_ascii_str_len_{}", str_len),
3539
|b| {
3640
b.iter(|| {
37-
// TODO use invoke_with_args
38-
black_box(character_length.invoke_batch(&args_string_ascii, n_rows))
41+
black_box(character_length.invoke_with_args(ScalarFunctionArgs {
42+
args: args_string_ascii.clone(),
43+
number_rows: n_rows,
44+
return_type: &return_type,
45+
}))
3946
})
4047
},
4148
);
@@ -46,8 +53,11 @@ fn criterion_benchmark(c: &mut Criterion) {
4653
&format!("character_length_StringArray_utf8_str_len_{}", str_len),
4754
|b| {
4855
b.iter(|| {
49-
// TODO use invoke_with_args
50-
black_box(character_length.invoke_batch(&args_string_utf8, n_rows))
56+
black_box(character_length.invoke_with_args(ScalarFunctionArgs {
57+
args: args_string_utf8.clone(),
58+
number_rows: n_rows,
59+
return_type: &return_type,
60+
}))
5161
})
5262
},
5363
);
@@ -58,10 +68,11 @@ fn criterion_benchmark(c: &mut Criterion) {
5868
&format!("character_length_StringViewArray_ascii_str_len_{}", str_len),
5969
|b| {
6070
b.iter(|| {
61-
// TODO use invoke_with_args
62-
black_box(
63-
character_length.invoke_batch(&args_string_view_ascii, n_rows),
64-
)
71+
black_box(character_length.invoke_with_args(ScalarFunctionArgs {
72+
args: args_string_view_ascii.clone(),
73+
number_rows: n_rows,
74+
return_type: &return_type,
75+
}))
6576
})
6677
},
6778
);
@@ -72,10 +83,11 @@ fn criterion_benchmark(c: &mut Criterion) {
7283
&format!("character_length_StringViewArray_utf8_str_len_{}", str_len),
7384
|b| {
7485
b.iter(|| {
75-
// TODO use invoke_with_args
76-
black_box(
77-
character_length.invoke_batch(&args_string_view_utf8, n_rows),
78-
)
86+
black_box(character_length.invoke_with_args(ScalarFunctionArgs {
87+
args: args_string_view_utf8.clone(),
88+
number_rows: n_rows,
89+
return_type: &return_type,
90+
}))
7991
})
8092
},
8193
);

datafusion/functions/benches/chr.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ extern crate criterion;
1919

2020
use arrow::{array::PrimitiveArray, datatypes::Int64Type, util::test_util::seedable_rng};
2121
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22-
use datafusion_expr::ColumnarValue;
22+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2323
use datafusion_functions::string::chr;
2424
use rand::Rng;
2525

26+
use arrow::datatypes::DataType;
2627
use std::sync::Arc;
2728

2829
fn criterion_benchmark(c: &mut Criterion) {
@@ -44,7 +45,17 @@ fn criterion_benchmark(c: &mut Criterion) {
4445
let input = Arc::new(input);
4546
let args = vec![ColumnarValue::Array(input)];
4647
c.bench_function("chr", |b| {
47-
b.iter(|| black_box(cot_fn.invoke_batch(&args, size).unwrap()))
48+
b.iter(|| {
49+
black_box(
50+
cot_fn
51+
.invoke_with_args(ScalarFunctionArgs {
52+
args: args.clone(),
53+
number_rows: size,
54+
return_type: &DataType::Utf8,
55+
})
56+
.unwrap(),
57+
)
58+
})
4859
});
4960
}
5061

datafusion/functions/benches/cot.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ use arrow::{
2222
util::bench_util::create_primitive_array,
2323
};
2424
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25-
use datafusion_expr::ColumnarValue;
25+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2626
use datafusion_functions::math::cot;
2727

28+
use arrow::datatypes::DataType;
2829
use std::sync::Arc;
2930

3031
fn criterion_benchmark(c: &mut Criterion) {
@@ -34,16 +35,30 @@ fn criterion_benchmark(c: &mut Criterion) {
3435
let f32_args = vec![ColumnarValue::Array(f32_array)];
3536
c.bench_function(&format!("cot f32 array: {}", size), |b| {
3637
b.iter(|| {
37-
// TODO use invoke_with_args
38-
black_box(cot_fn.invoke_batch(&f32_args, size).unwrap())
38+
black_box(
39+
cot_fn
40+
.invoke_with_args(ScalarFunctionArgs {
41+
args: f32_args.clone(),
42+
number_rows: size,
43+
return_type: &DataType::Float32,
44+
})
45+
.unwrap(),
46+
)
3947
})
4048
});
4149
let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
4250
let f64_args = vec![ColumnarValue::Array(f64_array)];
4351
c.bench_function(&format!("cot f64 array: {}", size), |b| {
4452
b.iter(|| {
45-
// TODO use invoke_with_args
46-
black_box(cot_fn.invoke_batch(&f64_args, size).unwrap())
53+
black_box(
54+
cot_fn
55+
.invoke_with_args(ScalarFunctionArgs {
56+
args: f64_args.clone(),
57+
number_rows: size,
58+
return_type: &DataType::Float64,
59+
})
60+
.unwrap(),
61+
)
4762
})
4863
});
4964
}

datafusion/functions/benches/date_bin.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use datafusion_common::ScalarValue;
2525
use rand::rngs::ThreadRng;
2626
use rand::Rng;
2727

28-
use datafusion_expr::ColumnarValue;
28+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2929
use datafusion_functions::datetime::date_bin;
3030

3131
fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray {
@@ -45,12 +45,18 @@ fn criterion_benchmark(c: &mut Criterion) {
4545
let interval = ColumnarValue::Scalar(ScalarValue::new_interval_dt(0, 1_000_000));
4646
let timestamps = ColumnarValue::Array(timestamps_array);
4747
let udf = date_bin();
48+
let return_type = udf
49+
.return_type(&[interval.data_type(), timestamps.data_type()])
50+
.unwrap();
4851

4952
b.iter(|| {
50-
// TODO use invoke_with_args
5153
black_box(
52-
udf.invoke_batch(&[interval.clone(), timestamps.clone()], batch_len)
53-
.expect("date_bin should work on valid values"),
54+
udf.invoke_with_args(ScalarFunctionArgs {
55+
args: vec![interval.clone(), timestamps.clone()],
56+
number_rows: batch_len,
57+
return_type: &return_type,
58+
})
59+
.expect("date_bin should work on valid values"),
5460
)
5561
})
5662
});

datafusion/functions/benches/date_trunc.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use datafusion_common::ScalarValue;
2525
use rand::rngs::ThreadRng;
2626
use rand::Rng;
2727

28-
use datafusion_expr::ColumnarValue;
28+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2929
use datafusion_functions::datetime::date_trunc;
3030

3131
fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray {
@@ -46,11 +46,15 @@ fn criterion_benchmark(c: &mut Criterion) {
4646
ColumnarValue::Scalar(ScalarValue::Utf8(Some("minute".to_string())));
4747
let timestamps = ColumnarValue::Array(timestamps_array);
4848
let udf = date_trunc();
49-
49+
let return_type = &udf.return_type(&[timestamps.data_type()]).unwrap();
5050
b.iter(|| {
5151
black_box(
52-
udf.invoke_batch(&[precision.clone(), timestamps.clone()], batch_len)
53-
.expect("date_trunc should work on valid values"),
52+
udf.invoke_with_args(ScalarFunctionArgs {
53+
args: vec![precision.clone(), timestamps.clone()],
54+
number_rows: batch_len,
55+
return_type,
56+
})
57+
.expect("date_trunc should work on valid values"),
5458
)
5559
})
5660
});

datafusion/functions/benches/encoding.rs

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::datatypes::DataType;
2021
use arrow::util::bench_util::create_string_array_with_len;
2122
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22-
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2324
use datafusion_functions::encoding;
2425
use std::sync::Arc;
2526

@@ -29,35 +30,49 @@ fn criterion_benchmark(c: &mut Criterion) {
2930
let str_array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, 32));
3031
c.bench_function(&format!("base64_decode/{size}"), |b| {
3132
let method = ColumnarValue::Scalar("base64".into());
32-
// TODO: use invoke_with_args
3333
let encoded = encoding::encode()
34-
.invoke_batch(
35-
&[ColumnarValue::Array(str_array.clone()), method.clone()],
36-
size,
37-
)
34+
.invoke_with_args(ScalarFunctionArgs {
35+
args: vec![ColumnarValue::Array(str_array.clone()), method.clone()],
36+
number_rows: size,
37+
return_type: &DataType::Utf8,
38+
})
3839
.unwrap();
3940

4041
let args = vec![encoded, method];
4142
b.iter(|| {
42-
// TODO use invoke_with_args
43-
black_box(decode.invoke_batch(&args, size).unwrap())
43+
black_box(
44+
decode
45+
.invoke_with_args(ScalarFunctionArgs {
46+
args: args.clone(),
47+
number_rows: size,
48+
return_type: &DataType::Utf8,
49+
})
50+
.unwrap(),
51+
)
4452
})
4553
});
4654

4755
c.bench_function(&format!("hex_decode/{size}"), |b| {
4856
let method = ColumnarValue::Scalar("hex".into());
49-
// TODO use invoke_with_args
5057
let encoded = encoding::encode()
51-
.invoke_batch(
52-
&[ColumnarValue::Array(str_array.clone()), method.clone()],
53-
size,
54-
)
58+
.invoke_with_args(ScalarFunctionArgs {
59+
args: vec![ColumnarValue::Array(str_array.clone()), method.clone()],
60+
number_rows: size,
61+
return_type: &DataType::Utf8,
62+
})
5563
.unwrap();
5664

5765
let args = vec![encoded, method];
5866
b.iter(|| {
59-
// TODO use invoke_with_args
60-
black_box(decode.invoke_batch(&args, size).unwrap())
67+
black_box(
68+
decode
69+
.invoke_with_args(ScalarFunctionArgs {
70+
args: args.clone(),
71+
number_rows: size,
72+
return_type: &DataType::Utf8,
73+
})
74+
.unwrap(),
75+
)
6176
})
6277
});
6378
}

0 commit comments

Comments
 (0)