Skip to content

Commit 43ecd9b

Browse files
authored
fix: graceful NULL and type error handling in array functions (#14737)
* feat: arbitrary typed argument in array function * fix: array_sort null handling * fix: array_resize signature * test: add array_sort sqllogictest for null and invalid types * fix: don't match error message * chore: use string instead of data type * refactor: use new_null_array * fix: pass null to array argument should return null * fix: handle null argument for array in replace and resize * fix: mismatched error message * fix: incorrect number of rows returned * test: update null tests * fix: treat NULLs as lists directly to prevent extra handling * fix: incorrect null pushing in array_sort
1 parent 9a4c9d5 commit 43ecd9b

File tree

7 files changed

+149
-25
lines changed

7 files changed

+149
-25
lines changed

datafusion/expr-common/src/signature.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,8 @@ pub enum ArrayFunctionArgument {
358358
/// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
359359
/// to the same type.
360360
Array,
361+
// A Utf8 argument.
362+
String,
361363
}
362364

363365
impl Display for ArrayFunctionArgument {
@@ -372,6 +374,9 @@ impl Display for ArrayFunctionArgument {
372374
ArrayFunctionArgument::Array => {
373375
write!(f, "array")
374376
}
377+
ArrayFunctionArgument::String => {
378+
write!(f, "string")
379+
}
375380
}
376381
}
377382
}

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use super::binary::{binary_numeric_coercion, comparison_coercion};
1919
use crate::{AggregateUDF, ScalarUDF, Signature, TypeSignature, WindowUDF};
2020
use arrow::{
2121
compute::can_cast_types,
22-
datatypes::{DataType, TimeUnit},
22+
datatypes::{DataType, Field, TimeUnit},
2323
};
2424
use datafusion_common::types::LogicalType;
2525
use datafusion_common::utils::{coerced_fixed_size_list_to_list, ListCoercion};
@@ -387,7 +387,7 @@ fn get_valid_types(
387387
new_base_type =
388388
coerce_array_types(function_name, current_type, &new_base_type)?;
389389
}
390-
ArrayFunctionArgument::Index => {}
390+
ArrayFunctionArgument::Index | ArrayFunctionArgument::String => {}
391391
}
392392
}
393393
let new_array_type = datafusion_common::utils::coerced_type_with_base_type_only(
@@ -408,6 +408,7 @@ fn get_valid_types(
408408
let valid_type = match argument_type {
409409
ArrayFunctionArgument::Element => new_elem_type.clone(),
410410
ArrayFunctionArgument::Index => DataType::Int64,
411+
ArrayFunctionArgument::String => DataType::Utf8,
411412
ArrayFunctionArgument::Array => {
412413
let Some(current_type) = array(current_type) else {
413414
return Ok(vec![vec![]]);
@@ -435,6 +436,10 @@ fn get_valid_types(
435436
match array_type {
436437
DataType::List(_) | DataType::LargeList(_) => Some(array_type.clone()),
437438
DataType::FixedSizeList(field, _) => Some(DataType::List(Arc::clone(field))),
439+
DataType::Null => Some(DataType::List(Arc::new(Field::new_list_field(
440+
DataType::Int64,
441+
true,
442+
)))),
438443
_ => None,
439444
}
440445
}

datafusion/functions-nested/src/extract.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ impl ScalarUDFImpl for ArrayElement {
166166
List(field)
167167
| LargeList(field)
168168
| FixedSizeList(field, _) => Ok(field.data_type().clone()),
169+
DataType::Null => Ok(List(Arc::new(Field::new_list_field(DataType::Int64, true)))),
169170
_ => plan_err!(
170171
"ArrayElement can only accept List, LargeList or FixedSizeList as the first argument"
171172
),

datafusion/functions-nested/src/replace.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
//! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and array_replace_all functions.
1919
2020
use arrow::array::{
21-
Array, ArrayRef, AsArray, Capacities, GenericListArray, MutableArrayData,
22-
NullBufferBuilder, OffsetSizeTrait,
21+
new_null_array, Array, ArrayRef, AsArray, Capacities, GenericListArray,
22+
MutableArrayData, NullBufferBuilder, OffsetSizeTrait,
2323
};
2424
use arrow::datatypes::{DataType, Field};
2525

@@ -429,6 +429,7 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
429429
let list_array = array.as_list::<i64>();
430430
general_replace::<i64>(list_array, from, to, arr_n)
431431
}
432+
DataType::Null => Ok(new_null_array(array.data_type(), 1)),
432433
array_type => exec_err!("array_replace does not support type '{array_type:?}'."),
433434
}
434435
}
@@ -447,6 +448,7 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
447448
let list_array = array.as_list::<i64>();
448449
general_replace::<i64>(list_array, from, to, arr_n)
449450
}
451+
DataType::Null => Ok(new_null_array(array.data_type(), 1)),
450452
array_type => {
451453
exec_err!("array_replace_n does not support type '{array_type:?}'.")
452454
}
@@ -467,6 +469,7 @@ pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
467469
let list_array = array.as_list::<i64>();
468470
general_replace::<i64>(list_array, from, to, arr_n)
469471
}
472+
DataType::Null => Ok(new_null_array(array.data_type(), 1)),
470473
array_type => {
471474
exec_err!("array_replace_all does not support type '{array_type:?}'.")
472475
}

datafusion/functions-nested/src/resize.rs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,18 @@ use arrow::array::{
2323
MutableArrayData, NullBufferBuilder, OffsetSizeTrait,
2424
};
2525
use arrow::buffer::OffsetBuffer;
26-
use arrow::datatypes::ArrowNativeType;
2726
use arrow::datatypes::DataType;
27+
use arrow::datatypes::{ArrowNativeType, Field};
2828
use arrow::datatypes::{
2929
DataType::{FixedSizeList, LargeList, List},
3030
FieldRef,
3131
};
3232
use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
33+
use datafusion_common::utils::ListCoercion;
3334
use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue};
3435
use datafusion_expr::{
35-
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
36+
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
37+
ScalarUDFImpl, Signature, TypeSignature, Volatility,
3638
};
3739
use datafusion_macros::user_doc;
3840
use std::any::Any;
@@ -83,7 +85,26 @@ impl Default for ArrayResize {
8385
impl ArrayResize {
8486
pub fn new() -> Self {
8587
Self {
86-
signature: Signature::variadic_any(Volatility::Immutable),
88+
signature: Signature::one_of(
89+
vec![
90+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
91+
arguments: vec![
92+
ArrayFunctionArgument::Array,
93+
ArrayFunctionArgument::Index,
94+
],
95+
array_coercion: Some(ListCoercion::FixedSizedListToList),
96+
}),
97+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
98+
arguments: vec![
99+
ArrayFunctionArgument::Array,
100+
ArrayFunctionArgument::Index,
101+
ArrayFunctionArgument::Element,
102+
],
103+
array_coercion: Some(ListCoercion::FixedSizedListToList),
104+
}),
105+
],
106+
Volatility::Immutable,
107+
),
87108
aliases: vec!["list_resize".to_string()],
88109
}
89110
}
@@ -106,6 +127,9 @@ impl ScalarUDFImpl for ArrayResize {
106127
match &arg_types[0] {
107128
List(field) | FixedSizeList(field, _) => Ok(List(Arc::clone(field))),
108129
LargeList(field) => Ok(LargeList(Arc::clone(field))),
130+
DataType::Null => {
131+
Ok(List(Arc::new(Field::new_list_field(DataType::Int64, true))))
132+
}
109133
_ => exec_err!(
110134
"Not reachable, data_type should be List, LargeList or FixedSizeList"
111135
),
@@ -137,7 +161,7 @@ pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
137161
let array = &arg[0];
138162

139163
// Checks if entire array is null
140-
if array.null_count() == array.len() {
164+
if array.logical_null_count() == array.len() {
141165
let return_type = match array.data_type() {
142166
List(field) => List(Arc::clone(field)),
143167
LargeList(field) => LargeList(Arc::clone(field)),

datafusion/functions-nested/src/sort.rs

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818
//! [`ScalarUDFImpl`] definitions for array_sort function.
1919
2020
use crate::utils::make_scalar_function;
21-
use arrow::array::{Array, ArrayRef, ListArray, NullBufferBuilder};
21+
use arrow::array::{new_null_array, Array, ArrayRef, ListArray, NullBufferBuilder};
2222
use arrow::buffer::OffsetBuffer;
2323
use arrow::datatypes::DataType::{FixedSizeList, LargeList, List};
2424
use arrow::datatypes::{DataType, Field};
2525
use arrow::{compute, compute::SortOptions};
2626
use datafusion_common::cast::{as_list_array, as_string_array};
2727
use datafusion_common::{exec_err, Result};
2828
use datafusion_expr::{
29-
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
29+
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
30+
ScalarUDFImpl, Signature, TypeSignature, Volatility,
3031
};
3132
use datafusion_macros::user_doc;
3233
use std::any::Any;
@@ -87,7 +88,30 @@ impl Default for ArraySort {
8788
impl ArraySort {
8889
pub fn new() -> Self {
8990
Self {
90-
signature: Signature::variadic_any(Volatility::Immutable),
91+
signature: Signature::one_of(
92+
vec![
93+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
94+
arguments: vec![ArrayFunctionArgument::Array],
95+
array_coercion: None,
96+
}),
97+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
98+
arguments: vec![
99+
ArrayFunctionArgument::Array,
100+
ArrayFunctionArgument::String,
101+
],
102+
array_coercion: None,
103+
}),
104+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
105+
arguments: vec![
106+
ArrayFunctionArgument::Array,
107+
ArrayFunctionArgument::String,
108+
ArrayFunctionArgument::String,
109+
],
110+
array_coercion: None,
111+
}),
112+
],
113+
Volatility::Immutable,
114+
),
91115
aliases: vec!["list_sort".to_string()],
92116
}
93117
}
@@ -115,6 +139,7 @@ impl ScalarUDFImpl for ArraySort {
115139
field.data_type().clone(),
116140
true,
117141
)))),
142+
DataType::Null => Ok(DataType::Null),
118143
_ => exec_err!(
119144
"Not reachable, data_type should be List, LargeList or FixedSizeList"
120145
),
@@ -143,6 +168,10 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
143168
return exec_err!("array_sort expects one to three arguments");
144169
}
145170

171+
if args[1..].iter().any(|array| array.is_null(0)) {
172+
return Ok(new_null_array(args[0].data_type(), args[0].len()));
173+
}
174+
146175
let sort_option = match args.len() {
147176
1 => None,
148177
2 => {
@@ -196,12 +225,16 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
196225
.map(|a| a.as_ref())
197226
.collect::<Vec<&dyn Array>>();
198227

199-
let list_arr = ListArray::new(
200-
Arc::new(Field::new_list_field(data_type, true)),
201-
OffsetBuffer::from_lengths(array_lengths),
202-
Arc::new(compute::concat(elements.as_slice())?),
203-
buffer,
204-
);
228+
let list_arr = if elements.is_empty() {
229+
ListArray::new_null(Arc::new(Field::new_list_field(data_type, true)), row_count)
230+
} else {
231+
ListArray::new(
232+
Arc::new(Field::new_list_field(data_type, true)),
233+
OffsetBuffer::from_lengths(array_lengths),
234+
Arc::new(compute::concat(elements.as_slice())?),
235+
buffer,
236+
)
237+
};
205238
Ok(Arc::new(list_arr))
206239
}
207240

datafusion/sqllogictest/test_files/array.slt

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,8 +1204,10 @@ select array_element([1, 2], NULL);
12041204
----
12051205
NULL
12061206

1207-
query error
1207+
query I
12081208
select array_element(NULL, 2);
1209+
----
1210+
NULL
12091211

12101212
# array_element scalar function #1 (with positive index)
12111213
query IT
@@ -2265,6 +2267,52 @@ select array_sort([]);
22652267
----
22662268
[]
22672269

2270+
# test with null arguments
2271+
query ?
2272+
select array_sort(NULL);
2273+
----
2274+
NULL
2275+
2276+
query ?
2277+
select array_sort(column1, NULL) from arrays_values;
2278+
----
2279+
NULL
2280+
NULL
2281+
NULL
2282+
NULL
2283+
NULL
2284+
NULL
2285+
NULL
2286+
NULL
2287+
2288+
query ??
2289+
select array_sort(column1, 'DESC', NULL), array_sort(column1, 'ASC', NULL) from arrays_values;
2290+
----
2291+
NULL NULL
2292+
NULL NULL
2293+
NULL NULL
2294+
NULL NULL
2295+
NULL NULL
2296+
NULL NULL
2297+
NULL NULL
2298+
NULL NULL
2299+
2300+
query ??
2301+
select array_sort(column1, NULL, 'NULLS FIRST'), array_sort(column1, NULL, 'NULLS LAST') from arrays_values;
2302+
----
2303+
NULL NULL
2304+
NULL NULL
2305+
NULL NULL
2306+
NULL NULL
2307+
NULL NULL
2308+
NULL NULL
2309+
NULL NULL
2310+
NULL NULL
2311+
2312+
## test with argument of incorrect types
2313+
query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC
2314+
select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1);
2315+
22682316
# test with empty row, the row that does not match the condition has row count 0
22692317
statement ok
22702318
create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2);
@@ -2290,8 +2338,10 @@ select list_sort(make_array(1, 3, null, 5, NULL, -5)), list_sort(make_array(1, 3
22902338

22912339
# array_append with NULLs
22922340

2293-
query error
2341+
query ?
22942342
select array_append(null, 1);
2343+
----
2344+
[1]
22952345

22962346
query error
22972347
select array_append(null, [2, 3]);
@@ -2539,8 +2589,10 @@ select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3
25392589
# DuckDB: [4]
25402590
# ClickHouse: Null
25412591
# Since they dont have the same result, we just follow Postgres, return error
2542-
query error
2592+
query ?
25432593
select array_prepend(4, NULL);
2594+
----
2595+
[4]
25442596

25452597
query ?
25462598
select array_prepend(4, []);
@@ -2575,11 +2627,10 @@ select array_prepend(null, [[1,2,3]]);
25752627
query error
25762628
select array_prepend([], []);
25772629

2578-
# DuckDB: [null]
2579-
# ClickHouse: [null]
2580-
# TODO: We may also return [null]
2581-
query error
2630+
query ?
25822631
select array_prepend(null, null);
2632+
----
2633+
[NULL]
25832634

25842635
query ?
25852636
select array_append([], null);
@@ -5264,9 +5315,11 @@ NULL [3] [5]
52645315
# array_ndims scalar function #1
52655316

52665317
#follow PostgreSQL
5267-
query error
5318+
query I
52685319
select
52695320
array_ndims(null);
5321+
----
5322+
NULL
52705323

52715324
query I
52725325
select

0 commit comments

Comments
 (0)