diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index 90fb46883de6..83356e2f9fb4 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -573,7 +573,7 @@ fn min_batch(values: &ArrayRef) -> Result { } /// dynamically-typed max(array) -> ScalarValue -fn max_batch(values: &ArrayRef) -> Result { +pub fn max_batch(values: &ArrayRef) -> Result { Ok(match values.data_type() { DataType::Utf8 => { typed_min_max_batch_string!(values, StringArray, Utf8, max_string) diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index 41ebb4366cff..446cd58865c3 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -52,6 +52,7 @@ pub mod map; pub mod map_extract; pub mod map_keys; pub mod map_values; +pub mod max; pub mod planner; pub mod position; pub mod range; @@ -144,6 +145,7 @@ pub fn all_default_nested_functions() -> Vec> { length::array_length_udf(), distance::array_distance_udf(), flatten::flatten_udf(), + max::array_max_udf(), sort::array_sort_udf(), repeat::array_repeat_udf(), resize::array_resize_udf(), diff --git a/datafusion/functions-nested/src/max.rs b/datafusion/functions-nested/src/max.rs new file mode 100644 index 000000000000..22bd14740b5e --- /dev/null +++ b/datafusion/functions-nested/src/max.rs @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`ScalarUDFImpl`] definitions for array_max function. +use crate::utils::make_scalar_function; +use arrow::array::ArrayRef; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::List; +use datafusion_common::cast::as_list_array; +use datafusion_common::utils::take_function_args; +use datafusion_common::{exec_err, ScalarValue}; +use datafusion_doc::Documentation; +use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_functions_aggregate::min_max; +use datafusion_macros::user_doc; +use itertools::Itertools; +use std::any::Any; + +make_udf_expr_and_func!( + ArrayMax, + array_max, + array, + "returns the maximum value in the array.", + array_max_udf +); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the maximum value in the array.", + syntax_example = "array_max(array)", + sql_example = r#"```sql +> select array_max([3,1,4,2]); ++-----------------------------------------+ +| array_max(List([3,1,4,2])) | ++-----------------------------------------+ +| 4 | ++-----------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] +#[derive(Debug)] +pub struct ArrayMax { + signature: Signature, + aliases: Vec, +} + +impl Default for ArrayMax { + fn default() -> Self { + Self::new() + } +} + +impl ArrayMax { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec!["list_max".to_string()], + } + } +} + +impl ScalarUDFImpl for ArrayMax { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "array_max" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + match &arg_types[0] { + List(field) => Ok(field.data_type().clone()), + _ => exec_err!("Not reachable, data_type should be List"), + } + } + + fn invoke_batch( + &self, + args: &[ColumnarValue], + _number_rows: usize, + ) -> datafusion_common::Result { + make_scalar_function(array_max_inner)(args) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} + +/// array_max SQL function +/// +/// There is one argument for array_max as the array. +/// `array_max(array)` +/// +/// For example: +/// > array_max(\[1, 3, 2]) -> 3 +pub fn array_max_inner(args: &[ArrayRef]) -> datafusion_common::Result { + let [arg1] = take_function_args("array_max", args)?; + + match arg1.data_type() { + List(_) => { + let input_list_array = as_list_array(&arg1)?; + let result_vec = input_list_array + .iter() + .flat_map(|arr| min_max::max_batch(&arr.unwrap())) + .collect_vec(); + ScalarValue::iter_to_array(result_vec) + } + _ => exec_err!("array_max does not support type: {:?}", arg1.data_type()), + } +} diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 6b5b246aee51..653760e0cae3 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1433,6 +1433,93 @@ NULL 23 NULL 43 5 NULL + +## array_max +# array_max scalar function #1 (with positive index) +query I +select array_max(make_array(5, 3, 6, 4)); +---- +6 + +query I +select array_max(make_array(5, 3, 4, NULL, 6, NULL)); +---- +6 + +query I +select array_max(make_array(NULL, NULL)); +---- +NULL + +query T +select array_max(make_array('h', 'e', 'o', 'l', 'l')); +---- +o + +query T +select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); +---- +o + +query B +select array_max(make_array(false, true, false, true)); +---- +true + +query B +select array_max(make_array(false, true, NULL, false, true)); +---- +true + +query D +select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); +---- +1999-05-01 + +query D +select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); +---- +1999-05-01 + +query P +select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); +---- +1995-06-01T00:00:00 + +query P +select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); +---- +1996-10-01T00:00:00 + +query R +select array_max(make_array(5.1, -3.2, 6.3, 4.9)); +---- +6.3 + +query ?I +select input, array_max(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) +---- +[-1, 0, 1] 1 +[9, 10, 11] 11 +[19, 20, 21] 21 +[29, 30, 31] 31 +[NULL, NULL, NULL] NULL + +query II +select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +3 1 + +query I +select array_max(make_array()); +---- +NULL + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_max' does not support zero arguments +select array_max(); + + ## array_pop_back (aliases: `list_pop_back`) # array_pop_back scalar function with null diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index fb4043c33efc..60ecf7bd78d4 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2524,6 +2524,7 @@ _Alias of [current_date](#current_date)._ - [array_intersect](#array_intersect) - [array_join](#array_join) - [array_length](#array_length) +- [array_max](#array_max) - [array_ndims](#array_ndims) - [array_pop_back](#array_pop_back) - [array_pop_front](#array_pop_front) @@ -2569,6 +2570,7 @@ _Alias of [current_date](#current_date)._ - [list_intersect](#list_intersect) - [list_join](#list_join) - [list_length](#list_length) +- [list_max](#list_max) - [list_ndims](#list_ndims) - [list_pop_back](#list_pop_back) - [list_pop_front](#list_pop_front) @@ -3002,6 +3004,33 @@ array_length(array, dimension) - list_length +### `array_max` + +Returns the maximum value in the array. + +```sql +array_max(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_max([3,1,4,2]); ++-----------------------------------------+ +| array_max(List([3,1,4,2])) | ++-----------------------------------------+ +| 4 | ++-----------------------------------------+ +``` + +#### Aliases + +- list_max + ### `array_ndims` Returns the number of dimensions of the array. @@ -3759,6 +3788,10 @@ _Alias of [array_to_string](#array_to_string)._ _Alias of [array_length](#array_length)._ +### `list_max` + +_Alias of [array_max](#array_max)._ + ### `list_ndims` _Alias of [array_ndims](#array_ndims)._