Skip to content

Commit 639f8ce

Browse files
Issue-14416 - feat: Add array_min function
1 parent ea788c7 commit 639f8ce

File tree

4 files changed

+269
-0
lines changed

4 files changed

+269
-0
lines changed

datafusion/functions-nested/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub mod map;
4747
pub mod map_extract;
4848
pub mod map_keys;
4949
pub mod map_values;
50+
pub mod min;
5051
pub mod planner;
5152
pub mod position;
5253
pub mod range;
@@ -139,6 +140,7 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
139140
length::array_length_udf(),
140141
distance::array_distance_udf(),
141142
flatten::flatten_udf(),
143+
min::array_min_udf(),
142144
sort::array_sort_udf(),
143145
repeat::array_repeat_udf(),
144146
resize::array_resize_udf(),
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! [`ScalarUDFImpl`] definitions for array_min function.
19+
20+
use crate::sort::array_sort_inner;
21+
use crate::utils::make_scalar_function;
22+
use arrow_array::{Array, ArrayRef, StringArray};
23+
use arrow_schema::DataType;
24+
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
25+
use datafusion_common::cast::as_list_array;
26+
use datafusion_common::exec_err;
27+
use datafusion_doc::Documentation;
28+
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
29+
use datafusion_macros::user_doc;
30+
use std::any::Any;
31+
use std::sync::Arc;
32+
33+
make_udf_expr_and_func!(
34+
ArrayMin,
35+
array_min,
36+
array,
37+
"returns the minimum value in the array.",
38+
array_min_udf
39+
);
40+
41+
#[user_doc(
42+
doc_section(label = "Array Functions"),
43+
description = "Returns the minimum value in the array.",
44+
syntax_example = "array_min(array)",
45+
sql_example = r#"```sql
46+
> select array_min([3,1,4,2]);
47+
+-----------------------------------------+
48+
| array_min(List([3,1,4,2])) |
49+
+-----------------------------------------+
50+
| 1 |
51+
+-----------------------------------------+
52+
```"#,
53+
argument(
54+
name = "array",
55+
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
56+
)
57+
)]
58+
#[derive(Debug)]
59+
pub struct ArrayMin {
60+
signature: Signature,
61+
aliases: Vec<String>,
62+
}
63+
64+
impl Default for ArrayMin {
65+
fn default() -> Self {
66+
Self::new()
67+
}
68+
}
69+
70+
impl ArrayMin {
71+
pub fn new() -> Self {
72+
Self {
73+
signature: Signature::array(Volatility::Immutable),
74+
aliases: vec!["list_min".to_string()],
75+
}
76+
}
77+
}
78+
79+
impl ScalarUDFImpl for ArrayMin {
80+
fn as_any(&self) -> &dyn Any {
81+
self
82+
}
83+
84+
fn name(&self) -> &str {
85+
"array_min"
86+
}
87+
88+
fn display_name(&self, args: &[Expr]) -> datafusion_common::Result<String> {
89+
let args_name = args.iter().map(ToString::to_string).collect::<Vec<_>>();
90+
if args_name.len() != 1 {
91+
return exec_err!("expects 1 arg, got {}", args_name.len());
92+
}
93+
94+
Ok(format!("{}", args_name[0]))
95+
}
96+
97+
fn schema_name(&self, args: &[Expr]) -> datafusion_common::Result<String> {
98+
let args_name = args
99+
.iter()
100+
.map(|e| e.schema_name().to_string())
101+
.collect::<Vec<_>>();
102+
if args_name.len() != 1 {
103+
return exec_err!("expects 1 arg, got {}", args_name.len());
104+
}
105+
106+
Ok(format!("{}", args_name[0]))
107+
}
108+
109+
fn signature(&self) -> &Signature {
110+
&self.signature
111+
}
112+
113+
fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
114+
match &arg_types[0] {
115+
List(field) | LargeList(field) | FixedSizeList(field, _) => {
116+
Ok(field.data_type().clone())
117+
}
118+
_ => exec_err!(
119+
"Not reachable, data_type should be List, LargeList or FixedSizeList"
120+
),
121+
}
122+
}
123+
124+
fn invoke_batch(
125+
&self,
126+
args: &[ColumnarValue],
127+
_number_rows: usize,
128+
) -> datafusion_common::Result<ColumnarValue> {
129+
make_scalar_function(array_min_inner)(args)
130+
}
131+
132+
fn aliases(&self) -> &[String] {
133+
&self.aliases
134+
}
135+
136+
fn documentation(&self) -> Option<&Documentation> {
137+
self.doc()
138+
}
139+
}
140+
141+
/// array_min SQL function
142+
///
143+
/// There is one argument for array_min as the array.
144+
/// `array_min(array)`
145+
///
146+
/// For example:
147+
/// > array_min(\[3, 1, 2]) -> 1
148+
pub fn array_min_inner(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
149+
if args.len() != 1 {
150+
return exec_err!("array_min needs one argument");
151+
}
152+
153+
match &args[0].data_type() {
154+
List(_) | LargeList(_) | FixedSizeList(_, _) => {
155+
let new_args = vec![
156+
args[0].clone(),
157+
Arc::new(StringArray::from_iter(vec![Some("ASC")])),
158+
Arc::new(StringArray::from_iter(vec![Some("NULLS LAST")])),
159+
];
160+
array_min_internal(&new_args)
161+
}
162+
_ => exec_err!("array_min does not support type: {:?}", args[0].data_type()),
163+
}
164+
}
165+
166+
fn array_min_internal(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
167+
let sorted_array = array_sort_inner(args)?;
168+
let result_array = as_list_array(&sorted_array)?.value(0);
169+
if result_array.is_empty() {
170+
return exec_err!("array_min needs one argument as non-empty array");
171+
}
172+
let min_result = result_array.slice(0, 1);
173+
Ok(min_result)
174+
}

datafusion/sqllogictest/test_files/array.slt

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,66 @@ NULL 23
14331433
NULL 43
14341434
5 NULL
14351435

1436+
## array_min
1437+
# array_min scalar function #1 (with positive index)
1438+
query I
1439+
select array_min(make_array(5, 3, 4, 6));
1440+
----
1441+
3
1442+
1443+
query I
1444+
select array_min(make_array(5, 3, 4, NULL, 6, NULL));
1445+
----
1446+
3
1447+
1448+
query I
1449+
select array_min(make_array(NULL, NULL));
1450+
----
1451+
NULL
1452+
1453+
query T
1454+
select array_min(make_array('h', 'e', 'l', 'l', 'o'));
1455+
----
1456+
e
1457+
1458+
query T
1459+
select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
1460+
----
1461+
e
1462+
1463+
query B
1464+
select array_min(make_array(true, true, false, true));
1465+
----
1466+
false
1467+
1468+
query B
1469+
select array_min(make_array(true, true, NULL, false, true));
1470+
----
1471+
false
1472+
1473+
query D
1474+
select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1985-11-01', DATE '1999-05-01'));
1475+
----
1476+
1985-11-01
1477+
1478+
query D
1479+
select array_min(make_array(DATE '1995-09-01', DATE '1993-03-01', NULL, DATE '1999-05-01'));
1480+
----
1481+
1993-03-01
1482+
1483+
query P
1484+
select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1984-10-01', TIMESTAMP '1995-06-01'));
1485+
----
1486+
1984-10-01T00:00:00
1487+
1488+
query P
1489+
select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01'));
1490+
----
1491+
1995-06-01T00:00:00
1492+
1493+
query error Execution error: array_min needs one argument as non-empty array
1494+
select array_min(make_array());
1495+
14361496
## array_pop_back (aliases: `list_pop_back`)
14371497

14381498
# array_pop_back scalar function with null

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2524,6 +2524,7 @@ _Alias of [current_date](#current_date)._
25242524
- [array_intersect](#array_intersect)
25252525
- [array_join](#array_join)
25262526
- [array_length](#array_length)
2527+
- [array_min](#array_min)
25272528
- [array_ndims](#array_ndims)
25282529
- [array_pop_back](#array_pop_back)
25292530
- [array_pop_front](#array_pop_front)
@@ -2569,6 +2570,7 @@ _Alias of [current_date](#current_date)._
25692570
- [list_intersect](#list_intersect)
25702571
- [list_join](#list_join)
25712572
- [list_length](#list_length)
2573+
- [list_min](#list_min)
25722574
- [list_ndims](#list_ndims)
25732575
- [list_pop_back](#list_pop_back)
25742576
- [list_pop_front](#list_pop_front)
@@ -3002,6 +3004,33 @@ array_length(array, dimension)
30023004

30033005
- list_length
30043006

3007+
### `array_min`
3008+
3009+
Returns the minimum value in the array.
3010+
3011+
```
3012+
array_min(array)
3013+
```
3014+
3015+
#### Arguments
3016+
3017+
- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.
3018+
3019+
#### Example
3020+
3021+
```sql
3022+
> select array_min([3,1,4,2]);
3023+
+-------------------------------------------+
3024+
| array_min(List([3,1,4,2])) |
3025+
+-------------------------------------------+
3026+
| 1 |
3027+
+-------------------------------------------+
3028+
```
3029+
3030+
#### Aliases
3031+
3032+
- list_min
3033+
30053034
### `array_ndims`
30063035

30073036
Returns the number of dimensions of the array.
@@ -3759,6 +3788,10 @@ _Alias of [array_to_string](#array_to_string)._
37593788

37603789
_Alias of [array_length](#array_length)._
37613790

3791+
### `list_min`
3792+
3793+
_Alias of [array_min](#array_min)._
3794+
37623795
### `list_ndims`
37633796

37643797
_Alias of [array_ndims](#array_ndims)._

0 commit comments

Comments
 (0)