Skip to content

Commit 7597769

Browse files
authored
replace TypeSignature::String with TypeSignature::Coercible (#14917)
* deprecated use of TypeSignature::String * make kernel functions private
1 parent 5d08325 commit 7597769

File tree

14 files changed

+378
-139
lines changed

14 files changed

+378
-139
lines changed

datafusion/functions-nested/src/string.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use arrow::array::{
2424
UInt8Array,
2525
};
2626
use arrow::datatypes::{DataType, Field};
27-
use datafusion_expr::TypeSignature;
2827

2928
use datafusion_common::{
3029
internal_datafusion_err, not_impl_err, plan_err, DataFusionError, Result,
@@ -44,8 +43,10 @@ use arrow::datatypes::DataType::{
4443
};
4544
use datafusion_common::cast::{as_large_list_array, as_list_array};
4645
use datafusion_common::exec_err;
46+
use datafusion_common::types::logical_string;
4747
use datafusion_expr::{
48-
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
48+
Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
49+
TypeSignatureClass, Volatility,
4950
};
5051
use datafusion_functions::{downcast_arg, downcast_named_arg};
5152
use datafusion_macros::user_doc;
@@ -251,7 +252,17 @@ impl StringToArray {
251252
pub fn new() -> Self {
252253
Self {
253254
signature: Signature::one_of(
254-
vec![TypeSignature::String(2), TypeSignature::String(3)],
255+
vec![
256+
TypeSignature::Coercible(vec![
257+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
258+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
259+
]),
260+
TypeSignature::Coercible(vec![
261+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
262+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
263+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
264+
]),
265+
],
255266
Volatility::Immutable,
256267
),
257268
aliases: vec![String::from("string_to_list")],

datafusion/functions/src/regex/regexplike.rs

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,15 @@ use arrow::array::{Array, ArrayRef, AsArray, GenericStringArray};
2121
use arrow::compute::kernels::regexp;
2222
use arrow::datatypes::DataType;
2323
use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
24-
use datafusion_common::exec_err;
25-
use datafusion_common::ScalarValue;
26-
use datafusion_common::{arrow_datafusion_err, plan_err};
27-
use datafusion_common::{internal_err, DataFusionError, Result};
28-
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
29-
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
24+
use datafusion_common::types::logical_string;
25+
use datafusion_common::{
26+
arrow_datafusion_err, exec_err, internal_err, plan_err, DataFusionError, Result,
27+
ScalarValue,
28+
};
29+
use datafusion_expr::{
30+
Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
31+
TypeSignatureClass, Volatility,
32+
};
3033
use datafusion_macros::user_doc;
3134

3235
use std::any::Any;
@@ -79,7 +82,17 @@ impl RegexpLikeFunc {
7982
pub fn new() -> Self {
8083
Self {
8184
signature: Signature::one_of(
82-
vec![TypeSignature::String(2), TypeSignature::String(3)],
85+
vec![
86+
TypeSignature::Coercible(vec![
87+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
88+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
89+
]),
90+
TypeSignature::Coercible(vec![
91+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
92+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
93+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
94+
]),
95+
],
8396
Volatility::Immutable,
8497
),
8598
}

datafusion/functions/src/string/bit_length.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@ use arrow::datatypes::DataType;
2020
use std::any::Any;
2121

2222
use crate::utils::utf8_to_int_type;
23-
use datafusion_common::{utils::take_function_args, Result, ScalarValue};
24-
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
25-
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
23+
use datafusion_common::types::logical_string;
24+
use datafusion_common::utils::take_function_args;
25+
use datafusion_common::{Result, ScalarValue};
26+
use datafusion_expr::{
27+
Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28+
TypeSignatureClass, Volatility,
29+
};
2630
use datafusion_macros::user_doc;
2731

2832
#[user_doc(
@@ -55,7 +59,12 @@ impl Default for BitLengthFunc {
5559
impl BitLengthFunc {
5660
pub fn new() -> Self {
5761
Self {
58-
signature: Signature::string(1, Volatility::Immutable),
62+
signature: Signature::coercible(
63+
vec![Coercion::new_exact(TypeSignatureClass::Native(
64+
logical_string(),
65+
))],
66+
Volatility::Immutable,
67+
),
5968
}
6069
}
6170
}

datafusion/functions/src/string/contains.rs

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ use arrow::array::{Array, ArrayRef, AsArray};
2020
use arrow::compute::contains as arrow_contains;
2121
use arrow::datatypes::DataType;
2222
use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
23-
use datafusion_common::exec_err;
24-
use datafusion_common::DataFusionError;
25-
use datafusion_common::Result;
23+
use datafusion_common::types::logical_string;
24+
use datafusion_common::{exec_err, DataFusionError, Result};
25+
use datafusion_expr::binary::{binary_to_string_coercion, string_coercion};
2626
use datafusion_expr::{
27-
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28-
Volatility,
27+
Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28+
TypeSignatureClass, Volatility,
2929
};
3030
use datafusion_macros::user_doc;
3131
use std::any::Any;
@@ -60,7 +60,13 @@ impl Default for ContainsFunc {
6060
impl ContainsFunc {
6161
pub fn new() -> Self {
6262
Self {
63-
signature: Signature::string(2, Volatility::Immutable),
63+
signature: Signature::coercible(
64+
vec![
65+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
66+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
67+
],
68+
Volatility::Immutable,
69+
),
6470
}
6571
}
6672
}
@@ -92,29 +98,52 @@ impl ScalarUDFImpl for ContainsFunc {
9298
}
9399

94100
/// use `arrow::compute::contains` to do the calculation for contains
95-
pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
96-
match (args[0].data_type(), args[1].data_type()) {
97-
(Utf8View, Utf8View) => {
98-
let mod_str = args[0].as_string_view();
99-
let match_str = args[1].as_string_view();
100-
let res = arrow_contains(mod_str, match_str)?;
101-
Ok(Arc::new(res) as ArrayRef)
102-
}
103-
(Utf8, Utf8) => {
104-
let mod_str = args[0].as_string::<i32>();
105-
let match_str = args[1].as_string::<i32>();
106-
let res = arrow_contains(mod_str, match_str)?;
107-
Ok(Arc::new(res) as ArrayRef)
108-
}
109-
(LargeUtf8, LargeUtf8) => {
110-
let mod_str = args[0].as_string::<i64>();
111-
let match_str = args[1].as_string::<i64>();
112-
let res = arrow_contains(mod_str, match_str)?;
113-
Ok(Arc::new(res) as ArrayRef)
114-
}
115-
other => {
116-
exec_err!("Unsupported data type {other:?} for function `contains`.")
101+
fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
102+
if let Some(coercion_data_type) =
103+
string_coercion(args[0].data_type(), args[1].data_type()).or_else(|| {
104+
binary_to_string_coercion(args[0].data_type(), args[1].data_type())
105+
})
106+
{
107+
let arg0 = if args[0].data_type() == &coercion_data_type {
108+
Arc::clone(&args[0])
109+
} else {
110+
arrow::compute::kernels::cast::cast(&args[0], &coercion_data_type)?
111+
};
112+
let arg1 = if args[1].data_type() == &coercion_data_type {
113+
Arc::clone(&args[1])
114+
} else {
115+
arrow::compute::kernels::cast::cast(&args[1], &coercion_data_type)?
116+
};
117+
118+
match coercion_data_type {
119+
Utf8View => {
120+
let mod_str = arg0.as_string_view();
121+
let match_str = arg1.as_string_view();
122+
let res = arrow_contains(mod_str, match_str)?;
123+
Ok(Arc::new(res) as ArrayRef)
124+
}
125+
Utf8 => {
126+
let mod_str = arg0.as_string::<i32>();
127+
let match_str = arg1.as_string::<i32>();
128+
let res = arrow_contains(mod_str, match_str)?;
129+
Ok(Arc::new(res) as ArrayRef)
130+
}
131+
LargeUtf8 => {
132+
let mod_str = arg0.as_string::<i64>();
133+
let match_str = arg1.as_string::<i64>();
134+
let res = arrow_contains(mod_str, match_str)?;
135+
Ok(Arc::new(res) as ArrayRef)
136+
}
137+
other => {
138+
exec_err!("Unsupported data type {other:?} for function `contains`.")
139+
}
117140
}
141+
} else {
142+
exec_err!(
143+
"Unsupported data type {:?}, {:?} for function `contains`.",
144+
args[0].data_type(),
145+
args[1].data_type()
146+
)
118147
}
119148
}
120149

datafusion/functions/src/string/ends_with.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,13 @@ use arrow::array::ArrayRef;
2222
use arrow::datatypes::DataType;
2323

2424
use crate::utils::make_scalar_function;
25+
use datafusion_common::types::logical_string;
2526
use datafusion_common::{internal_err, Result};
26-
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
27-
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
27+
use datafusion_expr::binary::{binary_to_string_coercion, string_coercion};
28+
use datafusion_expr::{
29+
Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
30+
TypeSignatureClass, Volatility,
31+
};
2832
use datafusion_macros::user_doc;
2933

3034
#[user_doc(
@@ -62,7 +66,13 @@ impl Default for EndsWithFunc {
6266
impl EndsWithFunc {
6367
pub fn new() -> Self {
6468
Self {
65-
signature: Signature::string(2, Volatility::Immutable),
69+
signature: Signature::coercible(
70+
vec![
71+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
72+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
73+
],
74+
Volatility::Immutable,
75+
),
6676
}
6777
}
6878
}
@@ -102,10 +112,29 @@ impl ScalarUDFImpl for EndsWithFunc {
102112

103113
/// Returns true if string ends with suffix.
104114
/// ends_with('alphabet', 'abet') = 't'
105-
pub fn ends_with(args: &[ArrayRef]) -> Result<ArrayRef> {
106-
let result = arrow::compute::kernels::comparison::ends_with(&args[0], &args[1])?;
107-
108-
Ok(Arc::new(result) as ArrayRef)
115+
fn ends_with(args: &[ArrayRef]) -> Result<ArrayRef> {
116+
if let Some(coercion_data_type) =
117+
string_coercion(args[0].data_type(), args[1].data_type()).or_else(|| {
118+
binary_to_string_coercion(args[0].data_type(), args[1].data_type())
119+
})
120+
{
121+
let arg0 = if args[0].data_type() == &coercion_data_type {
122+
Arc::clone(&args[0])
123+
} else {
124+
arrow::compute::kernels::cast::cast(&args[0], &coercion_data_type)?
125+
};
126+
let arg1 = if args[1].data_type() == &coercion_data_type {
127+
Arc::clone(&args[1])
128+
} else {
129+
arrow::compute::kernels::cast::cast(&args[1], &coercion_data_type)?
130+
};
131+
let result = arrow::compute::kernels::comparison::ends_with(&arg0, &arg1)?;
132+
Ok(Arc::new(result) as ArrayRef)
133+
} else {
134+
internal_err!(
135+
"Unsupported data types for ends_with. Expected Utf8, LargeUtf8 or Utf8View"
136+
)
137+
}
109138
}
110139

111140
#[cfg(test)]

0 commit comments

Comments
 (0)