Skip to content

Commit 6354df6

Browse files
yyy1000alamb
andauthored
Port arrow_typeof to datafusion-function (#9524)
* Port arrowtypeof * fmt * fix test case * revert test change --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 0d3d274 commit 6354df6

File tree

11 files changed

+75
-42
lines changed

11 files changed

+75
-42
lines changed

datafusion/expr/src/built_in_function.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,6 @@ pub enum BuiltinScalarFunction {
232232
Upper,
233233
/// uuid
234234
Uuid,
235-
/// arrow_typeof
236-
ArrowTypeof,
237235
/// overlay
238236
OverLay,
239237
/// levenshtein
@@ -387,7 +385,6 @@ impl BuiltinScalarFunction {
387385
BuiltinScalarFunction::Trim => Volatility::Immutable,
388386
BuiltinScalarFunction::Upper => Volatility::Immutable,
389387
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
390-
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
391388
BuiltinScalarFunction::OverLay => Volatility::Immutable,
392389
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
393390
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
@@ -612,8 +609,6 @@ impl BuiltinScalarFunction {
612609

613610
BuiltinScalarFunction::Iszero => Ok(Boolean),
614611

615-
BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
616-
617612
BuiltinScalarFunction::OverLay => {
618613
utf8_to_str_type(&input_expr_types[0], "overlay")
619614
}
@@ -898,7 +893,6 @@ impl BuiltinScalarFunction {
898893
BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => {
899894
Signature::uniform(2, vec![Int64], self.volatility())
900895
}
901-
BuiltinScalarFunction::ArrowTypeof => Signature::any(1, self.volatility()),
902896
BuiltinScalarFunction::OverLay => Signature::one_of(
903897
vec![
904898
Exact(vec![Utf8, Utf8, Int64, Int64]),
@@ -1087,9 +1081,6 @@ impl BuiltinScalarFunction {
10871081
BuiltinScalarFunction::SHA384 => &["sha384"],
10881082
BuiltinScalarFunction::SHA512 => &["sha512"],
10891083

1090-
// other functions
1091-
BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
1092-
10931084
BuiltinScalarFunction::ArraySort => &["array_sort", "list_sort"],
10941085
BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"],
10951086
BuiltinScalarFunction::ArrayElement => &[

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,6 @@ scalar_expr!(
822822
"returns true if a given number is +0.0 or -0.0 otherwise returns false"
823823
);
824824

825-
scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");
826825
scalar_expr!(Levenshtein, levenshtein, string1 string2, "Returns the Levenshtein distance between the two given strings");
827826
scalar_expr!(SubstrIndex, substr_index, string delimiter count, "Returns the substring from str before count occurrences of the delimiter");
828827
scalar_expr!(FindInSet, find_in_set, str strlist, "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings");
@@ -1292,7 +1291,6 @@ mod test {
12921291
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max);
12931292
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
12941293

1295-
test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
12961294
test_nary_scalar_expr!(OverLay, overlay, string, characters, position, len);
12971295
test_nary_scalar_expr!(OverLay, overlay, string, characters, position);
12981296
test_scalar_expr!(Levenshtein, levenshtein, string1, string2);
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::datatypes::DataType;
19+
use datafusion_common::{exec_err, Result, ScalarValue};
20+
use datafusion_expr::ColumnarValue;
21+
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
22+
use std::any::Any;
23+
24+
#[derive(Debug)]
25+
pub(super) struct ArrowTypeOfFunc {
26+
signature: Signature,
27+
}
28+
29+
impl ArrowTypeOfFunc {
30+
pub fn new() -> Self {
31+
Self {
32+
signature: Signature::any(1, Volatility::Immutable),
33+
}
34+
}
35+
}
36+
37+
impl ScalarUDFImpl for ArrowTypeOfFunc {
38+
fn as_any(&self) -> &dyn Any {
39+
self
40+
}
41+
fn name(&self) -> &str {
42+
"arrow_typeof"
43+
}
44+
45+
fn signature(&self) -> &Signature {
46+
&self.signature
47+
}
48+
49+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
50+
Ok(DataType::Utf8)
51+
}
52+
53+
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
54+
if args.len() != 1 {
55+
return exec_err!(
56+
"arrow_typeof function requires 1 arguments, got {}",
57+
args.len()
58+
);
59+
}
60+
61+
let input_data_type = args[0].data_type();
62+
Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
63+
"{input_data_type}"
64+
))))
65+
}
66+
}

datafusion/functions/src/core/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
//! "core" DataFusion functions
1919
20+
mod arrowtypeof;
2021
mod nullif;
2122
mod nvl;
2223
mod nvl2;
@@ -26,12 +27,14 @@ pub mod r#struct;
2627
make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
2728
make_udf_function!(nvl::NVLFunc, NVL, nvl);
2829
make_udf_function!(nvl2::NVL2Func, NVL2, nvl2);
30+
make_udf_function!(arrowtypeof::ArrowTypeOfFunc, ARROWTYPEOF, arrow_typeof);
2931
make_udf_function!(r#struct::StructFunc, STRUCT, r#struct);
3032

3133
// Export the functions out of this package, both as expr_fn as well as a list of functions
3234
export_functions!(
3335
(nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression."),
3436
(nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns value1"),
3537
(nvl2, arg_1 arg_2 arg_3, "Returns value2 if value1 is not NULL; otherwise, it returns value3."),
38+
(arrow_typeof, arg_1, "Returns the Arrow type of the input expression."),
3639
(r#struct, args, "Returns a struct with the given arguments")
3740
);

datafusion/functions/src/core/nullif.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Encoding expressions
19-
2018
use arrow::datatypes::DataType;
2119
use datafusion_common::{exec_err, Result};
2220
use datafusion_expr::ColumnarValue;

datafusion/physical-expr/src/functions.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -691,19 +691,6 @@ pub fn create_physical_fun(
691691
}),
692692
BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
693693
BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
694-
BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| {
695-
if args.len() != 1 {
696-
return exec_err!(
697-
"arrow_typeof function requires 1 arguments, got {}",
698-
args.len()
699-
);
700-
}
701-
702-
let input_data_type = args[0].data_type();
703-
Ok(ColumnarValue::Scalar(ScalarValue::from(format!(
704-
"{input_data_type}"
705-
))))
706-
}),
707694
BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() {
708695
DataType::Utf8 => {
709696
make_scalar_function_inner(string_expressions::overlay::<i32>)(args)

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ enum ScalarFunction {
618618
FromUnixtime = 66;
619619
Atan2 = 67;
620620
// 68 was DateBin
621-
ArrowTypeof = 69;
621+
// 69 was ArrowTypeof
622622
CurrentDate = 70;
623623
CurrentTime = 71;
624624
Uuid = 72;

datafusion/proto/src/generated/pbjson.rs

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 1 addition & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/from_proto.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ use datafusion_expr::{
5050
acosh, array_distinct, array_element, array_except, array_intersect, array_pop_back,
5151
array_pop_front, array_position, array_positions, array_remove, array_remove_all,
5252
array_remove_n, array_repeat, array_replace, array_replace_all, array_replace_n,
53-
array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan,
54-
atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr, coalesce,
55-
concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees,
56-
digest, ends_with, exp,
53+
array_resize, array_slice, array_sort, array_union, ascii, asinh, atan, atan2, atanh,
54+
bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr,
55+
concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees, digest,
56+
ends_with, exp,
5757
expr::{self, InList, Sort, WindowFunction},
5858
factorial, find_in_set, floor, from_unixtime, gcd, initcap, iszero, lcm, left,
5959
levenshtein, ln, log, log10, log2,
@@ -538,7 +538,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
538538
ScalarFunction::Atan2 => Self::Atan2,
539539
ScalarFunction::Nanvl => Self::Nanvl,
540540
ScalarFunction::Iszero => Self::Iszero,
541-
ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
542541
ScalarFunction::OverLay => Self::OverLay,
543542
ScalarFunction::Levenshtein => Self::Levenshtein,
544543
ScalarFunction::SubstrIndex => Self::SubstrIndex,
@@ -1736,9 +1735,6 @@ pub fn parse_expr(
17361735
ScalarFunction::Iszero => {
17371736
Ok(iszero(parse_expr(&args[0], registry, codec)?))
17381737
}
1739-
ScalarFunction::ArrowTypeof => {
1740-
Ok(arrow_typeof(parse_expr(&args[0], registry, codec)?))
1741-
}
17421738
ScalarFunction::OverLay => Ok(overlay(
17431739
args.to_owned()
17441740
.iter()

datafusion/proto/src/logical_plan/to_proto.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1518,7 +1518,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
15181518
BuiltinScalarFunction::Atan2 => Self::Atan2,
15191519
BuiltinScalarFunction::Nanvl => Self::Nanvl,
15201520
BuiltinScalarFunction::Iszero => Self::Iszero,
1521-
BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
15221521
BuiltinScalarFunction::OverLay => Self::OverLay,
15231522
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
15241523
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,

0 commit comments

Comments
 (0)