Skip to content

Commit 8e5f3ad

Browse files
committed
Move nullif and isnan to datafusion-functions
move nullif
1 parent 2615d1b commit 8e5f3ad

File tree

21 files changed

+356
-104
lines changed

21 files changed

+356
-104
lines changed

.github/workflows/rust.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ jobs:
8282
- name: Check function packages (encoding_expressions)
8383
run: cargo check --no-default-features --features=encoding_expressions -p datafusion
8484

85+
- name: Check function packages (math_expressions)
86+
run: cargo check --no-default-features --features=math_expressions -p datafusion
87+
8588
- name: Check function packages (array_expressions)
8689
run: cargo check --no-default-features --features=array_expressions -p datafusion
8790

datafusion/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ default = ["array_expressions", "crypto_expressions", "encoding_expressions", "r
4747
encoding_expressions = ["datafusion-functions/encoding_expressions"]
4848
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
4949
force_hash_collisions = []
50+
math_expressions = ["datafusion-functions/math_expressions"]
5051
parquet = ["datafusion-common/parquet", "dep:parquet"]
5152
pyarrow = ["datafusion-common/pyarrow", "parquet"]
5253
regex_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion-optimizer/regex_expressions"]

datafusion/expr/src/built_in_function.rs

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ use std::fmt;
2323
use std::str::FromStr;
2424
use std::sync::{Arc, OnceLock};
2525

26-
use crate::nullif::SUPPORTED_NULLIF_TYPES;
2726
use crate::signature::TIMEZONE_WILDCARD;
2827
use crate::type_coercion::binary::get_wider_type;
2928
use crate::type_coercion::functions::data_types;
@@ -83,8 +82,6 @@ pub enum BuiltinScalarFunction {
8382
Gcd,
8483
/// lcm, Least common multiple
8584
Lcm,
86-
/// isnan
87-
Isnan,
8885
/// iszero
8986
Iszero,
9087
/// ln, Natural logarithm
@@ -233,8 +230,6 @@ pub enum BuiltinScalarFunction {
233230
Ltrim,
234231
/// md5
235232
MD5,
236-
/// nullif
237-
NullIf,
238233
/// octet_length
239234
OctetLength,
240235
/// random
@@ -384,7 +379,6 @@ impl BuiltinScalarFunction {
384379
BuiltinScalarFunction::Factorial => Volatility::Immutable,
385380
BuiltinScalarFunction::Floor => Volatility::Immutable,
386381
BuiltinScalarFunction::Gcd => Volatility::Immutable,
387-
BuiltinScalarFunction::Isnan => Volatility::Immutable,
388382
BuiltinScalarFunction::Iszero => Volatility::Immutable,
389383
BuiltinScalarFunction::Lcm => Volatility::Immutable,
390384
BuiltinScalarFunction::Ln => Volatility::Immutable,
@@ -456,7 +450,6 @@ impl BuiltinScalarFunction {
456450
BuiltinScalarFunction::Lower => Volatility::Immutable,
457451
BuiltinScalarFunction::Ltrim => Volatility::Immutable,
458452
BuiltinScalarFunction::MD5 => Volatility::Immutable,
459-
BuiltinScalarFunction::NullIf => Volatility::Immutable,
460453
BuiltinScalarFunction::OctetLength => Volatility::Immutable,
461454
BuiltinScalarFunction::Radians => Volatility::Immutable,
462455
BuiltinScalarFunction::RegexpLike => Volatility::Immutable,
@@ -726,11 +719,6 @@ impl BuiltinScalarFunction {
726719
utf8_to_str_type(&input_expr_types[0], "ltrim")
727720
}
728721
BuiltinScalarFunction::MD5 => utf8_to_str_type(&input_expr_types[0], "md5"),
729-
BuiltinScalarFunction::NullIf => {
730-
// NULLIF has two args and they might get coerced, get a preview of this
731-
let coerced_types = data_types(input_expr_types, &self.signature());
732-
coerced_types.map(|typs| typs[0].clone())
733-
}
734722
BuiltinScalarFunction::OctetLength => {
735723
utf8_to_int_type(&input_expr_types[0], "octet_length")
736724
}
@@ -871,7 +859,7 @@ impl BuiltinScalarFunction {
871859
_ => Ok(Float64),
872860
},
873861

874-
BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => Ok(Boolean),
862+
BuiltinScalarFunction::Iszero => Ok(Boolean),
875863

876864
BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
877865

@@ -1261,9 +1249,6 @@ impl BuiltinScalarFunction {
12611249
self.volatility(),
12621250
),
12631251

1264-
BuiltinScalarFunction::NullIf => {
1265-
Signature::uniform(2, SUPPORTED_NULLIF_TYPES.to_vec(), self.volatility())
1266-
}
12671252
BuiltinScalarFunction::Pi => Signature::exact(vec![], self.volatility()),
12681253
BuiltinScalarFunction::Random => Signature::exact(vec![], self.volatility()),
12691254
BuiltinScalarFunction::Uuid => Signature::exact(vec![], self.volatility()),
@@ -1368,12 +1353,10 @@ impl BuiltinScalarFunction {
13681353
vec![Int32, Int64, UInt32, UInt64, Utf8],
13691354
self.volatility(),
13701355
),
1371-
BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => {
1372-
Signature::one_of(
1373-
vec![Exact(vec![Float32]), Exact(vec![Float64])],
1374-
self.volatility(),
1375-
)
1376-
}
1356+
BuiltinScalarFunction::Iszero => Signature::one_of(
1357+
vec![Exact(vec![Float32]), Exact(vec![Float64])],
1358+
self.volatility(),
1359+
),
13771360
}
13781361
}
13791362

@@ -1439,7 +1422,6 @@ impl BuiltinScalarFunction {
14391422
BuiltinScalarFunction::Factorial => &["factorial"],
14401423
BuiltinScalarFunction::Floor => &["floor"],
14411424
BuiltinScalarFunction::Gcd => &["gcd"],
1442-
BuiltinScalarFunction::Isnan => &["isnan"],
14431425
BuiltinScalarFunction::Iszero => &["iszero"],
14441426
BuiltinScalarFunction::Lcm => &["lcm"],
14451427
BuiltinScalarFunction::Ln => &["ln"],
@@ -1462,7 +1444,6 @@ impl BuiltinScalarFunction {
14621444

14631445
// conditional functions
14641446
BuiltinScalarFunction::Coalesce => &["coalesce"],
1465-
BuiltinScalarFunction::NullIf => &["nullif"],
14661447

14671448
// string functions
14681449
BuiltinScalarFunction::Ascii => &["ascii"],

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,6 @@ scalar_expr!(Lcm, lcm, arg_1 arg_2, "least common multiple");
565565
scalar_expr!(Log2, log2, num, "base 2 logarithm");
566566
scalar_expr!(Log10, log10, num, "base 10 logarithm");
567567
scalar_expr!(Ln, ln, num, "natural logarithm");
568-
scalar_expr!(NullIf, nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression.");
569568
scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
570569
scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument");
571570
scalar_expr!(
@@ -926,12 +925,6 @@ scalar_expr!(Now, now, ,"returns current timestamp in nanoseconds, using the sam
926925
scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a [`DataType::Time64`] value");
927926
scalar_expr!(MakeDate, make_date, year month day, "make a date from year, month and day component parts");
928927
scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y");
929-
scalar_expr!(
930-
Isnan,
931-
isnan,
932-
num,
933-
"returns true if a given number is +NaN or -NaN otherwise returns false"
934-
);
935928
scalar_expr!(
936929
Iszero,
937930
iszero,
@@ -1363,7 +1356,6 @@ mod test {
13631356
test_unary_scalar_expr!(Ln, ln);
13641357
test_scalar_expr!(Atan2, atan2, y, x);
13651358
test_scalar_expr!(Nanvl, nanvl, x, y);
1366-
test_scalar_expr!(Isnan, isnan, input);
13671359
test_scalar_expr!(Iszero, iszero, input);
13681360

13691361
test_scalar_expr!(Ascii, ascii, input);

datafusion/expr/src/lib.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ mod built_in_function;
3030
mod built_in_window_function;
3131
mod columnar_value;
3232
mod literal;
33-
mod nullif;
3433
mod operator;
3534
mod partition_evaluator;
3635
mod signature;
@@ -74,7 +73,6 @@ pub use function::{
7473
pub use groups_accumulator::{EmitTo, GroupsAccumulator};
7574
pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
7675
pub use logical_plan::*;
77-
pub use nullif::SUPPORTED_NULLIF_TYPES;
7876
pub use operator::Operator;
7977
pub use partition_evaluator::PartitionEvaluator;
8078
pub use signature::{

datafusion/functions/Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,14 @@ authors = { workspace = true }
2929
rust-version = { workspace = true }
3030

3131
[features]
32+
# enable core functions
33+
core_expressions = []
3234
# Enable encoding by default so the doctests work. In general don't automatically enable all packages.
33-
default = ["encoding_expressions"]
34-
# enable the encode/decode functions
35+
default = ["core_expressions", "encoding_expressions", "math_expressions"]
36+
# enable encode/decode functions
3537
encoding_expressions = ["base64", "hex"]
38+
# enable math functions
39+
math_expressions = []
3640

3741

3842
[lib]

datafusion/expr/src/nullif.rs renamed to datafusion/functions/src/core/mod.rs

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,15 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::datatypes::DataType;
18+
//! "core" DataFusion functions
19+
20+
mod nullif;
21+
22+
// create UDFs
23+
make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
24+
25+
// Export the functions out of this package, both as expr_fn as well as a list of functions
26+
export_functions!(
27+
(nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression.")
28+
);
1929

20-
/// Currently supported types by the nullif function.
21-
/// The order of these types correspond to the order on which coercion applies
22-
/// This should thus be from least informative to most informative
23-
pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
24-
DataType::Boolean,
25-
DataType::UInt8,
26-
DataType::UInt16,
27-
DataType::UInt32,
28-
DataType::UInt64,
29-
DataType::Int8,
30-
DataType::Int16,
31-
DataType::Int32,
32-
DataType::Int64,
33-
DataType::Float32,
34-
DataType::Float64,
35-
DataType::Utf8,
36-
DataType::LargeUtf8,
37-
];

datafusion/physical-expr/src/expressions/nullif.rs renamed to datafusion/functions/src/core/nullif.rs

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,89 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Encoding expressions
19+
20+
use arrow::{
21+
datatypes::DataType,
22+
};
23+
use datafusion_common::{internal_err, Result, DataFusionError};
24+
use datafusion_expr::{ColumnarValue};
25+
26+
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
27+
use std::any::Any;
1828
use arrow::array::Array;
1929
use arrow::compute::kernels::cmp::eq;
2030
use arrow::compute::kernels::nullif::nullif;
21-
use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
22-
use datafusion_expr::ColumnarValue;
31+
use datafusion_common::{ ScalarValue};
32+
33+
#[derive(Debug)]
34+
pub(super) struct NullIfFunc {
35+
signature: Signature,
36+
}
37+
38+
/// Currently supported types by the nullif function.
39+
/// The order of these types correspond to the order on which coercion applies
40+
/// This should thus be from least informative to most informative
41+
static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
42+
DataType::Boolean,
43+
DataType::UInt8,
44+
DataType::UInt16,
45+
DataType::UInt32,
46+
DataType::UInt64,
47+
DataType::Int8,
48+
DataType::Int16,
49+
DataType::Int32,
50+
DataType::Int64,
51+
DataType::Float32,
52+
DataType::Float64,
53+
DataType::Utf8,
54+
DataType::LargeUtf8,
55+
];
56+
57+
58+
impl NullIfFunc {
59+
pub fn new() -> Self {
60+
Self {
61+
signature:
62+
Signature::uniform(2, SUPPORTED_NULLIF_TYPES.to_vec(),
63+
Volatility::Immutable,
64+
)
65+
}
66+
}
67+
}
68+
69+
impl ScalarUDFImpl for NullIfFunc {
70+
fn as_any(&self) -> &dyn Any {
71+
self
72+
}
73+
fn name(&self) -> &str {
74+
"nullif"
75+
}
76+
77+
fn signature(&self) -> &Signature {
78+
&self.signature
79+
}
80+
81+
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
82+
// NULLIF has two args and they might get coerced, get a preview of this
83+
let coerced_types = datafusion_expr::type_coercion::functions::data_types(arg_types, &self.signature);
84+
coerced_types.map(|typs| typs[0].clone())
85+
.map_err(|e| e.context("Failed to coerce arguments for NULLIF")
86+
)
87+
}
88+
89+
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
90+
nullif_func(args)
91+
}
92+
}
93+
94+
2395

2496
/// Implements NULLIF(expr1, expr2)
2597
/// Args: 0 - left expr is any array
2698
/// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
2799
///
28-
pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
100+
fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
29101
if args.len() != 2 {
30102
return internal_err!(
31103
"{:?} args were supplied but NULLIF takes exactly two args",

datafusion/functions/src/lib.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,34 @@ use log::debug;
8484
#[macro_use]
8585
pub mod macros;
8686

87+
make_package!(core, "core_expressions", "Core datafusion expressions");
88+
8789
make_package!(
8890
encoding,
8991
"encoding_expressions",
9092
"Hex and binary `encode` and `decode` functions."
9193
);
9294

95+
make_package!(math, "math_expressions", "Mathematical functions.");
96+
9397
/// Fluent-style API for creating `Expr`s
9498
pub mod expr_fn {
99+
#[cfg(feature = "core_expressions")]
100+
pub use super::core::expr_fn::*;
95101
#[cfg(feature = "encoding_expressions")]
96102
pub use super::encoding::expr_fn::*;
103+
#[cfg(feature = "math_expressions")]
104+
pub use super::math::expr_fn::*;
97105
}
98106

99107
/// Registers all enabled packages with a [`FunctionRegistry`]
100108
pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
101-
encoding::functions().into_iter().try_for_each(|udf| {
109+
let mut all_functions = core::functions()
110+
.into_iter()
111+
.chain(encoding::functions())
112+
.chain(math::functions());
113+
114+
all_functions.try_for_each(|udf| {
102115
let existing_udf = registry.register_udf(udf)?;
103116
if let Some(existing_udf) = existing_udf {
104117
debug!("Overwrite existing UDF: {}", existing_udf.name());

datafusion/functions/src/macros.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,42 @@ macro_rules! make_package {
121121
}
122122
};
123123
}
124+
125+
/// Invokes a function on each element of an array and returns the result as a new array
126+
///
127+
/// $ARG: ArrayRef
128+
/// $NAME: name of the function (for error messages)
129+
/// $ARGS_TYPE: the type of array to cast the argument to
130+
/// $RETURN_TYPE: the type of array to return
131+
/// $FUNC: the function to apply to each element of $ARG
132+
///
133+
macro_rules! make_function_scalar_inputs_return_type {
134+
($ARG: expr, $NAME:expr, $ARG_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{
135+
let arg = downcast_arg!($ARG, $NAME, $ARG_TYPE);
136+
137+
arg.iter()
138+
.map(|a| match a {
139+
Some(a) => Some($FUNC(a)),
140+
_ => None,
141+
})
142+
.collect::<$RETURN_TYPE>()
143+
}};
144+
}
145+
146+
/// Downcast an argument to a specific array type, returning an internal error
147+
/// if the cast fails
148+
///
149+
/// $ARG: ArrayRef
150+
/// $NAME: name of the argument (for error messages)
151+
/// $ARRAY_TYPE: the type of array to cast the argument to
152+
macro_rules! downcast_arg {
153+
($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
154+
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
155+
DataFusionError::Internal(format!(
156+
"could not cast {} to {}",
157+
$NAME,
158+
std::any::type_name::<$ARRAY_TYPE>()
159+
))
160+
})?
161+
}};
162+
}

0 commit comments

Comments
 (0)