diff --git a/Cargo.lock b/Cargo.lock index 299ea0dc4c6f..6e9d2e45de75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2558,6 +2558,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-spark" +version = "47.0.0" +dependencies = [ + "arrow", + "datafusion-catalog", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-macros", + "log", +] + [[package]] name = "datafusion-sql" version = "47.0.0" @@ -2593,6 +2607,7 @@ dependencies = [ "chrono", "clap 4.5.36", "datafusion", + "datafusion-spark", "env_logger", "futures", "half", diff --git a/Cargo.toml b/Cargo.toml index 5a735666f8e7..e51081b9bf15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ members = [ "datafusion/proto-common", "datafusion/proto-common/gen", "datafusion/session", + "datafusion/spark", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", @@ -138,6 +139,7 @@ datafusion-physical-plan = { path = "datafusion/physical-plan", version = "47.0. datafusion-proto = { path = "datafusion/proto", version = "47.0.0" } datafusion-proto-common = { path = "datafusion/proto-common", version = "47.0.0" } datafusion-session = { path = "datafusion/session", version = "47.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "47.0.0" } datafusion-sql = { path = "datafusion/sql", version = "47.0.0" } doc-comment = "0.3" env_logger = "0.11" diff --git a/NOTICE.txt b/NOTICE.txt index 21be1a20d554..7f3c80d606c0 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache DataFusion -Copyright 2019-2024 The Apache Software Foundation +Copyright 2019-2025 The Apache Software Foundation This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). \ No newline at end of file +The Apache Software Foundation (http://www.apache.org/). diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs index b464dde6ccab..18f27c3c4ae3 100644 --- a/datafusion/functions-aggregate/src/macros.rs +++ b/datafusion/functions-aggregate/src/macros.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#[macro_export] macro_rules! make_udaf_expr { ($EXPR_FN:ident, $($arg:ident)*, $DOC:expr, $AGGREGATE_UDF_FN:ident) => { // "fluent expr_fn" style function @@ -34,6 +35,7 @@ macro_rules! make_udaf_expr { }; } +#[macro_export] macro_rules! make_udaf_expr_and_func { ($UDAF:ty, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr, $AGGREGATE_UDF_FN:ident) => { make_udaf_expr!($EXPR_FN, $($arg)*, $DOC, $AGGREGATE_UDF_FN); @@ -59,6 +61,7 @@ macro_rules! make_udaf_expr_and_func { }; } +#[macro_export] macro_rules! create_func { ($UDAF:ty, $AGGREGATE_UDF_FN:ident) => { create_func!($UDAF, $AGGREGATE_UDF_FN, <$UDAF>::default()); diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index d2849c3abba0..30ebf8654ea0 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -40,6 +40,7 @@ /// Exported functions accept: /// - `Vec` argument (single argument followed by a comma) /// - Variable number of `Expr` arguments (zero or more arguments, must be without commas) +#[macro_export] macro_rules! export_functions { ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => { $( @@ -69,6 +70,7 @@ macro_rules! export_functions { /// named `$NAME` which returns that singleton. /// /// This is used to ensure creating the list of `ScalarUDF` only happens once. +#[macro_export] macro_rules! make_udf_function { ($UDF:ty, $NAME:ident) => { #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))] diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 47f3121ba2ce..33ea9fe5d28d 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -75,7 +75,7 @@ get_optimal_return_type!(utf8_to_int_type, DataType::Int64, DataType::Int32); /// Creates a scalar function implementation for the given function. /// * `inner` - the function to be executed /// * `hints` - hints to be used when expanding scalars to arrays -pub(super) fn make_scalar_function( +pub fn make_scalar_function( inner: F, hints: Vec, ) -> impl Fn(&[ColumnarValue]) -> Result diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml new file mode 100644 index 000000000000..1ded8c40aa4b --- /dev/null +++ b/datafusion/spark/Cargo.toml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-spark" +description = "DataFusion expressions that emulate Apache Spark's behavior" +version = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +readme = { workspace = true } +license = { workspace = true } +edition = { workspace = true } + +[package.metadata.docs.rs] +all-features = true + +[lints] +workspace = true + +[lib] +name = "datafusion_spark" + +[dependencies] +arrow = { workspace = true } +datafusion-catalog = { workspace = true } +datafusion-common = { workspace = true } +datafusion-execution = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-functions = { workspace = true } +datafusion-macros = { workspace = true } +log = { workspace = true } diff --git a/datafusion/spark/LICENSE.txt b/datafusion/spark/LICENSE.txt new file mode 120000 index 000000000000..1ef648f64b34 --- /dev/null +++ b/datafusion/spark/LICENSE.txt @@ -0,0 +1 @@ +../../LICENSE.txt \ No newline at end of file diff --git a/datafusion/spark/NOTICE.txt b/datafusion/spark/NOTICE.txt new file mode 120000 index 000000000000..fb051c92b10b --- /dev/null +++ b/datafusion/spark/NOTICE.txt @@ -0,0 +1 @@ +../../NOTICE.txt \ No newline at end of file diff --git a/datafusion/spark/README.md b/datafusion/spark/README.md new file mode 100644 index 000000000000..c92ada0ab477 --- /dev/null +++ b/datafusion/spark/README.md @@ -0,0 +1,40 @@ + + +# datafusion-spark: Spark-compatible Expressions + +This crate provides Apache Spark-compatible expressions for use with DataFusion. + +## Testing Guide + +When testing functions by directly invoking them (e.g., `test_scalar_function!()`), input coercion (from the `signature` +or `coerce_types`) is not applied. + +Therefore, direct invocation tests should only be used to verify that the function is correctly implemented. + +Please be sure to add additional tests beyond direct invocation. +For more detailed testing guidelines, refer to +the [Spark SQLLogicTest README](../sqllogictest/test_files/spark/README.md). + +## Implementation References + +When implementing Spark-compatible functions, you can check if there are existing implementations in +the [Sail](https://github.com/lakehq/sail) or [Comet](https://github.com/apache/datafusion-comet) projects first. +If you do port functionality from these sources, make sure to port over the corresponding tests too, to ensure +correctness and compatibility. diff --git a/datafusion/spark/src/function/aggregate/mod.rs b/datafusion/spark/src/function/aggregate/mod.rs new file mode 100644 index 000000000000..0856e2872d4f --- /dev/null +++ b/datafusion/spark/src/function/aggregate/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::AggregateUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/array/mod.rs b/datafusion/spark/src/function/array/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/array/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/bitwise/mod.rs b/datafusion/spark/src/function/bitwise/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/bitwise/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/collection/mod.rs b/datafusion/spark/src/function/collection/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/collection/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/conditional/mod.rs b/datafusion/spark/src/function/conditional/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/conditional/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/conversion/mod.rs b/datafusion/spark/src/function/conversion/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/conversion/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/csv/mod.rs b/datafusion/spark/src/function/csv/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/csv/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/error_utils.rs b/datafusion/spark/src/function/error_utils.rs new file mode 100644 index 000000000000..b972d64ed3e9 --- /dev/null +++ b/datafusion/spark/src/function/error_utils.rs @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// TODO: https://github.com/apache/spark/tree/master/common/utils/src/main/resources/error + +use arrow::datatypes::DataType; +use datafusion_common::{exec_datafusion_err, internal_datafusion_err, DataFusionError}; + +pub fn invalid_arg_count_exec_err( + function_name: &str, + required_range: (i32, i32), + provided: usize, +) -> DataFusionError { + let (min_required, max_required) = required_range; + let required = if min_required == max_required { + format!( + "{min_required} argument{}", + if min_required == 1 { "" } else { "s" } + ) + } else { + format!("{min_required} to {max_required} arguments") + }; + exec_datafusion_err!( + "Spark `{function_name}` function requires {required}, got {provided}" + ) +} + +pub fn unsupported_data_type_exec_err( + function_name: &str, + required: &str, + provided: &DataType, +) -> DataFusionError { + exec_datafusion_err!("Unsupported Data Type: Spark `{function_name}` function expects {required}, got {provided}") +} + +pub fn unsupported_data_types_exec_err( + function_name: &str, + required: &str, + provided: &[DataType], +) -> DataFusionError { + exec_datafusion_err!( + "Unsupported Data Type: Spark `{function_name}` function expects {required}, got {}", + provided + .iter() + .map(|dt| format!("{dt}")) + .collect::>() + .join(", ") + ) +} + +pub fn generic_exec_err(function_name: &str, message: &str) -> DataFusionError { + exec_datafusion_err!("Spark `{function_name}` function: {message}") +} + +pub fn generic_internal_err(function_name: &str, message: &str) -> DataFusionError { + internal_datafusion_err!("Spark `{function_name}` function: {message}") +} diff --git a/datafusion/spark/src/function/generator/mod.rs b/datafusion/spark/src/function/generator/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/generator/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/hash/mod.rs b/datafusion/spark/src/function/hash/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/hash/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/json/mod.rs b/datafusion/spark/src/function/json/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/json/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/lambda/mod.rs b/datafusion/spark/src/function/lambda/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/lambda/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/map/mod.rs b/datafusion/spark/src/function/map/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/map/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/math/expm1.rs b/datafusion/spark/src/function/math/expm1.rs new file mode 100644 index 000000000000..3a3a0c3835d3 --- /dev/null +++ b/datafusion/spark/src/function/math/expm1.rs @@ -0,0 +1,145 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::function::error_utils::{ + invalid_arg_count_exec_err, unsupported_data_type_exec_err, +}; +use arrow::array::{ArrayRef, AsArray}; +use arrow::datatypes::{DataType, Float64Type}; +use datafusion_common::{Result, ScalarValue}; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +}; +use std::any::Any; +use std::sync::Arc; + +/// +#[derive(Debug)] +pub struct SparkExpm1 { + signature: Signature, + aliases: Vec, +} + +impl Default for SparkExpm1 { + fn default() -> Self { + Self::new() + } +} + +impl SparkExpm1 { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + aliases: vec![], + } + } +} + +impl ScalarUDFImpl for SparkExpm1 { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "expm1" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Float64) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + if args.args.len() != 1 { + return Err(invalid_arg_count_exec_err("expm1", (1, 1), args.args.len())); + } + match &args.args[0] { + ColumnarValue::Scalar(ScalarValue::Float64(value)) => Ok( + ColumnarValue::Scalar(ScalarValue::Float64(value.map(|x| x.exp_m1()))), + ), + ColumnarValue::Array(array) => match array.data_type() { + DataType::Float64 => Ok(ColumnarValue::Array(Arc::new( + array + .as_primitive::() + .unary::<_, Float64Type>(|x| x.exp_m1()), + ) + as ArrayRef)), + other => Err(unsupported_data_type_exec_err( + "expm1", + format!("{}", DataType::Float64).as_str(), + other, + )), + }, + other => Err(unsupported_data_type_exec_err( + "expm1", + format!("{}", DataType::Float64).as_str(), + &other.data_type(), + )), + } + } + + fn aliases(&self) -> &[String] { + &self.aliases + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 1 { + return Err(invalid_arg_count_exec_err("expm1", (1, 1), arg_types.len())); + } + if arg_types[0].is_numeric() { + Ok(vec![DataType::Float64]) + } else { + Err(unsupported_data_type_exec_err( + "expm1", + "Numeric Type", + &arg_types[0], + )) + } + } +} + +#[cfg(test)] +mod tests { + use crate::function::math::expm1::SparkExpm1; + use crate::function::utils::test::test_scalar_function; + use arrow::array::{Array, Float64Array}; + use arrow::datatypes::DataType::Float64; + use datafusion_common::{Result, ScalarValue}; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + macro_rules! test_expm1_float64_invoke { + ($INPUT:expr, $EXPECTED:expr) => { + test_scalar_function!( + SparkExpm1::new(), + vec![ColumnarValue::Scalar(ScalarValue::Float64($INPUT))], + $EXPECTED, + f64, + Float64, + Float64Array + ); + }; + } + + #[test] + fn test_expm1_invoke() -> Result<()> { + test_expm1_float64_invoke!(Some(0f64), Ok(Some(0.0f64))); + Ok(()) + } +} diff --git a/datafusion/spark/src/function/math/mod.rs b/datafusion/spark/src/function/math/mod.rs new file mode 100644 index 000000000000..c5b007f40d7f --- /dev/null +++ b/datafusion/spark/src/function/math/mod.rs @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod expm1; + +use datafusion_expr::ScalarUDF; +use datafusion_functions::make_udf_function; +use std::sync::Arc; + +make_udf_function!(expm1::SparkExpm1, expm1); + +pub mod expr_fn { + use datafusion_functions::export_functions; + + export_functions!((expm1, "Returns exp(expr) - 1 as a Float64.", arg1)); +} + +pub fn functions() -> Vec> { + vec![expm1()] +} diff --git a/datafusion/spark/src/function/misc/mod.rs b/datafusion/spark/src/function/misc/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/misc/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/mod.rs b/datafusion/spark/src/function/mod.rs new file mode 100644 index 000000000000..dfdd94a040a9 --- /dev/null +++ b/datafusion/spark/src/function/mod.rs @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod aggregate; +pub mod array; +pub mod bitwise; +pub mod collection; +pub mod conditional; +pub mod conversion; +pub mod csv; +pub mod datetime; +pub mod error_utils; +pub mod generator; +pub mod hash; +pub mod json; +pub mod lambda; +pub mod map; +pub mod math; +pub mod misc; +pub mod predicate; +pub mod string; +pub mod r#struct; +pub mod table; +pub mod url; +pub mod utils; +pub mod window; +pub mod xml; diff --git a/datafusion/spark/src/function/predicate/mod.rs b/datafusion/spark/src/function/predicate/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/predicate/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/string/ascii.rs b/datafusion/spark/src/function/string/ascii.rs new file mode 100644 index 000000000000..c05aa214ccc0 --- /dev/null +++ b/datafusion/spark/src/function/string/ascii.rs @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array}; +use arrow::datatypes::DataType; +use arrow::error::ArrowError; +use datafusion_common::{internal_err, plan_err, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; +use datafusion_functions::utils::make_scalar_function; +use std::any::Any; +use std::sync::Arc; + +/// +#[derive(Debug)] +pub struct SparkAscii { + signature: Signature, + aliases: Vec, +} + +impl Default for SparkAscii { + fn default() -> Self { + Self::new() + } +} + +impl SparkAscii { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + aliases: vec![], + } + } +} + +impl ScalarUDFImpl for SparkAscii { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "ascii" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Int32) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + make_scalar_function(ascii, vec![])(&args.args) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 1 { + return plan_err!( + "The {} function requires 1 argument, but got {}.", + self.name(), + arg_types.len() + ); + } + Ok(vec![DataType::Utf8]) + } +} + +fn calculate_ascii<'a, V>(array: V) -> Result +where + V: ArrayAccessor, +{ + let iter = ArrayIter::new(array); + let result = iter + .map(|string| { + string.map(|s| { + let mut chars = s.chars(); + chars.next().map_or(0, |v| v as i32) + }) + }) + .collect::(); + + Ok(Arc::new(result) as ArrayRef) +} + +/// Returns the numeric code of the first character of the argument. +pub fn ascii(args: &[ArrayRef]) -> Result { + match args[0].data_type() { + DataType::Utf8 => { + let string_array = args[0].as_string::(); + Ok(calculate_ascii(string_array)?) + } + DataType::LargeUtf8 => { + let string_array = args[0].as_string::(); + Ok(calculate_ascii(string_array)?) + } + DataType::Utf8View => { + let string_array = args[0].as_string_view(); + Ok(calculate_ascii(string_array)?) + } + _ => internal_err!("Unsupported data type"), + } +} + +#[cfg(test)] +mod tests { + use crate::function::string::ascii::SparkAscii; + use crate::function::utils::test::test_scalar_function; + use arrow::array::{Array, Int32Array}; + use arrow::datatypes::DataType::Int32; + use datafusion_common::{Result, ScalarValue}; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + macro_rules! test_ascii_string_invoke { + ($INPUT:expr, $EXPECTED:expr) => { + test_scalar_function!( + SparkAscii::new(), + vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))], + $EXPECTED, + i32, + Int32, + Int32Array + ); + + test_scalar_function!( + SparkAscii::new(), + vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))], + $EXPECTED, + i32, + Int32, + Int32Array + ); + + test_scalar_function!( + SparkAscii::new(), + vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))], + $EXPECTED, + i32, + Int32, + Int32Array + ); + }; + } + + #[test] + fn test_ascii_invoke() -> Result<()> { + test_ascii_string_invoke!(Some(String::from("x")), Ok(Some(120))); + test_ascii_string_invoke!(Some(String::from("a")), Ok(Some(97))); + test_ascii_string_invoke!(Some(String::from("")), Ok(Some(0))); + test_ascii_string_invoke!(Some(String::from("\n")), Ok(Some(10))); + test_ascii_string_invoke!(Some(String::from("\t")), Ok(Some(9))); + test_ascii_string_invoke!(None, Ok(None)); + + Ok(()) + } +} diff --git a/datafusion/spark/src/function/string/mod.rs b/datafusion/spark/src/function/string/mod.rs new file mode 100644 index 000000000000..c01b6c45c008 --- /dev/null +++ b/datafusion/spark/src/function/string/mod.rs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod ascii; + +use datafusion_expr::ScalarUDF; +use datafusion_functions::make_udf_function; +use std::sync::Arc; + +make_udf_function!(ascii::SparkAscii, ascii); + +pub mod expr_fn { + use datafusion_functions::export_functions; + + export_functions!(( + ascii, + "Returns the ASCII code point of the first character of string.", + arg1 + )); +} + +pub fn functions() -> Vec> { + vec![ascii()] +} diff --git a/datafusion/spark/src/function/struct/mod.rs b/datafusion/spark/src/function/struct/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/struct/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/table/mod.rs b/datafusion/spark/src/function/table/mod.rs new file mode 100644 index 000000000000..aba7b7ceb78e --- /dev/null +++ b/datafusion/spark/src/function/table/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_catalog::TableFunction; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/url/mod.rs b/datafusion/spark/src/function/url/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/url/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/utils.rs b/datafusion/spark/src/function/utils.rs new file mode 100644 index 000000000000..b05e4acd20ae --- /dev/null +++ b/datafusion/spark/src/function/utils.rs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(test)] +pub mod test { + /// $FUNC ScalarUDFImpl to test + /// $ARGS arguments (vec) to pass to function + /// $EXPECTED a Result + /// $EXPECTED_TYPE is the expected value type + /// $EXPECTED_DATA_TYPE is the expected result type + /// $ARRAY_TYPE is the column type after function applied + macro_rules! test_scalar_function { + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + let expected: datafusion_common::Result> = $EXPECTED; + let func = $FUNC; + + let type_array = $ARGS.iter().map(|arg| arg.data_type()).collect::>(); + let cardinality = $ARGS + .iter() + .fold(Option::::None, |acc, arg| match arg { + datafusion_expr::ColumnarValue::Scalar(_) => acc, + datafusion_expr::ColumnarValue::Array(a) => Some(a.len()), + }) + .unwrap_or(1); + + let scalar_arguments = $ARGS.iter().map(|arg| match arg { + datafusion_expr::ColumnarValue::Scalar(scalar) => Some(scalar.clone()), + datafusion_expr::ColumnarValue::Array(_) => None, + }).collect::>(); + let scalar_arguments_refs = scalar_arguments.iter().map(|arg| arg.as_ref()).collect::>(); + + let nullables = $ARGS.iter().map(|arg| match arg { + datafusion_expr::ColumnarValue::Scalar(scalar) => scalar.is_null(), + datafusion_expr::ColumnarValue::Array(a) => a.null_count() > 0, + }).collect::>(); + + let return_info = func.return_type_from_args(datafusion_expr::ReturnTypeArgs { + arg_types: &type_array, + scalar_arguments: &scalar_arguments_refs, + nullables: &nullables + }); + + match expected { + Ok(expected) => { + assert_eq!(return_info.is_ok(), true); + let (return_type, _nullable) = return_info.unwrap().into_parts(); + assert_eq!(return_type, $EXPECTED_DATA_TYPE); + + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); + assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); + + let result = result.unwrap().to_array(cardinality).expect("Failed to convert to array"); + let result = result.as_any().downcast_ref::<$ARRAY_TYPE>().expect("Failed to convert to type"); + assert_eq!(result.data_type(), &$EXPECTED_DATA_TYPE); + + // value is correct + match expected { + Some(v) => assert_eq!(result.value(0), v), + None => assert!(result.is_null(0)), + }; + } + Err(expected_error) => { + if return_info.is_err() { + match return_info { + Ok(_) => assert!(false, "expected error"), + Err(error) => { datafusion_common::assert_contains!(expected_error.strip_backtrace(), error.strip_backtrace()); } + } + } + else { + let (return_type, _nullable) = return_info.unwrap().into_parts(); + + // invoke is expected error - cannot use .expect_err() due to Debug not being implemented + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}) { + Ok(_) => assert!(false, "expected error"), + Err(error) => { + assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); + } + } + } + } + }; + }; + } + + pub(crate) use test_scalar_function; +} diff --git a/datafusion/spark/src/function/window/mod.rs b/datafusion/spark/src/function/window/mod.rs new file mode 100644 index 000000000000..97ab4a9e3542 --- /dev/null +++ b/datafusion/spark/src/function/window/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::WindowUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/function/xml/mod.rs b/datafusion/spark/src/function/xml/mod.rs new file mode 100644 index 000000000000..a87df9a2c87a --- /dev/null +++ b/datafusion/spark/src/function/xml/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + +pub mod expr_fn {} + +pub fn functions() -> Vec> { + vec![] +} diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs new file mode 100644 index 000000000000..1fe5b6ecac8f --- /dev/null +++ b/datafusion/spark/src/lib.rs @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![doc( + html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg", + html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg" +)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] +// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143 +#![deny(clippy::clone_on_ref_ptr)] + +//! Spark Expression packages for [DataFusion]. +//! +//! This crate contains a collection of various Spark expression packages for DataFusion, +//! implemented using the extension API. +//! +//! [DataFusion]: https://crates.io/crates/datafusion +//! +//! # Available Packages +//! See the list of [modules](#modules) in this crate for available packages. +//! +//! # Using A Package +//! You can register all functions in all packages using the [`register_all`] function. +//! +//! Each package also exports an `expr_fn` submodule to help create [`Expr`]s that invoke +//! functions using a fluent style. For example: +//! +//![`Expr`]: datafusion_expr::Expr + +pub mod function; + +use datafusion_catalog::TableFunction; +use datafusion_common::Result; +use datafusion_execution::FunctionRegistry; +use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use log::debug; +use std::sync::Arc; + +/// Fluent-style API for creating `Expr`s +#[allow(unused)] +pub mod expr_fn { + pub use super::function::aggregate::expr_fn::*; + pub use super::function::array::expr_fn::*; + pub use super::function::bitwise::expr_fn::*; + pub use super::function::collection::expr_fn::*; + pub use super::function::conditional::expr_fn::*; + pub use super::function::conversion::expr_fn::*; + pub use super::function::csv::expr_fn::*; + pub use super::function::datetime::expr_fn::*; + pub use super::function::generator::expr_fn::*; + pub use super::function::hash::expr_fn::*; + pub use super::function::json::expr_fn::*; + pub use super::function::lambda::expr_fn::*; + pub use super::function::map::expr_fn::*; + pub use super::function::math::expr_fn::*; + pub use super::function::misc::expr_fn::*; + pub use super::function::predicate::expr_fn::*; + pub use super::function::r#struct::expr_fn::*; + pub use super::function::string::expr_fn::*; + pub use super::function::table::expr_fn::*; + pub use super::function::url::expr_fn::*; + pub use super::function::window::expr_fn::*; + pub use super::function::xml::expr_fn::*; +} + +/// Returns all default scalar functions +pub fn all_default_scalar_functions() -> Vec> { + function::array::functions() + .into_iter() + .chain(function::bitwise::functions()) + .chain(function::collection::functions()) + .chain(function::conditional::functions()) + .chain(function::conversion::functions()) + .chain(function::csv::functions()) + .chain(function::datetime::functions()) + .chain(function::generator::functions()) + .chain(function::hash::functions()) + .chain(function::json::functions()) + .chain(function::lambda::functions()) + .chain(function::map::functions()) + .chain(function::math::functions()) + .chain(function::misc::functions()) + .chain(function::predicate::functions()) + .chain(function::string::functions()) + .chain(function::r#struct::functions()) + .chain(function::url::functions()) + .chain(function::xml::functions()) + .collect::>() +} + +/// Returns all default aggregate functions +pub fn all_default_aggregate_functions() -> Vec> { + function::aggregate::functions() +} + +/// Returns all default window functions +pub fn all_default_window_functions() -> Vec> { + function::window::functions() +} + +/// Returns all default table functions +pub fn all_default_table_functions() -> Vec> { + function::table::functions() +} + +/// Registers all enabled packages with a [`FunctionRegistry`] +pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { + let scalar_functions: Vec> = all_default_scalar_functions(); + scalar_functions.into_iter().try_for_each(|udf| { + let existing_udf = registry.register_udf(udf)?; + if let Some(existing_udf) = existing_udf { + debug!("Overwrite existing UDF: {}", existing_udf.name()); + } + Ok(()) as Result<()> + })?; + + let aggregate_functions: Vec> = all_default_aggregate_functions(); + aggregate_functions.into_iter().try_for_each(|udf| { + let existing_udaf = registry.register_udaf(udf)?; + if let Some(existing_udaf) = existing_udaf { + debug!("Overwrite existing UDAF: {}", existing_udaf.name()); + } + Ok(()) as Result<()> + })?; + + let window_functions: Vec> = all_default_window_functions(); + window_functions.into_iter().try_for_each(|udf| { + let existing_udwf = registry.register_udwf(udf)?; + if let Some(existing_udwf) = existing_udwf { + debug!("Overwrite existing UDWF: {}", existing_udwf.name()); + } + Ok(()) as Result<()> + })?; + + Ok(()) +} diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index b2e02d7d5dd8..ef31fcad0f48 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -44,6 +44,7 @@ bytes = { workspace = true, optional = true } chrono = { workspace = true, optional = true } clap = { version = "4.5.36", features = ["derive", "env"] } datafusion = { workspace = true, default-features = true, features = ["avro"] } +datafusion-spark = { workspace = true, default-features = true } futures = { workspace = true } half = { workspace = true, default-features = true } indicatif = "0.17" diff --git a/datafusion/sqllogictest/src/engines/conversion.rs b/datafusion/sqllogictest/src/engines/conversion.rs index 516ec69e0b07..92ab64059bbd 100644 --- a/datafusion/sqllogictest/src/engines/conversion.rs +++ b/datafusion/sqllogictest/src/engines/conversion.rs @@ -49,7 +49,7 @@ pub(crate) fn f16_to_str(value: f16) -> String { } else if value == f16::NEG_INFINITY { "-Infinity".to_string() } else { - big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap()) + big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap(), None) } } @@ -63,7 +63,7 @@ pub(crate) fn f32_to_str(value: f32) -> String { } else if value == f32::NEG_INFINITY { "-Infinity".to_string() } else { - big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap()) + big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap(), None) } } @@ -77,7 +77,21 @@ pub(crate) fn f64_to_str(value: f64) -> String { } else if value == f64::NEG_INFINITY { "-Infinity".to_string() } else { - big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap()) + big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap(), None) + } +} + +pub(crate) fn spark_f64_to_str(value: f64) -> String { + if value.is_nan() { + // The sign of NaN can be different depending on platform. + // So the string representation of NaN ignores the sign. + "NaN".to_string() + } else if value == f64::INFINITY { + "Infinity".to_string() + } else if value == f64::NEG_INFINITY { + "-Infinity".to_string() + } else { + big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap(), Some(15)) } } @@ -86,6 +100,7 @@ pub(crate) fn decimal_128_to_str(value: i128, scale: i8) -> String { big_decimal_to_str( BigDecimal::from_str(&Decimal128Type::format_decimal(value, precision, scale)) .unwrap(), + None, ) } @@ -94,17 +109,21 @@ pub(crate) fn decimal_256_to_str(value: i256, scale: i8) -> String { big_decimal_to_str( BigDecimal::from_str(&Decimal256Type::format_decimal(value, precision, scale)) .unwrap(), + None, ) } #[cfg(feature = "postgres")] pub(crate) fn decimal_to_str(value: Decimal) -> String { - big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap()) + big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap(), None) } -pub(crate) fn big_decimal_to_str(value: BigDecimal) -> String { +/// Converts a `BigDecimal` to its plain string representation, optionally rounding to a specified number of decimal places. +/// +/// If `round_digits` is `None`, the value is rounded to 12 decimal places by default. +pub(crate) fn big_decimal_to_str(value: BigDecimal, round_digits: Option) -> String { // Round the value to limit the number of decimal places - let value = value.round(12).normalized(); + let value = value.round(round_digits.unwrap_or(12)).normalized(); // Format the value to a string value.to_plain_string() } @@ -115,12 +134,12 @@ mod tests { use bigdecimal::{num_bigint::BigInt, BigDecimal}; macro_rules! assert_decimal_str_eq { - ($integer:expr, $scale:expr, $expected:expr) => { + ($integer:expr, $scale:expr, $round_digits:expr, $expected:expr) => { assert_eq!( - big_decimal_to_str(BigDecimal::from_bigint( - BigInt::from($integer), - $scale - )), + big_decimal_to_str( + BigDecimal::from_bigint(BigInt::from($integer), $scale), + $round_digits + ), $expected ); }; @@ -128,44 +147,51 @@ mod tests { #[test] fn test_big_decimal_to_str() { - assert_decimal_str_eq!(110, 3, "0.11"); - assert_decimal_str_eq!(11, 3, "0.011"); - assert_decimal_str_eq!(11, 2, "0.11"); - assert_decimal_str_eq!(11, 1, "1.1"); - assert_decimal_str_eq!(11, 0, "11"); - assert_decimal_str_eq!(11, -1, "110"); - assert_decimal_str_eq!(0, 0, "0"); + assert_decimal_str_eq!(110, 3, None, "0.11"); + assert_decimal_str_eq!(11, 3, None, "0.011"); + assert_decimal_str_eq!(11, 2, None, "0.11"); + assert_decimal_str_eq!(11, 1, None, "1.1"); + assert_decimal_str_eq!(11, 0, None, "11"); + assert_decimal_str_eq!(11, -1, None, "110"); + assert_decimal_str_eq!(0, 0, None, "0"); assert_decimal_str_eq!( 12345678901234567890123456789012345678_i128, 0, + None, "12345678901234567890123456789012345678" ); assert_decimal_str_eq!( 12345678901234567890123456789012345678_i128, 38, + None, "0.123456789012" ); // Negative cases - assert_decimal_str_eq!(-110, 3, "-0.11"); - assert_decimal_str_eq!(-11, 3, "-0.011"); - assert_decimal_str_eq!(-11, 2, "-0.11"); - assert_decimal_str_eq!(-11, 1, "-1.1"); - assert_decimal_str_eq!(-11, 0, "-11"); - assert_decimal_str_eq!(-11, -1, "-110"); + assert_decimal_str_eq!(-110, 3, None, "-0.11"); + assert_decimal_str_eq!(-11, 3, None, "-0.011"); + assert_decimal_str_eq!(-11, 2, None, "-0.11"); + assert_decimal_str_eq!(-11, 1, None, "-1.1"); + assert_decimal_str_eq!(-11, 0, None, "-11"); + assert_decimal_str_eq!(-11, -1, None, "-110"); assert_decimal_str_eq!( -12345678901234567890123456789012345678_i128, 0, + None, "-12345678901234567890123456789012345678" ); assert_decimal_str_eq!( -12345678901234567890123456789012345678_i128, 38, + None, "-0.123456789012" ); // Round to 12 decimal places // 1.0000000000011 -> 1.000000000001 - assert_decimal_str_eq!(10_i128.pow(13) + 11, 13, "1.000000000001"); + assert_decimal_str_eq!(10_i128.pow(13) + 11, 13, None, "1.000000000001"); + assert_decimal_str_eq!(10_i128.pow(13) + 11, 13, Some(12), "1.000000000001"); + + assert_decimal_str_eq!(10_i128.pow(13) + 11, 13, Some(13), "1.0000000000011"); } } diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index eeb34186ea20..85bbd421827e 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -28,7 +28,10 @@ use std::path::PathBuf; use std::sync::LazyLock; /// Converts `batches` to a result as expected by sqllogictest. -pub fn convert_batches(batches: Vec) -> Result>> { +pub fn convert_batches( + batches: Vec, + is_spark_path: bool, +) -> Result>> { if batches.is_empty() { Ok(vec![]) } else { @@ -46,7 +49,16 @@ pub fn convert_batches(batches: Vec) -> Result>> { ))); } - let new_rows = convert_batch(batch)? + // Convert a single batch to a `Vec>` for comparison, flatten expanded rows, and normalize each. + let new_rows = (0..batch.num_rows()) + .map(|row| { + batch + .columns() + .iter() + .map(|col| cell_to_string(col, row, is_spark_path)) + .collect::>>() + }) + .collect::>>>()? .into_iter() .flat_map(expand_row) .map(normalize_paths); @@ -162,19 +174,6 @@ static WORKSPACE_ROOT: LazyLock = LazyLock::new(|| { object_store::path::Path::parse(sanitized_workplace_root).unwrap() }); -/// Convert a single batch to a `Vec>` for comparison -fn convert_batch(batch: RecordBatch) -> Result>> { - (0..batch.num_rows()) - .map(|row| { - batch - .columns() - .iter() - .map(|col| cell_to_string(col, row)) - .collect::>>() - }) - .collect() -} - macro_rules! get_row_value { ($array_type:ty, $column: ident, $row: ident) => {{ let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); @@ -193,7 +192,7 @@ macro_rules! get_row_value { /// /// Floating numbers are rounded to have a consistent representation with the Postgres runner. /// -pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { +pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) -> Result { if !col.is_valid(row) { // represent any null value with the string "NULL" Ok(NULL_STR.to_string()) @@ -210,7 +209,12 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { Ok(f32_to_str(get_row_value!(array::Float32Array, col, row))) } DataType::Float64 => { - Ok(f64_to_str(get_row_value!(array::Float64Array, col, row))) + let result = get_row_value!(array::Float64Array, col, row); + if is_spark_path { + Ok(spark_f64_to_str(result)) + } else { + Ok(f64_to_str(result)) + } } DataType::Decimal128(_, scale) => { let value = get_row_value!(array::Decimal128Array, col, row); @@ -236,12 +240,12 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { DataType::Dictionary(_, _) => { let dict = col.as_any_dictionary(); let key = dict.normalized_keys()[row]; - Ok(cell_to_string(dict.values(), key)?) + Ok(cell_to_string(dict.values(), key, is_spark_path)?) } _ => { let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_CLI_FORMAT_OPTIONS); - Ok(f.unwrap().value(row).to_string()) + Ok(f?.value(row).to_string()) } } .map_err(DFSqlLogicTestError::Arrow) diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs index a3a29eda2ee9..a01ac7e2f985 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs @@ -31,6 +31,7 @@ use sqllogictest::DBOutput; use tokio::time::Instant; use crate::engines::output::{DFColumnType, DFOutput}; +use crate::is_spark_path; pub struct DataFusion { ctx: SessionContext, @@ -79,7 +80,7 @@ impl sqllogictest::AsyncDB for DataFusion { } let start = Instant::now(); - let result = run_query(&self.ctx, sql).await; + let result = run_query(&self.ctx, is_spark_path(&self.relative_path), sql).await; let duration = start.elapsed(); if duration.gt(&Duration::from_millis(500)) { @@ -115,7 +116,11 @@ impl sqllogictest::AsyncDB for DataFusion { async fn shutdown(&mut self) {} } -async fn run_query(ctx: &SessionContext, sql: impl Into) -> Result { +async fn run_query( + ctx: &SessionContext, + is_spark_path: bool, + sql: impl Into, +) -> Result { let df = ctx.sql(sql.into().as_str()).await?; let task_ctx = Arc::new(df.task_ctx()); let plan = df.create_physical_plan().await?; @@ -123,7 +128,7 @@ async fn run_query(ctx: &SessionContext, sql: impl Into) -> Result = collect(stream).await?; - let rows = normalize::convert_batches(results)?; + let rows = normalize::convert_batches(results, is_spark_path)?; if rows.is_empty() && types.is_empty() { Ok(DBOutput::StatementComplete(0)) diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index ce819f186454..999f8fe08d4e 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -40,8 +40,11 @@ use datafusion::{ prelude::{CsvReadOptions, SessionContext}, }; +use crate::is_spark_path; use async_trait::async_trait; use datafusion::common::cast::as_float64_array; +use datafusion::execution::runtime_env::RuntimeEnv; +use datafusion::execution::SessionStateBuilder; use log::info; use tempfile::TempDir; @@ -70,8 +73,20 @@ impl TestContext { let config = SessionConfig::new() // hardcode target partitions so plans are deterministic .with_target_partitions(4); + let runtime = Arc::new(RuntimeEnv::default()); + let mut state = SessionStateBuilder::new() + .with_config(config) + .with_runtime_env(runtime) + .with_default_features() + .build(); + + if is_spark_path(relative_path) { + info!("Registering Spark functions"); + datafusion_spark::register_all(&mut state) + .expect("Can not register Spark functions"); + } - let mut test_ctx = TestContext::new(SessionContext::new_with_config(config)); + let mut test_ctx = TestContext::new(SessionContext::new_with_state(state)); let file_name = relative_path.file_name().unwrap().to_str().unwrap(); match file_name { @@ -122,6 +137,7 @@ impl TestContext { info!("Using default SessionContext"); } }; + Some(test_ctx) } @@ -223,14 +239,14 @@ pub async fn register_temp_table(ctx: &SessionContext) { self } - fn table_type(&self) -> TableType { - self.0 - } - fn schema(&self) -> SchemaRef { unimplemented!() } + fn table_type(&self) -> TableType { + self.0 + } + async fn scan( &self, _state: &dyn Session, diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs index 5ae640cc98a9..695fe463fa67 100644 --- a/datafusion/sqllogictest/src/util.rs +++ b/datafusion/sqllogictest/src/util.rs @@ -106,3 +106,7 @@ pub fn df_value_validator( normalized_actual == normalized_expected } + +pub fn is_spark_path(relative_path: &Path) -> bool { + relative_path.starts_with("spark/") +} diff --git a/datafusion/sqllogictest/test_files/spark/README.md b/datafusion/sqllogictest/test_files/spark/README.md new file mode 100644 index 000000000000..0a7bb92371b5 --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/README.md @@ -0,0 +1,57 @@ + + +# Spark Test Files + +This directory contains test files for the `spark` test suite. + +## Testing Guide + +When testing Spark functions: + +- Functions must be tested on both `Scalar` and `Array` inputs +- Test cases should only contain `SELECT` statements with the function being tested +- Add explicit casts to input values to ensure the correct data type is used (e.g., `0::INT`) + - Explicit casting is necessary because DataFusion and Spark do not infer data types in the same way + +### Finding Test Cases + +To verify and compare function behavior at a minimum, you can refer to the following documentation sources: + +1. Databricks SQL Function Reference: + https://docs.databricks.com/aws/en/sql/language-manual/functions/NAME +2. Apache Spark SQL Function Reference: + https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.NAME.html +3. PySpark SQL Function Reference: + https://spark.apache.org/docs/latest/api/sql/#NAME + +**Note:** Replace `NAME` in each URL with the actual function name (e.g., for the `ASCII` function, use `ascii` instead +of `NAME`). + +### Scalar Example: + +```sql +SELECT expm1(0::INT); +``` + +### Array Example: + +```sql +SELECT expm1(a) FROM (VALUES (0::INT), (1::INT)) AS t(a); +``` diff --git a/datafusion/sqllogictest/test_files/spark/math/expm1.slt b/datafusion/sqllogictest/test_files/spark/math/expm1.slt new file mode 100644 index 000000000000..96d4abb0414b --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/math/expm1.slt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +query R +SELECT expm1(0::INT); +---- +0 + +query R +SELECT expm1(1::INT); +---- +1.718281828459045 + +query R +SELECT expm1(a) FROM (VALUES (0::INT), (1::INT)) AS t(a); +---- +0 +1.718281828459045 diff --git a/datafusion/sqllogictest/test_files/spark/string/ascii.slt b/datafusion/sqllogictest/test_files/spark/string/ascii.slt new file mode 100644 index 000000000000..623154ffaa7b --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/string/ascii.slt @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +query I +SELECT ascii('234'); +---- +50 + +query I +SELECT ascii(''); +---- +0 + +query I +SELECT ascii('222'); +---- +50 + +query I +SELECT ascii('😀'); +---- +128512 + +query I +SELECT ascii(2::INT); +---- +50 + +query I +SELECT ascii(a) FROM (VALUES ('Spark'), ('PySpark'), ('Pandas API')) AS t(a); +---- +83 +80 +80