Skip to content

Commit c6f0d3c

Browse files
Pass scalar to eq inside nullif (#11697)
* Properly specialize nullif for scalar (3x faster) * missed feature flag * fix test * extract * dodes -> does Co-authored-by: Oleks V <[email protected]> --------- Co-authored-by: Oleks V <[email protected]>
1 parent 011a3f3 commit c6f0d3c

File tree

3 files changed

+54
-3
lines changed

3 files changed

+54
-3
lines changed

datafusion/functions/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ harness = false
112112
name = "make_date"
113113
required-features = ["datetime_expressions"]
114114

115+
[[bench]]
116+
harness = false
117+
name = "nullif"
118+
required-features = ["core_expressions"]
119+
115120
[[bench]]
116121
harness = false
117122
name = "date_bin"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::util::bench_util::create_string_array_with_len;
21+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22+
use datafusion_common::ScalarValue;
23+
use datafusion_expr::ColumnarValue;
24+
use datafusion_functions::core::nullif;
25+
use std::sync::Arc;
26+
27+
fn criterion_benchmark(c: &mut Criterion) {
28+
let nullif = nullif();
29+
for size in [1024, 4096, 8192] {
30+
let array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, 32));
31+
let args = vec![
32+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("abcd".to_string()))),
33+
ColumnarValue::Array(array),
34+
];
35+
c.bench_function(&format!("nullif scalar array: {}", size), |b| {
36+
b.iter(|| black_box(nullif.invoke(&args).unwrap()))
37+
});
38+
}
39+
}
40+
41+
criterion_group!(benches, criterion_benchmark);
42+
criterion_main!(benches);

datafusion/functions/src/core/nullif.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use arrow::datatypes::DataType;
1919
use datafusion_common::{exec_err, Result};
2020
use datafusion_expr::ColumnarValue;
2121

22-
use arrow::array::Array;
2322
use arrow::compute::kernels::cmp::eq;
2423
use arrow::compute::kernels::nullif::nullif;
2524
use datafusion_common::ScalarValue;
@@ -122,8 +121,13 @@ fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
122121
Ok(ColumnarValue::Array(array))
123122
}
124123
(ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
125-
let lhs = lhs.to_array_of_size(rhs.len())?;
126-
let array = nullif(&lhs, &eq(&lhs, &rhs)?)?;
124+
let lhs_s = lhs.to_scalar()?;
125+
let lhs_a = lhs.to_array_of_size(rhs.len())?;
126+
let array = nullif(
127+
// nullif in arrow-select does not support Datum, so we need to convert to array
128+
lhs_a.as_ref(),
129+
&eq(&lhs_s, &rhs)?,
130+
)?;
127131
Ok(ColumnarValue::Array(array))
128132
}
129133
(ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {

0 commit comments

Comments
 (0)