Skip to content

Commit 53d6987

Browse files
DandandanDaniël Heres
and
Daniël Heres
authored
Support is [not] distinct from for binaryarray types (#6394)
* Support is distinc from binary * Add tests * Tests * Fix test --------- Co-authored-by: Daniël Heres <[email protected]>
1 parent 19af952 commit 53d6987

File tree

3 files changed

+76
-7
lines changed

3 files changed

+76
-7
lines changed

datafusion/core/tests/sqllogictests/test_files/select.slt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,34 @@ select column1 is not distinct from column2 from t;
298298
false
299299

300300

301+
# Binary Expression for Binary
302+
statement ok
303+
CREATE TABLE binary_t as select arrow_cast('Bar', 'Binary') as column1, arrow_cast('B%', 'Binary') as column2;
304+
305+
query B
306+
select column1 is distinct from column2 from binary_t;
307+
----
308+
true
309+
310+
query B
311+
select column1 is not distinct from column2 from binary_t;
312+
----
313+
false
314+
315+
# Binary Expression for LargeBinary
316+
statement ok
317+
CREATE TABLE large_binary_t as select arrow_cast('Bar', 'LargeBinary') as column1, arrow_cast('B%', 'LargeBinary') as column2;
318+
319+
query B
320+
select column1 is distinct from column2 from large_binary_t;
321+
----
322+
true
323+
324+
query B
325+
select column1 is not distinct from column2 from large_binary_t;
326+
----
327+
false
328+
301329
# select all
302330
# these two queries should return the same result
303331
query R

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,14 @@ use kernels::{
7070
use kernels_arrow::{
7171
add_decimal_dyn_scalar, add_dyn_decimal, add_dyn_temporal, add_dyn_temporal_scalar,
7272
divide_decimal_dyn_scalar, divide_dyn_opt_decimal, is_distinct_from,
73-
is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_f32,
74-
is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8,
75-
is_not_distinct_from, is_not_distinct_from_bool, is_not_distinct_from_decimal,
76-
is_not_distinct_from_f32, is_not_distinct_from_f64, is_not_distinct_from_null,
77-
is_not_distinct_from_utf8, modulus_decimal_dyn_scalar, modulus_dyn_decimal,
78-
multiply_decimal_dyn_scalar, multiply_dyn_decimal, subtract_decimal_dyn_scalar,
79-
subtract_dyn_decimal, subtract_dyn_temporal, subtract_dyn_temporal_scalar,
73+
is_distinct_from_binary, is_distinct_from_bool, is_distinct_from_decimal,
74+
is_distinct_from_f32, is_distinct_from_f64, is_distinct_from_null,
75+
is_distinct_from_utf8, is_not_distinct_from, is_not_distinct_from_binary,
76+
is_not_distinct_from_bool, is_not_distinct_from_decimal, is_not_distinct_from_f32,
77+
is_not_distinct_from_f64, is_not_distinct_from_null, is_not_distinct_from_utf8,
78+
modulus_decimal_dyn_scalar, modulus_dyn_decimal, multiply_decimal_dyn_scalar,
79+
multiply_dyn_decimal, subtract_decimal_dyn_scalar, subtract_dyn_decimal,
80+
subtract_dyn_temporal, subtract_dyn_temporal_scalar,
8081
};
8182

8283
use arrow::datatypes::{DataType, Schema, TimeUnit};
@@ -245,6 +246,21 @@ macro_rules! compute_utf8_op {
245246
}};
246247
}
247248

249+
/// Invoke a compute kernel on a pair of binary data arrays
250+
macro_rules! compute_binary_op {
251+
($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
252+
let ll = $LEFT
253+
.as_any()
254+
.downcast_ref::<$DT>()
255+
.expect("compute_op failed to downcast left side array");
256+
let rr = $RIGHT
257+
.as_any()
258+
.downcast_ref::<$DT>()
259+
.expect("compute_op failed to downcast right side array");
260+
Ok(Arc::new(paste::expr! {[<$OP _binary>]}(&ll, &rr)?))
261+
}};
262+
}
263+
248264
/// Invoke a compute kernel on a data array and a scalar value
249265
macro_rules! compute_utf8_op_scalar {
250266
($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident, $OP_TYPE:expr) => {{
@@ -510,7 +526,10 @@ macro_rules! binary_array_op {
510526
DataType::Float32 => compute_f32_op!($LEFT, $RIGHT, $OP, Float32Array),
511527
DataType::Float64 => compute_f64_op!($LEFT, $RIGHT, $OP, Float64Array),
512528
DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
529+
DataType::Binary => compute_binary_op!($LEFT, $RIGHT, $OP, BinaryArray),
530+
DataType::LargeBinary => compute_binary_op!($LEFT, $RIGHT, $OP, LargeBinaryArray),
513531
DataType::LargeUtf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, LargeStringArray),
532+
514533
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
515534
compute_op!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
516535
}

datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,17 @@ pub(crate) fn is_distinct_from_utf8<OffsetSize: OffsetSizeTrait>(
210210
.collect())
211211
}
212212

213+
pub(crate) fn is_distinct_from_binary<OffsetSize: OffsetSizeTrait>(
214+
left: &GenericBinaryArray<OffsetSize>,
215+
right: &GenericBinaryArray<OffsetSize>,
216+
) -> Result<BooleanArray> {
217+
Ok(left
218+
.iter()
219+
.zip(right.iter())
220+
.map(|(x, y)| Some(x != y))
221+
.collect())
222+
}
223+
213224
pub(crate) fn is_distinct_from_null(
214225
left: &NullArray,
215226
_right: &NullArray,
@@ -241,6 +252,17 @@ pub(crate) fn is_not_distinct_from_utf8<OffsetSize: OffsetSizeTrait>(
241252
.collect())
242253
}
243254

255+
pub(crate) fn is_not_distinct_from_binary<OffsetSize: OffsetSizeTrait>(
256+
left: &GenericBinaryArray<OffsetSize>,
257+
right: &GenericBinaryArray<OffsetSize>,
258+
) -> Result<BooleanArray> {
259+
Ok(left
260+
.iter()
261+
.zip(right.iter())
262+
.map(|(x, y)| Some(x == y))
263+
.collect())
264+
}
265+
244266
pub(crate) fn is_distinct_from_decimal(
245267
left: &Decimal128Array,
246268
right: &Decimal128Array,

0 commit comments

Comments
 (0)