Skip to content

Commit ecbb8c2

Browse files
authored
Add BooleanArray::from_unary and BooleanArray::from_binary (#3258)
* Add BooleanArray::from_unary and BooleanArray::from_binary * Add docs * Tweak signatures * Remove fallibility from combine_option_bitmap * Remove unused compare_option_bitmap * Remove fallibility * Fix doc
1 parent 9abdb55 commit ecbb8c2

File tree

9 files changed

+250
-307
lines changed

9 files changed

+250
-307
lines changed

arrow-array/src/array/boolean_array.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use crate::iterator::BooleanIter;
2020
use crate::raw_pointer::RawPtrBox;
2121
use crate::{print_long_array, Array, ArrayAccessor};
2222
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
23+
use arrow_data::bit_mask::combine_option_bitmap;
2324
use arrow_data::ArrayData;
2425
use arrow_schema::DataType;
2526
use std::any::Any;
@@ -173,6 +174,92 @@ impl BooleanArray {
173174
) -> impl Iterator<Item = Option<bool>> + 'a {
174175
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
175176
}
177+
178+
/// Create a [`BooleanArray`] by evaluating the operation for
179+
/// each element of the provided array
180+
///
181+
/// ```
182+
/// # use arrow_array::{BooleanArray, Int32Array};
183+
///
184+
/// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
185+
/// let r = BooleanArray::from_unary(&array, |x| x > 2);
186+
/// assert_eq!(&r, &BooleanArray::from(vec![false, false, true, true, true]));
187+
/// ```
188+
pub fn from_unary<T: ArrayAccessor, F>(left: T, mut op: F) -> Self
189+
where
190+
F: FnMut(T::Item) -> bool,
191+
{
192+
let null_bit_buffer = left
193+
.data()
194+
.null_buffer()
195+
.map(|b| b.bit_slice(left.offset(), left.len()));
196+
197+
let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
198+
// SAFETY: i in range 0..len
199+
op(left.value_unchecked(i))
200+
});
201+
202+
let data = unsafe {
203+
ArrayData::new_unchecked(
204+
DataType::Boolean,
205+
left.len(),
206+
None,
207+
null_bit_buffer,
208+
0,
209+
vec![Buffer::from(buffer)],
210+
vec![],
211+
)
212+
};
213+
Self::from(data)
214+
}
215+
216+
/// Create a [`BooleanArray`] by evaluating the binary operation for
217+
/// each element of the provided arrays
218+
///
219+
/// ```
220+
/// # use arrow_array::{BooleanArray, Int32Array};
221+
///
222+
/// let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
223+
/// let b = Int32Array::from(vec![1, 2, 0, 2, 5]);
224+
/// let r = BooleanArray::from_binary(&a, &b, |a, b| a == b);
225+
/// assert_eq!(&r, &BooleanArray::from(vec![true, true, false, false, true]));
226+
/// ```
227+
///
228+
/// # Panics
229+
///
230+
/// This function panics if left and right are not the same length
231+
///
232+
pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(
233+
left: T,
234+
right: S,
235+
mut op: F,
236+
) -> Self
237+
where
238+
F: FnMut(T::Item, S::Item) -> bool,
239+
{
240+
assert_eq!(left.len(), right.len());
241+
242+
let null_bit_buffer =
243+
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len());
244+
245+
let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
246+
// SAFETY: i in range 0..len
247+
op(left.value_unchecked(i), right.value_unchecked(i))
248+
});
249+
250+
let data = unsafe {
251+
ArrayData::new_unchecked(
252+
DataType::Boolean,
253+
left.len(),
254+
None,
255+
null_bit_buffer,
256+
0,
257+
vec![Buffer::from(buffer)],
258+
vec![],
259+
)
260+
};
261+
Self::from(data)
262+
}
176263
}
177264

178265
impl Array for BooleanArray {

arrow-data/src/bit_mask.rs

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@
1717

1818
//! Utils for working with packed bit masks
1919
20+
use crate::ArrayData;
2021
use arrow_buffer::bit_chunk_iterator::BitChunks;
2122
use arrow_buffer::bit_util::{ceil, get_bit, set_bit};
23+
use arrow_buffer::buffer::buffer_bin_and;
24+
use arrow_buffer::Buffer;
2225

2326
/// Sets all bits on `write_data` in the range `[offset_write..offset_write+len]` to be equal to the
2427
/// bits in `data` in the range `[offset_read..offset_read+len]`
@@ -62,9 +65,41 @@ pub fn set_bits(
6265
null_count as usize
6366
}
6467

68+
/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation.
69+
///
70+
/// This function is useful when implementing operations on higher level arrays.
71+
pub fn combine_option_bitmap(
72+
arrays: &[&ArrayData],
73+
len_in_bits: usize,
74+
) -> Option<Buffer> {
75+
let (buffer, offset) = arrays
76+
.iter()
77+
.map(|array| (array.null_buffer().cloned(), array.offset()))
78+
.reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) {
79+
((None, _), (None, _)) => (None, 0),
80+
((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => {
81+
(Some(buffer), offset)
82+
}
83+
((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => (
84+
Some(buffer_bin_and(
85+
&buffer_left,
86+
offset_left,
87+
&buffer_right,
88+
offset_right,
89+
len_in_bits,
90+
)),
91+
0,
92+
),
93+
})?;
94+
95+
Some(buffer?.bit_slice(offset, len_in_bits))
96+
}
97+
6598
#[cfg(test)]
6699
mod tests {
67100
use super::*;
101+
use arrow_schema::DataType;
102+
use std::sync::Arc;
68103

69104
#[test]
70105
fn test_set_bits_aligned() {
@@ -187,4 +222,110 @@ mod tests {
187222
assert_eq!(destination, expected_data);
188223
assert_eq!(result, expected_null_count);
189224
}
225+
226+
fn make_data_with_null_bit_buffer(
227+
len: usize,
228+
offset: usize,
229+
null_bit_buffer: Option<Buffer>,
230+
) -> Arc<ArrayData> {
231+
let buffer = Buffer::from(&vec![11; len + offset]);
232+
233+
Arc::new(
234+
ArrayData::try_new(
235+
DataType::UInt8,
236+
len,
237+
null_bit_buffer,
238+
offset,
239+
vec![buffer],
240+
vec![],
241+
)
242+
.unwrap(),
243+
)
244+
}
245+
246+
#[test]
247+
fn test_combine_option_bitmap() {
248+
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
249+
let some_bitmap =
250+
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
251+
let inverse_bitmap =
252+
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
253+
let some_other_bitmap =
254+
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111])));
255+
assert_eq!(None, combine_option_bitmap(&[], 8));
256+
assert_eq!(
257+
Some(Buffer::from([0b01001010])),
258+
combine_option_bitmap(&[&some_bitmap], 8)
259+
);
260+
assert_eq!(
261+
None,
262+
combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8)
263+
);
264+
assert_eq!(
265+
Some(Buffer::from([0b01001010])),
266+
combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8)
267+
);
268+
assert_eq!(
269+
Some(Buffer::from([0b11010111])),
270+
combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8)
271+
);
272+
assert_eq!(
273+
Some(Buffer::from([0b01001010])),
274+
combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,)
275+
);
276+
assert_eq!(
277+
Some(Buffer::from([0b0])),
278+
combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,)
279+
);
280+
assert_eq!(
281+
Some(Buffer::from([0b01000010])),
282+
combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,)
283+
);
284+
assert_eq!(
285+
Some(Buffer::from([0b00001001])),
286+
combine_option_bitmap(
287+
&[
288+
&some_bitmap.slice(3, 5),
289+
&inverse_bitmap.slice(2, 5),
290+
&some_other_bitmap.slice(1, 5)
291+
],
292+
5,
293+
)
294+
);
295+
}
296+
297+
#[test]
298+
fn test_combine_option_bitmap_with_offsets() {
299+
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
300+
let bitmap0 =
301+
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010])));
302+
let bitmap1 =
303+
make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1])));
304+
let bitmap2 =
305+
make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10])));
306+
assert_eq!(
307+
Some(Buffer::from([0b10101010])),
308+
combine_option_bitmap(&[&bitmap1], 8)
309+
);
310+
assert_eq!(
311+
Some(Buffer::from([0b10101010])),
312+
combine_option_bitmap(&[&bitmap2], 8)
313+
);
314+
assert_eq!(
315+
Some(Buffer::from([0b10101010])),
316+
combine_option_bitmap(&[&bitmap1, &none_bitmap], 8)
317+
);
318+
assert_eq!(
319+
Some(Buffer::from([0b10101010])),
320+
combine_option_bitmap(&[&none_bitmap, &bitmap2], 8)
321+
);
322+
assert_eq!(
323+
Some(Buffer::from([0b10101010])),
324+
combine_option_bitmap(&[&bitmap0, &bitmap1], 8)
325+
);
326+
assert_eq!(
327+
Some(Buffer::from([0b10101010])),
328+
combine_option_bitmap(&[&bitmap1, &bitmap2], 8)
329+
);
330+
}
190331
}

arrow/src/compute/kernels/arithmetic.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,10 @@ where
310310
}
311311

312312
// Create the combined `Bitmap`
313-
let null_bit_buffer = crate::compute::util::combine_option_bitmap(
313+
let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
314314
&[left.data_ref(), right.data_ref()],
315315
left.len(),
316-
)?;
316+
);
317317

318318
let lanes = T::lanes();
319319
let buffer_size = left.len() * std::mem::size_of::<T::Native>();
@@ -660,10 +660,10 @@ where
660660
)));
661661
}
662662

663-
let null_bit_buffer = crate::compute::util::combine_option_bitmap(
663+
let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
664664
&[left.data_ref(), right.data_ref()],
665665
left.len(),
666-
)?;
666+
);
667667

668668
// Safety justification: Since the inputs are valid Arrow arrays, all values are
669669
// valid indexes into the dictionary (which is verified during construction)

arrow/src/compute/kernels/arity.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ use crate::array::{
2222
PrimitiveArray,
2323
};
2424
use crate::buffer::Buffer;
25-
use crate::compute::util::combine_option_bitmap;
2625
use crate::datatypes::{ArrowNumericType, ArrowPrimitiveType};
2726
use crate::downcast_dictionary_array;
2827
use crate::error::{ArrowError, Result};
2928
use crate::util::bit_iterator::try_for_each_valid_idx;
3029
use arrow_buffer::MutableBuffer;
30+
use arrow_data::bit_mask::combine_option_bitmap;
3131
use std::sync::Arc;
3232

3333
#[inline]
@@ -215,7 +215,7 @@ where
215215
return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)));
216216
}
217217

218-
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
218+
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
219219
let null_count = null_buffer
220220
.as_ref()
221221
.map(|x| len - x.count_set_bits_offset(0, len))
@@ -275,7 +275,7 @@ where
275275

276276
let len = a.len();
277277

278-
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
278+
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
279279
let null_count = null_buffer
280280
.as_ref()
281281
.map(|x| len - x.count_set_bits_offset(0, len))
@@ -333,7 +333,7 @@ where
333333
if a.null_count() == 0 && b.null_count() == 0 {
334334
try_binary_no_nulls(len, a, b, op)
335335
} else {
336-
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
336+
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
337337

338338
let null_count = null_buffer
339339
.as_ref()
@@ -401,7 +401,7 @@ where
401401
if a.null_count() == 0 && b.null_count() == 0 {
402402
try_binary_no_nulls_mut(len, a, b, op)
403403
} else {
404-
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap();
404+
let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len);
405405
let null_count = null_buffer
406406
.as_ref()
407407
.map(|x| len - x.count_set_bits_offset(0, len))

arrow/src/compute/kernels/boolean.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ use crate::buffer::{
2929
bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or,
3030
buffer_unary_not, Buffer, MutableBuffer,
3131
};
32-
use crate::compute::util::combine_option_bitmap;
3332
use crate::datatypes::DataType;
3433
use crate::error::{ArrowError, Result};
3534
use crate::util::bit_util::ceil;
35+
use arrow_data::bit_mask::combine_option_bitmap;
3636

3737
/// Updates null buffer based on data buffer and null buffer of the operand at other side
3838
/// in boolean AND kernel with Kleene logic. In short, because for AND kernel, null AND false
@@ -108,7 +108,7 @@ pub(crate) fn build_null_buffer_for_and_or(
108108
len_in_bits: usize,
109109
) -> Option<Buffer> {
110110
// `arrays` are not empty, so safely do `unwrap` directly.
111-
combine_option_bitmap(&[left_data, right_data], len_in_bits).unwrap()
111+
combine_option_bitmap(&[left_data, right_data], len_in_bits)
112112
}
113113

114114
/// Updates null buffer based on data buffer and null buffer of the operand at other side

0 commit comments

Comments
 (0)