Skip to content

Commit b1e2bd9

Browse files
authored
more const evaluations for list array (#2327)
Signed-off-by: remzi <[email protected]>
1 parent 3ed0e28 commit b1e2bd9

File tree

8 files changed

+52
-87
lines changed

8 files changed

+52
-87
lines changed

arrow/src/array/array_binary.rs

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ impl<'a, T: OffsetSizeTrait> GenericBinaryArray<T> {
236236

237237
impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
238238
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
239-
let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" };
239+
let prefix = OffsetSize::PREFIX;
240240

241241
write!(f, "{}BinaryArray\n[\n", prefix)?;
242242
print_long_array(self, f, |array, index, f| {
@@ -608,11 +608,9 @@ mod tests {
608608
.unwrap();
609609
let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
610610

611-
let data_type = if O::IS_LARGE {
612-
DataType::LargeList
613-
} else {
614-
DataType::List
615-
}(Box::new(Field::new("item", DataType::UInt8, false)));
611+
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
612+
Field::new("item", DataType::UInt8, false),
613+
));
616614

617615
let array_data2 = ArrayData::builder(data_type)
618616
.len(3)
@@ -660,11 +658,9 @@ mod tests {
660658

661659
let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
662660
let null_buffer = Buffer::from_slice_ref(&[0b101]);
663-
let data_type = if O::IS_LARGE {
664-
DataType::LargeList
665-
} else {
666-
DataType::List
667-
}(Box::new(Field::new("item", DataType::UInt8, false)));
661+
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
662+
Field::new("item", DataType::UInt8, false),
663+
));
668664

669665
// [None, Some(b"Parquet")]
670666
let array_data = ArrayData::builder(data_type)
@@ -707,11 +703,9 @@ mod tests {
707703
.unwrap();
708704

709705
let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
710-
let data_type = if O::IS_LARGE {
711-
DataType::LargeList
712-
} else {
713-
DataType::List
714-
}(Box::new(Field::new("item", DataType::UInt8, false)));
706+
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
707+
Field::new("item", DataType::UInt8, false),
708+
));
715709

716710
// [None, Some(b"Parquet")]
717711
let array_data = ArrayData::builder(data_type)

arrow/src/array/array_list.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,17 @@ use crate::{
3434
/// trait declaring an offset size, relevant for i32 vs i64 array types.
3535
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
3636
const IS_LARGE: bool;
37+
const PREFIX: &'static str;
3738
}
3839

3940
impl OffsetSizeTrait for i32 {
4041
const IS_LARGE: bool = false;
42+
const PREFIX: &'static str = "";
4143
}
4244

4345
impl OffsetSizeTrait for i64 {
4446
const IS_LARGE: bool = true;
47+
const PREFIX: &'static str = "Large";
4548
}
4649

4750
/// Generic struct for a variable-size list array.
@@ -57,6 +60,16 @@ pub struct GenericListArray<OffsetSize> {
5760
}
5861

5962
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
63+
/// The data type constructor of list array.
64+
/// The input is the schema of the child array and
65+
/// the output is the [`DataType`], List or LargeList.
66+
pub const DATA_TYPE_CONSTRUCTOR: fn(Box<Field>) -> DataType = if OffsetSize::IS_LARGE
67+
{
68+
DataType::LargeList
69+
} else {
70+
DataType::List
71+
};
72+
6073
/// Returns a reference to the values of this list.
6174
pub fn values(&self) -> ArrayRef {
6275
self.values.clone()
@@ -170,11 +183,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
170183
.collect();
171184

172185
let field = Box::new(Field::new("item", T::DATA_TYPE, true));
173-
let data_type = if OffsetSize::IS_LARGE {
174-
DataType::LargeList(field)
175-
} else {
176-
DataType::List(field)
177-
};
186+
let data_type = Self::DATA_TYPE_CONSTRUCTOR(field);
178187
let array_data = ArrayData::builder(data_type)
179188
.len(null_buf.len())
180189
.add_buffer(offsets.into())
@@ -274,7 +283,7 @@ impl<'a, OffsetSize: OffsetSizeTrait> ArrayAccessor for &'a GenericListArray<Off
274283

275284
impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericListArray<OffsetSize> {
276285
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
277-
let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" };
286+
let prefix = OffsetSize::PREFIX;
278287

279288
write!(f, "{}ListArray\n[\n", prefix)?;
280289
print_long_array(self, f, |array, index, f| {

arrow/src/array/array_string.rs

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ impl<'a, T: OffsetSizeTrait> GenericStringArray<T> {
294294

295295
impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
296296
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
297-
let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" };
297+
let prefix = OffsetSize::PREFIX;
298298

299299
write!(f, "{}StringArray\n[\n", prefix)?;
300300
print_long_array(self, f, |array, index, f| {
@@ -707,11 +707,9 @@ mod tests {
707707

708708
let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
709709
let null_buffer = Buffer::from_slice_ref(&[0b101]);
710-
let data_type = if O::IS_LARGE {
711-
DataType::LargeList
712-
} else {
713-
DataType::List
714-
}(Box::new(Field::new("item", DataType::UInt8, false)));
710+
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
711+
Field::new("item", DataType::UInt8, false),
712+
));
715713

716714
// [None, Some("Parquet")]
717715
let array_data = ArrayData::builder(data_type)
@@ -754,11 +752,9 @@ mod tests {
754752
.unwrap();
755753

756754
let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
757-
let data_type = if O::IS_LARGE {
758-
DataType::LargeList
759-
} else {
760-
DataType::List
761-
}(Box::new(Field::new("item", DataType::UInt8, false)));
755+
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
756+
Field::new("item", DataType::UInt8, false),
757+
));
762758

763759
// [None, Some(b"Parquet")]
764760
let array_data = ArrayData::builder(data_type)
@@ -792,11 +788,9 @@ mod tests {
792788
.unwrap();
793789

794790
let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
795-
let data_type = if O::IS_LARGE {
796-
DataType::LargeList
797-
} else {
798-
DataType::List
799-
}(Box::new(Field::new("item", DataType::UInt16, false)));
791+
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
792+
Field::new("item", DataType::UInt16, false),
793+
));
800794

801795
let array_data = ArrayData::builder(data_type)
802796
.len(2)

arrow/src/array/builder/generic_list_builder.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use crate::array::ArrayData;
2222
use crate::array::ArrayRef;
2323
use crate::array::GenericListArray;
2424
use crate::array::OffsetSizeTrait;
25-
use crate::datatypes::DataType;
2625
use crate::datatypes::Field;
2726

2827
use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder};
@@ -135,11 +134,7 @@ where
135134
values_data.data_type().clone(),
136135
true, // TODO: find a consistent way of getting this
137136
));
138-
let data_type = if OffsetSize::IS_LARGE {
139-
DataType::LargeList(field)
140-
} else {
141-
DataType::List(field)
142-
};
137+
let data_type = GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field);
143138
let array_data_builder = ArrayData::builder(data_type)
144139
.len(len)
145140
.add_buffer(offset_buffer)
@@ -163,6 +158,7 @@ mod tests {
163158
use crate::array::builder::ListBuilder;
164159
use crate::array::{Array, Int32Array, Int32Builder};
165160
use crate::buffer::Buffer;
161+
use crate::datatypes::DataType;
166162

167163
fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
168164
let values_builder = Int32Builder::new(10);

arrow/src/array/transform/mod.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,7 @@ fn preallocate_offset_and_binary_buffer<Offset: OffsetSizeTrait>(
313313
// offsets
314314
let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<Offset>());
315315
// safety: `unsafe` code assumes that this buffer is initialized with one element
316-
if Offset::IS_LARGE {
317-
buffer.push(0i64);
318-
} else {
319-
buffer.push(0i32)
320-
}
316+
buffer.push(Offset::zero());
321317

322318
[
323319
buffer,

arrow/src/compute/util.rs

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -351,9 +351,7 @@ pub(super) mod tests {
351351
T: ArrowPrimitiveType,
352352
PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
353353
{
354-
use std::any::TypeId;
355-
356-
let mut offset = vec![0];
354+
let mut offset = vec![S::zero()];
357355
let mut values = vec![];
358356

359357
let list_len = data.len();
@@ -367,34 +365,18 @@ pub(super) mod tests {
367365
list_null_count += 1;
368366
bit_util::unset_bit(list_bitmap.as_slice_mut(), idx);
369367
}
370-
offset.push(values.len() as i64);
368+
offset.push(S::from_usize(values.len()).unwrap());
371369
}
372370

373371
let value_data = PrimitiveArray::<T>::from(values).into_data();
374-
let (list_data_type, value_offsets) = if TypeId::of::<S>() == TypeId::of::<i32>()
375-
{
376-
(
377-
DataType::List(Box::new(Field::new(
378-
"item",
379-
T::DATA_TYPE,
380-
list_null_count == 0,
381-
))),
382-
Buffer::from_slice_ref(
383-
&offset.into_iter().map(|x| x as i32).collect::<Vec<i32>>(),
384-
),
385-
)
386-
} else if TypeId::of::<S>() == TypeId::of::<i64>() {
387-
(
388-
DataType::LargeList(Box::new(Field::new(
389-
"item",
390-
T::DATA_TYPE,
391-
list_null_count == 0,
392-
))),
393-
Buffer::from_slice_ref(&offset),
394-
)
395-
} else {
396-
unreachable!()
397-
};
372+
let (list_data_type, value_offsets) = (
373+
GenericListArray::<S>::DATA_TYPE_CONSTRUCTOR(Box::new(Field::new(
374+
"item",
375+
T::DATA_TYPE,
376+
list_null_count == 0,
377+
))),
378+
Buffer::from_slice_ref(&offset),
379+
);
398380

399381
let list_data = ArrayData::builder(list_data_type)
400382
.len(list_len)

arrow/src/ffi.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,12 +1034,9 @@ mod tests {
10341034
.collect::<Buffer>();
10351035

10361036
// Construct a list array from the above two
1037-
let list_data_type = match std::mem::size_of::<Offset>() {
1038-
4 => DataType::List(Box::new(Field::new("item", DataType::Int32, false))),
1039-
_ => {
1040-
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)))
1041-
}
1042-
};
1037+
let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Box::new(
1038+
Field::new("item", DataType::Int32, false),
1039+
));
10431040

10441041
let list_data = ArrayData::builder(list_data_type)
10451042
.len(3)

parquet/src/arrow/array_reader/list_array.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,7 @@ mod tests {
268268
item_nullable: bool,
269269
) -> ArrowType {
270270
let field = Box::new(Field::new("item", data_type, item_nullable));
271-
match OffsetSize::IS_LARGE {
272-
true => ArrowType::LargeList(field),
273-
false => ArrowType::List(field),
274-
}
271+
GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field)
275272
}
276273

277274
fn downcast<OffsetSize: OffsetSizeTrait>(

0 commit comments

Comments
 (0)