Skip to content

Commit f8c4037

Browse files
authored
Convert rows to arrays (#2677) (#2826)
* Convert rows to arrays (#2677) * Review feedback * Clippy
1 parent a0a263f commit f8c4037

File tree

6 files changed

+1158
-230
lines changed

6 files changed

+1158
-230
lines changed

arrow/benches/row_format.rs

Lines changed: 37 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -20,161 +20,95 @@ extern crate criterion;
2020
extern crate core;
2121

2222
use arrow::array::ArrayRef;
23-
use arrow::datatypes::{DataType, Int64Type, UInt64Type};
23+
use arrow::datatypes::{Int64Type, UInt64Type};
2424
use arrow::row::{RowConverter, SortField};
2525
use arrow::util::bench_util::{
2626
create_primitive_array, create_string_array_with_len, create_string_dict_array,
2727
};
2828
use arrow_array::types::Int32Type;
29+
use arrow_array::Array;
2930
use criterion::{black_box, Criterion};
3031
use std::sync::Arc;
3132

32-
fn row_bench(c: &mut Criterion) {
33-
let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
33+
fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
34+
let fields: Vec<_> = cols
35+
.iter()
36+
.map(|x| SortField::new(x.data_type().clone()))
37+
.collect();
3438

35-
c.bench_function("row_batch 4096 u64(0)", |b| {
39+
c.bench_function(&format!("convert_columns {}", name), |b| {
3640
b.iter(|| {
37-
let mut converter = RowConverter::new(vec![SortField::new(DataType::UInt64)]);
38-
black_box(converter.convert_columns(&cols))
41+
let mut converter = RowConverter::new(fields.clone());
42+
black_box(converter.convert_columns(&cols).unwrap())
3943
});
4044
});
4145

42-
let cols = vec![Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef];
46+
let mut converter = RowConverter::new(fields);
47+
let rows = converter.convert_columns(&cols).unwrap();
4348

44-
c.bench_function("row_batch 4096 i64(0)", |b| {
45-
b.iter(|| {
46-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Int64)]);
47-
black_box(converter.convert_columns(&cols))
48-
});
49+
c.bench_function(&format!("convert_rows {}", name), |b| {
50+
b.iter(|| black_box(converter.convert_rows(&rows).unwrap()));
4951
});
52+
}
53+
54+
fn row_bench(c: &mut Criterion) {
55+
let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
56+
do_bench(c, "4096 u64(0)", cols);
57+
58+
let cols = vec![Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef];
59+
do_bench(c, "4096 i64(0)", cols);
5060

5161
let cols =
5262
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 10)) as ArrayRef];
53-
54-
c.bench_function("row_batch 4096 string(10, 0)", |b| {
55-
b.iter(|| {
56-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
57-
black_box(converter.convert_columns(&cols))
58-
});
59-
});
63+
do_bench(c, "4096 string(10, 0)", cols);
6064

6165
let cols =
6266
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef];
63-
64-
c.bench_function("row_batch 4096 string(30, 0)", |b| {
65-
b.iter(|| {
66-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
67-
black_box(converter.convert_columns(&cols))
68-
});
69-
});
67+
do_bench(c, "4096 string(30, 0)", cols);
7068

7169
let cols =
7270
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef];
73-
74-
c.bench_function("row_batch 4096 string(100, 0)", |b| {
75-
b.iter(|| {
76-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
77-
black_box(converter.convert_columns(&cols))
78-
});
79-
});
71+
do_bench(c, "4096 string(100, 0)", cols);
8072

8173
let cols =
8274
vec![Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 100)) as ArrayRef];
83-
84-
c.bench_function("row_batch 4096 string(100, 0.5)", |b| {
85-
b.iter(|| {
86-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
87-
black_box(converter.convert_columns(&cols))
88-
});
89-
});
75+
do_bench(c, "4096 string(100, 0.5)", cols);
9076

9177
let cols =
9278
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 10)) as ArrayRef];
93-
94-
c.bench_function("row_batch 4096 string_dictionary(10, 0)", |b| {
95-
b.iter(|| {
96-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
97-
black_box(converter.convert_columns(&cols))
98-
});
99-
});
79+
do_bench(c, "4096 string_dictionary(10, 0)", cols);
10080

10181
let cols =
10282
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 30)) as ArrayRef];
103-
104-
c.bench_function("row_batch 4096 string_dictionary(30, 0)", |b| {
105-
b.iter(|| {
106-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
107-
black_box(converter.convert_columns(&cols))
108-
});
109-
});
83+
do_bench(c, "4096 string_dictionary(30, 0)", cols);
11084

11185
let cols =
11286
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef];
113-
114-
c.bench_function("row_batch 4096 string_dictionary(100, 0)", |b| {
115-
b.iter(|| {
116-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
117-
black_box(converter.convert_columns(&cols))
118-
});
119-
});
87+
do_bench(c, "4096 string_dictionary(100, 0)", cols);
12088

12189
let cols =
12290
vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0.5, 100)) as ArrayRef];
91+
do_bench(c, "4096 string_dictionary(100, 0.5)", cols);
12392

124-
c.bench_function("row_batch 4096 string_dictionary(100, 0.5)", |b| {
125-
b.iter(|| {
126-
let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]);
127-
black_box(converter.convert_columns(&cols))
128-
});
129-
});
130-
131-
let cols = [
93+
let cols = vec![
13294
Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 20)) as ArrayRef,
13395
Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef,
13496
Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef,
13597
Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
13698
];
137-
138-
let fields = [
139-
SortField::new(DataType::Utf8),
140-
SortField::new(DataType::Utf8),
141-
SortField::new(DataType::Utf8),
142-
SortField::new(DataType::Int64),
143-
];
144-
145-
c.bench_function(
146-
"row_batch 4096 string(20, 0.5), string(30, 0), string(100, 0), i64(0)",
147-
|b| {
148-
b.iter(|| {
149-
let mut converter = RowConverter::new(fields.to_vec());
150-
black_box(converter.convert_columns(&cols))
151-
});
152-
},
99+
do_bench(
100+
c,
101+
"4096 string(20, 0.5), string(30, 0), string(100, 0), i64(0)",
102+
cols,
153103
);
154104

155-
let cols = [
105+
let cols = vec![
156106
Arc::new(create_string_dict_array::<Int32Type>(4096, 0.5, 20)) as ArrayRef,
157107
Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 30)) as ArrayRef,
158108
Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef,
159109
Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
160110
];
161-
162-
let fields = [
163-
SortField::new(DataType::Utf8),
164-
SortField::new(DataType::Utf8),
165-
SortField::new(DataType::Utf8),
166-
SortField::new(DataType::Int64),
167-
];
168-
169-
c.bench_function(
170-
"row_batch 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)",
171-
|b| {
172-
b.iter(|| {
173-
let mut converter = RowConverter::new(fields.to_vec());
174-
black_box(converter.convert_columns(&cols))
175-
});
176-
},
177-
);
111+
do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols);
178112
}
179113

180114
criterion_group!(benches, row_bench);

0 commit comments

Comments
 (0)