Skip to content

Commit 1df1e57

Browse files
committed
Merge remote-tracking branch 'upstream/master' into split-out-arrow-data
2 parents 3c3faf2 + 43d912c commit 1df1e57

File tree

12 files changed

+211
-52
lines changed

12 files changed

+211
-52
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ __blobstorage__
2020

2121
# .bak files
2222
*.bak
23-
23+
*.bak2
2424
# OS-specific .gitignores
2525

2626
# Mac .gitignore

arrow-buffer/src/lib.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
2020
pub mod alloc;
2121
pub mod buffer;
22+
pub use buffer::{Buffer, MutableBuffer};
23+
2224
mod bytes;
23-
pub mod native;
24-
pub mod util;
25+
mod native;
26+
27+
pub use native::*;
28+
mod util;
29+
pub use util::*;

arrow/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,8 @@ harness = false
239239
name = "row_format"
240240
harness = false
241241
required-features = ["test_utils"]
242+
243+
[[bench]]
244+
name = "bitwise_kernel"
245+
harness = false
246+
required-features = ["test_utils"]

arrow/benches/bitwise_kernel.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#[macro_use]
19+
extern crate criterion;
20+
21+
use arrow::compute::kernels::bitwise::{
22+
bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar,
23+
bitwise_xor, bitwise_xor_scalar,
24+
};
25+
use arrow::datatypes::Int64Type;
26+
use criterion::{black_box, Criterion};
27+
use rand::RngCore;
28+
29+
extern crate arrow;
30+
31+
use arrow::util::bench_util::create_primitive_array;
32+
use arrow::util::test_util::seedable_rng;
33+
34+
fn bitwise_array_benchmark(c: &mut Criterion) {
35+
let size = 64 * 1024_usize;
36+
let left_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
37+
let right_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
38+
let left_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
39+
let right_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
40+
// array and
41+
let mut group = c.benchmark_group("bench bitwise array: and");
42+
group.bench_function("bitwise array and, no nulls", |b| {
43+
b.iter(|| {
44+
black_box(bitwise_and(&left_without_null, &right_without_null).unwrap())
45+
})
46+
});
47+
group.bench_function("bitwise array and, 20% nulls", |b| {
48+
b.iter(|| black_box(bitwise_and(&left_with_null, &right_with_null).unwrap()))
49+
});
50+
group.finish();
51+
// array or
52+
let mut group = c.benchmark_group("bench bitwise: or");
53+
group.bench_function("bitwise array or, no nulls", |b| {
54+
b.iter(|| black_box(bitwise_or(&left_without_null, &right_without_null).unwrap()))
55+
});
56+
group.bench_function("bitwise array or, 20% nulls", |b| {
57+
b.iter(|| black_box(bitwise_or(&left_with_null, &right_with_null).unwrap()))
58+
});
59+
group.finish();
60+
// xor
61+
let mut group = c.benchmark_group("bench bitwise: xor");
62+
group.bench_function("bitwise array xor, no nulls", |b| {
63+
b.iter(|| {
64+
black_box(bitwise_xor(&left_without_null, &right_without_null).unwrap())
65+
})
66+
});
67+
group.bench_function("bitwise array xor, 20% nulls", |b| {
68+
b.iter(|| black_box(bitwise_xor(&left_with_null, &right_with_null).unwrap()))
69+
});
70+
group.finish();
71+
// not
72+
let mut group = c.benchmark_group("bench bitwise: not");
73+
group.bench_function("bitwise array not, no nulls", |b| {
74+
b.iter(|| black_box(bitwise_not(&left_without_null).unwrap()))
75+
});
76+
group.bench_function("bitwise array not, 20% nulls", |b| {
77+
b.iter(|| black_box(bitwise_not(&left_with_null).unwrap()))
78+
});
79+
group.finish();
80+
}
81+
82+
fn bitwise_array_scalar_benchmark(c: &mut Criterion) {
83+
let size = 64 * 1024_usize;
84+
let array_without_null = create_primitive_array::<Int64Type>(size, 0 as f32);
85+
let array_with_null = create_primitive_array::<Int64Type>(size, 0.2_f32);
86+
let scalar = seedable_rng().next_u64() as i64;
87+
// array scalar and
88+
let mut group = c.benchmark_group("bench bitwise array scalar: and");
89+
group.bench_function("bitwise array scalar and, no nulls", |b| {
90+
b.iter(|| black_box(bitwise_and_scalar(&array_without_null, scalar).unwrap()))
91+
});
92+
group.bench_function("bitwise array and, 20% nulls", |b| {
93+
b.iter(|| black_box(bitwise_and_scalar(&array_with_null, scalar).unwrap()))
94+
});
95+
group.finish();
96+
// array scalar or
97+
let mut group = c.benchmark_group("bench bitwise array scalar: or");
98+
group.bench_function("bitwise array scalar or, no nulls", |b| {
99+
b.iter(|| black_box(bitwise_or_scalar(&array_without_null, scalar).unwrap()))
100+
});
101+
group.bench_function("bitwise array scalar or, 20% nulls", |b| {
102+
b.iter(|| black_box(bitwise_or_scalar(&array_with_null, scalar).unwrap()))
103+
});
104+
group.finish();
105+
// array scalar xor
106+
let mut group = c.benchmark_group("bench bitwise array scalar: xor");
107+
group.bench_function("bitwise array scalar xor, no nulls", |b| {
108+
b.iter(|| black_box(bitwise_xor_scalar(&array_without_null, scalar).unwrap()))
109+
});
110+
group.bench_function("bitwise array scalar xor, 20% nulls", |b| {
111+
b.iter(|| black_box(bitwise_xor_scalar(&array_with_null, scalar).unwrap()))
112+
});
113+
group.finish();
114+
}
115+
116+
criterion_group!(
117+
benches,
118+
bitwise_array_benchmark,
119+
bitwise_array_scalar_benchmark
120+
);
121+
criterion_main!(benches);

arrow/src/bitmap.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@
1717

1818
//! Defines [Bitmap] for tracking validity bitmaps
1919
20-
use crate::buffer::Buffer;
2120
use crate::error::{ArrowError, Result};
2221
use crate::util::bit_util;
2322
use std::mem;
2423

25-
use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or};
24+
use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
2625
use std::ops::{BitAnd, BitOr};
2726

2827
#[derive(Debug, Clone)]

arrow/src/datatypes/native.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use super::DataType;
19-
pub use arrow_buffer::native::{ArrowNativeType, ToByteSlice};
19+
pub use arrow_buffer::{ArrowNativeType, ToByteSlice};
2020
use half::f16;
2121

2222
/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the

arrow/src/ipc/reader.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -578,10 +578,7 @@ pub fn read_record_batch(
578578
let mut node_index = 0;
579579
let mut arrays = vec![];
580580

581-
let options = RecordBatchOptions {
582-
row_count: Some(batch.length() as usize),
583-
..Default::default()
584-
};
581+
let options = RecordBatchOptions::new().with_row_count(Some(batch.length() as usize));
585582

586583
if let Some(projection) = projection {
587584
// project fields
@@ -1692,10 +1689,9 @@ mod tests {
16921689
#[test]
16931690
fn test_no_columns_batch() {
16941691
let schema = Arc::new(Schema::new(vec![]));
1695-
let options = RecordBatchOptions {
1696-
match_field_names: true,
1697-
row_count: Some(10),
1698-
};
1692+
let options = RecordBatchOptions::new()
1693+
.with_match_field_names(true)
1694+
.with_row_count(Some(10));
16991695
let input_batch =
17001696
RecordBatch::try_new_with_options(schema, vec![], &options).unwrap();
17011697
let output_batch = roundtrip_ipc_stream(&input_batch);

arrow/src/record_batch.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl RecordBatch {
8080
/// # }
8181
/// ```
8282
pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self> {
83-
let options = RecordBatchOptions::default();
83+
let options = RecordBatchOptions::new();
8484
Self::try_new_impl(schema, columns, &options)
8585
}
8686

@@ -413,15 +413,29 @@ pub struct RecordBatchOptions {
413413
pub row_count: Option<usize>,
414414
}
415415

416-
impl Default for RecordBatchOptions {
417-
fn default() -> Self {
416+
impl RecordBatchOptions {
417+
pub fn new() -> Self {
418418
Self {
419419
match_field_names: true,
420420
row_count: None,
421421
}
422422
}
423+
/// Sets the row_count of RecordBatchOptions and returns self
424+
pub fn with_row_count(mut self, row_count: Option<usize>) -> Self {
425+
self.row_count = row_count;
426+
self
427+
}
428+
/// Sets the match_field_names of RecordBatchOptions and returns self
429+
pub fn with_match_field_names(mut self, match_field_names: bool) -> Self {
430+
self.match_field_names = match_field_names;
431+
self
432+
}
433+
}
434+
impl Default for RecordBatchOptions {
435+
fn default() -> Self {
436+
Self::new()
437+
}
423438
}
424-
425439
impl From<&StructArray> for RecordBatch {
426440
/// Create a record batch from struct array, where each field of
427441
/// the `StructArray` becomes a `Field` in the schema.
@@ -901,10 +915,7 @@ mod tests {
901915
.to_string()
902916
.contains("must either specify a row count or at least one column"));
903917

904-
let options = RecordBatchOptions {
905-
row_count: Some(10),
906-
..Default::default()
907-
};
918+
let options = RecordBatchOptions::new().with_row_count(Some(10));
908919

909920
let ok =
910921
RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
@@ -929,4 +940,12 @@ mod tests {
929940
);
930941
assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap()));
931942
}
943+
#[test]
944+
fn test_record_batch_options() {
945+
let options = RecordBatchOptions::new()
946+
.with_match_field_names(false)
947+
.with_row_count(Some(20));
948+
assert!(!options.match_field_names);
949+
assert_eq!(options.row_count.unwrap(), 20)
950+
}
932951
}

arrow/src/util/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
pub use arrow_buffer::util::{bit_chunk_iterator, bit_util};
18+
pub use arrow_buffer::{bit_chunk_iterator, bit_util};
1919

2020
#[cfg(feature = "test_utils")]
2121
pub mod bench_util;

dev/release/update_change_log.sh

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,45 @@
2929

3030
set -e
3131

32-
SINCE_TAG="21.0.0"
33-
FUTURE_RELEASE="22.0.0"
32+
SINCE_TAG="22.0.0"
33+
FUTURE_RELEASE="23.0.0"
3434

3535
SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
3636
SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
3737

3838
OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG.md"
39+
OLD_OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG-old.md"
3940

4041
# remove license header so github-changelog-generator has a clean base to append
41-
sed -i.bak '1,18d' "${OUTPUT_PATH}"
42+
sed -i.bak '1,21d' "${OUTPUT_PATH}"
43+
sed -i.bak '1,21d' "${OLD_OUTPUT_PATH}"
44+
# remove the github-changelog-generator footer from the old CHANGELOG.md
45+
LINE_COUNT=$(wc -l <"${OUTPUT_PATH}")
46+
sed -i.bak2 "$(( $LINE_COUNT-4+1 )),$ d" "${OUTPUT_PATH}"
47+
48+
# Copy the previous CHANGELOG.md to CHANGELOG-old.md
49+
echo '<!---
50+
Licensed to the Apache Software Foundation (ASF) under one
51+
or more contributor license agreements. See the NOTICE file
52+
distributed with this work for additional information
53+
regarding copyright ownership. The ASF licenses this file
54+
to you under the Apache License, Version 2.0 (the
55+
"License"); you may not use this file except in compliance
56+
with the License. You may obtain a copy of the License at
57+
58+
http://www.apache.org/licenses/LICENSE-2.0
59+
60+
Unless required by applicable law or agreed to in writing,
61+
software distributed under the License is distributed on an
62+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
63+
KIND, either express or implied. See the License for the
64+
specific language governing permissions and limitations
65+
under the License.
66+
-->
67+
68+
# Historical Changelog
69+
' | cat - "${OUTPUT_PATH}" "${OLD_OUTPUT_PATH}" > "${OLD_OUTPUT_PATH}".tmp
70+
mv "${OLD_OUTPUT_PATH}".tmp "${OLD_OUTPUT_PATH}"
4271

4372
# use exclude-tags-regex to filter out tags used for object_store
4473
# crates and only only look at tags that DO NOT begin with `object_store_`

object_store/src/aws/client.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ impl S3Client {
411411
pub async fn create_multipart(&self, location: &Path) -> Result<MultipartId> {
412412
let credential = self.get_credential().await?;
413413
let url = format!(
414-
"{}/{}/{}?uploads",
414+
"{}/{}/{}?uploads=",
415415
self.config.endpoint,
416416
self.config.bucket,
417417
encode_path(location)

0 commit comments

Comments
 (0)