Skip to content

Commit b9cc1fc

Browse files
committed
Split out arrow-buffer crate (apache#2594)
1 parent 5ccf73e commit b9cc1fc

25 files changed

+676
-536
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
[workspace]
1919
members = [
2020
"arrow",
21+
"arrow-buffer",
22+
"arrow-flight",
2123
"parquet",
2224
"parquet_derive",
2325
"parquet_derive_test",
24-
"arrow-flight",
2526
"integration-testing",
2627
"object_store",
2728
]

arrow-buffer/Cargo.toml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "arrow-buffer"
20+
version = "22.0.0"
21+
description = "Buffer abstractions for Apache Arrow"
22+
homepage = "https://github.com/apache/arrow-rs"
23+
repository = "https://github.com/apache/arrow-rs"
24+
authors = ["Apache Arrow <[email protected]>"]
25+
license = "Apache-2.0"
26+
keywords = ["arrow"]
27+
include = [
28+
"benches/*.rs",
29+
"src/**/*.rs",
30+
"Cargo.toml",
31+
]
32+
edition = "2021"
33+
rust-version = "1.62"
34+
35+
[lib]
36+
name = "arrow_buffer"
37+
path = "src/lib.rs"
38+
bench = false
39+
40+
[dependencies]
41+
num = { version = "0.4", default-features = false, features = ["std"] }
42+
half = { version = "2.0", default-features = false }
43+
44+
[dev-dependencies]
45+
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
46+
47+
[build-dependencies]
File renamed without changes.

arrow/src/alloc/mod.rs renamed to arrow-buffer/src/alloc/mod.rs

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,34 +20,31 @@
2020
2121
use std::alloc::{handle_alloc_error, Layout};
2222
use std::fmt::{Debug, Formatter};
23-
use std::mem::size_of;
2423
use std::panic::RefUnwindSafe;
2524
use std::ptr::NonNull;
2625
use std::sync::Arc;
2726

2827
mod alignment;
29-
mod types;
3028

3129
pub use alignment::ALIGNMENT;
32-
pub use types::NativeType;
3330

3431
#[inline]
35-
unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
36-
NonNull::new_unchecked(ALIGNMENT as *mut T)
32+
unsafe fn null_pointer() -> NonNull<u8> {
33+
NonNull::new_unchecked(ALIGNMENT as *mut u8)
3734
}
3835

3936
/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values.
4037
/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have
4138
/// an unknown or non-zero value and is semantically similar to `malloc`.
42-
pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
39+
pub fn allocate_aligned(size: usize) -> NonNull<u8> {
4340
unsafe {
4441
if size == 0 {
4542
null_pointer()
4643
} else {
47-
let size = size * size_of::<T>();
44+
let size = size;
4845

4946
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
50-
let raw_ptr = std::alloc::alloc(layout) as *mut T;
47+
let raw_ptr = std::alloc::alloc(layout);
5148
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
5249
}
5350
}
@@ -56,15 +53,13 @@ pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
5653
/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them.
5754
/// This is more performant than using [allocate_aligned] and setting all bytes to zero
5855
/// and is semantically similar to `calloc`.
59-
pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
56+
pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> {
6057
unsafe {
6158
if size == 0 {
6259
null_pointer()
6360
} else {
64-
let size = size * size_of::<T>();
65-
6661
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
67-
let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
62+
let raw_ptr = std::alloc::alloc_zeroed(layout);
6863
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
6964
}
7065
}
@@ -78,9 +73,8 @@ pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
7873
/// * ptr must denote a block of memory currently allocated via this allocator,
7974
///
8075
/// * size must be the same size that was used to allocate that block of memory,
81-
pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
76+
pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) {
8277
if ptr != null_pointer() {
83-
let size = size * size_of::<T>();
8478
std::alloc::dealloc(
8579
ptr.as_ptr() as *mut u8,
8680
Layout::from_size_align_unchecked(size, ALIGNMENT),
@@ -99,13 +93,11 @@ pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
9993
///
10094
/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e.,
10195
/// the rounded value must be less than usize::MAX).
102-
pub unsafe fn reallocate<T: NativeType>(
103-
ptr: NonNull<T>,
96+
pub unsafe fn reallocate(
97+
ptr: NonNull<u8>,
10498
old_size: usize,
10599
new_size: usize,
106-
) -> NonNull<T> {
107-
let old_size = old_size * size_of::<T>();
108-
let new_size = new_size * size_of::<T>();
100+
) -> NonNull<u8> {
109101
if ptr == null_pointer() {
110102
return allocate_aligned(new_size);
111103
}
@@ -119,7 +111,7 @@ pub unsafe fn reallocate<T: NativeType>(
119111
ptr.as_ptr() as *mut u8,
120112
Layout::from_size_align_unchecked(old_size, ALIGNMENT),
121113
new_size,
122-
) as *mut T;
114+
);
123115
NonNull::new(raw_ptr).unwrap_or_else(|| {
124116
handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT))
125117
})

arrow/src/buffer/immutable.rs renamed to arrow-buffer/src/buffer/immutable.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use std::{convert::AsRef, usize};
2323

2424
use crate::alloc::{Allocation, Deallocation};
2525
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
26-
use crate::{bytes::Bytes, datatypes::ArrowNativeType};
26+
use crate::{bytes::Bytes, native::ArrowNativeType};
2727

2828
use super::ops::bitwise_unary_op_helper;
2929
use super::MutableBuffer;
@@ -271,7 +271,7 @@ impl Buffer {
271271
/// Prefer this to `collect` whenever possible, as it is ~60% faster.
272272
/// # Example
273273
/// ```
274-
/// # use arrow::buffer::Buffer;
274+
/// # use arrow_buffer::buffer::Buffer;
275275
/// let v = vec![1u32];
276276
/// let iter = v.iter().map(|x| x * 2);
277277
/// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };

arrow-buffer/src/buffer/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents
19+
//! a contiguous memory region that can be shared via `offsets`.
20+
21+
mod immutable;
22+
pub use immutable::*;
23+
mod mutable;
24+
pub use mutable::*;
25+
mod ops;
26+
mod scalar;
27+
pub use scalar::*;
28+
29+
pub use ops::*;

arrow/src/buffer/mutable.rs renamed to arrow-buffer/src/buffer/mutable.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::alloc::Deallocation;
2020
use crate::{
2121
alloc,
2222
bytes::Bytes,
23-
datatypes::{ArrowNativeType, ToByteSlice},
23+
native::{ArrowNativeType, ToByteSlice},
2424
util::bit_util,
2525
};
2626
use std::ptr::NonNull;
@@ -36,7 +36,7 @@ use std::ptr::NonNull;
3636
/// # Example
3737
///
3838
/// ```
39-
/// # use arrow::buffer::{Buffer, MutableBuffer};
39+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
4040
/// let mut buffer = MutableBuffer::new(0);
4141
/// buffer.push(256u32);
4242
/// buffer.extend_from_slice(&[1u32]);
@@ -75,7 +75,7 @@ impl MutableBuffer {
7575
/// all bytes are guaranteed to be `0u8`.
7676
/// # Example
7777
/// ```
78-
/// # use arrow::buffer::{Buffer, MutableBuffer};
78+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
7979
/// let mut buffer = MutableBuffer::from_len_zeroed(127);
8080
/// assert_eq!(buffer.len(), 127);
8181
/// assert!(buffer.capacity() >= 127);
@@ -131,7 +131,7 @@ impl MutableBuffer {
131131
/// `self.len + additional > capacity`.
132132
/// # Example
133133
/// ```
134-
/// # use arrow::buffer::{Buffer, MutableBuffer};
134+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
135135
/// let mut buffer = MutableBuffer::new(0);
136136
/// buffer.reserve(253); // allocates for the first time
137137
/// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
@@ -171,7 +171,7 @@ impl MutableBuffer {
171171
/// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
172172
/// # Example
173173
/// ```
174-
/// # use arrow::buffer::{Buffer, MutableBuffer};
174+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
175175
/// let mut buffer = MutableBuffer::new(0);
176176
/// buffer.resize(253, 2); // allocates for the first time
177177
/// assert_eq!(buffer.as_slice()[252], 2u8);
@@ -195,7 +195,7 @@ impl MutableBuffer {
195195
///
196196
/// # Example
197197
/// ```
198-
/// # use arrow::buffer::{Buffer, MutableBuffer};
198+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
199199
/// // 2 cache lines
200200
/// let mut buffer = MutableBuffer::new(128);
201201
/// assert_eq!(buffer.capacity(), 128);
@@ -322,7 +322,7 @@ impl MutableBuffer {
322322
/// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
323323
/// # Example
324324
/// ```
325-
/// # use arrow::buffer::MutableBuffer;
325+
/// # use arrow_buffer::buffer::MutableBuffer;
326326
/// let mut buffer = MutableBuffer::new(0);
327327
/// buffer.extend_from_slice(&[2u32, 0]);
328328
/// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
@@ -346,7 +346,7 @@ impl MutableBuffer {
346346
/// Extends the buffer with a new item, increasing its capacity if needed.
347347
/// # Example
348348
/// ```
349-
/// # use arrow::buffer::MutableBuffer;
349+
/// # use arrow_buffer::buffer::MutableBuffer;
350350
/// let mut buffer = MutableBuffer::new(0);
351351
/// buffer.push(256u32);
352352
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
@@ -384,7 +384,7 @@ impl MutableBuffer {
384384
/// # Safety
385385
/// The caller must ensure that the buffer was properly initialized up to `len`.
386386
#[inline]
387-
pub(crate) unsafe fn set_len(&mut self, len: usize) {
387+
pub unsafe fn set_len(&mut self, len: usize) {
388388
assert!(len <= self.capacity());
389389
self.len = len;
390390
}
@@ -394,7 +394,7 @@ impl MutableBuffer {
394394
/// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster
395395
/// as it eliminates the conditional `Iterator::next`
396396
#[inline]
397-
pub(crate) fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
397+
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
398398
let mut buffer = Self::new(bit_util::ceil(len, 8));
399399

400400
let chunks = len / 8;
@@ -484,7 +484,7 @@ impl MutableBuffer {
484484
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
485485
/// # Example
486486
/// ```
487-
/// # use arrow::buffer::MutableBuffer;
487+
/// # use arrow_buffer::buffer::MutableBuffer;
488488
/// let v = vec![1u32];
489489
/// let iter = v.iter().map(|x| x * 2);
490490
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
@@ -525,10 +525,10 @@ impl MutableBuffer {
525525
}
526526

527527
/// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
528-
/// # use arrow::buffer::MutableBuffer;
528+
/// # use arrow_buffer::buffer::MutableBuffer;
529529
/// # Example
530530
/// ```
531-
/// # use arrow::buffer::MutableBuffer;
531+
/// # use arrow_buffer::buffer::MutableBuffer;
532532
/// let v = vec![false, true, false];
533533
/// let iter = v.iter().map(|x| *x || true);
534534
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };

arrow/src/buffer/ops.rs renamed to arrow-buffer/src/buffer/ops.rs

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,19 @@ use crate::util::bit_util::ceil;
2020

2121
/// Apply a bitwise operation `op` to four inputs and return the result as a Buffer.
2222
/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
23-
#[allow(clippy::too_many_arguments)]
24-
pub(crate) fn bitwise_quaternary_op_helper<F>(
25-
first: &Buffer,
26-
first_offset_in_bits: usize,
27-
second: &Buffer,
28-
second_offset_in_bits: usize,
29-
third: &Buffer,
30-
third_offset_in_bits: usize,
31-
fourth: &Buffer,
32-
fourth_offset_in_bits: usize,
23+
pub fn bitwise_quaternary_op_helper<F>(
24+
buffers: [&Buffer; 4],
25+
offsets: [usize; 4],
3326
len_in_bits: usize,
3427
op: F,
3528
) -> Buffer
3629
where
3730
F: Fn(u64, u64, u64, u64) -> u64,
3831
{
39-
let first_chunks = first.bit_chunks(first_offset_in_bits, len_in_bits);
40-
let second_chunks = second.bit_chunks(second_offset_in_bits, len_in_bits);
41-
let third_chunks = third.bit_chunks(third_offset_in_bits, len_in_bits);
42-
let fourth_chunks = fourth.bit_chunks(fourth_offset_in_bits, len_in_bits);
32+
let first_chunks = buffers[0].bit_chunks(offsets[0], len_in_bits);
33+
let second_chunks = buffers[1].bit_chunks(offsets[1], len_in_bits);
34+
let third_chunks = buffers[2].bit_chunks(offsets[2], len_in_bits);
35+
let fourth_chunks = buffers[3].bit_chunks(offsets[3], len_in_bits);
4336

4437
let chunks = first_chunks
4538
.iter()

arrow/src/buffer/scalar.rs renamed to arrow-buffer/src/buffer/scalar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::buffer::Buffer;
19-
use crate::datatypes::ArrowNativeType;
19+
use crate::native::ArrowNativeType;
2020
use std::ops::Deref;
2121

2222
/// Provides a safe API for interpreting a [`Buffer`] as a slice of [`ArrowNativeType`]

0 commit comments

Comments
 (0)