Skip to content

Commit 9ad9d68

Browse files
committed
Split out arrow-row (apache#2594)
1 parent c344433 commit 9ad9d68

File tree

14 files changed

+124
-68
lines changed

14 files changed

+124
-68
lines changed

.github/workflows/arrow.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ on:
3535
- arrow-ipc/**
3636
- arrow-json/**
3737
- arrow-ord/**
38+
- arrow-row/**
3839
- arrow-schema/**
3940
- arrow-select/**
4041
- arrow-string/**
@@ -76,6 +77,8 @@ jobs:
7677
run: cargo test -p arrow-string --all-features
7778
- name: Test arrow-ord with all features except SIMD
7879
run: cargo test -p arrow-ord --features dyn_cmp_dict
80+
- name: Test arrow-row with all features
81+
run: cargo test -p arrow-row --all-reatures
7982
- name: Test arrow-integration-test with all features
8083
run: cargo test -p arrow-integration-test --all-features
8184
- name: Test arrow with default features
@@ -196,5 +199,7 @@ jobs:
196199
run: cargo clippy -p arrow-string --all-targets --all-features -- -D warnings
197200
- name: Clippy arrow-ord with all features except SIMD
198201
run: cargo clippy -p arrow-ord --all-targets --features dyn_cmp_dict -- -D warnings
202+
- name: Clippy arrow-ord with all features
203+
run: cargo clippy -p arrow-ord --all-targets --all-features -- -D warnings
199204
- name: Clippy arrow
200205
run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings

.github/workflows/integration.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,12 @@ on:
3333
- arrow-integration-test/**
3434
- arrow-integration-testing/**
3535
- arrow-ipc/**
36-
- arrow-ord/**
3736
- arrow-json/**
37+
- arrow-ord/**
3838
- arrow-pyarrow-integration-testing/**
3939
- arrow-schema/**
4040
- arrow-select/**
41+
- arrow-sort/**
4142
- arrow-string/**
4243
- arrow/**
4344

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ members = [
2929
"arrow-ipc",
3030
"arrow-json",
3131
"arrow-ord",
32+
"arrow-row",
3233
"arrow-schema",
3334
"arrow-select",
3435
"arrow-string",

arrow-array/src/lib.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,25 @@ pub mod timezone;
183183
mod trusted_len;
184184
pub mod types;
185185

186+
/// Options that define how sort kernels should behave
187+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
188+
pub struct SortOptions {
189+
/// Whether to sort in descending order
190+
pub descending: bool,
191+
/// Whether to sort nulls first
192+
pub nulls_first: bool,
193+
}
194+
195+
impl Default for SortOptions {
196+
fn default() -> Self {
197+
Self {
198+
descending: false,
199+
// default to nulls first to match spark's behavior
200+
nulls_first: true,
201+
}
202+
}
203+
}
204+
186205
#[cfg(test)]
187206
mod tests {
188207
use crate::builder::*;

arrow-ord/src/sort.rs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
2727
use arrow_select::take::take;
2828
use std::cmp::Ordering;
2929

30+
pub use arrow_array::SortOptions;
31+
3032
/// Sort the `ArrayRef` using `SortOptions`.
3133
///
3234
/// Performs a sort on values and indices. Nulls are ordered according
@@ -366,25 +368,6 @@ pub fn sort_to_indices(
366368
})
367369
}
368370

369-
/// Options that define how sort kernels should behave
370-
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
371-
pub struct SortOptions {
372-
/// Whether to sort in descending order
373-
pub descending: bool,
374-
/// Whether to sort nulls first
375-
pub nulls_first: bool,
376-
}
377-
378-
impl Default for SortOptions {
379-
fn default() -> Self {
380-
Self {
381-
descending: false,
382-
// default to nulls first to match spark's behavior
383-
nulls_first: true,
384-
}
385-
}
386-
}
387-
388371
/// Sort boolean values
389372
///
390373
/// when a limit is present, the sort is pair-comparison based as k-select might be more efficient,

arrow-row/Cargo.toml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "arrow-row"
20+
version = "29.0.0"
21+
description = "Arrow row format"
22+
homepage = "https://github.com/apache/arrow-rs"
23+
repository = "https://github.com/apache/arrow-rs"
24+
authors = ["Apache Arrow <[email protected]>"]
25+
license = "Apache-2.0"
26+
keywords = ["arrow"]
27+
include = [
28+
"benches/*.rs",
29+
"src/**/*.rs",
30+
"Cargo.toml",
31+
]
32+
edition = "2021"
33+
rust-version = "1.62"
34+
35+
[lib]
36+
name = "arrow_row"
37+
path = "src/lib.rs"
38+
bench = false
39+
40+
[target.'cfg(target_arch = "wasm32")'.dependencies]
41+
ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }
42+
43+
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
44+
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
45+
46+
[dependencies]
47+
arrow-array = { version = "29.0.0", path = "../arrow-array" }
48+
arrow-buffer = { version = "29.0.0", path = "../arrow-buffer" }
49+
arrow-data = { version = "29.0.0", path = "../arrow-data" }
50+
arrow-schema = { version = "29.0.0", path = "../arrow-schema" }
51+
52+
half = { version = "2.1", default-features = false }
53+
hashbrown = { version = "0.13", default-features = false }
54+
55+
[dev-dependencies]
56+
arrow-cast = { version = "29.0.0", path = "../arrow-cast" }
57+
arrow-ord = { version = "29.0.0", path = "../arrow-ord" }
58+
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
59+
60+
[features]
61+

arrow/src/row/dictionary.rs renamed to arrow-row/src/dictionary.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::compute::SortOptions;
19-
use crate::row::fixed::{FixedLengthEncoding, FromSlice};
20-
use crate::row::interner::{Interned, OrderPreservingInterner};
21-
use crate::row::{null_sentinel, Rows};
18+
use crate::fixed::{FixedLengthEncoding, FromSlice};
19+
use crate::interner::{Interned, OrderPreservingInterner};
20+
use crate::{null_sentinel, Rows};
2221
use arrow_array::builder::*;
2322
use arrow_array::cast::*;
2423
use arrow_array::types::*;

arrow/src/row/fixed.rs renamed to arrow-row/src/fixed.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,9 @@
1616
// under the License.
1717

1818
use crate::array::PrimitiveArray;
19-
use crate::compute::SortOptions;
20-
use crate::datatypes::ArrowPrimitiveType;
21-
use crate::row::{null_sentinel, Rows};
19+
use crate::{null_sentinel, Rows};
2220
use arrow_array::builder::BufferBuilder;
23-
use arrow_array::{BooleanArray, FixedSizeBinaryArray};
21+
use arrow_array::{ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray, SortOptions};
2422
use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer};
2523
use arrow_data::{ArrayData, ArrayDataBuilder};
2624
use arrow_schema::DataType;
File renamed without changes.

arrow/src/row/mod.rs renamed to arrow-row/src/lib.rs

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
//! # Basic Example
5151
//! ```
5252
//! # use std::sync::Arc;
53-
//! # use arrow::row::{RowConverter, SortField};
53+
//! # use arrow_row::{RowConverter, SortField};
5454
//! # use arrow_array::{ArrayRef, Int32Array, StringArray};
5555
//! # use arrow_array::cast::{as_primitive_array, as_string_array};
5656
//! # use arrow_array::types::Int32Type;
@@ -102,7 +102,7 @@
102102
//! The row format can also be used to implement a fast multi-column / lexicographic sort
103103
//!
104104
//! ```
105-
//! # use arrow::row::{RowConverter, SortField};
105+
//! # use arrow_row::{RowConverter, SortField};
106106
//! # use arrow_array::{ArrayRef, UInt32Array};
107107
//! fn lexsort_to_indices(arrays: &[ArrayRef]) -> UInt32Array {
108108
//! let fields = arrays
@@ -131,18 +131,16 @@ use std::sync::Arc;
131131

132132
use arrow_array::cast::*;
133133
use arrow_array::*;
134+
use arrow_buffer::ArrowNativeType;
134135
use arrow_data::ArrayDataBuilder;
136+
use arrow_schema::*;
135137

136-
use crate::compute::SortOptions;
137-
use crate::datatypes::*;
138-
use crate::error::{ArrowError, Result};
139-
use crate::row::dictionary::{
138+
use crate::dictionary::{
140139
compute_dictionary_mapping, decode_dictionary, encode_dictionary,
141140
};
142-
use crate::row::fixed::{decode_bool, decode_fixed_size_binary, decode_primitive};
143-
use crate::row::interner::OrderPreservingInterner;
144-
use crate::row::variable::{decode_binary, decode_string};
145-
use crate::{downcast_dictionary_array, downcast_primitive_array};
141+
use crate::fixed::{decode_bool, decode_fixed_size_binary, decode_primitive};
142+
use crate::interner::OrderPreservingInterner;
143+
use crate::variable::{decode_binary, decode_string};
146144

147145
mod dictionary;
148146
mod fixed;
@@ -437,7 +435,7 @@ enum Codec {
437435
}
438436

439437
impl Codec {
440-
fn new(sort_field: &SortField) -> Result<Self> {
438+
fn new(sort_field: &SortField) -> Result<Self, ArrowError> {
441439
match &sort_field.data_type {
442440
DataType::Dictionary(_, _) => Ok(Self::Dictionary(Default::default())),
443441
d if !d.is_nested() => Ok(Self::Stateless),
@@ -485,7 +483,7 @@ impl Codec {
485483
}
486484
}
487485

488-
fn encoder(&mut self, array: &dyn Array) -> Result<Encoder<'_>> {
486+
fn encoder(&mut self, array: &dyn Array) -> Result<Encoder<'_>, ArrowError> {
489487
match self {
490488
Codec::Stateless => Ok(Encoder::Stateless),
491489
Codec::Dictionary(interner) => {
@@ -577,15 +575,15 @@ impl SortField {
577575

578576
impl RowConverter {
579577
/// Create a new [`RowConverter`] with the provided schema
580-
pub fn new(fields: Vec<SortField>) -> Result<Self> {
578+
pub fn new(fields: Vec<SortField>) -> Result<Self, ArrowError> {
581579
if !Self::supports_fields(&fields) {
582580
return Err(ArrowError::NotYetImplemented(format!(
583581
"Row format support not yet implemented for: {:?}",
584582
fields
585583
)));
586584
}
587585

588-
let codecs = fields.iter().map(Codec::new).collect::<Result<_>>()?;
586+
let codecs = fields.iter().map(Codec::new).collect::<Result<_, _>>()?;
589587
Ok(Self {
590588
fields: fields.into(),
591589
codecs,
@@ -617,7 +615,7 @@ impl RowConverter {
617615
/// # Panics
618616
///
619617
/// Panics if the schema of `columns` does not match that provided to [`RowConverter::new`]
620-
pub fn convert_columns(&mut self, columns: &[ArrayRef]) -> Result<Rows> {
618+
pub fn convert_columns(&mut self, columns: &[ArrayRef]) -> Result<Rows, ArrowError> {
621619
if columns.len() != self.fields.len() {
622620
return Err(ArrowError::InvalidArgumentError(format!(
623621
"Incorrect number of arrays provided to RowConverter, expected {} got {}",
@@ -640,7 +638,7 @@ impl RowConverter {
640638
}
641639
codec.encoder(column.as_ref())
642640
})
643-
.collect::<Result<Vec<_>>>()?;
641+
.collect::<Result<Vec<_>, _>>()?;
644642

645643
let config = RowConfig {
646644
fields: Arc::clone(&self.fields),
@@ -671,7 +669,7 @@ impl RowConverter {
671669
/// # Panics
672670
///
673671
/// Panics if the rows were not produced by this [`RowConverter`]
674-
pub fn convert_rows<'a, I>(&self, rows: I) -> Result<Vec<ArrayRef>>
672+
pub fn convert_rows<'a, I>(&self, rows: I) -> Result<Vec<ArrayRef>, ArrowError>
675673
where
676674
I: IntoIterator<Item = Row<'a>>,
677675
{
@@ -703,7 +701,7 @@ impl RowConverter {
703701
&self,
704702
rows: &mut [&[u8]],
705703
validate_utf8: bool,
706-
) -> Result<Vec<ArrayRef>> {
704+
) -> Result<Vec<ArrayRef>, ArrowError> {
707705
self.fields
708706
.iter()
709707
.zip(&self.codecs)
@@ -1196,7 +1194,7 @@ unsafe fn decode_column(
11961194
rows: &mut [&[u8]],
11971195
codec: &Codec,
11981196
validate_utf8: bool,
1199-
) -> Result<ArrayRef> {
1197+
) -> Result<ArrayRef, ArrowError> {
12001198
let options = field.options;
12011199

12021200
let array: ArrayRef = match codec {
@@ -1255,24 +1253,18 @@ unsafe fn decode_column(
12551253
mod tests {
12561254
use std::sync::Arc;
12571255

1258-
use arrow_array::builder::{
1259-
FixedSizeBinaryBuilder, GenericListBuilder, Int32Builder,
1260-
};
12611256
use rand::distributions::uniform::SampleUniform;
12621257
use rand::distributions::{Distribution, Standard};
12631258
use rand::{thread_rng, Rng};
12641259

1265-
use arrow_array::NullArray;
1260+
use arrow_array::builder::*;
1261+
use arrow_array::types::*;
1262+
use arrow_array::*;
1263+
use arrow_buffer::i256;
12661264
use arrow_buffer::Buffer;
1265+
use arrow_cast::display::array_value_to_string;
12671266
use arrow_ord::sort::{LexicographicalComparator, SortColumn, SortOptions};
12681267

1269-
use crate::array::{
1270-
BinaryArray, BooleanArray, DictionaryArray, Float32Array, GenericStringArray,
1271-
Int16Array, Int32Array, OffsetSizeTrait, PrimitiveArray,
1272-
PrimitiveDictionaryBuilder, StringArray,
1273-
};
1274-
use crate::util::display::array_value_to_string;
1275-
12761268
use super::*;
12771269

12781270
#[test]

arrow/src/row/list.rs renamed to arrow-row/src/list.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::compute::SortOptions;
19-
use crate::row::{RowConverter, Rows, SortField};
18+
use crate::{RowConverter, Rows, SortField};
2019
use arrow_array::builder::BufferBuilder;
21-
use arrow_array::{Array, GenericListArray, OffsetSizeTrait};
20+
use arrow_array::{Array, GenericListArray, OffsetSizeTrait, SortOptions};
2221
use arrow_data::ArrayDataBuilder;
2322
use arrow_schema::ArrowError;
2423
use std::ops::Range;

arrow/src/row/variable.rs renamed to arrow-row/src/variable.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::compute::SortOptions;
19-
use crate::row::{null_sentinel, Rows};
20-
use crate::util::bit_util::ceil;
18+
use crate::{null_sentinel, Rows};
2119
use arrow_array::builder::BufferBuilder;
22-
use arrow_array::{Array, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
20+
use arrow_array::*;
21+
use arrow_buffer::bit_util::ceil;
2322
use arrow_buffer::MutableBuffer;
2423
use arrow_data::ArrayDataBuilder;
2524
use arrow_schema::DataType;

arrow/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ pub mod pyarrow;
328328
pub mod record_batch {
329329
pub use arrow_array::{RecordBatch, RecordBatchOptions, RecordBatchReader};
330330
}
331-
pub mod row;
332331
pub use arrow_array::temporal_conversions;
332+
pub use arrow_row as row;
333333
pub mod tensor;
334334
pub mod util;

0 commit comments

Comments
 (0)