Skip to content

Commit 96c7c9d

Browse files
authored
Split out arrow-string (#2594) (#3295)
* Split out arrow-string (#2594) * Doc * Clippy
1 parent 7b71713 commit 96c7c9d

File tree

19 files changed

+2563
-2164
lines changed

19 files changed

+2563
-2164
lines changed

.github/workflows/arrow.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ jobs:
7070
run: cargo test -p arrow-csv --all-features
7171
- name: Test arrow-json with all features
7272
run: cargo test -p arrow-json --all-features
73+
- name: Test arrow-string with all features
74+
run: cargo test -p arrow-string --all-features
7375
- name: Test arrow-integration-test with all features
7476
run: cargo test -p arrow-integration-test --all-features
7577
- name: Test arrow with default features
@@ -184,5 +186,7 @@ jobs:
184186
run: cargo clippy -p arrow-csv --all-targets --all-features -- -D warnings
185187
- name: Clippy arrow-json with all features
186188
run: cargo clippy -p arrow-json --all-targets --all-features -- -D warnings
189+
- name: Clippy arrow-string with all features
190+
run: cargo clippy -p arrow-string --all-targets --all-features -- -D warnings
187191
- name: Clippy arrow
188192
run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings

.github/workflows/arrow_flight.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ on:
3131
- arrow-buffer/**
3232
- arrow-cast/**
3333
- arrow-data/**
34-
- arrow-schema/**
35-
- arrow-select/**
3634
- arrow-flight/**
3735
- arrow-ipc/**
36+
- arrow-schema/**
37+
- arrow-select/**
38+
- arrow-string/**
3839
- .github/**
3940

4041
jobs:

.github/workflows/dev_pr/labeler.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,17 @@
1616
# under the License.
1717

1818
arrow:
19-
- arrow/**/*
2019
- arrow-array/**/*
2120
- arrow-buffer/**/*
2221
- arrow-cast/**/*
22+
- arrow-csv/**/*
2323
- arrow-data/**/*
24-
- arrow-schema/**/*
25-
- arrow-select/**/*
2624
- arrow-ipc/**/*
27-
- arrow-csv/**/*
2825
- arrow-json/**/*
26+
- arrow-schema/**/*
27+
- arrow-select/**/*
28+
- arrow-string/**/*
29+
- arrow/**/*
2930

3031
arrow-flight:
3132
- arrow-flight/**/*

.github/workflows/integration.yml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,21 @@ on:
2424
- master
2525
pull_request:
2626
paths:
27-
- arrow/**
27+
- .github/**
2828
- arrow-array/**
2929
- arrow-buffer/**
3030
- arrow-cast/**
31+
- arrow-csv/**
3132
- arrow-data/**
32-
- arrow-schema/**
33-
- arrow-select/**
33+
- arrow-integration-test/**
34+
- arrow-integration-testing/**
3435
- arrow-ipc/**
35-
- arrow-csv/**
3636
- arrow-json/**
3737
- arrow-pyarrow-integration-testing/**
38-
- arrow-integration-test/**
39-
- arrow-integration-testing/**
40-
- .github/**
38+
- arrow-schema/**
39+
- arrow-select/**
40+
- arrow-string/**
41+
- arrow/**
4142

4243
jobs:
4344

.github/workflows/miri.yaml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,18 @@ on:
2424
- master
2525
pull_request:
2626
paths:
27-
- arrow/**
27+
- .github/**
2828
- arrow-array/**
2929
- arrow-buffer/**
3030
- arrow-cast/**
31+
- arrow-csv/**
3132
- arrow-data/**
32-
- arrow-schema/**
33-
- arrow-select/**
3433
- arrow-ipc/**
35-
- arrow-csv/**
3634
- arrow-json/**
37-
- .github/**
35+
- arrow-schema/**
36+
- arrow-select/**
37+
- arrow-string/**
38+
- arrow/**
3839

3940
jobs:
4041
miri-checks:

.github/workflows/parquet.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ on:
3636
- arrow-ipc/**
3737
- arrow-csv/**
3838
- arrow-json/**
39+
- arrow-string/**
3940
- parquet/**
4041
- .github/**
4142

@@ -123,7 +124,7 @@ jobs:
123124
runs-on: ubuntu-latest
124125
strategy:
125126
matrix:
126-
rust: [stable]
127+
rust: [ stable ]
127128
steps:
128129
- uses: actions/checkout@v3
129130
- name: Setup Python

Cargo.toml

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,24 @@
1717

1818
[workspace]
1919
members = [
20-
"arrow",
21-
"arrow-array",
22-
"arrow-buffer",
23-
"arrow-cast",
24-
"arrow-csv",
25-
"arrow-data",
26-
"arrow-flight",
27-
"arrow-integration-test",
28-
"arrow-integration-testing",
29-
"arrow-ipc",
30-
"arrow-json",
31-
"arrow-schema",
32-
"arrow-select",
33-
"object_store",
34-
"parquet",
35-
"parquet_derive",
36-
"parquet_derive_test",
20+
"arrow",
21+
"arrow-array",
22+
"arrow-buffer",
23+
"arrow-cast",
24+
"arrow-csv",
25+
"arrow-data",
26+
"arrow-flight",
27+
"arrow-integration-test",
28+
"arrow-integration-testing",
29+
"arrow-ipc",
30+
"arrow-json",
31+
"arrow-schema",
32+
"arrow-select",
33+
"arrow-string",
34+
"object_store",
35+
"parquet",
36+
"parquet_derive",
37+
"parquet_derive_test",
3738
]
3839
# Enable the version 2 feature resolver, which avoids unifying features for targets that are not being built
3940
#

arrow-string/Cargo.toml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "arrow-string"
20+
version = "28.0.0"
21+
description = "String kernels for arrow arrays"
22+
homepage = "https://github.com/apache/arrow-rs"
23+
repository = "https://github.com/apache/arrow-rs"
24+
authors = ["Apache Arrow <[email protected]>"]
25+
license = "Apache-2.0"
26+
keywords = ["arrow"]
27+
include = [
28+
"benches/*.rs",
29+
"src/**/*.rs",
30+
"Cargo.toml",
31+
]
32+
edition = "2021"
33+
rust-version = "1.62"
34+
35+
[lib]
36+
name = "arrow_string"
37+
path = "src/lib.rs"
38+
bench = false
39+
40+
[dependencies]
41+
arrow-buffer = { version = "28.0.0", path = "../arrow-buffer" }
42+
arrow-data = { version = "28.0.0", path = "../arrow-data" }
43+
arrow-schema = { version = "28.0.0", path = "../arrow-schema" }
44+
arrow-array = { version = "28.0.0", path = "../arrow-array" }
45+
regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
46+
regex-syntax = { version = "0.6.27", default-features = false, features = ["unicode"] }
47+
48+
[features]
49+
dyn_cmp_dict = []

arrow/src/compute/kernels/concat_elements.rs renamed to arrow-string/src/concat_elements.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::array::*;
19-
use crate::error::{ArrowError, Result};
18+
use arrow_array::builder::BufferBuilder;
19+
use arrow_array::*;
2020
use arrow_data::bit_mask::combine_option_bitmap;
21+
use arrow_data::ArrayDataBuilder;
22+
use arrow_schema::ArrowError;
2123

2224
/// Returns the elementwise concatenation of a [`StringArray`].
2325
///
@@ -36,7 +38,7 @@ use arrow_data::bit_mask::combine_option_bitmap;
3638
pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
3739
left: &GenericStringArray<Offset>,
3840
right: &GenericStringArray<Offset>,
39-
) -> Result<GenericStringArray<Offset>> {
41+
) -> Result<GenericStringArray<Offset>, ArrowError> {
4042
if left.len() != right.len() {
4143
return Err(ArrowError::ComputeError(format!(
4244
"Arrays must have the same length: {} != {}",
@@ -89,7 +91,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
8991
/// An error will be returned if the [`StringArray`] are of different lengths
9092
pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
9193
arrays: &[&GenericStringArray<Offset>],
92-
) -> Result<GenericStringArray<Offset>> {
94+
) -> Result<GenericStringArray<Offset>, ArrowError> {
9395
if arrays.is_empty() {
9496
return Err(ArrowError::ComputeError(
9597
"concat requires input of at least one array".to_string(),
@@ -158,6 +160,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
158160
#[cfg(test)]
159161
mod tests {
160162
use super::*;
163+
use arrow_array::StringArray;
161164
#[test]
162165
fn test_string_concat() {
163166
let left = [Some("foo"), Some("bar"), None]

0 commit comments

Comments
 (0)