Skip to content

Commit 67002a0

Browse files
authored
Upgrade to arrow 23.0.0 (#3483)
* Changes for API * Update avro code for API changes * Use divide_opt` kernel * Update update_arrow_deps.py * Update arrow dependency to 23.0.0 * Use nicer RecordBatchOptions API * cleanups * fix: update
1 parent e873423 commit 67002a0

File tree

19 files changed

+45
-36
lines changed

19 files changed

+45
-36
lines changed

datafusion-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ rust-version = "1.62"
2929
readme = "README.md"
3030

3131
[dependencies]
32-
arrow = "22.0.0"
32+
arrow = "23.0.0"
3333
clap = { version = "3", features = ["derive", "cargo"] }
3434
datafusion = { path = "../datafusion/core", version = "12.0.0" }
3535
dirs = "4.0.0"

datafusion-examples/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
3434
required-features = ["datafusion/avro"]
3535

3636
[dev-dependencies]
37-
arrow-flight = "22.0.0"
37+
arrow-flight = "23.0.0"
3838
async-trait = "0.1.41"
3939
datafusion = { path = "../datafusion/core" }
4040
futures = "0.3"

datafusion-examples/examples/flight_server.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use std::pin::Pin;
1919
use std::sync::Arc;
2020

2121
use arrow_flight::SchemaAsIpc;
22+
use datafusion::arrow::error::ArrowError;
2223
use datafusion::datasource::file_format::parquet::ParquetFormat;
2324
use datafusion::datasource::listing::{ListingOptions, ListingTableUrl};
2425
use futures::Stream;
@@ -77,7 +78,9 @@ impl FlightService for FlightServiceImpl {
7778
.unwrap();
7879

7980
let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default();
80-
let schema_result = SchemaAsIpc::new(&schema, &options).into();
81+
let schema_result = SchemaAsIpc::new(&schema, &options)
82+
.try_into()
83+
.map_err(|e: ArrowError| tonic::Status::internal(e.to_string()))?;
8184

8285
Ok(Response::new(schema_result))
8386
}

datafusion/common/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ pyarrow = ["pyo3"]
3939

4040
[dependencies]
4141
apache-avro = { version = "0.14", features = ["snappy"], optional = true }
42-
arrow = { version = "22.0.0", features = ["prettyprint"] }
42+
arrow = { version = "23.0.0", features = ["prettyprint"] }
4343
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
4444
cranelift-module = { version = "0.87.0", optional = true }
4545
object_store = { version = "0.5.0", optional = true }
4646
ordered-float = "3.0"
47-
parquet = { version = "22.0.0", features = ["arrow"], optional = true }
47+
parquet = { version = "23.0.0", features = ["arrow"], optional = true }
4848
pyo3 = { version = "0.17.1", optional = true }
4949
sqlparser = "0.23"

datafusion/core/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion
5656
[dependencies]
5757
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
5858
apache-avro = { version = "0.14", optional = true }
59-
arrow = { version = "22.0.0", features = ["prettyprint"] }
59+
arrow = { version = "23.0.0", features = ["prettyprint"] }
6060
async-trait = "0.1.41"
6161
bytes = "1.1"
6262
chrono = { version = "0.4", default-features = false }
@@ -78,7 +78,7 @@ num_cpus = "1.13.0"
7878
object_store = "0.5.0"
7979
ordered-float = "3.0"
8080
parking_lot = "0.12"
81-
parquet = { version = "22.0.0", features = ["arrow", "async"] }
81+
parquet = { version = "23.0.0", features = ["arrow", "async"] }
8282
paste = "^1.0"
8383
pin-project-lite = "^0.2.7"
8484
pyo3 = { version = "0.17.1", optional = true }
@@ -93,7 +93,7 @@ url = "2.2"
9393
uuid = { version = "1.0", features = ["v4"] }
9494

9595
[dev-dependencies]
96-
arrow = { version = "22.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
96+
arrow = { version = "23.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
9797
async-trait = "0.1.53"
9898
criterion = "0.4"
9999
csv = "1.1.6"

datafusion/core/fuzz-utils/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@ edition = "2021"
2323
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
2424

2525
[dependencies]
26-
arrow = { version = "22.0.0", features = ["prettyprint"] }
26+
arrow = { version = "23.0.0", features = ["prettyprint"] }
2727
env_logger = "0.9.0"
2828
rand = "0.8"

datafusion/core/src/avro_to_arrow/arrow_array_reader.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
use crate::arrow::array::{
2121
make_array, Array, ArrayBuilder, ArrayData, ArrayDataBuilder, ArrayRef,
2222
BooleanBuilder, LargeStringArray, ListBuilder, NullArray, OffsetSizeTrait,
23-
PrimitiveArray, PrimitiveBuilder, StringArray, StringBuilder,
24-
StringDictionaryBuilder,
23+
PrimitiveArray, StringArray, StringBuilder, StringDictionaryBuilder,
2524
};
2625
use crate::arrow::buffer::{Buffer, MutableBuffer};
2726
use crate::arrow::datatypes::{
@@ -171,9 +170,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
171170
where
172171
T: ArrowPrimitiveType + ArrowDictionaryKeyType,
173172
{
174-
let key_builder = PrimitiveBuilder::<T>::with_capacity(row_len);
175-
let values_builder = StringBuilder::with_capacity(row_len, 5);
176-
StringDictionaryBuilder::new(key_builder, values_builder)
173+
StringDictionaryBuilder::with_capacity(row_len, row_len, row_len)
177174
}
178175

179176
fn build_wrapped_list_array(

datafusion/core/src/physical_plan/coalesce_batches.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,7 @@ pub fn concat_batches(
292292
row_count
293293
);
294294

295-
let mut options = RecordBatchOptions::default();
296-
options.row_count = Some(row_count);
295+
let options = RecordBatchOptions::new().with_row_count(Some(row_count));
297296

298297
RecordBatch::try_new_with_options(schema.clone(), arrays, &options)
299298
}

datafusion/core/src/physical_plan/file_format/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,7 @@ impl SchemaAdapter {
286286
let projected_schema = Arc::new(self.table_schema.clone().project(projections)?);
287287

288288
// Necessary to handle empty batches
289-
let mut options = RecordBatchOptions::default();
290-
options.row_count = Some(batch.num_rows());
289+
let options = RecordBatchOptions::new().with_row_count(Some(batch.num_rows()));
291290

292291
Ok(RecordBatch::try_new_with_options(
293292
projected_schema,

datafusion/expr/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ path = "src/lib.rs"
3636

3737
[dependencies]
3838
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
39-
arrow = { version = "22.0.0", features = ["prettyprint"] }
39+
arrow = { version = "23.0.0", features = ["prettyprint"] }
4040
datafusion-common = { path = "../common", version = "12.0.0" }
4141
sqlparser = "0.23"

datafusion/jit/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ path = "src/lib.rs"
3636
jit = []
3737

3838
[dependencies]
39-
arrow = "22.0.0"
39+
arrow = "23.0.0"
4040
cranelift = "0.87.0"
4141
cranelift-jit = "0.87.0"
4242
cranelift-module = "0.87.0"

datafusion/optimizer/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ default = ["unicode_expressions"]
3737
unicode_expressions = []
3838

3939
[dependencies]
40-
arrow = { version = "22.0.0", features = ["prettyprint"] }
40+
arrow = { version = "23.0.0", features = ["prettyprint"] }
4141
async-trait = "0.1.41"
4242
chrono = { version = "0.4", default-features = false }
4343
datafusion-common = { path = "../common", version = "12.0.0" }

datafusion/physical-expr/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]
4040

4141
[dependencies]
4242
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
43-
arrow = { version = "22.0.0", features = ["prettyprint"] }
43+
arrow = { version = "23.0.0", features = ["prettyprint"] }
4444
blake2 = { version = "^0.10.2", optional = true }
4545
blake3 = { version = "1.0", optional = true }
4646
chrono = { version = "0.4", default-features = false }

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use std::{any::Any, sync::Arc};
2424

2525
use arrow::array::*;
2626
use arrow::compute::kernels::arithmetic::{
27-
add, add_scalar, divide, divide_scalar, modulus, modulus_scalar, multiply,
27+
add, add_scalar, divide_opt, divide_scalar, modulus, modulus_scalar, multiply,
2828
multiply_scalar, subtract, subtract_scalar,
2929
};
3030
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
@@ -60,7 +60,7 @@ use kernels::{
6060
bitwise_xor, bitwise_xor_scalar,
6161
};
6262
use kernels_arrow::{
63-
add_decimal, add_decimal_scalar, divide_decimal, divide_decimal_scalar,
63+
add_decimal, add_decimal_scalar, divide_decimal_scalar, divide_opt_decimal,
6464
eq_decimal_scalar, gt_decimal_scalar, gt_eq_decimal_scalar, is_distinct_from,
6565
is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_null,
6666
is_distinct_from_utf8, is_not_distinct_from, is_not_distinct_from_bool,
@@ -844,7 +844,7 @@ impl BinaryExpr {
844844
Operator::Plus => binary_primitive_array_op!(left, right, add),
845845
Operator::Minus => binary_primitive_array_op!(left, right, subtract),
846846
Operator::Multiply => binary_primitive_array_op!(left, right, multiply),
847-
Operator::Divide => binary_primitive_array_op!(left, right, divide),
847+
Operator::Divide => binary_primitive_array_op!(left, right, divide_opt),
848848
Operator::Modulo => binary_primitive_array_op!(left, right, modulus),
849849
Operator::And => {
850850
if left_data_type == &DataType::Boolean {
@@ -1326,9 +1326,7 @@ mod tests {
13261326
let string_type = DataType::Utf8;
13271327

13281328
// build dictionary
1329-
let keys_builder = PrimitiveBuilder::<Int32Type>::with_capacity(10);
1330-
let values_builder = arrow::array::StringBuilder::with_capacity(10, 1024);
1331-
let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder);
1329+
let mut dict_builder = StringDictionaryBuilder::<Int32Type>::new();
13321330

13331331
dict_builder.append("one")?;
13341332
dict_builder.append_null();

datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ pub(crate) fn multiply_decimal_scalar(
372372
Ok(array)
373373
}
374374

375-
pub(crate) fn divide_decimal(
375+
pub(crate) fn divide_opt_decimal(
376376
left: &Decimal128Array,
377377
right: &Decimal128Array,
378378
) -> Result<Decimal128Array> {
@@ -636,7 +636,7 @@ mod tests {
636636
25,
637637
3,
638638
);
639-
let result = divide_decimal(&left_decimal_array, &right_decimal_array)?;
639+
let result = divide_opt_decimal(&left_decimal_array, &right_decimal_array)?;
640640
let expect = create_decimal_array(
641641
&[Some(123456700), None, Some(22446672), Some(-10037130), None],
642642
25,
@@ -674,7 +674,8 @@ mod tests {
674674
let left_decimal_array = create_decimal_array(&[Some(101)], 10, 1);
675675
let right_decimal_array = create_decimal_array(&[Some(0)], 1, 1);
676676

677-
let err = divide_decimal(&left_decimal_array, &right_decimal_array).unwrap_err();
677+
let err =
678+
divide_opt_decimal(&left_decimal_array, &right_decimal_array).unwrap_err();
678679
assert_eq!("Arrow error: Divide by zero error", err.to_string());
679680
let err = divide_decimal_scalar(&left_decimal_array, 0).unwrap_err();
680681
assert_eq!("Arrow error: Divide by zero error", err.to_string());

datafusion/proto/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ default = []
3737
json = ["pbjson", "pbjson-build", "serde", "serde_json"]
3838

3939
[dependencies]
40-
arrow = "22.0.0"
40+
arrow = "23.0.0"
4141
datafusion = { path = "../core", version = "12.0.0" }
4242
datafusion-common = { path = "../common", version = "12.0.0" }
4343
datafusion-expr = { path = "../expr", version = "12.0.0" }

datafusion/row/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ path = "src/lib.rs"
3737
jit = ["datafusion-jit"]
3838

3939
[dependencies]
40-
arrow = "22.0.0"
40+
arrow = "23.0.0"
4141
datafusion-common = { path = "../common", version = "12.0.0" }
4242
datafusion-jit = { path = "../jit", version = "12.0.0", optional = true }
4343
paste = "^1.0"

datafusion/sql/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ unicode_expressions = []
3838

3939
[dependencies]
4040
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
41-
arrow = { version = "22.0.0", features = ["prettyprint"] }
41+
arrow = { version = "23.0.0", features = ["prettyprint"] }
4242
datafusion-common = { path = "../common", version = "12.0.0" }
4343
datafusion-expr = { path = "../expr", version = "12.0.0" }
4444
hashbrown = "0.12"

dev/update_arrow_deps.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,20 @@ def update_version_cargo_toml(cargo_toml, new_version):
8888

8989
for section in ("dependencies", "dev-dependencies"):
9090
for (dep_name, constraint) in doc.get(section, {}).items():
91-
if dep_name in ("arrow", "parquet", "arrow-flight") and constraint.get("version") is not None:
92-
doc[section][dep_name]["version"] = new_version
91+
if dep_name in ("arrow", "parquet", "arrow-flight"):
92+
if type(constraint) == tomlkit.items.String:
93+
# handle constraint that is (only) a string like '12',
94+
doc[section][dep_name] = new_version
95+
elif type(constraint) == dict:
96+
# handle constraint that is itself a struct like
97+
# {'version': '12', 'features': ['prettyprint']}
98+
doc[section][dep_name]["version"] = new_version
99+
elif type(constraint) == tomlkit.items.InlineTable:
100+
# handle constraint that is itself a struct like
101+
# {'version': '12', 'features': ['prettyprint']}
102+
doc[section][dep_name]["version"] = new_version
103+
else:
104+
print("Unknown type for {} {}: {}", dep_name, constraint, type(constraint))
93105

94106
with open(cargo_toml, 'w') as f:
95107
f.write(tomlkit.dumps(doc))

0 commit comments

Comments
 (0)