Skip to content

Commit 2e806b0

Browse files
authored
fix(ffi): handle null data buffers from empty arrays (#3276)
* test(python): validate roundtripping of empty arrays * test: check more types * test: parameterize rust side * fix: also check for zero length buffers * fix: handle null data buffers * fix: only allow zero-length buffers to be null
1 parent 16484a6 commit 2e806b0

File tree

3 files changed

+57
-9
lines changed

3 files changed

+57
-9
lines changed

arrow-pyarrow-integration-testing/src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
2121
use std::sync::Arc;
2222

23+
use arrow::array::new_empty_array;
2324
use pyo3::prelude::*;
2425
use pyo3::wrap_pyfunction;
2526

@@ -70,6 +71,13 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
7071
Ok(array == expected)
7172
}
7273

74+
#[pyfunction]
75+
fn make_empty_array(datatype: PyArrowType<DataType>, py: Python) -> PyResult<PyObject> {
76+
let array = new_empty_array(&datatype.0);
77+
78+
array.data().to_pyarrow(py)
79+
}
80+
7381
/// Returns the substring
7482
#[pyfunction]
7583
fn substring(
@@ -134,6 +142,7 @@ fn round_trip_record_batch_reader(
134142
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
135143
m.add_wrapped(wrap_pyfunction!(double))?;
136144
m.add_wrapped(wrap_pyfunction!(double_py))?;
145+
m.add_wrapped(wrap_pyfunction!(make_empty_array))?;
137146
m.add_wrapped(wrap_pyfunction!(substring))?;
138147
m.add_wrapped(wrap_pyfunction!(concatenate))?;
139148
m.add_wrapped(wrap_pyfunction!(round_trip_type))?;

arrow-pyarrow-integration-testing/tests/test_sql.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,38 @@ def test_time32_python():
193193
del b
194194
del expected
195195

196+
197+
@pytest.mark.parametrize("datatype", _supported_pyarrow_types, ids=str)
198+
def test_empty_array_python(datatype):
199+
"""
200+
Python -> Rust -> Python
201+
"""
202+
if datatype == pa.float16():
203+
pytest.skip("Float 16 is not implemented in Rust")
204+
205+
a = pa.array([], datatype)
206+
b = rust.round_trip_array(a)
207+
b.validate(full=True)
208+
assert a.to_pylist() == b.to_pylist()
209+
assert a.type == b.type
210+
del a
211+
del b
212+
213+
214+
@pytest.mark.parametrize("datatype", _supported_pyarrow_types, ids=str)
215+
def test_empty_array_rust(datatype):
216+
"""
217+
Rust -> Python
218+
"""
219+
a = pa.array([], type=datatype)
220+
b = rust.make_empty_array(datatype)
221+
b.validate(full=True)
222+
assert a.to_pylist() == b.to_pylist()
223+
assert a.type == b.type
224+
del a
225+
del b
226+
227+
196228
def test_binary_array():
197229
"""
198230
Python -> Rust -> Python

arrow/src/ffi.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ use std::{
123123
use bitflags::bitflags;
124124

125125
use crate::array::{layout, ArrayData};
126-
use crate::buffer::Buffer;
126+
use crate::buffer::{Buffer, MutableBuffer};
127127
use crate::datatypes::DataType;
128128
use crate::error::{ArrowError, Result};
129129
use crate::util::bit_util;
@@ -578,7 +578,7 @@ unsafe fn create_buffer(
578578
index: usize,
579579
len: usize,
580580
) -> Option<Buffer> {
581-
if array.buffers.is_null() {
581+
if array.buffers.is_null() || array.n_buffers == 0 {
582582
return None;
583583
}
584584
let buffers = array.buffers as *mut *const u8;
@@ -657,13 +657,20 @@ pub trait ArrowArrayRef {
657657

658658
let len = self.buffer_len(index)?;
659659

660-
unsafe { create_buffer(self.owner().clone(), self.array(), index, len) }
661-
.ok_or_else(|| {
662-
ArrowError::CDataInterface(format!(
663-
"The external buffer at position {} is null.",
664-
index - 1
665-
))
666-
})
660+
match unsafe {
661+
create_buffer(self.owner().clone(), self.array(), index, len)
662+
} {
663+
Some(buf) => Ok(buf),
664+
None if len == 0 => {
665+
// Null data buffer, which Rust doesn't allow. So create
666+
// an empty buffer.
667+
Ok(MutableBuffer::new(0).into())
668+
}
669+
None => Err(ArrowError::CDataInterface(format!(
670+
"The external buffer at position {} is null.",
671+
index - 1
672+
))),
673+
}
667674
})
668675
.collect()
669676
}

0 commit comments

Comments
 (0)