Skip to content

Commit 7b71713

Browse files
authored
Fix ipc schema custom_metadata serialization (#3282)
* Fix ipc schema custom_metadata serialization * Fix ipc doc test * PR comments
1 parent 2e806b0 commit 7b71713

File tree

2 files changed

+64
-45
lines changed

2 files changed

+64
-45
lines changed

arrow-ipc/src/convert.rs

Lines changed: 58 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -41,29 +41,37 @@ pub fn schema_to_fb_offset<'a>(
4141
fbb: &mut FlatBufferBuilder<'a>,
4242
schema: &Schema,
4343
) -> WIPOffset<crate::Schema<'a>> {
44-
let mut fields = vec![];
45-
for field in schema.fields() {
46-
let fb_field = build_field(fbb, field);
47-
fields.push(fb_field);
48-
}
49-
50-
let mut custom_metadata = vec![];
51-
for (k, v) in schema.metadata() {
52-
let fb_key_name = fbb.create_string(k.as_str());
53-
let fb_val_name = fbb.create_string(v.as_str());
44+
let fields = schema
45+
.fields()
46+
.iter()
47+
.map(|field| build_field(fbb, field))
48+
.collect::<Vec<_>>();
49+
let fb_field_list = fbb.create_vector(&fields);
5450

55-
let mut kv_builder = crate::KeyValueBuilder::new(fbb);
56-
kv_builder.add_key(fb_key_name);
57-
kv_builder.add_value(fb_val_name);
58-
custom_metadata.push(kv_builder.finish());
59-
}
51+
let fb_metadata_list = if !schema.metadata().is_empty() {
52+
let custom_metadata = schema
53+
.metadata()
54+
.iter()
55+
.map(|(k, v)| {
56+
let fb_key_name = fbb.create_string(k);
57+
let fb_val_name = fbb.create_string(v);
6058

61-
let fb_field_list = fbb.create_vector(&fields);
62-
let fb_metadata_list = fbb.create_vector(&custom_metadata);
59+
let mut kv_builder = crate::KeyValueBuilder::new(fbb);
60+
kv_builder.add_key(fb_key_name);
61+
kv_builder.add_value(fb_val_name);
62+
kv_builder.finish()
63+
})
64+
.collect::<Vec<_>>();
65+
Some(fbb.create_vector(&custom_metadata))
66+
} else {
67+
None
68+
};
6369

6470
let mut builder = crate::SchemaBuilder::new(fbb);
6571
builder.add_fields(fb_field_list);
66-
builder.add_custom_metadata(fb_metadata_list);
72+
if let Some(fb_metadata_list) = fb_metadata_list {
73+
builder.add_custom_metadata(fb_metadata_list);
74+
}
6775
builder.finish()
6876
}
6977

@@ -1031,32 +1039,45 @@ mod tests {
10311039

10321040
#[test]
10331041
fn schema_from_bytes() {
1034-
// bytes of a schema generated from python (0.14.0), saved as an `crate::Message`.
1035-
// the schema is: Field("field1", DataType::UInt32, false)
1042+
// Bytes of a schema generated via following python code, using pyarrow 10.0.1:
1043+
//
1044+
// import pyarrow as pa
1045+
// schema = pa.schema([pa.field('field1', pa.uint32(), nullable=False)])
1046+
// sink = pa.BufferOutputStream()
1047+
// with pa.ipc.new_stream(sink, schema) as writer:
1048+
// pass
1049+
// # stripping continuation & length prefix & suffix bytes to get only schema bytes
1050+
// [x for x in sink.getvalue().to_pybytes()][8:-8]
10361051
let bytes: Vec<u8> = vec![
1037-
16, 0, 0, 0, 0, 0, 10, 0, 12, 0, 6, 0, 5, 0, 8, 0, 10, 0, 0, 0, 0, 1, 3, 0,
1052+
16, 0, 0, 0, 0, 0, 10, 0, 12, 0, 6, 0, 5, 0, 8, 0, 10, 0, 0, 0, 0, 1, 4, 0,
10381053
12, 0, 0, 0, 8, 0, 8, 0, 0, 0, 4, 0, 8, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 20,
10391054
0, 0, 0, 16, 0, 20, 0, 8, 0, 0, 0, 7, 0, 12, 0, 0, 0, 16, 0, 16, 0, 0, 0, 0,
1040-
0, 0, 2, 32, 0, 0, 0, 20, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 8, 0,
1041-
4, 0, 6, 0, 0, 0, 32, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49, 0, 0,
1042-
0, 0, 0, 0,
1055+
0, 0, 2, 16, 0, 0, 0, 32, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 102,
1056+
105, 101, 108, 100, 49, 0, 0, 0, 0, 6, 0, 8, 0, 4, 0, 6, 0, 0, 0, 32, 0, 0,
1057+
0,
10431058
];
1044-
let ipc = crate::root_as_message(&bytes[..]).unwrap();
1059+
let ipc = crate::root_as_message(&bytes).unwrap();
10451060
let schema = ipc.header_as_schema().unwrap();
10461061

1047-
// a message generated from Rust, same as the Python one
1048-
let bytes: Vec<u8> = vec![
1049-
16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 12, 0, 11, 0, 4, 0, 10, 0, 0, 0, 20, 0, 0,
1050-
0, 0, 0, 0, 1, 3, 0, 10, 0, 12, 0, 0, 0, 8, 0, 4, 0, 10, 0, 0, 0, 8, 0, 0, 0,
1051-
8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 16, 0, 0, 0, 12, 0, 18, 0, 12, 0, 0, 0,
1052-
11, 0, 4, 0, 12, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 20, 0, 0, 0, 0, 0, 6, 0,
1053-
8, 0, 4, 0, 6, 0, 0, 0, 32, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49,
1054-
0, 0,
1055-
];
1056-
let ipc2 = crate::root_as_message(&bytes[..]).unwrap();
1057-
let schema2 = ipc.header_as_schema().unwrap();
1062+
// generate same message with Rust
1063+
let data_gen = crate::writer::IpcDataGenerator::default();
1064+
let arrow_schema =
1065+
Schema::new(vec![Field::new("field1", DataType::UInt32, false)]);
1066+
let bytes = data_gen
1067+
.schema_to_bytes(&arrow_schema, &crate::writer::IpcWriteOptions::default())
1068+
.ipc_message;
1069+
1070+
let ipc2 = crate::root_as_message(&bytes).unwrap();
1071+
let schema2 = ipc2.header_as_schema().unwrap();
1072+
1073+
// can't compare schema directly as it compares the underlying bytes, which can differ
1074+
assert!(schema.custom_metadata().is_none());
1075+
assert!(schema2.custom_metadata().is_none());
1076+
assert_eq!(schema.endianness(), schema2.endianness());
1077+
assert!(schema.features().is_none());
1078+
assert!(schema2.features().is_none());
1079+
assert_eq!(fb_to_schema(schema), fb_to_schema(schema2));
10581080

1059-
assert_eq!(schema, schema2);
10601081
assert_eq!(ipc.version(), ipc2.version());
10611082
assert_eq!(ipc.header_type(), ipc2.header_type());
10621083
assert_eq!(ipc.bodyLength(), ipc2.bodyLength());

arrow-ipc/src/writer.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -793,15 +793,13 @@ impl<W: Write> StreamWriter<W> {
793793
/// # fn main() -> Result<(), ArrowError> {
794794
/// // The result we expect from an empty schema
795795
/// let expected = vec![
796-
/// 255, 255, 255, 255, 64, 0, 0, 0,
796+
/// 255, 255, 255, 255, 48, 0, 0, 0,
797797
/// 16, 0, 0, 0, 0, 0, 10, 0,
798-
/// 14, 0, 12, 0, 11, 0, 4, 0,
799-
/// 10, 0, 0, 0, 20, 0, 0, 0,
800-
/// 0, 0, 0, 1, 4, 0, 10, 0,
801-
/// 12, 0, 0, 0, 8, 0, 4, 0,
802-
/// 10, 0, 0, 0, 8, 0, 0, 0,
803-
/// 8, 0, 0, 0, 0, 0, 0, 0,
804-
/// 0, 0, 0, 0, 0, 0, 0, 0,
798+
/// 12, 0, 10, 0, 9, 0, 4, 0,
799+
/// 10, 0, 0, 0, 16, 0, 0, 0,
800+
/// 0, 1, 4, 0, 8, 0, 8, 0,
801+
/// 0, 0, 4, 0, 8, 0, 0, 0,
802+
/// 4, 0, 0, 0, 0, 0, 0, 0,
805803
/// 255, 255, 255, 255, 0, 0, 0, 0
806804
/// ];
807805
///

0 commit comments

Comments
 (0)