Skip to content

Commit ea14618

Browse files
committed
Split out integration test plumbing (apache#2594) (apache#2300)
1 parent ff81dee commit ea14618

File tree

12 files changed

+645
-760
lines changed

12 files changed

+645
-760
lines changed

.github/workflows/arrow.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ jobs:
6464
cargo run --example dynamic_types
6565
cargo run --example read_csv
6666
cargo run --example read_csv_infer_schema
67+
- name: Run non-archery based integration-tests
68+
run: cargo test -p arrow-integration-testing
6769

6870
# test compilaton features
6971
linux-features:

arrow/Cargo.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ path = "src/lib.rs"
3838
bench = false
3939

4040
[target.'cfg(target_arch = "wasm32")'.dependencies]
41-
ahash = { version = "0.8", default-features = false, features=["compile-time-rng"] }
41+
ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }
4242

4343
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
44-
ahash = { version = "0.8", default-features = false, features=["runtime-rng"] }
44+
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
4545

4646
[dependencies]
4747
serde = { version = "1.0", default-features = false }
@@ -61,7 +61,6 @@ packed_simd = { version = "0.3", default-features = false, optional = true, pack
6161
chrono = { version = "0.4", default-features = false, features = ["clock"] }
6262
chrono-tz = { version = "0.6", default-features = false, optional = true }
6363
flatbuffers = { version = "2.1.2", default-features = false, features = ["thiserror"], optional = true }
64-
hex = { version = "0.4", default-features = false, features = ["std"] }
6564
comfy-table = { version = "6.0", optional = true, default-features = false }
6665
pyo3 = { version = "0.17", default-features = false, optional = true }
6766
lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
@@ -102,7 +101,7 @@ tempfile = { version = "3", default-features = false }
102101
[[example]]
103102
name = "dynamic_types"
104103
required-features = ["prettyprint"]
105-
path="./examples/dynamic_types.rs"
104+
path = "./examples/dynamic_types.rs"
106105

107106
[[bench]]
108107
name = "aggregate_kernels"

arrow/src/ipc/reader.rs

Lines changed: 1 addition & 345 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,336 +1173,8 @@ mod tests {
11731173

11741174
use std::fs::File;
11751175

1176-
use flate2::read::GzDecoder;
1177-
1176+
use crate::datatypes;
11781177
use crate::datatypes::{ArrowNativeType, Float64Type, Int32Type, Int8Type};
1179-
use crate::{datatypes, util::integration_util::*};
1180-
1181-
#[test]
1182-
#[cfg(not(feature = "force_validate"))]
1183-
fn read_generated_files_014() {
1184-
let testdata = crate::util::test_util::arrow_test_data();
1185-
let version = "0.14.1";
1186-
// the test is repetitive, thus we can read all supported files at once
1187-
let paths = vec![
1188-
"generated_interval",
1189-
"generated_datetime",
1190-
"generated_dictionary",
1191-
"generated_map",
1192-
"generated_nested",
1193-
"generated_primitive_no_batches",
1194-
"generated_primitive_zerolength",
1195-
"generated_primitive",
1196-
"generated_decimal",
1197-
];
1198-
paths.iter().for_each(|path| {
1199-
let file = File::open(format!(
1200-
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
1201-
testdata, version, path
1202-
))
1203-
.unwrap();
1204-
1205-
let mut reader = FileReader::try_new(file, None).unwrap();
1206-
1207-
// read expected JSON output
1208-
let arrow_json = read_gzip_json(version, path);
1209-
assert!(arrow_json.equals_reader(&mut reader).unwrap());
1210-
});
1211-
}
1212-
1213-
#[test]
1214-
#[should_panic(expected = "Big Endian is not supported for Decimal!")]
1215-
fn read_decimal_be_file_should_panic() {
1216-
let testdata = crate::util::test_util::arrow_test_data();
1217-
let file = File::open(format!(
1218-
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_decimal.arrow_file",
1219-
testdata
1220-
))
1221-
.unwrap();
1222-
FileReader::try_new(file, None).unwrap();
1223-
}
1224-
1225-
#[test]
1226-
#[should_panic(
1227-
expected = "Last offset 687865856 of Utf8 is larger than values length 41"
1228-
)]
1229-
fn read_dictionary_be_not_implemented() {
1230-
// The offsets are not translated for big-endian files
1231-
// https://github.com/apache/arrow-rs/issues/859
1232-
let testdata = crate::util::test_util::arrow_test_data();
1233-
let file = File::open(format!(
1234-
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_dictionary.arrow_file",
1235-
testdata
1236-
))
1237-
.unwrap();
1238-
FileReader::try_new(file, None).unwrap();
1239-
}
1240-
1241-
#[test]
1242-
fn read_generated_be_files_should_work() {
1243-
// complementary to the previous test
1244-
let testdata = crate::util::test_util::arrow_test_data();
1245-
let paths = vec![
1246-
"generated_interval",
1247-
"generated_datetime",
1248-
"generated_map",
1249-
"generated_nested",
1250-
"generated_null_trivial",
1251-
"generated_null",
1252-
"generated_primitive_no_batches",
1253-
"generated_primitive_zerolength",
1254-
"generated_primitive",
1255-
];
1256-
paths.iter().for_each(|path| {
1257-
let file = File::open(format!(
1258-
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/{}.arrow_file",
1259-
testdata, path
1260-
))
1261-
.unwrap();
1262-
1263-
FileReader::try_new(file, None).unwrap();
1264-
});
1265-
}
1266-
1267-
#[test]
1268-
fn projection_should_work() {
1269-
// complementary to the previous test
1270-
let testdata = crate::util::test_util::arrow_test_data();
1271-
let paths = vec![
1272-
"generated_interval",
1273-
"generated_datetime",
1274-
"generated_map",
1275-
"generated_nested",
1276-
"generated_null_trivial",
1277-
"generated_null",
1278-
"generated_primitive_no_batches",
1279-
"generated_primitive_zerolength",
1280-
"generated_primitive",
1281-
];
1282-
paths.iter().for_each(|path| {
1283-
// We must use littleendian files here.
1284-
// The offsets are not translated for big-endian files
1285-
// https://github.com/apache/arrow-rs/issues/859
1286-
let file = File::open(format!(
1287-
"{}/arrow-ipc-stream/integration/1.0.0-littleendian/{}.arrow_file",
1288-
testdata, path
1289-
))
1290-
.unwrap();
1291-
1292-
let reader = FileReader::try_new(file, Some(vec![0])).unwrap();
1293-
let datatype_0 = reader.schema().fields()[0].data_type().clone();
1294-
reader.for_each(|batch| {
1295-
let batch = batch.unwrap();
1296-
assert_eq!(batch.columns().len(), 1);
1297-
assert_eq!(datatype_0, batch.schema().fields()[0].data_type().clone());
1298-
});
1299-
});
1300-
}
1301-
1302-
#[test]
1303-
#[cfg(not(feature = "force_validate"))]
1304-
fn read_generated_streams_014() {
1305-
let testdata = crate::util::test_util::arrow_test_data();
1306-
let version = "0.14.1";
1307-
// the test is repetitive, thus we can read all supported files at once
1308-
let paths = vec![
1309-
"generated_interval",
1310-
"generated_datetime",
1311-
"generated_dictionary",
1312-
"generated_map",
1313-
"generated_nested",
1314-
"generated_primitive_no_batches",
1315-
"generated_primitive_zerolength",
1316-
"generated_primitive",
1317-
"generated_decimal",
1318-
];
1319-
paths.iter().for_each(|path| {
1320-
let file = File::open(format!(
1321-
"{}/arrow-ipc-stream/integration/{}/{}.stream",
1322-
testdata, version, path
1323-
))
1324-
.unwrap();
1325-
1326-
let mut reader = StreamReader::try_new(file, None).unwrap();
1327-
1328-
// read expected JSON output
1329-
let arrow_json = read_gzip_json(version, path);
1330-
assert!(arrow_json.equals_reader(&mut reader).unwrap());
1331-
// the next batch must be empty
1332-
assert!(reader.next().is_none());
1333-
// the stream must indicate that it's finished
1334-
assert!(reader.is_finished());
1335-
});
1336-
}
1337-
1338-
#[test]
1339-
fn read_generated_files_100() {
1340-
let testdata = crate::util::test_util::arrow_test_data();
1341-
let version = "1.0.0-littleendian";
1342-
// the test is repetitive, thus we can read all supported files at once
1343-
let paths = vec![
1344-
"generated_interval",
1345-
"generated_datetime",
1346-
"generated_dictionary",
1347-
"generated_map",
1348-
// "generated_map_non_canonical",
1349-
"generated_nested",
1350-
"generated_null_trivial",
1351-
"generated_null",
1352-
"generated_primitive_no_batches",
1353-
"generated_primitive_zerolength",
1354-
"generated_primitive",
1355-
];
1356-
paths.iter().for_each(|path| {
1357-
let file = File::open(format!(
1358-
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
1359-
testdata, version, path
1360-
))
1361-
.unwrap();
1362-
1363-
let mut reader = FileReader::try_new(file, None).unwrap();
1364-
1365-
// read expected JSON output
1366-
let arrow_json = read_gzip_json(version, path);
1367-
assert!(arrow_json.equals_reader(&mut reader).unwrap());
1368-
});
1369-
}
1370-
1371-
#[test]
1372-
fn read_generated_streams_100() {
1373-
let testdata = crate::util::test_util::arrow_test_data();
1374-
let version = "1.0.0-littleendian";
1375-
// the test is repetitive, thus we can read all supported files at once
1376-
let paths = vec![
1377-
"generated_interval",
1378-
"generated_datetime",
1379-
"generated_dictionary",
1380-
"generated_map",
1381-
// "generated_map_non_canonical",
1382-
"generated_nested",
1383-
"generated_null_trivial",
1384-
"generated_null",
1385-
"generated_primitive_no_batches",
1386-
"generated_primitive_zerolength",
1387-
"generated_primitive",
1388-
];
1389-
paths.iter().for_each(|path| {
1390-
let file = File::open(format!(
1391-
"{}/arrow-ipc-stream/integration/{}/{}.stream",
1392-
testdata, version, path
1393-
))
1394-
.unwrap();
1395-
1396-
let mut reader = StreamReader::try_new(file, None).unwrap();
1397-
1398-
// read expected JSON output
1399-
let arrow_json = read_gzip_json(version, path);
1400-
assert!(arrow_json.equals_reader(&mut reader).unwrap());
1401-
// the next batch must be empty
1402-
assert!(reader.next().is_none());
1403-
// the stream must indicate that it's finished
1404-
assert!(reader.is_finished());
1405-
});
1406-
}
1407-
1408-
#[test]
1409-
#[cfg(feature = "ipc_compression")]
1410-
fn read_generated_streams_200() {
1411-
let testdata = crate::util::test_util::arrow_test_data();
1412-
let version = "2.0.0-compression";
1413-
1414-
// the test is repetitive, thus we can read all supported files at once
1415-
let paths = vec!["generated_lz4", "generated_zstd"];
1416-
paths.iter().for_each(|path| {
1417-
let file = File::open(format!(
1418-
"{}/arrow-ipc-stream/integration/{}/{}.stream",
1419-
testdata, version, path
1420-
))
1421-
.unwrap();
1422-
1423-
let mut reader = StreamReader::try_new(file, None).unwrap();
1424-
1425-
// read expected JSON output
1426-
let arrow_json = read_gzip_json(version, path);
1427-
assert!(arrow_json.equals_reader(&mut reader).unwrap());
1428-
// the next batch must be empty
1429-
assert!(reader.next().is_none());
1430-
// the stream must indicate that it's finished
1431-
assert!(reader.is_finished());
1432-
});
1433-
}
1434-
1435-
#[test]
1436-
#[cfg(not(feature = "ipc_compression"))]
1437-
fn read_generated_streams_200_negative() {
1438-
let testdata = crate::util::test_util::arrow_test_data();
1439-
let version = "2.0.0-compression";
1440-
1441-
// the test is repetitive, thus we can read all supported files at once
1442-
let cases = vec![("generated_lz4", "LZ4_FRAME"), ("generated_zstd", "ZSTD")];
1443-
cases.iter().for_each(|(path, compression_name)| {
1444-
let file = File::open(format!(
1445-
"{}/arrow-ipc-stream/integration/{}/{}.stream",
1446-
testdata, version, path
1447-
))
1448-
.unwrap();
1449-
1450-
let mut reader = StreamReader::try_new(file, None).unwrap();
1451-
let err = reader.next().unwrap().unwrap_err();
1452-
let expected_error = format!(
1453-
"Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature",
1454-
compression_name
1455-
);
1456-
assert_eq!(err.to_string(), expected_error);
1457-
});
1458-
}
1459-
1460-
#[test]
1461-
#[cfg(feature = "ipc_compression")]
1462-
fn read_generated_files_200() {
1463-
let testdata = crate::util::test_util::arrow_test_data();
1464-
let version = "2.0.0-compression";
1465-
// the test is repetitive, thus we can read all supported files at once
1466-
let paths = vec!["generated_lz4", "generated_zstd"];
1467-
paths.iter().for_each(|path| {
1468-
let file = File::open(format!(
1469-
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
1470-
testdata, version, path
1471-
))
1472-
.unwrap();
1473-
1474-
let mut reader = FileReader::try_new(file, None).unwrap();
1475-
1476-
// read expected JSON output
1477-
let arrow_json = read_gzip_json(version, path);
1478-
assert!(arrow_json.equals_reader(&mut reader).unwrap());
1479-
});
1480-
}
1481-
1482-
#[test]
1483-
#[cfg(not(feature = "ipc_compression"))]
1484-
fn read_generated_files_200_negative() {
1485-
let testdata = crate::util::test_util::arrow_test_data();
1486-
let version = "2.0.0-compression";
1487-
// the test is repetitive, thus we can read all supported files at once
1488-
let cases = vec![("generated_lz4", "LZ4_FRAME"), ("generated_zstd", "ZSTD")];
1489-
cases.iter().for_each(|(path, compression_name)| {
1490-
let file = File::open(format!(
1491-
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
1492-
testdata, version, path
1493-
))
1494-
.unwrap();
1495-
1496-
let mut reader = FileReader::try_new(file, None).unwrap();
1497-
1498-
let err = reader.next().unwrap().unwrap_err();
1499-
let expected_error = format!(
1500-
"Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature",
1501-
compression_name
1502-
);
1503-
assert_eq!(err.to_string(), expected_error);
1504-
});
1505-
}
15061178

15071179
fn create_test_projection_schema() -> Schema {
15081180
// define field types
@@ -1816,22 +1488,6 @@ mod tests {
18161488
check_union_with_builder(UnionBuilder::new_sparse());
18171489
}
18181490

1819-
/// Read gzipped JSON file
1820-
fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
1821-
let testdata = crate::util::test_util::arrow_test_data();
1822-
let file = File::open(format!(
1823-
"{}/arrow-ipc-stream/integration/{}/{}.json.gz",
1824-
testdata, version, path
1825-
))
1826-
.unwrap();
1827-
let mut gz = GzDecoder::new(&file);
1828-
let mut s = String::new();
1829-
gz.read_to_string(&mut s).unwrap();
1830-
// convert to Arrow JSON
1831-
let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
1832-
arrow_json
1833-
}
1834-
18351491
#[test]
18361492
fn test_roundtrip_stream_nested_dict() {
18371493
let xs = vec!["AA", "BB", "AA", "CC", "BB"];

0 commit comments

Comments
 (0)