@@ -1173,336 +1173,8 @@ mod tests {
1173
1173
1174
1174
use std:: fs:: File ;
1175
1175
1176
- use flate2:: read:: GzDecoder ;
1177
-
1176
+ use crate :: datatypes;
1178
1177
use crate :: datatypes:: { ArrowNativeType , Float64Type , Int32Type , Int8Type } ;
1179
- use crate :: { datatypes, util:: integration_util:: * } ;
1180
-
1181
- #[ test]
1182
- #[ cfg( not( feature = "force_validate" ) ) ]
1183
- fn read_generated_files_014 ( ) {
1184
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1185
- let version = "0.14.1" ;
1186
- // the test is repetitive, thus we can read all supported files at once
1187
- let paths = vec ! [
1188
- "generated_interval" ,
1189
- "generated_datetime" ,
1190
- "generated_dictionary" ,
1191
- "generated_map" ,
1192
- "generated_nested" ,
1193
- "generated_primitive_no_batches" ,
1194
- "generated_primitive_zerolength" ,
1195
- "generated_primitive" ,
1196
- "generated_decimal" ,
1197
- ] ;
1198
- paths. iter ( ) . for_each ( |path| {
1199
- let file = File :: open ( format ! (
1200
- "{}/arrow-ipc-stream/integration/{}/{}.arrow_file" ,
1201
- testdata, version, path
1202
- ) )
1203
- . unwrap ( ) ;
1204
-
1205
- let mut reader = FileReader :: try_new ( file, None ) . unwrap ( ) ;
1206
-
1207
- // read expected JSON output
1208
- let arrow_json = read_gzip_json ( version, path) ;
1209
- assert ! ( arrow_json. equals_reader( & mut reader) . unwrap( ) ) ;
1210
- } ) ;
1211
- }
1212
-
1213
- #[ test]
1214
- #[ should_panic( expected = "Big Endian is not supported for Decimal!" ) ]
1215
- fn read_decimal_be_file_should_panic ( ) {
1216
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1217
- let file = File :: open ( format ! (
1218
- "{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_decimal.arrow_file" ,
1219
- testdata
1220
- ) )
1221
- . unwrap ( ) ;
1222
- FileReader :: try_new ( file, None ) . unwrap ( ) ;
1223
- }
1224
-
1225
- #[ test]
1226
- #[ should_panic(
1227
- expected = "Last offset 687865856 of Utf8 is larger than values length 41"
1228
- ) ]
1229
- fn read_dictionary_be_not_implemented ( ) {
1230
- // The offsets are not translated for big-endian files
1231
- // https://github.com/apache/arrow-rs/issues/859
1232
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1233
- let file = File :: open ( format ! (
1234
- "{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_dictionary.arrow_file" ,
1235
- testdata
1236
- ) )
1237
- . unwrap ( ) ;
1238
- FileReader :: try_new ( file, None ) . unwrap ( ) ;
1239
- }
1240
-
1241
- #[ test]
1242
- fn read_generated_be_files_should_work ( ) {
1243
- // complementary to the previous test
1244
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1245
- let paths = vec ! [
1246
- "generated_interval" ,
1247
- "generated_datetime" ,
1248
- "generated_map" ,
1249
- "generated_nested" ,
1250
- "generated_null_trivial" ,
1251
- "generated_null" ,
1252
- "generated_primitive_no_batches" ,
1253
- "generated_primitive_zerolength" ,
1254
- "generated_primitive" ,
1255
- ] ;
1256
- paths. iter ( ) . for_each ( |path| {
1257
- let file = File :: open ( format ! (
1258
- "{}/arrow-ipc-stream/integration/1.0.0-bigendian/{}.arrow_file" ,
1259
- testdata, path
1260
- ) )
1261
- . unwrap ( ) ;
1262
-
1263
- FileReader :: try_new ( file, None ) . unwrap ( ) ;
1264
- } ) ;
1265
- }
1266
-
1267
- #[ test]
1268
- fn projection_should_work ( ) {
1269
- // complementary to the previous test
1270
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1271
- let paths = vec ! [
1272
- "generated_interval" ,
1273
- "generated_datetime" ,
1274
- "generated_map" ,
1275
- "generated_nested" ,
1276
- "generated_null_trivial" ,
1277
- "generated_null" ,
1278
- "generated_primitive_no_batches" ,
1279
- "generated_primitive_zerolength" ,
1280
- "generated_primitive" ,
1281
- ] ;
1282
- paths. iter ( ) . for_each ( |path| {
1283
- // We must use littleendian files here.
1284
- // The offsets are not translated for big-endian files
1285
- // https://github.com/apache/arrow-rs/issues/859
1286
- let file = File :: open ( format ! (
1287
- "{}/arrow-ipc-stream/integration/1.0.0-littleendian/{}.arrow_file" ,
1288
- testdata, path
1289
- ) )
1290
- . unwrap ( ) ;
1291
-
1292
- let reader = FileReader :: try_new ( file, Some ( vec ! [ 0 ] ) ) . unwrap ( ) ;
1293
- let datatype_0 = reader. schema ( ) . fields ( ) [ 0 ] . data_type ( ) . clone ( ) ;
1294
- reader. for_each ( |batch| {
1295
- let batch = batch. unwrap ( ) ;
1296
- assert_eq ! ( batch. columns( ) . len( ) , 1 ) ;
1297
- assert_eq ! ( datatype_0, batch. schema( ) . fields( ) [ 0 ] . data_type( ) . clone( ) ) ;
1298
- } ) ;
1299
- } ) ;
1300
- }
1301
-
1302
- #[ test]
1303
- #[ cfg( not( feature = "force_validate" ) ) ]
1304
- fn read_generated_streams_014 ( ) {
1305
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1306
- let version = "0.14.1" ;
1307
- // the test is repetitive, thus we can read all supported files at once
1308
- let paths = vec ! [
1309
- "generated_interval" ,
1310
- "generated_datetime" ,
1311
- "generated_dictionary" ,
1312
- "generated_map" ,
1313
- "generated_nested" ,
1314
- "generated_primitive_no_batches" ,
1315
- "generated_primitive_zerolength" ,
1316
- "generated_primitive" ,
1317
- "generated_decimal" ,
1318
- ] ;
1319
- paths. iter ( ) . for_each ( |path| {
1320
- let file = File :: open ( format ! (
1321
- "{}/arrow-ipc-stream/integration/{}/{}.stream" ,
1322
- testdata, version, path
1323
- ) )
1324
- . unwrap ( ) ;
1325
-
1326
- let mut reader = StreamReader :: try_new ( file, None ) . unwrap ( ) ;
1327
-
1328
- // read expected JSON output
1329
- let arrow_json = read_gzip_json ( version, path) ;
1330
- assert ! ( arrow_json. equals_reader( & mut reader) . unwrap( ) ) ;
1331
- // the next batch must be empty
1332
- assert ! ( reader. next( ) . is_none( ) ) ;
1333
- // the stream must indicate that it's finished
1334
- assert ! ( reader. is_finished( ) ) ;
1335
- } ) ;
1336
- }
1337
-
1338
- #[ test]
1339
- fn read_generated_files_100 ( ) {
1340
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1341
- let version = "1.0.0-littleendian" ;
1342
- // the test is repetitive, thus we can read all supported files at once
1343
- let paths = vec ! [
1344
- "generated_interval" ,
1345
- "generated_datetime" ,
1346
- "generated_dictionary" ,
1347
- "generated_map" ,
1348
- // "generated_map_non_canonical",
1349
- "generated_nested" ,
1350
- "generated_null_trivial" ,
1351
- "generated_null" ,
1352
- "generated_primitive_no_batches" ,
1353
- "generated_primitive_zerolength" ,
1354
- "generated_primitive" ,
1355
- ] ;
1356
- paths. iter ( ) . for_each ( |path| {
1357
- let file = File :: open ( format ! (
1358
- "{}/arrow-ipc-stream/integration/{}/{}.arrow_file" ,
1359
- testdata, version, path
1360
- ) )
1361
- . unwrap ( ) ;
1362
-
1363
- let mut reader = FileReader :: try_new ( file, None ) . unwrap ( ) ;
1364
-
1365
- // read expected JSON output
1366
- let arrow_json = read_gzip_json ( version, path) ;
1367
- assert ! ( arrow_json. equals_reader( & mut reader) . unwrap( ) ) ;
1368
- } ) ;
1369
- }
1370
-
1371
- #[ test]
1372
- fn read_generated_streams_100 ( ) {
1373
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1374
- let version = "1.0.0-littleendian" ;
1375
- // the test is repetitive, thus we can read all supported files at once
1376
- let paths = vec ! [
1377
- "generated_interval" ,
1378
- "generated_datetime" ,
1379
- "generated_dictionary" ,
1380
- "generated_map" ,
1381
- // "generated_map_non_canonical",
1382
- "generated_nested" ,
1383
- "generated_null_trivial" ,
1384
- "generated_null" ,
1385
- "generated_primitive_no_batches" ,
1386
- "generated_primitive_zerolength" ,
1387
- "generated_primitive" ,
1388
- ] ;
1389
- paths. iter ( ) . for_each ( |path| {
1390
- let file = File :: open ( format ! (
1391
- "{}/arrow-ipc-stream/integration/{}/{}.stream" ,
1392
- testdata, version, path
1393
- ) )
1394
- . unwrap ( ) ;
1395
-
1396
- let mut reader = StreamReader :: try_new ( file, None ) . unwrap ( ) ;
1397
-
1398
- // read expected JSON output
1399
- let arrow_json = read_gzip_json ( version, path) ;
1400
- assert ! ( arrow_json. equals_reader( & mut reader) . unwrap( ) ) ;
1401
- // the next batch must be empty
1402
- assert ! ( reader. next( ) . is_none( ) ) ;
1403
- // the stream must indicate that it's finished
1404
- assert ! ( reader. is_finished( ) ) ;
1405
- } ) ;
1406
- }
1407
-
1408
- #[ test]
1409
- #[ cfg( feature = "ipc_compression" ) ]
1410
- fn read_generated_streams_200 ( ) {
1411
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1412
- let version = "2.0.0-compression" ;
1413
-
1414
- // the test is repetitive, thus we can read all supported files at once
1415
- let paths = vec ! [ "generated_lz4" , "generated_zstd" ] ;
1416
- paths. iter ( ) . for_each ( |path| {
1417
- let file = File :: open ( format ! (
1418
- "{}/arrow-ipc-stream/integration/{}/{}.stream" ,
1419
- testdata, version, path
1420
- ) )
1421
- . unwrap ( ) ;
1422
-
1423
- let mut reader = StreamReader :: try_new ( file, None ) . unwrap ( ) ;
1424
-
1425
- // read expected JSON output
1426
- let arrow_json = read_gzip_json ( version, path) ;
1427
- assert ! ( arrow_json. equals_reader( & mut reader) . unwrap( ) ) ;
1428
- // the next batch must be empty
1429
- assert ! ( reader. next( ) . is_none( ) ) ;
1430
- // the stream must indicate that it's finished
1431
- assert ! ( reader. is_finished( ) ) ;
1432
- } ) ;
1433
- }
1434
-
1435
- #[ test]
1436
- #[ cfg( not( feature = "ipc_compression" ) ) ]
1437
- fn read_generated_streams_200_negative ( ) {
1438
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1439
- let version = "2.0.0-compression" ;
1440
-
1441
- // the test is repetitive, thus we can read all supported files at once
1442
- let cases = vec ! [ ( "generated_lz4" , "LZ4_FRAME" ) , ( "generated_zstd" , "ZSTD" ) ] ;
1443
- cases. iter ( ) . for_each ( |( path, compression_name) | {
1444
- let file = File :: open ( format ! (
1445
- "{}/arrow-ipc-stream/integration/{}/{}.stream" ,
1446
- testdata, version, path
1447
- ) )
1448
- . unwrap ( ) ;
1449
-
1450
- let mut reader = StreamReader :: try_new ( file, None ) . unwrap ( ) ;
1451
- let err = reader. next ( ) . unwrap ( ) . unwrap_err ( ) ;
1452
- let expected_error = format ! (
1453
- "Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature" ,
1454
- compression_name
1455
- ) ;
1456
- assert_eq ! ( err. to_string( ) , expected_error) ;
1457
- } ) ;
1458
- }
1459
-
1460
- #[ test]
1461
- #[ cfg( feature = "ipc_compression" ) ]
1462
- fn read_generated_files_200 ( ) {
1463
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1464
- let version = "2.0.0-compression" ;
1465
- // the test is repetitive, thus we can read all supported files at once
1466
- let paths = vec ! [ "generated_lz4" , "generated_zstd" ] ;
1467
- paths. iter ( ) . for_each ( |path| {
1468
- let file = File :: open ( format ! (
1469
- "{}/arrow-ipc-stream/integration/{}/{}.arrow_file" ,
1470
- testdata, version, path
1471
- ) )
1472
- . unwrap ( ) ;
1473
-
1474
- let mut reader = FileReader :: try_new ( file, None ) . unwrap ( ) ;
1475
-
1476
- // read expected JSON output
1477
- let arrow_json = read_gzip_json ( version, path) ;
1478
- assert ! ( arrow_json. equals_reader( & mut reader) . unwrap( ) ) ;
1479
- } ) ;
1480
- }
1481
-
1482
- #[ test]
1483
- #[ cfg( not( feature = "ipc_compression" ) ) ]
1484
- fn read_generated_files_200_negative ( ) {
1485
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1486
- let version = "2.0.0-compression" ;
1487
- // the test is repetitive, thus we can read all supported files at once
1488
- let cases = vec ! [ ( "generated_lz4" , "LZ4_FRAME" ) , ( "generated_zstd" , "ZSTD" ) ] ;
1489
- cases. iter ( ) . for_each ( |( path, compression_name) | {
1490
- let file = File :: open ( format ! (
1491
- "{}/arrow-ipc-stream/integration/{}/{}.arrow_file" ,
1492
- testdata, version, path
1493
- ) )
1494
- . unwrap ( ) ;
1495
-
1496
- let mut reader = FileReader :: try_new ( file, None ) . unwrap ( ) ;
1497
-
1498
- let err = reader. next ( ) . unwrap ( ) . unwrap_err ( ) ;
1499
- let expected_error = format ! (
1500
- "Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature" ,
1501
- compression_name
1502
- ) ;
1503
- assert_eq ! ( err. to_string( ) , expected_error) ;
1504
- } ) ;
1505
- }
1506
1178
1507
1179
fn create_test_projection_schema ( ) -> Schema {
1508
1180
// define field types
@@ -1816,22 +1488,6 @@ mod tests {
1816
1488
check_union_with_builder ( UnionBuilder :: new_sparse ( ) ) ;
1817
1489
}
1818
1490
1819
- /// Read gzipped JSON file
1820
- fn read_gzip_json ( version : & str , path : & str ) -> ArrowJson {
1821
- let testdata = crate :: util:: test_util:: arrow_test_data ( ) ;
1822
- let file = File :: open ( format ! (
1823
- "{}/arrow-ipc-stream/integration/{}/{}.json.gz" ,
1824
- testdata, version, path
1825
- ) )
1826
- . unwrap ( ) ;
1827
- let mut gz = GzDecoder :: new ( & file) ;
1828
- let mut s = String :: new ( ) ;
1829
- gz. read_to_string ( & mut s) . unwrap ( ) ;
1830
- // convert to Arrow JSON
1831
- let arrow_json: ArrowJson = serde_json:: from_str ( & s) . unwrap ( ) ;
1832
- arrow_json
1833
- }
1834
-
1835
1491
#[ test]
1836
1492
fn test_roundtrip_stream_nested_dict ( ) {
1837
1493
let xs = vec ! [ "AA" , "BB" , "AA" , "CC" , "BB" ] ;
0 commit comments