@@ -337,6 +337,14 @@ impl ParquetExecStream {
337
337
file_metrics,
338
338
) ) ;
339
339
}
340
+ if let Some ( range) = & file. range {
341
+ assert ! (
342
+ range. start >= 0 && range. end > 0 && range. end > range. start,
343
+ "invalid range specified: {:?}" ,
344
+ range
345
+ ) ;
346
+ opt = opt. with_range ( range. start , range. end ) ;
347
+ }
340
348
341
349
let file_reader = SerializedFileReader :: new_with_options (
342
350
ChunkObjectReader ( object_reader) ,
@@ -649,13 +657,15 @@ mod tests {
649
657
} ;
650
658
651
659
use super :: * ;
660
+ use crate :: datasource:: listing:: FileRange ;
652
661
use crate :: execution:: options:: CsvReadOptions ;
653
662
use crate :: prelude:: { ParquetReadOptions , SessionConfig , SessionContext } ;
654
663
use arrow:: array:: Float32Array ;
655
664
use arrow:: {
656
665
array:: { Int64Array , Int8Array , StringArray } ,
657
666
datatypes:: { DataType , Field } ,
658
667
} ;
668
+ use datafusion_data_access:: object_store:: local;
659
669
use datafusion_expr:: { col, lit} ;
660
670
use futures:: StreamExt ;
661
671
use parquet:: {
@@ -1099,6 +1109,81 @@ mod tests {
1099
1109
Ok ( ( ) )
1100
1110
}
1101
1111
1112
+ #[ tokio:: test]
1113
+ async fn parquet_exec_with_range ( ) -> Result < ( ) > {
1114
+ fn file_range ( file : String , start : i64 , end : i64 ) -> PartitionedFile {
1115
+ PartitionedFile {
1116
+ file_meta : local:: local_unpartitioned_file ( file) ,
1117
+ partition_values : vec ! [ ] ,
1118
+ range : Some ( FileRange { start, end } ) ,
1119
+ }
1120
+ }
1121
+
1122
+ async fn assert_parquet_read (
1123
+ file_groups : Vec < Vec < PartitionedFile > > ,
1124
+ expected_row_num : Option < usize > ,
1125
+ task_ctx : Arc < TaskContext > ,
1126
+ file_schema : SchemaRef ,
1127
+ ) -> Result < ( ) > {
1128
+ let parquet_exec = ParquetExec :: new (
1129
+ FileScanConfig {
1130
+ object_store : Arc :: new ( LocalFileSystem { } ) ,
1131
+ file_groups,
1132
+ file_schema,
1133
+ statistics : Statistics :: default ( ) ,
1134
+ projection : None ,
1135
+ limit : None ,
1136
+ table_partition_cols : vec ! [ ] ,
1137
+ } ,
1138
+ None ,
1139
+ ) ;
1140
+ assert_eq ! ( parquet_exec. output_partitioning( ) . partition_count( ) , 1 ) ;
1141
+ let results = parquet_exec. execute ( 0 , task_ctx) . await ?. next ( ) . await ;
1142
+
1143
+ if let Some ( expected_row_num) = expected_row_num {
1144
+ let batch = results. unwrap ( ) ?;
1145
+ assert_eq ! ( expected_row_num, batch. num_rows( ) ) ;
1146
+ } else {
1147
+ assert ! ( results. is_none( ) ) ;
1148
+ }
1149
+
1150
+ Ok ( ( ) )
1151
+ }
1152
+
1153
+ let session_ctx = SessionContext :: new ( ) ;
1154
+ let testdata = crate :: test_util:: parquet_test_data ( ) ;
1155
+ let filename = format ! ( "{}/alltypes_plain.parquet" , testdata) ;
1156
+ let file_schema = ParquetFormat :: default ( )
1157
+ . infer_schema ( local_object_reader_stream ( vec ! [ filename. clone( ) ] ) )
1158
+ . await ?;
1159
+
1160
+ let group_empty = vec ! [ vec![ file_range( filename. clone( ) , 0 , 5 ) ] ] ;
1161
+ let group_contain = vec ! [ vec![ file_range( filename. clone( ) , 5 , i64 :: MAX ) ] ] ;
1162
+ let group_all = vec ! [ vec![
1163
+ file_range( filename. clone( ) , 0 , 5 ) ,
1164
+ file_range( filename. clone( ) , 5 , i64 :: MAX ) ,
1165
+ ] ] ;
1166
+
1167
+ assert_parquet_read (
1168
+ group_empty,
1169
+ None ,
1170
+ session_ctx. task_ctx ( ) ,
1171
+ file_schema. clone ( ) ,
1172
+ )
1173
+ . await ?;
1174
+ assert_parquet_read (
1175
+ group_contain,
1176
+ Some ( 8 ) ,
1177
+ session_ctx. task_ctx ( ) ,
1178
+ file_schema. clone ( ) ,
1179
+ )
1180
+ . await ?;
1181
+ assert_parquet_read ( group_all, Some ( 8 ) , session_ctx. task_ctx ( ) , file_schema)
1182
+ . await ?;
1183
+
1184
+ Ok ( ( ) )
1185
+ }
1186
+
1102
1187
#[ tokio:: test]
1103
1188
async fn parquet_exec_with_partition ( ) -> Result < ( ) > {
1104
1189
let session_ctx = SessionContext :: new ( ) ;
@@ -1171,6 +1256,7 @@ mod tests {
1171
1256
last_modified : None ,
1172
1257
} ,
1173
1258
partition_values : vec ! [ ] ,
1259
+ range : None ,
1174
1260
} ;
1175
1261
1176
1262
let parquet_exec = ParquetExec :: new (
0 commit comments