@@ -332,7 +332,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
332
332
// If only computing chunk-level statistics compute them here, page-level statistics
333
333
// are computed in [`Self::write_mini_batch`] and used to update chunk statistics in
334
334
// [`Self::add_data_page`]
335
- if self . statistics_enabled == EnabledStatistics :: Chunk {
335
+ if self . statistics_enabled == EnabledStatistics :: Chunk
336
+ // INTERVAL has undefined sort order, so don't write min/max stats for it
337
+ && self . descr . converted_type ( ) != ConvertedType :: INTERVAL
338
+ {
336
339
match ( min, max) {
337
340
( Some ( min) , Some ( max) ) => {
338
341
update_min ( & self . descr , min, & mut self . column_metrics . min_column_value ) ;
@@ -1093,7 +1096,6 @@ fn is_nan<T: ParquetValueType>(descr: &ColumnDescriptor, val: &T) -> bool {
1093
1096
///
1094
1097
/// If `cur` is `None`, sets `cur` to `Some(val)`, otherwise calls `should_update` with
1095
1098
/// the value of `cur`, and updates `cur` to `Some(val)` if it returns `true`
1096
-
1097
1099
fn update_stat < T : ParquetValueType , F > (
1098
1100
descr : & ColumnDescriptor ,
1099
1101
val : & T ,
@@ -3066,6 +3068,30 @@ mod tests {
3066
3068
Ok ( ( ) )
3067
3069
}
3068
3070
3071
+ #[ test]
3072
+ fn test_interval_stats_should_not_have_min_max ( ) {
3073
+ let input = [
3074
+ vec ! [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ,
3075
+ vec ! [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 ] ,
3076
+ vec ! [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 2 ] ,
3077
+ ]
3078
+ . into_iter ( )
3079
+ . map ( |s| ByteArray :: from ( s) . into ( ) )
3080
+ . collect :: < Vec < _ > > ( ) ;
3081
+
3082
+ let page_writer = get_test_page_writer ( ) ;
3083
+ let mut writer = get_test_interval_column_writer ( page_writer) ;
3084
+ writer. write_batch ( & input, None , None ) . unwrap ( ) ;
3085
+
3086
+ let metadata = writer. close ( ) . unwrap ( ) . metadata ;
3087
+ let stats = if let Some ( Statistics :: FixedLenByteArray ( stats) ) = metadata. statistics ( ) {
3088
+ stats. clone ( )
3089
+ } else {
3090
+ panic ! ( "metadata missing statistics" ) ;
3091
+ } ;
3092
+ assert ! ( !stats. has_min_max_set( ) ) ;
3093
+ }
3094
+
3069
3095
fn write_multiple_pages < T : DataType > (
3070
3096
column_descr : & Arc < ColumnDescriptor > ,
3071
3097
pages : & [ & [ Option < T :: T > ] ] ,
@@ -3395,8 +3421,7 @@ mod tests {
3395
3421
values : & [ FixedLenByteArray ] ,
3396
3422
) -> ValueStatistics < FixedLenByteArray > {
3397
3423
let page_writer = get_test_page_writer ( ) ;
3398
- let props = Default :: default ( ) ;
3399
- let mut writer = get_test_float16_column_writer ( page_writer, 0 , 0 , props) ;
3424
+ let mut writer = get_test_float16_column_writer ( page_writer) ;
3400
3425
writer. write_batch ( values, None , None ) . unwrap ( ) ;
3401
3426
3402
3427
let metadata = writer. close ( ) . unwrap ( ) . metadata ;
@@ -3409,12 +3434,9 @@ mod tests {
3409
3434
3410
3435
fn get_test_float16_column_writer (
3411
3436
page_writer : Box < dyn PageWriter > ,
3412
- max_def_level : i16 ,
3413
- max_rep_level : i16 ,
3414
- props : WriterPropertiesPtr ,
3415
3437
) -> ColumnWriterImpl < ' static , FixedLenByteArrayType > {
3416
- let descr = Arc :: new ( get_test_float16_column_descr ( max_def_level , max_rep_level ) ) ;
3417
- let column_writer = get_column_writer ( descr, props , page_writer) ;
3438
+ let descr = Arc :: new ( get_test_float16_column_descr ( 0 , 0 ) ) ;
3439
+ let column_writer = get_column_writer ( descr, Default :: default ( ) , page_writer) ;
3418
3440
get_typed_column_writer :: < FixedLenByteArrayType > ( column_writer)
3419
3441
}
3420
3442
@@ -3429,6 +3451,25 @@ mod tests {
3429
3451
ColumnDescriptor :: new ( Arc :: new ( tpe) , max_def_level, max_rep_level, path)
3430
3452
}
3431
3453
3454
+ fn get_test_interval_column_writer (
3455
+ page_writer : Box < dyn PageWriter > ,
3456
+ ) -> ColumnWriterImpl < ' static , FixedLenByteArrayType > {
3457
+ let descr = Arc :: new ( get_test_interval_column_descr ( ) ) ;
3458
+ let column_writer = get_column_writer ( descr, Default :: default ( ) , page_writer) ;
3459
+ get_typed_column_writer :: < FixedLenByteArrayType > ( column_writer)
3460
+ }
3461
+
3462
+ fn get_test_interval_column_descr ( ) -> ColumnDescriptor {
3463
+ let path = ColumnPath :: from ( "col" ) ;
3464
+ let tpe =
3465
+ SchemaType :: primitive_type_builder ( "col" , FixedLenByteArrayType :: get_physical_type ( ) )
3466
+ . with_length ( 12 )
3467
+ . with_converted_type ( ConvertedType :: INTERVAL )
3468
+ . build ( )
3469
+ . unwrap ( ) ;
3470
+ ColumnDescriptor :: new ( Arc :: new ( tpe) , 0 , 0 , path)
3471
+ }
3472
+
3432
3473
/// Returns column writer for UINT32 Column provided as ConvertedType only
3433
3474
fn get_test_unsigned_int_given_as_converted_column_writer < ' a , T : DataType > (
3434
3475
page_writer : Box < dyn PageWriter + ' a > ,
0 commit comments