@@ -21,7 +21,7 @@ use std::fmt::{self, Debug, Display};
21
21
22
22
use crate :: { Result , ScalarValue } ;
23
23
24
- use arrow_schema:: { Schema , SchemaRef } ;
24
+ use arrow_schema:: { DataType , Schema , SchemaRef } ;
25
25
26
26
/// Represents a value with a degree of certainty. `Precision` is used to
27
27
/// propagate information the precision of statistical values.
@@ -170,24 +170,63 @@ impl Precision<ScalarValue> {
170
170
pub fn add ( & self , other : & Precision < ScalarValue > ) -> Precision < ScalarValue > {
171
171
match ( self , other) {
172
172
( Precision :: Exact ( a) , Precision :: Exact ( b) ) => {
173
- if let Ok ( result) = a. add ( b) {
174
- Precision :: Exact ( result)
175
- } else {
176
- Precision :: Absent
177
- }
173
+ a. add ( b) . map ( Precision :: Exact ) . unwrap_or ( Precision :: Absent )
178
174
}
179
175
( Precision :: Inexact ( a) , Precision :: Exact ( b) )
180
176
| ( Precision :: Exact ( a) , Precision :: Inexact ( b) )
181
- | ( Precision :: Inexact ( a) , Precision :: Inexact ( b) ) => {
182
- if let Ok ( result) = a. add ( b) {
183
- Precision :: Inexact ( result)
184
- } else {
185
- Precision :: Absent
186
- }
177
+ | ( Precision :: Inexact ( a) , Precision :: Inexact ( b) ) => a
178
+ . add ( b)
179
+ . map ( Precision :: Inexact )
180
+ . unwrap_or ( Precision :: Absent ) ,
181
+ ( _, _) => Precision :: Absent ,
182
+ }
183
+ }
184
+
185
+ /// Calculates the difference of two (possibly inexact) [`ScalarValue`] values,
186
+ /// conservatively propagating exactness information. If one of the input
187
+ /// values is [`Precision::Absent`], the result is `Absent` too.
188
+ pub fn sub ( & self , other : & Precision < ScalarValue > ) -> Precision < ScalarValue > {
189
+ match ( self , other) {
190
+ ( Precision :: Exact ( a) , Precision :: Exact ( b) ) => {
191
+ a. sub ( b) . map ( Precision :: Exact ) . unwrap_or ( Precision :: Absent )
187
192
}
193
+ ( Precision :: Inexact ( a) , Precision :: Exact ( b) )
194
+ | ( Precision :: Exact ( a) , Precision :: Inexact ( b) )
195
+ | ( Precision :: Inexact ( a) , Precision :: Inexact ( b) ) => a
196
+ . sub ( b)
197
+ . map ( Precision :: Inexact )
198
+ . unwrap_or ( Precision :: Absent ) ,
199
+ ( _, _) => Precision :: Absent ,
200
+ }
201
+ }
202
+
203
+ /// Calculates the multiplication of two (possibly inexact) [`ScalarValue`] values,
204
+ /// conservatively propagating exactness information. If one of the input
205
+ /// values is [`Precision::Absent`], the result is `Absent` too.
206
+ pub fn multiply ( & self , other : & Precision < ScalarValue > ) -> Precision < ScalarValue > {
207
+ match ( self , other) {
208
+ ( Precision :: Exact ( a) , Precision :: Exact ( b) ) => a
209
+ . mul_checked ( b)
210
+ . map ( Precision :: Exact )
211
+ . unwrap_or ( Precision :: Absent ) ,
212
+ ( Precision :: Inexact ( a) , Precision :: Exact ( b) )
213
+ | ( Precision :: Exact ( a) , Precision :: Inexact ( b) )
214
+ | ( Precision :: Inexact ( a) , Precision :: Inexact ( b) ) => a
215
+ . mul_checked ( b)
216
+ . map ( Precision :: Inexact )
217
+ . unwrap_or ( Precision :: Absent ) ,
188
218
( _, _) => Precision :: Absent ,
189
219
}
190
220
}
221
+
222
+ /// Casts the value to the given data type, propagating exactness information.
223
+ pub fn cast_to ( & self , data_type : & DataType ) -> Result < Precision < ScalarValue > > {
224
+ match self {
225
+ Precision :: Exact ( value) => value. cast_to ( data_type) . map ( Precision :: Exact ) ,
226
+ Precision :: Inexact ( value) => value. cast_to ( data_type) . map ( Precision :: Inexact ) ,
227
+ Precision :: Absent => Ok ( Precision :: Absent ) ,
228
+ }
229
+ }
191
230
}
192
231
193
232
impl < T : Debug + Clone + PartialEq + Eq + PartialOrd > Debug for Precision < T > {
@@ -210,6 +249,18 @@ impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Display for Precision<T> {
210
249
}
211
250
}
212
251
252
+ impl From < Precision < usize > > for Precision < ScalarValue > {
253
+ fn from ( value : Precision < usize > ) -> Self {
254
+ match value {
255
+ Precision :: Exact ( v) => Precision :: Exact ( ScalarValue :: UInt64 ( Some ( v as u64 ) ) ) ,
256
+ Precision :: Inexact ( v) => {
257
+ Precision :: Inexact ( ScalarValue :: UInt64 ( Some ( v as u64 ) ) )
258
+ }
259
+ Precision :: Absent => Precision :: Absent ,
260
+ }
261
+ }
262
+ }
263
+
213
264
/// Statistics for a relation
214
265
/// Fields are optional and can be inexact because the sources
215
266
/// sometimes provide approximate estimates for performance reasons
@@ -401,6 +452,11 @@ impl Display for Statistics {
401
452
} else {
402
453
s
403
454
} ;
455
+ let s = if cs. sum_value != Precision :: Absent {
456
+ format ! ( "{} Sum={}" , s, cs. sum_value)
457
+ } else {
458
+ s
459
+ } ;
404
460
let s = if cs. null_count != Precision :: Absent {
405
461
format ! ( "{} Null={}" , s, cs. null_count)
406
462
} else {
@@ -436,6 +492,8 @@ pub struct ColumnStatistics {
436
492
pub max_value : Precision < ScalarValue > ,
437
493
/// Minimum value of column
438
494
pub min_value : Precision < ScalarValue > ,
495
+ /// Sum value of a column
496
+ pub sum_value : Precision < ScalarValue > ,
439
497
/// Number of distinct values
440
498
pub distinct_count : Precision < usize > ,
441
499
}
@@ -458,6 +516,7 @@ impl ColumnStatistics {
458
516
null_count : Precision :: Absent ,
459
517
max_value : Precision :: Absent ,
460
518
min_value : Precision :: Absent ,
519
+ sum_value : Precision :: Absent ,
461
520
distinct_count : Precision :: Absent ,
462
521
}
463
522
}
@@ -469,6 +528,7 @@ impl ColumnStatistics {
469
528
self . null_count = self . null_count . to_inexact ( ) ;
470
529
self . max_value = self . max_value . to_inexact ( ) ;
471
530
self . min_value = self . min_value . to_inexact ( ) ;
531
+ self . sum_value = self . sum_value . to_inexact ( ) ;
472
532
self . distinct_count = self . distinct_count . to_inexact ( ) ;
473
533
self
474
534
}
@@ -563,6 +623,26 @@ mod tests {
563
623
assert_eq ! ( precision1. add( & absent_precision) , Precision :: Absent ) ;
564
624
}
565
625
626
+ #[ test]
627
+ fn test_add_scalar ( ) {
628
+ let precision = Precision :: Exact ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ;
629
+
630
+ assert_eq ! (
631
+ precision. add( & Precision :: Exact ( ScalarValue :: Int32 ( Some ( 23 ) ) ) ) ,
632
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 65 ) ) ) ,
633
+ ) ;
634
+ assert_eq ! (
635
+ precision. add( & Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 23 ) ) ) ) ,
636
+ Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 65 ) ) ) ,
637
+ ) ;
638
+ assert_eq ! (
639
+ precision. add( & Precision :: Exact ( ScalarValue :: Int32 ( None ) ) ) ,
640
+ // As per behavior of ScalarValue::add
641
+ Precision :: Exact ( ScalarValue :: Int32 ( None ) ) ,
642
+ ) ;
643
+ assert_eq ! ( precision. add( & Precision :: Absent ) , Precision :: Absent ) ;
644
+ }
645
+
566
646
#[ test]
567
647
fn test_sub ( ) {
568
648
let precision1 = Precision :: Exact ( 42 ) ;
@@ -575,6 +655,26 @@ mod tests {
575
655
assert_eq ! ( precision1. sub( & absent_precision) , Precision :: Absent ) ;
576
656
}
577
657
658
+ #[ test]
659
+ fn test_sub_scalar ( ) {
660
+ let precision = Precision :: Exact ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ;
661
+
662
+ assert_eq ! (
663
+ precision. sub( & Precision :: Exact ( ScalarValue :: Int32 ( Some ( 23 ) ) ) ) ,
664
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 19 ) ) ) ,
665
+ ) ;
666
+ assert_eq ! (
667
+ precision. sub( & Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 23 ) ) ) ) ,
668
+ Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 19 ) ) ) ,
669
+ ) ;
670
+ assert_eq ! (
671
+ precision. sub( & Precision :: Exact ( ScalarValue :: Int32 ( None ) ) ) ,
672
+ // As per behavior of ScalarValue::sub
673
+ Precision :: Exact ( ScalarValue :: Int32 ( None ) ) ,
674
+ ) ;
675
+ assert_eq ! ( precision. sub( & Precision :: Absent ) , Precision :: Absent ) ;
676
+ }
677
+
578
678
#[ test]
579
679
fn test_multiply ( ) {
580
680
let precision1 = Precision :: Exact ( 6 ) ;
@@ -588,6 +688,54 @@ mod tests {
588
688
assert_eq ! ( precision1. multiply( & absent_precision) , Precision :: Absent ) ;
589
689
}
590
690
691
+ #[ test]
692
+ fn test_multiply_scalar ( ) {
693
+ let precision = Precision :: Exact ( ScalarValue :: Int32 ( Some ( 6 ) ) ) ;
694
+
695
+ assert_eq ! (
696
+ precision. multiply( & Precision :: Exact ( ScalarValue :: Int32 ( Some ( 5 ) ) ) ) ,
697
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 30 ) ) ) ,
698
+ ) ;
699
+ assert_eq ! (
700
+ precision. multiply( & Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 5 ) ) ) ) ,
701
+ Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 30 ) ) ) ,
702
+ ) ;
703
+ assert_eq ! (
704
+ precision. multiply( & Precision :: Exact ( ScalarValue :: Int32 ( None ) ) ) ,
705
+ // As per behavior of ScalarValue::mul_checked
706
+ Precision :: Exact ( ScalarValue :: Int32 ( None ) ) ,
707
+ ) ;
708
+ assert_eq ! ( precision. multiply( & Precision :: Absent ) , Precision :: Absent ) ;
709
+ }
710
+
711
+ #[ test]
712
+ fn test_cast_to ( ) {
713
+ // Valid
714
+ assert_eq ! (
715
+ Precision :: Exact ( ScalarValue :: Int32 ( Some ( 42 ) ) )
716
+ . cast_to( & DataType :: Int64 )
717
+ . unwrap( ) ,
718
+ Precision :: Exact ( ScalarValue :: Int64 ( Some ( 42 ) ) ) ,
719
+ ) ;
720
+ assert_eq ! (
721
+ Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 42 ) ) )
722
+ . cast_to( & DataType :: Int64 )
723
+ . unwrap( ) ,
724
+ Precision :: Inexact ( ScalarValue :: Int64 ( Some ( 42 ) ) ) ,
725
+ ) ;
726
+ // Null
727
+ assert_eq ! (
728
+ Precision :: Exact ( ScalarValue :: Int32 ( None ) )
729
+ . cast_to( & DataType :: Int64 )
730
+ . unwrap( ) ,
731
+ Precision :: Exact ( ScalarValue :: Int64 ( None ) ) ,
732
+ ) ;
733
+ // Overflow returns error
734
+ assert ! ( Precision :: Exact ( ScalarValue :: Int32 ( Some ( 256 ) ) )
735
+ . cast_to( & DataType :: Int8 )
736
+ . is_err( ) ) ;
737
+ }
738
+
591
739
#[ test]
592
740
fn test_precision_cloning ( ) {
593
741
// Precision<usize> is copy
@@ -646,6 +794,7 @@ mod tests {
646
794
null_count : Precision :: Exact ( null_count) ,
647
795
max_value : Precision :: Exact ( ScalarValue :: Int64 ( Some ( 42 ) ) ) ,
648
796
min_value : Precision :: Exact ( ScalarValue :: Int64 ( Some ( 64 ) ) ) ,
797
+ sum_value : Precision :: Exact ( ScalarValue :: Int64 ( Some ( 4600 ) ) ) ,
649
798
distinct_count : Precision :: Exact ( 100 ) ,
650
799
}
651
800
}
0 commit comments