Skip to content

Commit 5a51f46

Browse files
authored
Merge pull request #34 from marshallpierce/count-saturates
Count saturates
2 parents 62e531f + 566e2e0 commit 5a51f46

File tree

5 files changed

+377
-27
lines changed

5 files changed

+377
-27
lines changed

benches/record.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#![feature(test)]
2+
3+
extern crate hdrsample;
4+
extern crate rand;
5+
extern crate test;
6+
7+
use hdrsample::*;
8+
use self::rand::Rng;
9+
use self::test::Bencher;
10+
11+
#[bench]
12+
fn record_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
13+
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
14+
let mut indices = Vec::<u64>::new();
15+
let mut rng = rand::weak_rng();
16+
17+
// same value approach as record_precalc_random_values_with_max_count_u64 so that
18+
// they are comparable
19+
20+
for _ in 0..1000_000 {
21+
indices.push(rng.gen());
22+
}
23+
24+
b.iter(|| {
25+
for i in indices.iter() {
26+
// u64 counts, won't overflow
27+
h.record(*i).unwrap()
28+
}
29+
})
30+
}
31+
32+
#[bench]
33+
fn record_precalc_random_values_with_max_count_u64(b: &mut Bencher) {
34+
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
35+
let mut indices = Vec::<u64>::new();
36+
let mut rng = rand::weak_rng();
37+
38+
// store values in an array and re-use so we can be sure to hit the overflow case
39+
40+
for _ in 0..1000_000 {
41+
let r = rng.gen();
42+
indices.push(r);
43+
h.record_n(r, u64::max_value()).unwrap();
44+
}
45+
46+
b.iter(|| {
47+
for i in indices.iter() {
48+
// all values are already at u64
49+
h.record(*i).unwrap()
50+
}
51+
})
52+
}
53+
54+
#[bench]
55+
fn record_random_values_with_1_count_u64(b: &mut Bencher) {
56+
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
57+
let mut rng = rand::weak_rng();
58+
59+
// This should be *slower* than the benchmarks above where we pre-calculate the values
60+
// outside of the hot loop. If it isn't, then those measurements are likely spurious.
61+
62+
b.iter(|| {
63+
for _ in 0..1000_000 {
64+
h.record(rng.gen()).unwrap()
65+
}
66+
})
67+
}
68+
69+
#[bench]
70+
fn add_precalc_random_value_1_count_same_dimensions_u64(b: &mut Bencher) {
71+
do_add_benchmark(b, 1, || { Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap() })
72+
}
73+
74+
#[bench]
75+
fn add_precalc_random_value_max_count_same_dimensions_u64(b: &mut Bencher) {
76+
do_add_benchmark(b, u64::max_value(), || { Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap() })
77+
}
78+
79+
#[bench]
80+
fn add_precalc_random_value_1_count_different_precision_u64(b: &mut Bencher) {
81+
do_add_benchmark(b, 1, || { Histogram::<u64>::new_with_bounds(1, u64::max_value(), 2).unwrap() })
82+
}
83+
84+
#[bench]
85+
fn add_precalc_random_value_max_count_different_precision_u64(b: &mut Bencher) {
86+
do_add_benchmark(b, u64::max_value(), || { Histogram::<u64>::new_with_bounds(1, u64::max_value(), 2).unwrap() })
87+
}
88+
89+
#[bench]
90+
fn subtract_precalc_random_value_1_count_same_dimensions_u64(b: &mut Bencher) {
91+
do_subtract_benchmark(b, 1, || { Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap() })
92+
}
93+
94+
// can't do subtraction with max count because it will error after the first iteration because
95+
// subtrahend count exceeds minuend. Similarly, when subtracting a different precision, the same
96+
// issue happens because the smallest equivalent value in the lower precision can map to a different
97+
// bucket in higher precision so we cannot easily pre-populate.
98+
99+
fn do_subtract_benchmark<F: Fn() -> Histogram<u64>>(b: &mut Bencher, count_at_each_addend_value: u64, addend_factory: F) {
100+
let mut accum = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
101+
let mut subtrahends = Vec::new();
102+
let mut rng = rand::weak_rng();
103+
104+
for _ in 0..1000 {
105+
let mut h = addend_factory();
106+
107+
for _ in 0..1000 {
108+
let r = rng.gen();
109+
h.record_n(r, count_at_each_addend_value).unwrap();
110+
// ensure there's a count to subtract from
111+
accum.record_n(r, u64::max_value()).unwrap();
112+
}
113+
114+
subtrahends.push(h);
115+
}
116+
117+
b.iter(|| {
118+
for h in subtrahends.iter() {
119+
accum.subtract(h).unwrap();
120+
}
121+
})
122+
}
123+
124+
fn do_add_benchmark<F: Fn() -> Histogram<u64>>(b: &mut Bencher, count_at_each_addend_value: u64, addend_factory: F) {
125+
let mut accum = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
126+
let mut addends = Vec::new();
127+
let mut rng = rand::weak_rng();
128+
129+
for _ in 0..1000 {
130+
let mut h = addend_factory();
131+
132+
for _ in 0..1000 {
133+
let r = rng.gen();
134+
h.record_n(r, count_at_each_addend_value).unwrap();
135+
}
136+
137+
addends.push(h);
138+
}
139+
140+
b.iter(|| {
141+
for h in addends.iter() {
142+
accum.add(h).unwrap();
143+
}
144+
})
145+
}

src/iterators/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ impl<'a, T: 'a, P> Iterator for HistogramIterator<'a, T, P>
159159
}
160160

161161
// maintain total count so we can yield percentiles
162+
// TODO overflow
162163
self.total_count_to_index = self.total_count_to_index + count.to_u64().unwrap();
163164

164165
// make sure we don't add this index again

src/lib.rs

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,14 @@ const ORIGINAL_MAX: u64 = 0;
171171
/// into an integer count. Partial ordering is used for threshholding, also usually in the context
172172
/// of percentiles.
173173
pub trait Counter
174-
: num::Num + num::ToPrimitive + num::FromPrimitive + Copy + PartialOrd<Self> {
174+
: num::Num + num::ToPrimitive + num::FromPrimitive + num::Saturating + num::CheckedSub
175+
+ num::CheckedAdd + Copy + PartialOrd<Self>{
175176
}
176177

177178
// auto-implement marker trait
178179
impl<T> Counter for T
179-
where T: num::Num + num::ToPrimitive + num::FromPrimitive + Copy + PartialOrd<T>
180+
where T: num::Num + num::ToPrimitive + num::FromPrimitive + num::Saturating + num::CheckedSub
181+
+ num::CheckedAdd + Copy + PartialOrd<T>
180182
{
181183
}
182184

@@ -455,13 +457,14 @@ impl<T: Counter> Histogram<T> {
455457
for i in 0..source.len() {
456458
let other_count = source[i];
457459
if other_count != T::zero() {
458-
self[i] = self[i] + other_count;
460+
self[i] = self[i].saturating_add(other_count);
459461
// TODO unwrapping .to_u64()
460-
observed_other_total_count = observed_other_total_count + other_count.to_u64().unwrap();
462+
observed_other_total_count = observed_other_total_count
463+
.saturating_add(other_count.to_u64().unwrap());
461464
}
462465
}
463466

464-
self.total_count = self.total_count + observed_other_total_count;
467+
self.total_count = self.total_count.saturating_add(observed_other_total_count);
465468
let mx = source.max();
466469
if mx > self.max() {
467470
self.update_max(mx);
@@ -552,6 +555,9 @@ impl<T: Counter> Histogram<T> {
552555
if other_count != T::zero() {
553556
let other_value = other.value_for(i);
554557
if self.count_at(other_value).unwrap() < other_count {
558+
// TODO Perhaps we should saturating sub here? Or expose some form of
559+
// pluggability so users could choose to error or saturate? Both seem useful.
560+
// It's also sort of inconsistent with overflow, which now saturates.
555561
return Err(SubtractionError::SubtrahendCountExceedsMinuendCount);
556562
}
557563
self.alter_n(other_value, other_count, false).expect("value should fit by now");
@@ -777,11 +783,19 @@ impl<T: Counter> Histogram<T> {
777783
}
778784

779785
fn alter_n(&mut self, value: u64, count: T, add: bool) -> Result<(), ()> {
786+
// TODO consider split out addition and subtraction cases; this isn't gaining much by
787+
// unifying since we have to test all the cases anyway, and the TODO below is marking a case
788+
// that might well be impossible but seems needed because of the (possibly false) symmetry
789+
// with addition
790+
791+
// add=false is used by subtract(), which should have already aborted if underflow was
792+
// possible
793+
780794
let success = if let Some(c) = self.mut_at(value) {
781795
if add {
782-
*c = *c + count;
796+
*c = (*c).saturating_add(count);
783797
} else {
784-
*c = *c - count;
798+
*c = (*c).checked_sub(&count).expect("count underflow on subtraction");
785799
}
786800
true
787801
} else {
@@ -798,9 +812,14 @@ impl<T: Counter> Histogram<T> {
798812
{
799813
let c = self.mut_at(value).expect("value should fit after resize");
800814
if add {
801-
*c = *c + count;
815+
// after resize, should be no possibility of overflow because this is a new slot
816+
*c = (*c).checked_add(&count).expect("count overflow after resize");
802817
} else {
803-
*c = *c - count;
818+
// TODO Not sure this code path can ever be hit: if subtraction requires minuend
819+
// count to exceed subtrahend count for a given value, we shouldn't ever need
820+
// to resize to subtract.
821+
// Anyway, at the very least, we know it shouldn't underflow.
822+
*c = (*c).checked_sub(&count).expect("count underflow after resize");
804823
}
805824
}
806825

@@ -809,9 +828,10 @@ impl<T: Counter> Histogram<T> {
809828

810829
self.update_min_max(value);
811830
if add {
812-
self.total_count = self.total_count + count.to_u64().unwrap();
831+
self.total_count = self.total_count.saturating_add(count.to_u64().unwrap());
813832
} else {
814-
self.total_count = self.total_count - count.to_u64().unwrap();
833+
self.total_count = self.total_count.checked_sub(count.to_u64().unwrap())
834+
.expect("total count underflow on subtraction");
815835
}
816836
Ok(())
817837
}
@@ -1113,6 +1133,7 @@ impl<T: Counter> Histogram<T> {
11131133

11141134
let mut total_to_current_index: u64 = 0;
11151135
for i in 0..self.len() {
1136+
// TODO overflow
11161137
total_to_current_index = total_to_current_index + self[i].to_u64().unwrap();
11171138
if total_to_current_index >= count_at_percentile {
11181139
let value_at_index = self.value_for(i);
@@ -1139,6 +1160,7 @@ impl<T: Counter> Histogram<T> {
11391160
}
11401161

11411162
let target_index = cmp::min(self.index_for(value), self.last());
1163+
// TODO overflow
11421164
let total_to_current_index =
11431165
(0..(target_index + 1)).map(|i| self[i]).fold(T::zero(), |t, v| t + v);
11441166
100.0 * total_to_current_index.to_f64().unwrap() / self.total_count as f64
@@ -1157,6 +1179,7 @@ impl<T: Counter> Histogram<T> {
11571179
pub fn count_between(&self, low: u64, high: u64) -> Result<T, ()> {
11581180
let low_index = self.index_for(low);
11591181
let high_index = cmp::min(self.index_for(high), self.last());
1182+
// TODO overflow
11601183
Ok((low_index..(high_index + 1)).map(|i| self[i]).fold(T::zero(), |t, v| t + v))
11611184
}
11621185

@@ -1229,6 +1252,7 @@ impl<T: Counter> Histogram<T> {
12291252
///
12301253
/// Note that the return value is capped at `u64::max_value()`.
12311254
pub fn median_equivalent(&self, value: u64) -> u64 {
1255+
// TODO isn't this just saturating?
12321256
match self.lowest_equivalent(value).overflowing_add(self.equivalent_range(value) >> 1) {
12331257
(_, of) if of => u64::max_value(),
12341258
(v, _) => v,
@@ -1444,8 +1468,7 @@ impl <T: Counter> RestatState<T> {
14441468
fn on_nonzero_count(&mut self, index: usize, count: T) {
14451469
// TODO don't unwrap here; weird user Counter types may not work.
14461470
// Fix Counter types to just be u8-64?
1447-
// TODO this can wrap, but not sure there's much we can do about that. Saturating add maybe?
1448-
self.total_count += count.to_u64().unwrap();
1471+
self.total_count = self.total_count.saturating_add(count.to_u64().unwrap());
14491472

14501473
self.max_index = Some(index);
14511474

@@ -1533,6 +1556,7 @@ impl<T: Counter, F: Counter> PartialEq<Histogram<F>> for Histogram<T>
15331556
if self.min_nz() != other.min_nz() {
15341557
return false;
15351558
}
1559+
// TODO may panic? Does the above guarantee that the other array is at least as long?
15361560
(0..self.len()).all(|i| self[i] == other[i])
15371561
}
15381562
}

src/serialization/tests.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ fn serialize_roundtrip_all_zeros() {
7070

7171
assert_eq!(orig.total_count, deser.total_count);
7272
assert_eq!(orig.counts, deser.counts);
73-
7473
}
7574

7675
#[test]

0 commit comments

Comments
 (0)