Skip to content

Commit ee1243b

Browse files
committedApr 2, 2025··
Faster histogram implementation
This implementation is especially optimized for parallel updates, while the decrease is negligable in serial updates. ``` │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ CounterParallel/Histogram.Update_int-10 93.28n ± 2% 93.50n ± 1% ~ (p=0.796 n=10) CounterParallel/Histogram.Update_float-10 447.9n ± 5% 249.7n ± 7% -44.25% (p=0.000 n=10) CounterSerial/Histogram.Update_int-10 11.39n ± 3% 12.25n ± 2% +7.60% (p=0.000 n=10) CounterSerial/Histogram.Update_float-10 11.87n ± 3% 18.11n ± 0% +52.68% (p=0.000 n=10) geomean 48.74n 47.71n -2.12% ```
1 parent 72bf628 commit ee1243b

File tree

6 files changed

+234
-28
lines changed

6 files changed

+234
-28
lines changed
 

‎counter_bench_test.go

+27-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,33 @@ func BenchmarkCounterParallel(b *testing.B) {
4848
benchmarkCounterParallel(b, "Uint64.Set", NewSet().NewUint64, (*Uint64).Set, 1)
4949
benchmarkCounterParallel(b, "Float64.Add", NewSet().NewFloat64, (*Float64).Add, 1)
5050
benchmarkCounterParallel(b, "Float64.Set", NewSet().NewFloat64, (*Float64).Set, 1)
51-
benchmarkCounterParallel(b, "Histogram.Update", NewSet().NewHistogram, (*Histogram).Update, 1)
51+
benchmarkCounterParallel(b, "Histogram.Update int", NewSet().NewHistogram, (*Histogram).Update, 1)
52+
benchmarkCounterParallel(b, "Histogram.Update float", NewSet().NewHistogram, (*Histogram).Update, 1.3)
53+
}
54+
55+
func BenchmarkCounterSerial(b *testing.B) {
56+
benchmarkCounter(b, "Uint64.Add", NewSet().NewUint64, (*Uint64).Add, 1)
57+
benchmarkCounter(b, "Uint64.Set", NewSet().NewUint64, (*Uint64).Set, 1)
58+
benchmarkCounter(b, "Float64.Add", NewSet().NewFloat64, (*Float64).Add, 1)
59+
benchmarkCounter(b, "Float64.Set", NewSet().NewFloat64, (*Float64).Set, 1)
60+
benchmarkCounter(b, "Histogram.Update int", NewSet().NewHistogram, (*Histogram).Update, 1)
61+
benchmarkCounter(b, "Histogram.Update float", NewSet().NewHistogram, (*Histogram).Update, 1.3)
62+
}
63+
64+
func benchmarkCounter[T any, V any](
65+
b *testing.B,
66+
name string,
67+
setup func(string, ...string) *T,
68+
do func(*T, V),
69+
value V,
70+
) {
71+
b.Helper()
72+
thing := setup("foo")
73+
b.Run(name, func(b *testing.B) {
74+
for b.Loop() {
75+
do(thing, value)
76+
}
77+
})
5278
}
5379

5480
func benchmarkCounterParallel[T any, V any](
@@ -61,7 +87,6 @@ func benchmarkCounterParallel[T any, V any](
6187
b.Helper()
6288
thing := setup("foo")
6389
b.Run(name, func(b *testing.B) {
64-
b.ReportAllocs()
6590
b.RunParallel(func(pb *testing.PB) {
6691
for pb.Next() {
6792
do(thing, value)

‎histogram.go

+5-18
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,15 @@ type Histogram struct {
104104
upper atomic.Uint64
105105

106106
// sum is the sum of all the values put into Histogram
107-
sumInt atomic.Uint64
108-
sumFloat atomicx.Float64
107+
sum atomicx.Sum
109108
}
110109

111110
// Reset resets the given histogram.
112111
func (h *Histogram) Reset() {
113112
clear(h.buckets[:])
114113
h.lower.Store(0)
115114
h.upper.Store(0)
116-
h.sumInt.Store(0)
117-
h.sumFloat.Store(0)
115+
h.sum.Reset()
118116
}
119117

120118
// Update updates h with val.
@@ -157,13 +155,7 @@ func (h *Histogram) Update(val float64) {
157155
db[offset].Add(1)
158156
}
159157

160-
if val > 0 {
161-
if intval := uint64(val); float64(intval) == val {
162-
h.sumInt.Add(intval)
163-
} else {
164-
h.sumFloat.Add(val)
165-
}
166-
}
158+
h.sum.Add(val)
167159
}
168160

169161
// Observe updates h with val, identical to [Histogram.Update].
@@ -177,8 +169,7 @@ func (h *Histogram) Observe(val float64) {
177169
func (h *Histogram) Merge(src *Histogram) {
178170
h.lower.Add(src.lower.Load())
179171
h.upper.Add(src.upper.Load())
180-
h.sumInt.Add(src.sumInt.Load())
181-
h.sumFloat.Add(src.sumFloat.Load())
172+
h.sum.Add(src.sum.Load())
182173

183174
for i := range src.buckets {
184175
if dbSrc := src.buckets[i].Load(); dbSrc != nil {
@@ -216,7 +207,7 @@ func (h *Histogram) marshalTo(w ExpfmtWriter, name MetricName) {
216207
return
217208
}
218209

219-
sum := h.sum()
210+
sum := h.sum.Load()
220211
family := name.Family.String()
221212

222213
// 1 extra because we're always adding in the vmrange tag
@@ -329,10 +320,6 @@ func (h *Histogram) punchBuckets(c *punchCard) (total uint64, punches int) {
329320
return
330321
}
331322

332-
func (h *Histogram) sum() float64 {
333-
return float64(h.sumInt.Load()) + h.sumFloat.Load()
334-
}
335-
336323
// punchCard is used internally to track counts per bucket when computing
337324
// which histograms ranges have been hit.
338325
type punchCard [totalBuckets]uint64

‎internal/atomicx/atomicx.go

+50-4
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,30 @@ package atomicx
22

33
import (
44
"math"
5+
"math/rand/v2"
56
"sync/atomic"
67

78
"go.withmatt.com/metrics/internal/fasttime"
89
)
910

10-
// An Float64 is an atomic float64. The zero value is zero.
11+
// A Float64 is an atomic float64. The zero value is zero.
1112
type Float64 struct {
1213
v atomic.Uint64
1314
}
1415

16+
func (x *Float64) Add(val float64) {
17+
if val == 0 {
18+
return
19+
}
20+
for {
21+
oldBits := x.v.Load()
22+
newBits := math.Float64bits(math.Float64frombits(oldBits) + val)
23+
if x.v.CompareAndSwap(oldBits, newBits) {
24+
return
25+
}
26+
}
27+
}
28+
1529
func (x *Float64) Load() float64 {
1630
return math.Float64frombits(x.v.Load())
1731
}
@@ -20,19 +34,51 @@ func (x *Float64) Store(val float64) {
2034
x.v.Store(math.Float64bits(val))
2135
}
2236

23-
func (x *Float64) Add(val float64) {
37+
// A Sum is an atomic float64 that can only Add. The zero value is zero.
38+
type Sum struct {
39+
i atomic.Uint64
40+
v [2]atomic.Uint64
41+
}
42+
43+
func (x *Sum) AddUint64(val uint64) {
2444
if val == 0 {
2545
return
2646
}
47+
x.i.Add(val)
48+
}
49+
50+
func (x *Sum) Add(val float64) {
51+
if val <= 0 {
52+
return
53+
}
54+
55+
if intval := uint64(val); val == float64(intval) {
56+
x.i.Add(intval)
57+
return
58+
}
59+
60+
//nolint:gosec
61+
idx := rand.Uint64() & 1
2762
for {
28-
oldBits := x.v.Load()
63+
oldBits := x.v[idx].Load()
2964
newBits := math.Float64bits(math.Float64frombits(oldBits) + val)
30-
if x.v.CompareAndSwap(oldBits, newBits) {
65+
if x.v[idx].CompareAndSwap(oldBits, newBits) {
3166
return
3267
}
3368
}
3469
}
3570

71+
func (x *Sum) Reset() {
72+
x.i.Store(0)
73+
clear(x.v[:])
74+
}
75+
76+
func (x *Sum) Load() float64 {
77+
return float64(x.i.Load()) +
78+
math.Float64frombits(x.v[0].Load()) +
79+
math.Float64frombits(x.v[1].Load())
80+
}
81+
3682
// An Instant is an atomic fasttime.Instant. The zero value is zero.
3783
type Instant struct {
3884
v atomic.Int64
+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package atomicx
2+
3+
import "testing"
4+
5+
func BenchmarkLoadParallel(b *testing.B) {
6+
b.Run("Float64", func(b *testing.B) {
7+
var f Float64
8+
b.RunParallel(func(pb *testing.PB) {
9+
for pb.Next() {
10+
f.Load()
11+
}
12+
})
13+
})
14+
15+
b.Run("Sum", func(b *testing.B) {
16+
var s Sum
17+
b.RunParallel(func(pb *testing.PB) {
18+
for pb.Next() {
19+
s.Load()
20+
}
21+
})
22+
})
23+
}
24+
25+
func BenchmarkAddParallel(b *testing.B) {
26+
b.Run("float/type=Float64", func(b *testing.B) {
27+
var f Float64
28+
b.RunParallel(func(pb *testing.PB) {
29+
for pb.Next() {
30+
f.Add(1.1)
31+
}
32+
})
33+
})
34+
35+
b.Run("integer/type=Float64", func(b *testing.B) {
36+
var f Float64
37+
b.RunParallel(func(pb *testing.PB) {
38+
for pb.Next() {
39+
f.Add(1)
40+
}
41+
})
42+
})
43+
44+
b.Run("float/type=Sum", func(b *testing.B) {
45+
var s Sum
46+
b.RunParallel(func(pb *testing.PB) {
47+
for pb.Next() {
48+
s.Add(1.1)
49+
}
50+
})
51+
})
52+
53+
b.Run("integer/type=Sum", func(b *testing.B) {
54+
var s Sum
55+
b.RunParallel(func(pb *testing.PB) {
56+
for pb.Next() {
57+
s.Add(1)
58+
}
59+
})
60+
})
61+
}
62+
63+
func BenchmarkAdd(b *testing.B) {
64+
b.Run("float/type=Float64", func(b *testing.B) {
65+
var f Float64
66+
for b.Loop() {
67+
f.Add(1.1)
68+
}
69+
})
70+
71+
b.Run("integer/type=Float64", func(b *testing.B) {
72+
var f Float64
73+
for b.Loop() {
74+
f.Add(1)
75+
}
76+
})
77+
78+
b.Run("float/type=Sum", func(b *testing.B) {
79+
var s Sum
80+
for b.Loop() {
81+
s.Add(1.1)
82+
}
83+
})
84+
85+
b.Run("integer/type=Sum", func(b *testing.B) {
86+
var s Sum
87+
for b.Loop() {
88+
s.Add(1)
89+
}
90+
})
91+
}

‎internal/atomicx/atomicx_test.go

+49-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package atomicx_test
22

33
import (
4+
"math"
45
"runtime"
56
"testing"
67

@@ -11,14 +12,25 @@ import (
1112
func TestFloat64(t *testing.T) {
1213
var f Float64
1314
v := float64(1.88)
14-
assert.Equal(t, 0, f.Load())
15+
assert.Equal(t, f.Load(), 0)
1516
f.Store(v)
16-
assert.Equal(t, v, f.Load())
17+
assert.Equal(t, f.Load(), v)
1718
f.Add(v)
18-
assert.Equal(t, v+v, f.Load())
19+
assert.Equal(t, f.Load(), v+v)
1920
}
2021

21-
func TestHammerAdd(t *testing.T) {
22+
func TestSum(t *testing.T) {
23+
var s Sum
24+
assert.Equal(t, s.Load(), 0)
25+
s.Add(1.88)
26+
assert.Equal(t, s.Load(), 1.88)
27+
s.Add(1)
28+
assert.Equal(t, s.Load(), 2.88)
29+
s.Reset()
30+
assert.Equal(t, s.Load(), 0)
31+
}
32+
33+
func TestHammerFloatAdd(t *testing.T) {
2234
const p = 4
2335
n := 100000
2436
if testing.Short() {
@@ -44,3 +56,36 @@ func TestHammerAdd(t *testing.T) {
4456
}
4557
assert.Equal(t, val.Load(), float64(n)*p)
4658
}
59+
60+
func TestHammerSumAdd(t *testing.T) {
61+
const p = 4
62+
n := 10000
63+
if testing.Short() {
64+
n = 1000
65+
}
66+
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(p))
67+
68+
const a = 1.1
69+
const b = 1
70+
71+
c := make(chan int)
72+
var val Sum
73+
for range p {
74+
go func() {
75+
defer func() {
76+
assert.Nil(t, recover())
77+
c <- 1
78+
}()
79+
for range n {
80+
val.Add(a)
81+
val.Add(b)
82+
val.AddUint64(b)
83+
}
84+
}()
85+
}
86+
for range p {
87+
<-c
88+
}
89+
// XXX: Floating point precision, so need to round
90+
assert.Equal(t, math.Round(val.Load()), math.Round(float64(n)*(p*a+p*b*2)))
91+
}

‎internal/atomicx/benchmarks.txt

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
$ benchstat -col /type compare.txt
2+
goos: darwin
3+
goarch: arm64
4+
pkg: go.withmatt.com/metrics/internal/atomicx
5+
cpu: Apple M1 Max
6+
│ Float64 │ Sum │
7+
│ sec/op │ sec/op vs base │
8+
AddParallel/float-10 336.3n ± 7% 270.2n ± 3% -19.64% (p=0.000 n=10)
9+
AddParallel/integer-10 331.75n ± 5% 80.38n ± 2% -75.77% (p=0.000 n=10)
10+
Add/float-10 10.180n ± 1% 9.612n ± 1% -5.58% (p=0.000 n=10)
11+
Add/integer-10 10.075n ± 1% 6.883n ± 0% -31.69% (p=0.000 n=10)
12+
geomean 58.16n 34.62n -40.47%

0 commit comments

Comments
 (0)
Please sign in to comment.