Skip to content

Commit 014f217

Browse files
committed
fixes dlclark#63 add the ability for unit tests to wait on the timeout background goroutine
1 parent 31eacab commit 014f217

File tree

6 files changed

+102
-18
lines changed

6 files changed

+102
-18
lines changed

README.md

+38-16
Original file line numberDiff line numberDiff line change
@@ -113,22 +113,44 @@ deadline for the match. The performance impact is as follows.
113113
is reached. E.g., if you set a timeout of one minute the load will persist
114114
for approximately a minute even if the match finishes quickly.
115115

116-
Some alternative implementations were considered and ruled out.
117-
118-
1. **time.Now()** - This was the initial timeout implementation. It called `time.Now()`
119-
and compared the result to the deadline approximately once every 1000 matching steps.
120-
Adding a timeout to a simple match increased the cost from ~45ns to ~3000ns).
121-
2. **time.AfterFunc** - This approach entails using `time.AfterFunc` to set an `expired`
122-
atomic boolean value. However it increases the cost of handling a simple match
123-
with a timeout from ~45ns to ~360ns and was therefore ruled out.
124-
3. **counter** - In this approach an atomic variable tracks the number of live matches
125-
with timeouts. The background clock stops when the counter hits zero. The benefit
126-
of this approach is that the background load will stop more quickly (after the
127-
last match has finished as opposed to waiting until the deadline for the last
128-
match). However this approach requires more atomic variable updates and has poorer
129-
performance when multiple matches are executed concurrently. (The cost of a
130-
single match jumps from ~45ns to ~65ns, and the cost of running matches on
131-
all 12 available CPUs jumps from ~400ns to ~730ns).
116+
See [PR #58](https://github.com/dlclark/regexp2/pull/58) for more details and
117+
alternatives considered.
118+
119+
## Goroutine leak error
120+
If you're using a library during unit tests (e.g. https://github.com/uber-go/goleak) that validates all goroutines are exited then you'll likely get an error if you or any of your dependencies use regex's with a MatchTimeout.
121+
To remedy the problem you'll need to tell the unit test to wait until the backgroup timeout goroutine is exited.
122+
123+
```go
124+
func TestSomething(t *testing.T) {
125+
defer goleak.VerifyNone(t)
126+
defer regexp2.StopTimeoutClock()
127+
128+
// ... test
129+
}
130+
131+
//or
132+
133+
func TestMain(m *testing.M) {
134+
// setup
135+
// ...
136+
137+
// run
138+
m.Run()
139+
140+
//tear down
141+
regexp2.StopTimeoutClock()
142+
goleak.VerifyNone(t)
143+
}
144+
```
145+
146+
This will add ~100ms runtime to each test (or TestMain). If that's too much time you can set the clock cycle rate of the timeout goroutine in an init function in a test file. `regexp2.SetTimeoutCheckPeriod` isn't threadsafe so it must be setup before starting any regex's with Timeouts.
147+
148+
```go
149+
func init() {
150+
//speed up testing by making the timeout clock 1ms
151+
regexp2.SetTimeoutCheckPeriod(time.Millisecond)
152+
}
153+
```
132154

133155
## ECMAScript compatibility mode
134156
In this mode the engine provides compatibility with the [regex engine](https://tc39.es/ecma262/multipage/text-processing.html#sec-regexp-regular-expression-objects) described in the ECMAScript specification.

fastclock.go

+23-1
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,36 @@ func extendClock(end fasttime) {
7676
}
7777
}
7878

79+
// stop the timeout clock in the background
80+
// should only used for unit tests to abandon the background goroutine
81+
func stopClock() {
82+
fast.mu.Lock()
83+
if fast.running {
84+
fast.clockEnd.write(fasttime(0))
85+
}
86+
fast.mu.Unlock()
87+
88+
// pause until not running
89+
// get and release the lock
90+
isRunning := true
91+
for isRunning {
92+
time.Sleep(clockPeriod / 2)
93+
fast.mu.Lock()
94+
isRunning = fast.running
95+
fast.mu.Unlock()
96+
}
97+
}
98+
7999
func durationToTicks(d time.Duration) fasttime {
80100
// Downscale nanoseconds to approximately a millisecond so that we can avoid
81101
// overflow even if the caller passes in math.MaxInt64.
82102
return fasttime(d) >> 20
83103
}
84104

105+
const DefaultClockPeriod = 100 * time.Millisecond
106+
85107
// clockPeriod is the approximate interval between updates of approximateClock.
86-
const clockPeriod = 100 * time.Millisecond
108+
var clockPeriod = DefaultClockPeriod
87109

88110
func runClock() {
89111
fast.mu.Lock()

fastclock_test.go

+24
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ import (
66
"time"
77
)
88

9+
func init() {
10+
//speed up testing by making the timeout clock 1ms instead of 100ms...
11+
//bad for benchmark tests though
12+
SetTimeoutCheckPeriod(time.Millisecond)
13+
}
914
func TestDeadline(t *testing.T) {
1015
for _, delay := range []time.Duration{
1116
clockPeriod / 10,
@@ -32,3 +37,22 @@ func TestDeadline(t *testing.T) {
3237
})
3338
}
3439
}
40+
41+
func TestStopTimeoutClock(t *testing.T) {
42+
// run a quick regex with a long timeout
43+
// make sure the stop clock returns quickly
44+
r := MustCompile(".", 0)
45+
r.MatchTimeout = time.Second * 10
46+
47+
r.MatchString("a")
48+
start := time.Now()
49+
StopTimeoutClock()
50+
stop := time.Now()
51+
52+
if want, got := clockPeriod*2, stop.Sub(start); want < got {
53+
t.Errorf("Expected duration less than %v, got %v", want, got)
54+
}
55+
if want, got := false, fast.running; want != got {
56+
t.Errorf("Expected isRunning to be %v, got %v", want, got)
57+
}
58+
}

regexp.go

+13
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,19 @@ func Unescape(input string) (string, error) {
9696
return syntax.Unescape(input)
9797
}
9898

99+
// SetTimeoutPeriod is a debug function that sets the frequency of the timeout goroutine's sleep cycle.
100+
// Defaults to 100ms. The only benefit of setting this lower is that the 1 background goroutine that manages
101+
// timeouts may exit slightly sooner after all the timeouts have expired. See Github issue #63
102+
func SetTimeoutCheckPeriod(d time.Duration) {
103+
clockPeriod = d
104+
}
105+
106+
// StopTimeoutClock should only be used in unit tests to prevent the timeout clock goroutine
107+
// from appearing like a leaking goroutine
108+
func StopTimeoutClock() {
109+
stopClock()
110+
}
111+
99112
// String returns the source text used to compile the regular expression.
100113
func (re *Regexp) String() string {
101114
return re.pattern

regexp_performance_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ func BenchmarkLeading(b *testing.B) {
310310
}
311311

312312
func BenchmarkShortSearch(b *testing.B) {
313+
// set to default check period for benchmarking purposes
314+
// unit tests tend to send this low
315+
SetTimeoutCheckPeriod(DefaultClockPeriod)
313316
type benchmark struct {
314317
name string
315318
parallel bool // Run in parallel?

regexp_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func TestBacktrack_CatastrophicTimeout(t *testing.T) {
1919
const subject = "Do you think you found the problem string!"
2020

2121
const earlyAllowance = 10 * time.Millisecond
22-
const lateAllowance = clockPeriod + 500*time.Millisecond // Large allowance in case machine is slow
22+
var lateAllowance = clockPeriod + 500*time.Millisecond // Large allowance in case machine is slow
2323

2424
for _, timeout := range []time.Duration{
2525
-1 * time.Millisecond,

0 commit comments

Comments
 (0)