1
+ // Copyright 2020-2023 The NATS Authors
2
+ // Licensed under the Apache License, Version 2.0 (the "License");
3
+ // you may not use this file except in compliance with the License.
4
+ // You may obtain a copy of the License at
5
+ //
6
+ // http://www.apache.org/licenses/LICENSE-2.0
7
+ //
8
+ // Unless required by applicable law or agreed to in writing, software
9
+ // distributed under the License is distributed on an "AS IS" BASIS,
10
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ // See the License for the specific language governing permissions and
12
+ // limitations under the License.
13
+
1
14
package natsreloader
2
15
3
16
import (
@@ -12,6 +25,7 @@ import (
12
25
"path/filepath"
13
26
"sort"
14
27
"strconv"
28
+ "syscall"
15
29
"time"
16
30
17
31
"github.com/fsnotify/fsnotify"
@@ -61,14 +75,21 @@ func (r *Reloader) waitForProcess() error {
61
75
goto WaitAndRetry
62
76
}
63
77
78
+ // This always succeeds regardless of the process existing or not.
64
79
proc , err = os .FindProcess (pid )
65
80
if err != nil {
66
81
goto WaitAndRetry
67
82
}
83
+
84
+ // Check if the process is still alive.
85
+ err = proc .Signal (syscall .Signal (0 ))
86
+ if err != nil {
87
+ goto WaitAndRetry
88
+ }
68
89
break
69
90
70
91
WaitAndRetry:
71
- log .Printf (errorFmt , err )
92
+ log .Printf ("Error while monitoring pid %v: %v" , pid , err )
72
93
attempts ++
73
94
if attempts > r .MaxRetries {
74
95
return fmt .Errorf ("too many errors attempting to find server process" )
@@ -77,11 +98,9 @@ func (r *Reloader) waitForProcess() error {
77
98
}
78
99
79
100
if attempts > 0 {
80
- log .Printf ("found pid from pidfile %q after %v failed attempts (%v time after start )" ,
81
- r .PidFile , attempts , time .Since (startTime ))
101
+ log .Printf ("Found pid from pidfile %q after %v failed attempts (took %.3fs )" ,
102
+ r .PidFile , attempts , time .Since (startTime ). Seconds () )
82
103
}
83
-
84
- r .pid = pid
85
104
r .proc = proc
86
105
return nil
87
106
}
@@ -167,8 +186,10 @@ func handleEvents(configWatcher *fsnotify.Watcher, event fsnotify.Event, lastCon
167
186
}
168
187
}
169
188
170
- func handleDeletedFiles (deletedFiles []string , configWatcher * fsnotify.Watcher , lastConfigAppliedCache map [string ][]byte ) ([]string , []string ) {
171
- log .Printf ("Ticker is running with deletedFiles %v" , deletedFiles )
189
+ func handleDeletedFiles (deletedFiles []string , configWatcher * fsnotify.Watcher , lastConfigAppliedCache map [string ][]byte ) ([]string , []string ) {
190
+ if len (deletedFiles ) > 0 {
191
+ log .Printf ("Tracking files %v" , deletedFiles )
192
+ }
172
193
newDeletedFiles := make ([]string , 0 , len (deletedFiles ))
173
194
updated := make ([]string , 0 , len (deletedFiles ))
174
195
for _ , f := range deletedFiles {
@@ -212,7 +233,7 @@ func (r *Reloader) init() (*fsnotify.Watcher, map[string][]byte, error) {
212
233
}
213
234
214
235
// lastConfigAppliedCache is the last config update
215
- // applied by us
236
+ // applied by us.
216
237
lastConfigAppliedCache := make (map [string ][]byte )
217
238
218
239
// Preload config hashes, so we know their digests
@@ -225,13 +246,8 @@ func (r *Reloader) init() (*fsnotify.Watcher, map[string][]byte, error) {
225
246
}
226
247
lastConfigAppliedCache [configFile ] = digest
227
248
}
228
-
229
- // If the two pids don't match then os.FindProcess() has done something
230
- // rather hinkier than we expect, but log them both just in case on some
231
- // future platform there's a weird namespace issue, as a difference will
232
- // help with debugging.
233
- log .Printf ("Live, ready to kick pid %v (live, from %v spec) based on any of %v files" ,
234
- r .proc .Pid , r .pid , len (lastConfigAppliedCache ))
249
+ log .Printf ("Live, ready to kick pid %v on config changes (files=%d)" ,
250
+ r .proc .Pid , len (lastConfigAppliedCache ))
235
251
236
252
if len (lastConfigAppliedCache ) == 0 {
237
253
log .Printf ("Error: no watched config files cached; input spec was: %#v" ,
@@ -243,17 +259,26 @@ func (r *Reloader) init() (*fsnotify.Watcher, map[string][]byte, error) {
243
259
func (r * Reloader ) reload (updatedFiles []string ) error {
244
260
attempts := 0
245
261
for {
246
- log .Printf ("Sending signal '%s' to server to reload configuration due to: %s" , r .Signal .String (), updatedFiles )
247
- err := r .proc .Signal (r .Signal )
262
+ err := r .waitForProcess ()
263
+ if err != nil {
264
+ goto Retry
265
+ }
266
+
267
+ log .Printf ("Sending pid %v '%s' signal to reload changes from: %s" , r .proc .Pid , r .Signal .String (), updatedFiles )
268
+ err = r .proc .Signal (r .Signal )
248
269
if err == nil {
249
270
return nil
250
271
}
251
- log .Printf ("Error during reload: %s\n " , err )
272
+
273
+ Retry:
274
+ if err != nil {
275
+ log .Printf ("Error during reload: %s" , err )
276
+ }
252
277
if attempts > r .MaxRetries {
253
278
return fmt .Errorf ("too many errors (%v) attempting to signal server to reload: %w" , attempts , err )
254
279
}
255
280
delay := retryJitter (time .Duration (r .RetryWaitSecs ) * time .Second )
256
- log .Printf ("Wait and retrying after some time [%v] ..." , delay )
281
+ log .Printf ("Wait and retrying in %.3fs ..." , delay . Seconds () )
257
282
time .Sleep (delay )
258
283
attempts ++
259
284
}
@@ -287,23 +312,23 @@ func (r *Reloader) Run(ctx context.Context) error {
287
312
case <- t .C :
288
313
updatedFiles , deletedFiles = handleDeletedFiles (deletedFiles , configWatcher , lastConfigAppliedCache )
289
314
if len (deletedFiles ) == 0 {
290
- // No more deleted files, stop the ticker
315
+ log . Printf ( "All monitored files detected." )
291
316
t .Stop ()
292
317
tickerRunning = false
293
318
}
294
319
if len (updatedFiles ) > 0 {
295
- // Send signal to reload the config
320
+ // Send signal to reload the config.
296
321
log .Printf ("Updated files: %v" , updatedFiles )
297
322
break
298
323
}
299
324
continue
300
- // Check if the process is still alive
301
325
case event := <- configWatcher .Events :
302
326
updated , deleted := handleEvents (configWatcher , event , lastConfigAppliedCache )
303
327
updatedFiles = removeDuplicateStrings (updated )
304
328
deletedFiles = removeDuplicateStrings (append (deletedFiles , deleted ... ))
305
329
if ! tickerRunning {
306
- // Start the ticker to re-add deleted files
330
+ // Start the ticker to re-add deleted files.
331
+ log .Printf ("Starting ticker to re-add all tracked files." )
307
332
t .Reset (time .Second )
308
333
tickerRunning = true
309
334
}
0 commit comments