Skip to content

Commit 4714c3c

Browse files
authored
add lifecycle metrics (#93)
motivation: startup/shutdown metrics are important for real-life services, for example spike in start metrics indicates a crash-loop changes: * add start and shutdown counters to ComponentLifecycle * add start and shutdown timers to report duration of startup and shutdown operations
1 parent 5577147 commit 4714c3c

File tree

3 files changed

+123
-3
lines changed

3 files changed

+123
-3
lines changed

Sources/Lifecycle/Lifecycle.swift

+14-3
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ import Darwin
1818
import Glibc
1919
#endif
2020
import Backtrace
21+
import CoreMetrics
2122
import Dispatch
2223
import Logging
23-
import Metrics
2424

2525
// MARK: - LifecycleTask
2626

@@ -473,9 +473,12 @@ public class ComponentLifecycle: LifecycleTask {
473473
guard case .idle = self.state else {
474474
preconditionFailure("invalid state, \(self.state)")
475475
}
476-
self.log("starting")
477476
self.state = .starting(queue)
478477
}
478+
479+
self.log("starting")
480+
Counter(label: "\(self.label).lifecycle.start").increment()
481+
479482
if tasks.count == 0 {
480483
self.log(level: .notice, "no tasks provided")
481484
}
@@ -517,7 +520,9 @@ public class ComponentLifecycle: LifecycleTask {
517520
return callback(index, nil)
518521
}
519522
self.log("starting tasks [\(tasks[index].label)]")
523+
let startTime = DispatchTime.now()
520524
start { error in
525+
Timer(label: "\(self.label).\(tasks[index].label).lifecycle.start").recordNanoseconds(DispatchTime.now().uptimeNanoseconds - startTime.uptimeNanoseconds)
521526
if let error = error {
522527
self.log(level: .error, "failed to start [\(tasks[index].label)]: \(error)")
523528
return callback(index, error)
@@ -532,9 +537,12 @@ public class ComponentLifecycle: LifecycleTask {
532537

533538
private func _shutdown(on queue: DispatchQueue, tasks: [LifecycleTask], callback: @escaping () -> Void) {
534539
self.stateLock.withLock {
535-
log("shutting down")
536540
self.state = .shuttingDown(queue)
537541
}
542+
543+
self.log("shutting down")
544+
Counter(label: "\(self.label).lifecycle.shutdown").increment()
545+
538546
self.shutdownTask(on: queue, tasks: tasks.reversed(), index: 0, errors: nil) { errors in
539547
self.stateLock.withLock {
540548
guard case .shuttingDown = self.state else {
@@ -555,8 +563,11 @@ public class ComponentLifecycle: LifecycleTask {
555563
if index >= tasks.count {
556564
return callback(errors)
557565
}
566+
558567
self.log("stopping tasks [\(tasks[index].label)]")
568+
let startTime = DispatchTime.now()
559569
shutdown { error in
570+
Timer(label: "\(self.label).\(tasks[index].label).lifecycle.shutdown").recordNanoseconds(DispatchTime.now().uptimeNanoseconds - startTime.uptimeNanoseconds)
560571
var errors = errors
561572
if let error = error {
562573
if errors == nil {

Tests/LifecycleTests/ComponentLifecycleTests+XCTest.swift

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ extension ComponentLifecycleTests {
6666
("testStatefulNIO", testStatefulNIO),
6767
("testStatefulNIOStartFailure", testStatefulNIOStartFailure),
6868
("testStatefulNIOShutdownFailure", testStatefulNIOShutdownFailure),
69+
("testMetrics", testMetrics),
6970
]
7071
}
7172
}

Tests/LifecycleTests/ComponentLifecycleTests.swift

+108
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
@testable import Lifecycle
1616
import LifecycleNIOCompat
17+
import Metrics
1718
import NIO
1819
import XCTest
1920

@@ -1247,4 +1248,111 @@ final class ComponentLifecycleTests: XCTestCase {
12471248

12481249
XCTAssertFalse(item.shutdown, "expected item to be shutdown")
12491250
}
1251+
1252+
func testMetrics() {
1253+
let metrics = TestMetrics()
1254+
MetricsSystem.bootstrap(metrics)
1255+
1256+
let items = (0 ..< 3).map { _ in GoodItem(id: UUID().uuidString, startDelay: 0.1, shutdownDelay: 0.1) }
1257+
let lifecycle = ComponentLifecycle(label: "test")
1258+
lifecycle.register(items)
1259+
lifecycle.start { startError in
1260+
XCTAssertNil(startError, "not expecting error")
1261+
lifecycle.shutdown { shutdownErrors in
1262+
XCTAssertNil(shutdownErrors, "not expecting error")
1263+
}
1264+
}
1265+
lifecycle.wait()
1266+
XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.start"]?.value, 1, "expected start counter to be 1")
1267+
XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.shutdown"]?.value, 1, "expected shutdown counter to be 1")
1268+
items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.start"]?.value ?? 0, 0, "expected start timer to be non-zero") }
1269+
items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected shutdown timer to be non-zero") }
1270+
}
1271+
}
1272+
1273+
class TestMetrics: MetricsFactory, RecorderHandler {
1274+
var counters = [String: TestCounter]()
1275+
var timers = [String: TestTimer]()
1276+
let lock = Lock()
1277+
1278+
public init() {}
1279+
1280+
public func makeCounter(label: String, dimensions: [(String, String)]) -> CounterHandler {
1281+
let counter = TestCounter(label: label)
1282+
self.lock.withLock {
1283+
self.counters[label] = counter
1284+
}
1285+
return counter
1286+
}
1287+
1288+
public func makeRecorder(label: String, dimensions: [(String, String)], aggregate: Bool) -> RecorderHandler {
1289+
return self
1290+
}
1291+
1292+
public func makeTimer(label: String, dimensions: [(String, String)]) -> TimerHandler {
1293+
let timer = TestTimer(label: label)
1294+
self.lock.withLock {
1295+
self.timers[label] = timer
1296+
}
1297+
return timer
1298+
}
1299+
1300+
public func destroyCounter(_: CounterHandler) {}
1301+
public func destroyRecorder(_: RecorderHandler) {}
1302+
public func destroyTimer(_: TimerHandler) {}
1303+
1304+
public func record(_: Int64) {}
1305+
public func record(_: Double) {}
1306+
1307+
class TestCounter: CounterHandler {
1308+
let label: String
1309+
var _value: Int64
1310+
let lock = Lock()
1311+
1312+
init(label: String) {
1313+
self.label = label
1314+
self._value = 0
1315+
}
1316+
1317+
public func increment(by: Int64) {
1318+
self.lock.withLock {
1319+
self._value += by
1320+
}
1321+
}
1322+
1323+
public func reset() {
1324+
self.lock.withLock {
1325+
self._value = 0
1326+
}
1327+
}
1328+
1329+
public var value: Int64 {
1330+
return self.lock.withLock {
1331+
return self._value
1332+
}
1333+
}
1334+
}
1335+
1336+
class TestTimer: TimerHandler {
1337+
let label: String
1338+
var _value: Int64
1339+
let lock = Lock()
1340+
1341+
init(label: String) {
1342+
self.label = label
1343+
self._value = 0
1344+
}
1345+
1346+
public func recordNanoseconds(_ value: Int64) {
1347+
self.lock.withLock {
1348+
self._value = value
1349+
}
1350+
}
1351+
1352+
public var value: Int64 {
1353+
return self.lock.withLock {
1354+
return self._value
1355+
}
1356+
}
1357+
}
12501358
}

0 commit comments

Comments
 (0)