@@ -18,7 +18,15 @@ import {
18
18
CheckpointClient ,
19
19
isKubernetesEnvironment ,
20
20
} from "@trigger.dev/core/v3/serverOnly" ;
21
- import { createK8sApi , RUNTIME_ENV } from "./clients/kubernetes.js" ;
21
+ import { createK8sApi } from "./clients/kubernetes.js" ;
22
+ import { collectDefaultMetrics } from "prom-client" ;
23
+ import { register } from "./metrics.js" ;
24
+ import { PodCleaner } from "./services/podCleaner.js" ;
25
+ import { FailedPodHandler } from "./services/failedPodHandler.js" ;
26
+
27
+ if ( env . METRICS_COLLECT_DEFAULTS ) {
28
+ collectDefaultMetrics ( { register } ) ;
29
+ }
22
30
23
31
class ManagedSupervisor {
24
32
private readonly workerSession : SupervisorSession ;
@@ -29,6 +37,9 @@ class ManagedSupervisor {
29
37
private readonly resourceMonitor : ResourceMonitor ;
30
38
private readonly checkpointClient ?: CheckpointClient ;
31
39
40
+ private readonly podCleaner ?: PodCleaner ;
41
+ private readonly failedPodHandler ?: FailedPodHandler ;
42
+
32
43
private readonly isKubernetes = isKubernetesEnvironment ( env . KUBERNETES_FORCE_ENABLED ) ;
33
44
private readonly warmStartUrl = env . TRIGGER_WARM_START_URL ;
34
45
@@ -37,6 +48,21 @@ class ManagedSupervisor {
37
48
const workloadApiDomain = env . TRIGGER_WORKLOAD_API_DOMAIN ;
38
49
const workloadApiPortExternal = env . TRIGGER_WORKLOAD_API_PORT_EXTERNAL ;
39
50
51
+ if ( env . POD_CLEANER_ENABLED ) {
52
+ this . podCleaner = new PodCleaner ( {
53
+ namespace : env . KUBERNETES_NAMESPACE ,
54
+ batchSize : env . POD_CLEANER_BATCH_SIZE ,
55
+ intervalMs : env . POD_CLEANER_INTERVAL_MS ,
56
+ } ) ;
57
+ }
58
+
59
+ if ( env . FAILED_POD_HANDLER_ENABLED ) {
60
+ this . failedPodHandler = new FailedPodHandler ( {
61
+ namespace : env . KUBERNETES_NAMESPACE ,
62
+ reconnectIntervalMs : env . FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS ,
63
+ } ) ;
64
+ }
65
+
40
66
if ( this . warmStartUrl ) {
41
67
this . logger . log ( "[ManagedWorker] 🔥 Warm starts enabled" , {
42
68
warmStartUrl : this . warmStartUrl ,
@@ -273,6 +299,14 @@ class ManagedSupervisor {
273
299
async start ( ) {
274
300
this . logger . log ( "[ManagedWorker] Starting up" ) ;
275
301
302
+ if ( this . podCleaner ) {
303
+ await this . podCleaner . start ( ) ;
304
+ }
305
+
306
+ if ( this . failedPodHandler ) {
307
+ await this . failedPodHandler . start ( ) ;
308
+ }
309
+
276
310
if ( env . TRIGGER_WORKLOAD_API_ENABLED ) {
277
311
this . logger . log ( "[ManagedWorker] Workload API enabled" , {
278
312
protocol : env . TRIGGER_WORKLOAD_API_PROTOCOL ,
@@ -292,6 +326,14 @@ class ManagedSupervisor {
292
326
async stop ( ) {
293
327
this . logger . log ( "[ManagedWorker] Shutting down" ) ;
294
328
await this . httpServer . stop ( ) ;
329
+
330
+ if ( this . podCleaner ) {
331
+ await this . podCleaner . stop ( ) ;
332
+ }
333
+
334
+ if ( this . failedPodHandler ) {
335
+ await this . failedPodHandler . stop ( ) ;
336
+ }
295
337
}
296
338
}
297
339
0 commit comments