Skip to content

Commit bb2d5e8

Browse files
authored
Improve healthcheck (#33)
* feat(healthcheck): perform a check on vault and kubernetes connectivity * test(healthcheck): improve healthcheck test with new method
1 parent e844aad commit bb2d5e8

File tree

6 files changed

+312
-94
lines changed

6 files changed

+312
-94
lines changed

Diff for: go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ require (
255255
github.com/tklauser/go-sysconf v0.3.10 // indirect
256256
github.com/tklauser/numcpus v0.4.0 // indirect
257257
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c // indirect
258+
github.com/undefinedlabs/go-mpatch v1.0.7 // indirect
258259
github.com/vmware/govmomi v0.18.0 // indirect
259260
github.com/x448/float16 v0.8.4 // indirect
260261
github.com/yusufpapurcu/wmi v1.2.2 // indirect

Diff for: go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,8 @@ github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVK
12131213
github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U=
12141214
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
12151215
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
1216+
github.com/undefinedlabs/go-mpatch v1.0.7 h1:943FMskd9oqfbZV0qRVKOUsXQhTLXL0bQTVbQSpzmBs=
1217+
github.com/undefinedlabs/go-mpatch v1.0.7/go.mod h1:TyJZDQ/5AgyN7FSLiBJ8RO9u2c6wbtRvK827b6AVqY4=
12161218
github.com/vmware/govmomi v0.18.0 h1:f7QxSmP7meCtoAmiKZogvVbLInT+CZx6Px6K5rYsJZo=
12171219
github.com/vmware/govmomi v0.18.0/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU=
12181220
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=

Diff for: pkg/controller/controller.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func NewController(cfg *config.Config, Clientset *kubernetes.Clientset, sentrySv
4040
func (c *Controller) RunInjector(ctx context.Context, errChan chan<- error, runSuccess chan<- bool) {
4141
c.log.Info("Starting server in mode injector")
4242
is := injector.NewWebhookStartor(c.Cfg, errChan, runSuccess, c.sentry)
43-
hcService := healthcheck.NewService()
43+
hcService := healthcheck.NewService(c.Cfg)
4444
hcService.RegisterHandlers()
4545
go hcService.Start(ctx, stopChan)
4646
go is.StartWebhook(ctx, stopChan)
@@ -54,7 +54,7 @@ func (c *Controller) RunRenewer(ctx context.Context, metricsSuccess chan<- bool)
5454
go le.RunLeaderElection(ctx, stopChan)
5555
metricsService := prometheus.NewService(metricsSuccess)
5656
go metricsService.RunMetrics()
57-
hcService := healthcheck.NewService()
57+
hcService := healthcheck.NewService(c.Cfg)
5858
hcService.RegisterHandlers()
5959
go hcService.Start(ctx, stopChan)
6060
}
@@ -67,7 +67,7 @@ func (c *Controller) RunRevoker(ctx context.Context, metricsSuccess chan<- bool)
6767
go le.RunLeaderElection(ctx, stopChan)
6868
metricsService := prometheus.NewService(metricsSuccess)
6969
go metricsService.RunMetrics()
70-
hcService := healthcheck.NewService()
70+
hcService := healthcheck.NewService(c.Cfg)
7171
hcService.RegisterHandlers()
7272
go hcService.Start(ctx, stopChan)
7373
}

Diff for: pkg/healthcheck/healthcheck.go

+129-35
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,45 @@ package healthcheck
22

33
import (
44
"context"
5+
"encoding/json"
56
"net/http"
67
"sync/atomic"
78
"time"
89

9-
"github.com/numberly/vault-db-injector/pkg/leadership"
10+
"github.com/numberly/vault-db-injector/pkg/config"
11+
"github.com/numberly/vault-db-injector/pkg/k8s"
1012
"github.com/numberly/vault-db-injector/pkg/logger"
13+
"github.com/numberly/vault-db-injector/pkg/vault"
1114
)
1215

16+
type HealthStatus struct {
17+
Status string `json:"status"`
18+
Kubernetes *ServiceHealth `json:"kubernetes,omitempty"`
19+
Vault *ServiceHealth `json:"vault,omitempty"`
20+
Timestamp string `json:"timestamp"`
21+
}
22+
23+
type ServiceHealth struct {
24+
Status string `json:"status"`
25+
Message string `json:"message,omitempty"`
26+
}
27+
1328
type HealthChecker interface {
1429
RegisterHandlers()
15-
Start() error
30+
Start(context.Context, chan struct{}) error
1631
}
1732

1833
type Service struct {
19-
isReady *atomic.Value
20-
server *http.Server
21-
log logger.Logger
34+
isReady *atomic.Value
35+
server *http.Server
36+
log logger.Logger
37+
cfg *config.Config
38+
k8sClient k8s.ClientInterface
2239
}
2340

24-
func NewService() *Service {
41+
func NewService(cfg *config.Config) *Service {
2542
isReady := &atomic.Value{}
26-
isReady.Store(true) // Initialize as ready
43+
isReady.Store(true)
2744

2845
return &Service{
2946
isReady: isReady,
@@ -32,62 +49,139 @@ func NewService() *Service {
3249
ReadTimeout: 10 * time.Second,
3350
WriteTimeout: 10 * time.Second,
3451
},
35-
log: logger.GetLogger(),
52+
log: logger.GetLogger(),
53+
cfg: cfg,
54+
k8sClient: k8s.NewClient(),
3655
}
3756
}
3857

39-
// RegisterHandlers sets up the HTTP endpoints for the health check service.
4058
func (s *Service) RegisterHandlers() {
41-
http.HandleFunc("/healthz", s.healthzHandler)
59+
http.HandleFunc("/healthz", s.healthHandler)
4260
http.HandleFunc("/readyz", s.readyzHandler())
43-
hcle := leadership.NewHealthChecker()
44-
hcle.SetupLivenessEndpoint()
4561
}
4662

47-
// Start begins listening for health check requests.
63+
func (s *Service) checkKubernetesHealth() *ServiceHealth {
64+
_, err := s.k8sClient.GetKubernetesClient()
65+
if err != nil {
66+
return &ServiceHealth{
67+
Status: "unhealthy",
68+
Message: "Failed to connect to Kubernetes: " + err.Error(),
69+
}
70+
}
71+
return &ServiceHealth{
72+
Status: "healthy",
73+
}
74+
}
75+
76+
func (s *Service) checkVaultHealth(ctx context.Context) *ServiceHealth {
77+
k8sClient := k8s.NewClient()
78+
tok, err := k8sClient.GetServiceAccountToken()
79+
if err != nil {
80+
return &ServiceHealth{
81+
Status: "unhealthy",
82+
Message: "Failed to get ServiceAccount token: " + err.Error(),
83+
}
84+
}
85+
86+
vaultConn := vault.NewConnector(s.cfg.VaultAddress, s.cfg.VaultAuthPath, s.cfg.KubeRole, "random", "random", tok, s.cfg.VaultRateLimit)
87+
88+
if err := vaultConn.CheckHealth(ctx); err != nil {
89+
return &ServiceHealth{
90+
Status: "unhealthy",
91+
Message: err.Error(),
92+
}
93+
}
94+
95+
return &ServiceHealth{
96+
Status: "healthy",
97+
}
98+
}
99+
100+
func (s *Service) healthHandler(w http.ResponseWriter, r *http.Request) {
101+
ctx := r.Context()
102+
health := HealthStatus{
103+
Timestamp: time.Now().UTC().Format(time.RFC3339),
104+
}
105+
106+
// Check both services
107+
k8sHealth := s.checkKubernetesHealth()
108+
vaultHealth := s.checkVaultHealth(ctx)
109+
110+
health.Kubernetes = k8sHealth
111+
health.Vault = vaultHealth
112+
113+
w.Header().Set("Content-Type", "application/json")
114+
115+
if k8sHealth.Status == "healthy" && vaultHealth.Status == "healthy" {
116+
health.Status = "healthy"
117+
w.WriteHeader(http.StatusOK)
118+
} else {
119+
health.Status = "unhealthy"
120+
var statusCode int
121+
122+
switch {
123+
case k8sHealth.Status != "healthy" && vaultHealth.Status != "healthy":
124+
statusCode = http.StatusServiceUnavailable
125+
case k8sHealth.Status != "healthy":
126+
statusCode = http.StatusBadGateway
127+
case vaultHealth.Status != "healthy":
128+
statusCode = http.StatusFailedDependency
129+
}
130+
131+
w.WriteHeader(statusCode)
132+
}
133+
134+
if err := json.NewEncoder(w).Encode(health); err != nil {
135+
s.log.Errorf("Failed to encode health status: %v", err)
136+
}
137+
}
138+
139+
func (s *Service) readyzHandler() http.HandlerFunc {
140+
return func(w http.ResponseWriter, _ *http.Request) {
141+
w.Header().Set("Content-Type", "application/json")
142+
143+
response := HealthStatus{
144+
Timestamp: time.Now().UTC().Format(time.RFC3339),
145+
}
146+
147+
if s.isReady == nil || !s.isReady.Load().(bool) {
148+
response.Status = "not ready"
149+
w.WriteHeader(http.StatusServiceUnavailable)
150+
json.NewEncoder(w).Encode(response)
151+
return
152+
}
153+
154+
response.Status = "ready"
155+
w.WriteHeader(http.StatusOK)
156+
json.NewEncoder(w).Encode(response)
157+
}
158+
}
159+
48160
func (s *Service) Start(ctx context.Context, doneCh chan struct{}) error {
49-
// Start the server in a separate goroutine.
50161
go func() {
51162
s.log.Info("Listening for health checks on :8888")
52163
if err := s.server.ListenAndServe(); err != http.ErrServerClosed {
53-
// Log the error if it's not ErrServerClosed, as we expect this error on shutdown.
54164
s.log.Errorf("Error serving health check: %v", err)
55165
}
56-
close(doneCh) // Signal that the server has stopped.
166+
close(doneCh)
57167
}()
58168

59-
// Wait for context cancellation or server stop signal.
60169
select {
61170
case <-ctx.Done():
62-
// Context was canceled, shut down the server.
63171
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
64172
defer cancel()
65173

66174
s.log.Info("Context canceled, shutting down health check server")
67175
if err := s.server.Shutdown(shutdownCtx); err != nil {
68176
s.log.Errorf("Error shutting down health check server: %v", err)
69-
return err // Return error if shutdown fails.
177+
return err
70178
}
71179
case <-doneCh:
72180
if err := s.server.Shutdown(ctx); err != nil {
73181
s.log.Errorf("Error shutting down health check server: %v", err)
74-
return err // Return error if shutdown fails.
182+
return err
75183
}
76184
s.log.Info("Health check server has stopped")
77185
}
78-
return nil // Return nil as the service stopped cleanly or was shutdown on context cancel.
79-
}
80-
81-
func (s *Service) healthzHandler(w http.ResponseWriter, _ *http.Request) {
82-
w.WriteHeader(http.StatusNoContent)
83-
}
84-
85-
func (s *Service) readyzHandler() http.HandlerFunc {
86-
return func(w http.ResponseWriter, _ *http.Request) {
87-
if s.isReady == nil || !s.isReady.Load().(bool) {
88-
http.Error(w, http.StatusText(http.StatusServiceUnavailable), http.StatusServiceUnavailable)
89-
return
90-
}
91-
w.WriteHeader(http.StatusNoContent)
92-
}
186+
return nil
93187
}

0 commit comments

Comments
 (0)