Skip to content

K8SPSMDB-1296: improve readiness probe #1917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions cmd/mongodb-healthcheck/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,27 @@ var (
)

func Dial(ctx context.Context, conf *Config) (mongo.Client, error) {
if err := conf.configureTLS(); err != nil {
log := logf.FromContext(ctx).WithName("Dial")
ctx = logf.IntoContext(ctx, log)

if err := conf.configureTLS(ctx); err != nil {
return nil, errors.Wrap(err, "configure TLS")
}

log := logf.FromContext(ctx)
log.V(1).Info("Connecting to mongodb", "hosts", conf.Hosts, "ssl", conf.SSL.Enabled, "ssl_insecure", conf.SSL.Insecure)

if conf.Username != "" && conf.Password != "" {
log.V(1).Info("Enabling authentication for session", "user", conf.Username)
}

cl, err := mongo.Dial(&conf.Config)
cl, err := mongo.Dial(ctx, &conf.Config)
if err != nil {
cfg := conf.Config
cfg.Direct = true
cfg.ReplSetName = ""
cl, err = mongo.Dial(&cfg)
cl, err = mongo.Dial(ctx, &cfg)
if err != nil {
return nil, errors.Wrap(err, "filed to dial mongo")
return nil, errors.Wrap(err, "failed to dial mongo")
}
}

Expand Down
7 changes: 4 additions & 3 deletions cmd/mongodb-healthcheck/db/ssl.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package db

import (
"context"
"crypto/tls"
"crypto/x509"
"os"
Expand All @@ -40,8 +41,8 @@ func (sc *SSLConfig) loadCaCertificate() (*x509.CertPool, error) {
return certificates, nil
}

func (cnf *Config) configureTLS() error {
log := logf.Log
func (cnf *Config) configureTLS(ctx context.Context) error {
log := logf.FromContext(ctx).WithName("configureTLS")

if !cnf.SSL.Enabled {
return nil
Expand Down Expand Up @@ -72,7 +73,7 @@ func (cnf *Config) configureTLS() error {
return errors.Wrapf(err, "check if file with name %s exists", cnf.SSL.CAFile)
}

log.V(1).Info("Loading SSL/TLS Certificate Authority: %s", "ca", cnf.SSL.CAFile)
log.V(1).Info("Loading SSL/TLS Certificate Authority", "ca", cnf.SSL.CAFile)
ca, err := cnf.SSL.loadCaCertificate()
if err != nil {
return errors.Wrapf(err, "load client CAs from %s", cnf.SSL.CAFile)
Expand Down
8 changes: 4 additions & 4 deletions cmd/mongodb-healthcheck/db/ssl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func TestSSLNotEnabled(t *testing.T) {
},
}

if err := cfg.configureTLS(); err != nil {
if err := cfg.configureTLS(t.Context()); err != nil {
t.Fatalf("TLS configuration failed: %s", err)
}

Expand All @@ -32,7 +32,7 @@ func TestSSLEnabled(t *testing.T) {
},
}

if err := cfg.configureTLS(); err != nil {
if err := cfg.configureTLS(t.Context()); err != nil {
t.Fatalf("TLS configuration failed: %s", err)
}

Expand All @@ -49,7 +49,7 @@ func TestPEMKeyFileDoesNotExists(t *testing.T) {
},
}

err := cfg.configureTLS()
err := cfg.configureTLS(t.Context())
if err == nil {
t.Fatal("Expected TLS config to fail, but it returned no error")
}
Expand All @@ -71,7 +71,7 @@ func TestCAFileDoesNotExists(t *testing.T) {
},
}

err := cfg.configureTLS()
err := cfg.configureTLS(t.Context())
if err == nil {
t.Fatal("Expected TLS config to fail, but it returned no error")
}
Expand Down
49 changes: 6 additions & 43 deletions cmd/mongodb-healthcheck/healthcheck/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,9 @@ package healthcheck

import (
"context"
"encoding/json"

v "github.com/hashicorp/go-version"
"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/percona/percona-server-mongodb-operator/cmd/mongodb-healthcheck/db"
Expand All @@ -32,6 +29,7 @@ var ErrNoReplsetConfigStr = "(NotYetInitialized) no replset config has been rece

func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error) {
log := logf.FromContext(ctx).WithName("HealthCheckMongosLiveness")
ctx = logf.IntoContext(ctx, log)

client, err := db.Dial(ctx, cnf)
if err != nil {
Expand All @@ -58,6 +56,7 @@ func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error)

func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelaySeconds int64) (_ *mongo.MemberState, err error) {
log := logf.FromContext(ctx).WithName("HealthCheckMongodLiveness")
ctx = logf.IntoContext(ctx, log)

client, err := db.Dial(ctx, cnf)
if err != nil {
Expand All @@ -74,50 +73,14 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay
return nil, errors.Wrap(err, "get isMaster response")
}

buildInfo, err := client.RSBuildInfo(ctx)
rsStatus, err := getStatus(ctx, client)
if err != nil {
return nil, errors.Wrap(err, "get buildInfo response")
}

replSetStatusCommand := bson.D{{Key: "replSetGetStatus", Value: 1}}
mongoVersion := v.Must(v.NewVersion(buildInfo.Version))
if mongoVersion.Compare(v.Must(v.NewVersion("4.2.1"))) < 0 {
// https://docs.mongodb.com/manual/reference/command/replSetGetStatus/#syntax
replSetStatusCommand = append(replSetStatusCommand, primitive.E{Key: "initialSync", Value: 1})
}

res := client.Database("admin").RunCommand(ctx, replSetStatusCommand)
if res.Err() != nil {
// if we come this far, it means db connection was successful
// standalone mongod nodes in an unmanaged cluster doesn't need
// to die before they added to a replset
if res.Err().Error() == ErrNoReplsetConfigStr {
if err.Error() == ErrNoReplsetConfigStr {
state := mongo.MemberStateUnknown
log.V(1).Info("replSetGetStatus failed", "err", res.Err().Error(), "state", state)
log.V(1).Info("replSetGetStatus failed", "err", err.Error(), "state", state)
return &state, nil
}
return nil, errors.Wrap(res.Err(), "get replsetGetStatus response")
}

// this is a workaround to fix decoding of empty interfaces
// https://jira.mongodb.org/browse/GODRIVER-988
rsStatus := ReplSetStatus{}
tempResult := bson.M{}
err = res.Decode(&tempResult)
if err != nil {
return nil, errors.Wrap(err, "decode replsetGetStatus response")
}

if err == nil {
result, err := json.Marshal(tempResult)
if err != nil {
return nil, errors.Wrap(err, "marshal temp result")
}

err = json.Unmarshal(result, &rsStatus)
if err != nil {
return nil, errors.Wrap(err, "unmarshal temp result")
}
return nil, errors.Wrap(err, "get replSetGetStatus response")
}

oplogRs := OplogRs{}
Expand Down
34 changes: 32 additions & 2 deletions cmd/mongodb-healthcheck/healthcheck/readiness.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package healthcheck
import (
"context"
"net"
"time"

"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
Expand All @@ -27,21 +28,50 @@ import (
)

// MongodReadinessCheck runs a ping on a pmgo.SessionManager to check server readiness
func MongodReadinessCheck(ctx context.Context, addr string) error {
func MongodReadinessCheck(ctx context.Context, cnf *db.Config) error {
log := logf.FromContext(ctx).WithName("MongodReadinessCheck")
ctx = logf.IntoContext(ctx, log)

var d net.Dialer

addr := cnf.Hosts[0]
log.V(1).Info("Connecting to " + addr)
conn, err := d.DialContext(ctx, "tcp", addr)
if err != nil {
return errors.Wrap(err, "dial")
}
return conn.Close()
if err := conn.Close(); err != nil {
return err
}

s, err := func() (ReplSetStatus, error) {
cnf.Timeout = time.Second
client, err := db.Dial(ctx, cnf)
if err != nil {
return ReplSetStatus{}, errors.Wrap(err, "connection error")
}
defer func() {
if derr := client.Disconnect(ctx); derr != nil && err == nil {
err = errors.Wrap(derr, "failed to disconnect")
}
}()
return getStatus(ctx, client)
}()
if err != nil {
log.Error(err, "Failed to get replset status")
return nil
}

if err := CheckState(s, 0, 0); err != nil {
return errors.Wrap(err, "check state")
}

return nil
}

func MongosReadinessCheck(ctx context.Context, cnf *db.Config) (err error) {
log := logf.FromContext(ctx).WithName("MongosReadinessCheck")
ctx = logf.IntoContext(ctx, log)

client, err := db.Dial(ctx, cnf)
if err != nil {
Expand Down
51 changes: 51 additions & 0 deletions cmd/mongodb-healthcheck/healthcheck/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package healthcheck

import (
"context"
"encoding/json"

v "github.com/hashicorp/go-version"
"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"

"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
)

func getStatus(ctx context.Context, client mongo.Client) (ReplSetStatus, error) {
buildInfo, err := client.RSBuildInfo(ctx)
if err != nil {
return ReplSetStatus{}, errors.Wrap(err, "get buildInfo response")
}

replSetStatusCommand := bson.D{{Key: "replSetGetStatus", Value: 1}}
mongoVersion := v.Must(v.NewVersion(buildInfo.Version))
if mongoVersion.Compare(v.Must(v.NewVersion("4.2.1"))) < 0 {
// https://docs.mongodb.com/manual/reference/command/replSetGetStatus/#syntax
replSetStatusCommand = append(replSetStatusCommand, primitive.E{Key: "initialSync", Value: 1})
}

res := client.Database("admin").RunCommand(ctx, replSetStatusCommand)
if res.Err() != nil {
if res.Err().Error() == ErrNoReplsetConfigStr {
return ReplSetStatus{}, errors.New(ErrNoReplsetConfigStr)
}
return ReplSetStatus{}, errors.Wrap(res.Err(), "get replsetGetStatus response")
}

// this is a workaround to fix decoding of empty interfaces
// https://jira.mongodb.org/browse/GODRIVER-988
rsStatus := ReplSetStatus{}
tempResult := bson.M{}
if err := res.Decode(&tempResult); err != nil {
return ReplSetStatus{}, errors.Wrap(err, "decode replsetGetStatus response")
}
result, err := json.Marshal(tempResult)
if err != nil {
return ReplSetStatus{}, errors.Wrap(err, "marshal temp result")
}
if err = json.Unmarshal(result, &rsStatus); err != nil {
return ReplSetStatus{}, errors.Wrap(err, "unmarshal temp result")
}
return rsStatus, nil
}
2 changes: 1 addition & 1 deletion cmd/mongodb-healthcheck/tool/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (app *App) Run(ctx context.Context) error {
switch *component {

case "mongod":
err := healthcheck.MongodReadinessCheck(ctx, cnf.Hosts[0])
err := healthcheck.MongodReadinessCheck(ctx, cnf)
if err != nil {
return errors.Wrap(err, "member failed Kubernetes readiness check")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
6 changes: 6 additions & 0 deletions e2e-tests/custom-tls/compare/statefulset_some-name-cfg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Loading
Loading