From 067e557c9d7cb49cf5adc5504a8fd2b066fa7547 Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Wed, 9 Apr 2025 09:52:35 +0200 Subject: [PATCH 1/6] fix: stuck at 'Progressing' #15317 Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 34 +++++--- ...-restart-never-with-ignore-annotation.yaml | 79 +++++++++++++++++++ 2 files changed, 103 insertions(+), 10 deletions(-) create mode 100644 pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index 9ebcef558..c106a1f5e 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -12,6 +12,10 @@ import ( "github.com/argoproj/gitops-engine/pkg/utils/kube" ) +const ( + AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy" +) + func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) { gvk := obj.GroupVersionKind() switch gvk { @@ -93,9 +97,9 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { } return &HealthStatus{Status: HealthStatusDegraded, Message: ""}, nil + case corev1.PodRunning: - switch pod.Spec.RestartPolicy { - case corev1.RestartPolicyAlways: + getHealthStatus := func(pod *corev1.Pod) (*HealthStatus, error) { // if pod is ready, it is automatically healthy if podutils.IsPodReady(pod) { return &HealthStatus{ @@ -117,14 +121,24 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { Status: HealthStatusProgressing, Message: pod.Status.Message, }, nil - case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: - // pods set with a restart policy of OnFailure or Never, have a finite life. - // These pods are typically resource hooks. Thus, we consider these as Progressing - // instead of healthy. - return &HealthStatus{ - Status: HealthStatusProgressing, - Message: pod.Status.Message, - }, nil + } + if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook { + return getHealthStatus(pod) + } else { + switch pod.Spec.RestartPolicy { + case corev1.RestartPolicyAlways: + return getHealthStatus(pod) + case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: + // Most pods set with a restart policy of OnFailure or Never, have a finite life. + // These pods are typically resource hooks. Thus, we consider these as Progressing + // instead of healthy. If this is unwanted, e.g., when the pod is managed by an + // operator and therefore has a restart policy of OnFailure or Never, then use the + // the AnnotationIgnoreRestartPolicy annotation. + return &HealthStatus{ + Status: HealthStatusProgressing, + Message: pod.Status.Message, + }, nil + } } } return &HealthStatus{ diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml new file mode 100644 index 000000000..432473dbd --- /dev/null +++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml @@ -0,0 +1,79 @@ +apiVersion: v1 +kind: Pod +metadata: + creationTimestamp: 2018-12-02T09:15:16Z + name: my-pod + namespace: argocd + resourceVersion: "151053" + selfLink: /api/v1/namespaces/argocd/pods/my-pod + uid: c86e909c-f612-11e8-a057-fe5f49266390 + annotations: + argocd.argoproj.io/ignore-restart-policy: "true" +spec: + containers: + - command: + - sh + - -c + - sleep 10 + image: alpine:latest + imagePullPolicy: Always + name: main + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: default-token-f9jvj + readOnly: true + dnsPolicy: ClusterFirst + nodeName: minikube + restartPolicy: Never + schedulerName: default-scheduler + securityContext: {} + serviceAccount: default + serviceAccountName: default + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + volumes: + - name: default-token-f9jvj + secret: + defaultMode: 420 + secretName: default-token-f9jvj +status: + conditions: + - lastProbeTime: null + lastTransitionTime: 2018-12-02T09:15:16Z + status: "True" + type: Initialized + - lastProbeTime: null + lastTransitionTime: 2018-12-02T09:15:19Z + status: "True" + type: Ready + - lastProbeTime: null + lastTransitionTime: 2018-12-02T09:15:16Z + status: "True" + type: PodScheduled + containerStatuses: + - containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195 + image: alpine:latest + imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528 + lastState: {} + name: main + ready: true + restartCount: 0 + state: + running: + startedAt: 2018-12-02T09:15:19Z + hostIP: 192.168.64.41 + phase: Running + podIP: 172.17.0.9 + qosClass: BestEffort + startTime: 2018-12-02T09:15:16Z From 609be43f3767f44e15b8cad79e93aef54274e79b Mon Sep 17 00:00:00 2001 From: RoelofKuijpers Date: Wed, 9 Apr 2025 10:22:16 +0200 Subject: [PATCH 2/6] Update health_test.go Signed-off-by: Roelof Kuijpers --- pkg/health/health_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index ef945eb46..20ddc5f5c 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -103,6 +103,7 @@ func TestPod(t *testing.T) { assertAppHealth(t, "./testdata/pod-error.yaml", HealthStatusDegraded) assertAppHealth(t, "./testdata/pod-running-restart-always.yaml", HealthStatusHealthy) assertAppHealth(t, "./testdata/pod-running-restart-never.yaml", HealthStatusProgressing) + assertAppHealth(t, "./testdata/pod-running-restart-never-with-ignore-annotation.yaml", HealthStatusHealthy) assertAppHealth(t, "./testdata/pod-running-restart-onfailure.yaml", HealthStatusProgressing) assertAppHealth(t, "./testdata/pod-failed.yaml", HealthStatusDegraded) assertAppHealth(t, "./testdata/pod-succeeded.yaml", HealthStatusHealthy) From 451b8e43520aeb71c072ff81e3cbadc3397113b4 Mon Sep 17 00:00:00 2001 From: RoelofKuijpers Date: Wed, 9 Apr 2025 12:06:40 +0200 Subject: [PATCH 3/6] Update pod-running-restart-never-with-ignore-annotation.yaml so it passes the checks of the Quality Gate Signed-off-by: Roelof Kuijpers --- ...ng-restart-never-with-ignore-annotation.yaml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml index 432473dbd..4c2b3682a 100644 --- a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml +++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml @@ -15,10 +15,16 @@ spec: - sh - -c - sleep 10 - image: alpine:latest + image: alpine:3.21 imagePullPolicy: Always name: main - resources: {} + resources: + requests: + memory: "128Mi" + cpu: "250m" + limits: + memory: "256Mi" + cpu: "500m" terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: @@ -32,6 +38,7 @@ spec: securityContext: {} serviceAccount: default serviceAccountName: default + automountServiceAccountToken: false terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute @@ -62,9 +69,9 @@ status: status: "True" type: PodScheduled containerStatuses: - - containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195 - image: alpine:latest - imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528 + - containerID: containerd://adc73c2c0ae3f1fd9bf294abd834e740042ee375de680c0cfcdd90d863a73b8b + image: alpine:3.21 + imageID: docker.io/library/alpine@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c lastState: {} name: main ready: true From 8a0b3bc6110e77111d9217c96d48e149901c5916 Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Wed, 9 Apr 2025 13:57:34 +0200 Subject: [PATCH 4/6] add storage request Signed-off-by: Roelof Kuijpers --- .../pod-running-restart-never-with-ignore-annotation.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml index 4c2b3682a..1cc27e5f1 100644 --- a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml +++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml @@ -20,6 +20,7 @@ spec: name: main resources: requests: + ephemeral-storage: "100Mi" memory: "128Mi" cpu: "250m" limits: From 1d946b87ee7f33943483e5dd7360910404d42a4f Mon Sep 17 00:00:00 2001 From: RoelofKuijpers Date: Mon, 21 Apr 2025 18:13:20 +0200 Subject: [PATCH 5/6] Update pkg/health/health_pod.go improve code readability Co-authored-by: sivchari Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index c106a1f5e..7d874f284 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -122,13 +122,12 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { Message: pod.Status.Message, }, nil } - if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook { + policy := pod.Spec.RestartPolicy + if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways { return getHealthStatus(pod) - } else { - switch pod.Spec.RestartPolicy { - case corev1.RestartPolicyAlways: - return getHealthStatus(pod) - case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: + } + + if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever { // Most pods set with a restart policy of OnFailure or Never, have a finite life. // These pods are typically resource hooks. Thus, we consider these as Progressing // instead of healthy. If this is unwanted, e.g., when the pod is managed by an From 61ddfd1eebeb1e0f569d8cafd239b4ae2fbc0efa Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Mon, 21 Apr 2025 18:30:38 +0200 Subject: [PATCH 6/6] Improve code readability fix Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index 7d874f284..d54c52cb0 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -122,22 +122,21 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { Message: pod.Status.Message, }, nil } - policy := pod.Spec.RestartPolicy + policy := pod.Spec.RestartPolicy if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways { return getHealthStatus(pod) } - - if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever { - // Most pods set with a restart policy of OnFailure or Never, have a finite life. - // These pods are typically resource hooks. Thus, we consider these as Progressing - // instead of healthy. If this is unwanted, e.g., when the pod is managed by an - // operator and therefore has a restart policy of OnFailure or Never, then use the - // the AnnotationIgnoreRestartPolicy annotation. - return &HealthStatus{ - Status: HealthStatusProgressing, - Message: pod.Status.Message, - }, nil - } + + if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever { + // Most pods set with a restart policy of OnFailure or Never, have a finite life. + // These pods are typically resource hooks. Thus, we consider these as Progressing + // instead of healthy. If this is unwanted, e.g., when the pod is managed by an + // operator and therefore has a restart policy of OnFailure or Never, then use the + // the AnnotationIgnoreRestartPolicy annotation. + return &HealthStatus{ + Status: HealthStatusProgressing, + Message: pod.Status.Message, + }, nil } } return &HealthStatus{