Skip to content

Commit a603100

Browse files
feat: make metrics scraping optional (#702)
* feat: make metrics scraping optional Signed-off-by: saumeya <[email protected]> fixes Signed-off-by: saumeya <[email protected]> fix tests and add monitoring by default Signed-off-by: saumeya <[email protected]> add test case Signed-off-by: saumeya <[email protected]> review comments Signed-off-by: saumeya <[email protected]> review comments Signed-off-by: saumeya <[email protected]> review comments Signed-off-by: saumeya <[email protected]> review comments Signed-off-by: saumeya <[email protected]> add e2e for the feature Signed-off-by: saumeya <[email protected]> uncommenting Signed-off-by: saumeya <[email protected]> rebase Signed-off-by: saumeya <[email protected]> changes: Signed-off-by: saumeya <[email protected]> changes Signed-off-by: saumeya <[email protected]> * bundle changes Signed-off-by: saumeya <[email protected]> * changes Signed-off-by: saumeya <[email protected]> * change Signed-off-by: saumeya <[email protected]> * change Signed-off-by: saumeya <[email protected]> * change Signed-off-by: saumeya <[email protected]> * changes Signed-off-by: saumeya <[email protected]> --------- Signed-off-by: saumeya <[email protected]> Co-authored-by: Abhishek Veeramalla <[email protected]>
1 parent 43e582c commit a603100

File tree

7 files changed

+334
-64
lines changed

7 files changed

+334
-64
lines changed

controllers/argocd_metrics_controller.go

+131-55
Original file line numberDiff line numberDiff line change
@@ -109,73 +109,135 @@ func (r *ArgoCDMetricsReconciler) Reconcile(ctx context.Context, request reconci
109109
}
110110

111111
const clusterMonitoringLabel = "openshift.io/cluster-monitoring"
112-
_, exists := namespace.Labels[clusterMonitoringLabel]
113-
if !exists {
114-
if namespace.Labels == nil {
115-
namespace.Labels = make(map[string]string)
112+
labelVal, exists := namespace.Labels[clusterMonitoringLabel]
113+
114+
if argocd.Spec.Monitoring.DisableMetrics == nil || !*argocd.Spec.Monitoring.DisableMetrics {
115+
if !exists || labelVal != "true" {
116+
if namespace.Labels == nil {
117+
namespace.Labels = make(map[string]string)
118+
}
119+
namespace.Labels[clusterMonitoringLabel] = "true"
120+
err = r.Client.Update(ctx, &namespace)
121+
if err != nil {
122+
reqLogger.Error(err, "Error updating namespace",
123+
"Namespace", namespace.Name)
124+
return reconcile.Result{}, err
125+
}
116126
}
117-
namespace.Labels[clusterMonitoringLabel] = "true"
118-
err = r.Client.Update(ctx, &namespace)
127+
128+
// Create role to grant read permission to the openshift metrics stack
129+
err = r.createReadRoleIfAbsent(request.Namespace, argocd, reqLogger)
119130
if err != nil {
120-
reqLogger.Error(err, "Error updating namespace",
121-
"Namespace", namespace.Name)
122131
return reconcile.Result{}, err
123132
}
124-
} else {
125-
reqLogger.Info("Namespace already has cluster-monitoring label",
126-
"Namespace", namespace.Name)
127-
}
128133

129-
// Create role to grant read permission to the openshift metrics stack
130-
err = r.createReadRoleIfAbsent(request.Namespace, argocd, reqLogger)
131-
if err != nil {
132-
return reconcile.Result{}, err
133-
}
134+
// Create role binding to grant read permission to the openshift metrics stack
135+
err = r.createReadRoleBindingIfAbsent(request.Namespace, argocd, reqLogger)
136+
if err != nil {
137+
return reconcile.Result{}, err
138+
}
134139

135-
// Create role binding to grant read permission to the openshift metrics stack
136-
err = r.createReadRoleBindingIfAbsent(request.Namespace, argocd, reqLogger)
137-
if err != nil {
138-
return reconcile.Result{}, err
139-
}
140+
// Create ServiceMonitor for ArgoCD application metrics
141+
serviceMonitorLabel := fmt.Sprintf("%s-metrics", request.Name)
142+
serviceMonitorName := request.Name
143+
err = r.createServiceMonitorIfAbsent(request.Namespace, argocd, serviceMonitorName, serviceMonitorLabel, reqLogger)
144+
if err != nil {
145+
return reconcile.Result{}, err
146+
}
140147

141-
// Create ServiceMonitor for ArgoCD application metrics
142-
serviceMonitorLabel := fmt.Sprintf("%s-metrics", request.Name)
143-
serviceMonitorName := request.Name
144-
err = r.createServiceMonitorIfAbsent(request.Namespace, argocd, serviceMonitorName, serviceMonitorLabel, reqLogger)
145-
if err != nil {
146-
return reconcile.Result{}, err
147-
}
148+
// Create ServiceMonitor for ArgoCD API server metrics
149+
serviceMonitorLabel = fmt.Sprintf("%s-server-metrics", request.Name)
150+
serviceMonitorName = fmt.Sprintf("%s-server", request.Name)
151+
err = r.createServiceMonitorIfAbsent(request.Namespace, argocd, serviceMonitorName, serviceMonitorLabel, reqLogger)
152+
if err != nil {
153+
return reconcile.Result{}, err
154+
}
148155

149-
// Create ServiceMonitor for ArgoCD API server metrics
150-
serviceMonitorLabel = fmt.Sprintf("%s-server-metrics", request.Name)
151-
serviceMonitorName = fmt.Sprintf("%s-server", request.Name)
152-
err = r.createServiceMonitorIfAbsent(request.Namespace, argocd, serviceMonitorName, serviceMonitorLabel, reqLogger)
153-
if err != nil {
154-
return reconcile.Result{}, err
155-
}
156+
// Create ServiceMonitor for ArgoCD repo server metrics
157+
serviceMonitorLabel = fmt.Sprintf("%s-repo-server", request.Name)
158+
serviceMonitorName = fmt.Sprintf("%s-repo-server", request.Name)
159+
err = r.createServiceMonitorIfAbsent(request.Namespace, argocd, serviceMonitorName, serviceMonitorLabel, reqLogger)
160+
if err != nil {
161+
return reconcile.Result{}, err
162+
}
156163

157-
// Create ServiceMonitor for ArgoCD repo server metrics
158-
serviceMonitorLabel = fmt.Sprintf("%s-repo-server", request.Name)
159-
serviceMonitorName = fmt.Sprintf("%s-repo-server", request.Name)
160-
err = r.createServiceMonitorIfAbsent(request.Namespace, argocd, serviceMonitorName, serviceMonitorLabel, reqLogger)
161-
if err != nil {
162-
return reconcile.Result{}, err
163-
}
164+
// Create alert rule
165+
err = r.createPrometheusRuleIfAbsent(request.Namespace, argocd, reqLogger)
166+
if err != nil {
167+
return reconcile.Result{}, err
168+
}
164169

165-
// Create alert rule
166-
err = r.createPrometheusRuleIfAbsent(request.Namespace, argocd, reqLogger)
167-
if err != nil {
168-
return reconcile.Result{}, err
169-
}
170+
err = r.reconcileDashboards(reqLogger)
171+
if err != nil {
172+
return reconcile.Result{}, err
173+
}
170174

171-
err = r.reconcileDashboards(reqLogger)
172-
if err != nil {
173-
return reconcile.Result{}, err
174-
}
175+
err = r.reconcileOperatorMetricsServiceMonitor(reqLogger)
176+
if err != nil {
177+
return reconcile.Result{}, err
178+
}
179+
} else {
180+
if exists {
181+
namespace.Labels[clusterMonitoringLabel] = "false"
182+
err = r.Client.Update(ctx, &namespace)
183+
if err != nil {
184+
reqLogger.Error(err, "Error updating namespace",
185+
"Namespace", namespace.Name)
186+
return reconcile.Result{}, err
187+
}
175188

176-
err = r.reconcileOperatorMetricsServiceMonitor(reqLogger)
177-
if err != nil {
178-
return reconcile.Result{}, err
189+
// Delete role to grant read permission to the openshift metrics stack
190+
err = r.Client.Delete(context.TODO(), &rbacv1.Role{ObjectMeta: metav1.ObjectMeta{Namespace: request.Namespace, Name: fmt.Sprintf(readRoleNameFormat, request.Namespace)}})
191+
if err != nil {
192+
if !errors.IsNotFound(err) {
193+
reqLogger.Error(err, "Error deleting role in ",
194+
"Namespace", request.Namespace)
195+
return reconcile.Result{}, err
196+
}
197+
}
198+
199+
// Delete role binding to grant read permission to the openshift metrics stack
200+
err = r.Client.Delete(context.TODO(), &rbacv1.RoleBinding{ObjectMeta: metav1.ObjectMeta{Namespace: request.Namespace, Name: fmt.Sprintf(readRoleBindingNameFormat, request.Namespace)}})
201+
if err != nil {
202+
if !errors.IsNotFound(err) {
203+
reqLogger.Error(err, "Error deleting rolebinding in ",
204+
"Namespace", request.Namespace)
205+
return reconcile.Result{}, err
206+
}
207+
}
208+
209+
// Delete ServiceMonitor for ArgoCD application metrics
210+
serviceMonitorName := request.Name
211+
err = r.deleteServiceMonitor(serviceMonitorName, request.Namespace, reqLogger)
212+
if err != nil {
213+
return reconcile.Result{}, err
214+
}
215+
216+
// Delete ServiceMonitor for ArgoCD API server metrics
217+
serviceMonitorName = fmt.Sprintf("%s-server", request.Name)
218+
err = r.deleteServiceMonitor(serviceMonitorName, request.Namespace, reqLogger)
219+
if err != nil {
220+
return reconcile.Result{}, err
221+
}
222+
223+
// Delete ServiceMonitor for ArgoCD repo server metrics
224+
serviceMonitorName = fmt.Sprintf("%s-repo-server", request.Name)
225+
err = r.deleteServiceMonitor(serviceMonitorName, request.Namespace, reqLogger)
226+
if err != nil {
227+
return reconcile.Result{}, err
228+
}
229+
230+
// Delete alert rule
231+
err = r.Client.Delete(context.TODO(), &monitoringv1.PrometheusRule{ObjectMeta: metav1.ObjectMeta{Namespace: request.Namespace, Name: alertRuleName}})
232+
if err != nil {
233+
if !errors.IsNotFound(err) {
234+
reqLogger.Error(err, "Error deleting prometheus in ",
235+
"Namespace", request.Namespace)
236+
return reconcile.Result{}, err
237+
}
238+
}
239+
240+
}
179241
}
180242

181243
return reconcile.Result{}, nil
@@ -282,6 +344,20 @@ func (r *ArgoCDMetricsReconciler) createServiceMonitorIfAbsent(namespace string,
282344
return err
283345
}
284346

347+
func (r *ArgoCDMetricsReconciler) deleteServiceMonitor(name string, namespace string, reqLogger logr.Logger) error {
348+
349+
err := r.Client.Delete(context.TODO(), &monitoringv1.ServiceMonitor{ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}})
350+
if err != nil {
351+
if !errors.IsNotFound(err) {
352+
reqLogger.Error(err, "Error deleting servicemonitor",
353+
"Namespace", namespace)
354+
return err
355+
}
356+
}
357+
return nil
358+
359+
}
360+
285361
func (r *ArgoCDMetricsReconciler) reconcileOperatorMetricsServiceMonitor(reqLogger logr.Logger) error {
286362

287363
data, err := os.ReadFile(operatorPodNamespacePath)

controllers/argocd_metrics_controller_test.go

+55-9
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ const (
5252
argoCDInstanceName = "openshift-gitops"
5353
)
5454

55-
func newClient(s *runtime.Scheme, namespace, name string) client.Client {
55+
func newClient(s *runtime.Scheme, namespace, name string, disableMetrics *bool) client.Client {
5656
ns := corev1.Namespace{
5757
ObjectMeta: v1.ObjectMeta{
5858
Name: namespace,
@@ -63,14 +63,19 @@ func newClient(s *runtime.Scheme, namespace, name string) client.Client {
6363
Name: name,
6464
Namespace: namespace,
6565
},
66+
Spec: argoapp.ArgoCDSpec{
67+
Monitoring: argoapp.ArgoCDMonitoringSpec{
68+
DisableMetrics: disableMetrics,
69+
},
70+
},
6671
}
6772
return fake.NewClientBuilder().WithScheme(s).WithObjects(&ns, &argocd).Build()
6873
}
6974

70-
func newMetricsReconciler(t *testing.T, namespace, name string) ArgoCDMetricsReconciler {
75+
func newMetricsReconciler(t *testing.T, namespace, name string, disableMetrics *bool) ArgoCDMetricsReconciler {
7176
t.Helper()
7277
s := newScheme()
73-
c := newClient(s, namespace, name)
78+
c := newClient(s, namespace, name, disableMetrics)
7479
r := ArgoCDMetricsReconciler{Client: c, Scheme: s}
7580
return r
7681
}
@@ -90,7 +95,7 @@ func TestReconcile_add_namespace_label(t *testing.T) {
9095
},
9196
}
9297
for _, tc := range testCases {
93-
r := newMetricsReconciler(t, tc.namespace, tc.instanceName)
98+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, nil)
9499
_, err := r.Reconcile(context.TODO(), newRequest(tc.namespace, tc.instanceName))
95100
assert.NilError(t, err)
96101

@@ -117,7 +122,7 @@ func TestReconcile_add_read_role(t *testing.T) {
117122
},
118123
}
119124
for _, tc := range testCases {
120-
r := newMetricsReconciler(t, tc.namespace, tc.instanceName)
125+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, nil)
121126
_, err := r.Reconcile(context.TODO(), newRequest(tc.namespace, tc.instanceName))
122127
assert.NilError(t, err)
123128

@@ -160,7 +165,7 @@ func TestReconcile_add_read_role_binding(t *testing.T) {
160165
},
161166
}
162167
for _, tc := range testCases {
163-
r := newMetricsReconciler(t, tc.namespace, tc.instanceName)
168+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, nil)
164169
_, err := r.Reconcile(context.TODO(), newRequest(tc.namespace, tc.instanceName))
165170
assert.NilError(t, err)
166171

@@ -204,7 +209,7 @@ func TestReconcile_add_service_monitors(t *testing.T) {
204209
},
205210
}
206211
for _, tc := range testCases {
207-
r := newMetricsReconciler(t, tc.namespace, tc.instanceName)
212+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, nil)
208213
_, err := r.Reconcile(context.TODO(), newRequest(tc.namespace, tc.instanceName))
209214
assert.NilError(t, err)
210215

@@ -255,6 +260,46 @@ func TestReconcile_add_service_monitors(t *testing.T) {
255260
}
256261
}
257262

263+
func TestReconcile_remove_service_monitors(t *testing.T) {
264+
testCases := []struct {
265+
instanceName string
266+
namespace string
267+
}{
268+
{
269+
instanceName: argoCDInstanceName,
270+
namespace: "openshift-gitops",
271+
},
272+
{
273+
instanceName: "instance-two",
274+
namespace: "namespace-two",
275+
},
276+
}
277+
flagPtr := true
278+
for _, tc := range testCases {
279+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, &flagPtr)
280+
_, err := r.Reconcile(context.TODO(), newRequest(tc.namespace, tc.instanceName))
281+
assert.NilError(t, err)
282+
283+
serviceMonitor := monitoringv1.ServiceMonitor{}
284+
serviceMonitorName := tc.instanceName
285+
286+
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: serviceMonitorName, Namespace: tc.namespace}, &serviceMonitor)
287+
assert.Error(t, err, err.Error())
288+
289+
serviceMonitor = monitoringv1.ServiceMonitor{}
290+
serviceMonitorName = fmt.Sprintf("%s-server", tc.instanceName)
291+
292+
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: serviceMonitorName, Namespace: tc.namespace}, &serviceMonitor)
293+
assert.Error(t, err, err.Error())
294+
295+
serviceMonitor = monitoringv1.ServiceMonitor{}
296+
serviceMonitorName = fmt.Sprintf("%s-repo-server", tc.instanceName)
297+
298+
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: serviceMonitorName, Namespace: tc.namespace}, &serviceMonitor)
299+
assert.Error(t, err, err.Error())
300+
}
301+
}
302+
258303
func TestReconciler_add_prometheus_rule(t *testing.T) {
259304
testCases := []struct {
260305
instanceName string
@@ -269,8 +314,9 @@ func TestReconciler_add_prometheus_rule(t *testing.T) {
269314
namespace: "namespace-two",
270315
},
271316
}
317+
flagPtr := false
272318
for _, tc := range testCases {
273-
r := newMetricsReconciler(t, tc.namespace, tc.instanceName)
319+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, &flagPtr)
274320
_, err := r.Reconcile(context.TODO(), newRequest(tc.namespace, tc.instanceName))
275321
assert.NilError(t, err)
276322

@@ -319,7 +365,7 @@ func TestReconciler_add_dashboard(t *testing.T) {
319365
},
320366
}
321367
for _, tc := range testCases {
322-
r := newMetricsReconciler(t, tc.namespace, tc.instanceName)
368+
r := newMetricsReconciler(t, tc.namespace, tc.instanceName, nil)
323369
// Create dashboard namespace
324370
err := r.Client.Create(context.TODO(), &ns)
325371
assert.NilError(t, err)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
apiVersion: argoproj.io/v1alpha1
2+
kind: ArgoCD
3+
metadata:
4+
name: openshift-gitops
5+
namespace: openshift-gitops
6+
status:
7+
phase: Available
8+
---
9+
kind: Namespace
10+
apiVersion: v1
11+
metadata:
12+
name: openshift-gitops
13+
labels:
14+
openshift.io/cluster-monitoring: 'true'
15+
---
16+
apiVersion: monitoring.coreos.com/v1
17+
kind: ServiceMonitor
18+
metadata:
19+
name: openshift-gitops
20+
namespace: openshift-gitops
21+
---
22+
apiVersion: monitoring.coreos.com/v1
23+
kind: ServiceMonitor
24+
metadata:
25+
name: openshift-gitops-repo-server
26+
namespace: openshift-gitops
27+
---
28+
apiVersion: monitoring.coreos.com/v1
29+
kind: ServiceMonitor
30+
metadata:
31+
name: openshift-gitops-server
32+
namespace: openshift-gitops
33+
---
34+
kind: Role
35+
apiVersion: rbac.authorization.k8s.io/v1
36+
metadata:
37+
name: openshift-gitops-read
38+
namespace: openshift-gitops
39+
---
40+
kind: RoleBinding
41+
apiVersion: rbac.authorization.k8s.io/v1
42+
metadata:
43+
name: openshift-gitops-prometheus-k8s-read-binding
44+
namespace: openshift-gitops
45+
---
46+
apiVersion: monitoring.coreos.com/v1
47+
kind: PrometheusRule
48+
metadata:
49+
name: gitops-operator-argocd-alerts
50+
namespace: openshift-gitops

0 commit comments

Comments
 (0)