From e63bfa9dcbe153f85af67b0e31ef322bfd8e5574 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Thu, 19 Oct 2023 03:56:21 +0200 Subject: [PATCH] Remove prom rules and Service Monitor from openshift monitoring namespace (#1256) * remove prom rules from os monitoring space Signed-off-by: Coleen Iona Quadros * avoid check for delete in unit test Signed-off-by: Coleen Iona Quadros * format Signed-off-by: Coleen Iona Quadros * delete service monitor in openshift montitoring namespace Signed-off-by: Coleen Iona Quadros * refactor sm controller and add tests Signed-off-by: Coleen Iona Quadros * format Signed-off-by: Coleen Iona Quadros * clean comments Signed-off-by: Coleen Iona Quadros * typo Signed-off-by: Coleen Iona Quadros * typo Signed-off-by: Coleen Iona Quadros * typo Signed-off-by: Coleen Iona Quadros --------- Signed-off-by: Coleen Iona Quadros --- .../multiclusterobservability_controller.go | 57 ++++++++++++++++++ ...lticlusterobservability_controller_test.go | 58 +++++++++++++++++++ .../base/alertmanager/prometheusrule.yaml | 2 +- .../pkg/servicemonitor/sm_controller.go | 27 ++------- 4 files changed, 121 insertions(+), 23 deletions(-) diff --git a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go index 11ceafa44..1af3cfcf5 100644 --- a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go +++ b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go @@ -10,6 +10,7 @@ import ( "fmt" "os" "reflect" + "strings" "time" "github.com/go-logr/logr" @@ -38,6 +39,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" mcov1beta2 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta2" placementctrl "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/controllers/placementrule" certctrl "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/pkg/certificates" @@ -69,6 +71,7 @@ var ( isRuleStorageSizeChanged = false isReceiveStorageSizeChanged = false isStoreStorageSizeChanged = false + isLegacyResourceRemoved = false ) // MultiClusterObservabilityReconciler reconciles a MultiClusterObservability object @@ -318,6 +321,20 @@ func (r *MultiClusterObservabilityReconciler) Reconcile(ctx context.Context, req } } + if os.Getenv("UNIT_TEST") != "true" && !isLegacyResourceRemoved { + isLegacyResourceRemoved = true + // Delete PrometheusRule from openshift-monitoring namespace + if err := r.deleteSpecificPrometheusRule(ctx); err != nil { + reqLogger.Error(err, "Failed to delete the specific PrometheusRule in the openshift-monitoring namespace") + return ctrl.Result{}, err + } + // Delete ServiceMonitor from openshft-monitoring namespace + if err := r.deleteServiceMonitorInOpenshiftMonitoringNamespace(ctx); err != nil { + reqLogger.Error(err, "Failed to delete service monitor in the openshift-monitoring namespace") + return ctrl.Result{}, err + } + } + //update status requeueStatusUpdate <- struct{}{} @@ -832,3 +849,43 @@ func (r *MultiClusterObservabilityReconciler) ensureOpenShiftNamespaceLabel(ctx return reconcile.Result{}, nil } + +func (r *MultiClusterObservabilityReconciler) deleteSpecificPrometheusRule(ctx context.Context) error { + promRule := &monitoringv1.PrometheusRule{} + err := r.Client.Get(ctx, client.ObjectKey{Name: "acm-observability-alert-rules", + Namespace: "openshift-monitoring"}, promRule) + if err == nil { + err = r.Client.Delete(ctx, promRule) + if err != nil { + log.Error(err, "Failed to delete PrometheusRule in openshift-monitoring namespace") + return err + } + log.Info("Deleted PrometheusRule from openshift-monitoring namespace") + } else if !apierrors.IsNotFound(err) { + log.Error(err, "Failed to fetch PrometheusRule") + return err + } + + return nil +} + +func (r *MultiClusterObservabilityReconciler) deleteServiceMonitorInOpenshiftMonitoringNamespace(ctx context.Context) error { + serviceMonitorList := &monitoringv1.ServiceMonitorList{} + err := r.Client.List(ctx, serviceMonitorList, client.InNamespace("openshift-monitoring")) + if !apierrors.IsNotFound(err) && err != nil { + log.Error(err, "Failed to fetch ServiceMonitors") + return err + } + + for _, sm := range serviceMonitorList.Items { + if strings.HasPrefix(sm.Name, "observability-") { + err = r.Client.Delete(ctx, sm) + if err != nil { + log.Error(err, "Failed to delete ServiceMonitor", "ServiceMonitorName", sm.Name) + return err + } + log.Info("Deleted ServiceMonitor", "ServiceMonitorName", sm.Name) + } + } + return nil +} diff --git a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go index 101bd292b..6faaa0c82 100644 --- a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go +++ b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go @@ -12,6 +12,8 @@ import ( "testing" "time" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + oauthv1 "github.com/openshift/api/oauth/v1" routev1 "github.com/openshift/api/route/v1" observatoriumv1alpha1 "github.com/stolostron/observatorium-operator/api/v1alpha1" @@ -1004,3 +1006,59 @@ func createAlertManagerConfigMap(name string) *corev1.ConfigMap { }, } } + +func TestPrometheusRulesRemovedFromOpenshiftMonitoringNamespace(t *testing.T) { + promRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "acm-observability-alert-rules", + Namespace: "openshift-monitoring", + }, + //Sample rules + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test", + Rules: []monitoringv1.Rule{ + { + Alert: "test", + }, + }, + }, + }, + }, + } + s := scheme.Scheme + monitoringv1.AddToScheme(s) + objs := []runtime.Object{promRule} + c := fake.NewClientBuilder().WithRuntimeObjects(objs...).Build() + r := &MultiClusterObservabilityReconciler{Client: c, Scheme: s} + err := r.deleteSpecificPrometheusRule(context.TODO()) + if err != nil { + t.Fatalf("Failed to delete PrometheusRule: (%v)", err) + } +} + +func TestServiceMonitorRemovedFromOpenshiftMonitoringNamespace(t *testing.T) { + sm := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "observability-sm-test", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Endpoints: []monitoringv1.Endpoint{ + { + Port: "test", + }, + }, + }, + } + s := scheme.Scheme + monitoringv1.AddToScheme(s) + objs := []runtime.Object{sm} + c := fake.NewClientBuilder().WithRuntimeObjects(objs...).Build() + r := &MultiClusterObservabilityReconciler{Client: c, Scheme: s} + err := r.deleteServiceMonitorInOpenshiftMonitoringNamespace(context.TODO()) + if err != nil { + t.Fatalf("Failed to delete ServiceMonitor: (%v)", err) + } +} diff --git a/operators/multiclusterobservability/manifests/base/alertmanager/prometheusrule.yaml b/operators/multiclusterobservability/manifests/base/alertmanager/prometheusrule.yaml index 9835aa8cb..a4a58226d 100644 --- a/operators/multiclusterobservability/manifests/base/alertmanager/prometheusrule.yaml +++ b/operators/multiclusterobservability/manifests/base/alertmanager/prometheusrule.yaml @@ -4,7 +4,7 @@ metadata: annotations: update-namespace: 'false' name: acm-observability-alert-rules - namespace: openshift-monitoring + namespace: open-cluster-management-observability spec: groups: - name: observability.rules diff --git a/operators/multiclusterobservability/pkg/servicemonitor/sm_controller.go b/operators/multiclusterobservability/pkg/servicemonitor/sm_controller.go index a587c005a..8b32ca235 100644 --- a/operators/multiclusterobservability/pkg/servicemonitor/sm_controller.go +++ b/operators/multiclusterobservability/pkg/servicemonitor/sm_controller.go @@ -23,21 +23,21 @@ import ( ) const ( - ocpMonitoringNamespace = "openshift-monitoring" - metricsNamePrefix = "acm_" + metricsNamePrefix = "acm_" ) var ( + ocpMonitoringNamespace = config.GetDefaultNamespace() log = logf.Log.WithName("sm_controller") - isSmControllerRunnning = false + isSmControllerRunning = false ) func Start() { - if isSmControllerRunnning { + if isSmControllerRunning { return } - isSmControllerRunnning = true + isSmControllerRunning = true promClient, err := promclientset.NewForConfig(ctrl.GetConfigOrDie()) if err != nil { @@ -56,7 +56,6 @@ func Start() { time.Minute*60, cache.ResourceEventHandlerFuncs{ AddFunc: onAdd(promClient), - DeleteFunc: onDelete(promClient), UpdateFunc: onUpdate(promClient), }, ) @@ -74,22 +73,6 @@ func onAdd(promClient promclientset.Interface) func(obj interface{}) { } } -func onDelete(promClient promclientset.Interface) func(obj interface{}) { - return func(obj interface{}) { - sm := obj.(*promv1.ServiceMonitor) - if sm.ObjectMeta.OwnerReferences != nil && sm.ObjectMeta.OwnerReferences[0].Kind == "Observatorium" { - err := promClient.MonitoringV1(). - ServiceMonitors(ocpMonitoringNamespace). - Delete(context.TODO(), sm.Name, metav1.DeleteOptions{}) - if err != nil { - log.Error(err, "Failed to delete ServiceMonitor", "namespace", ocpMonitoringNamespace, "name", sm.Name) - } else { - log.Info("ServiceMonitor Deleted", "namespace", ocpMonitoringNamespace, "name", sm.Name) - } - } - } -} - func onUpdate(promClient promclientset.Interface) func(oldObj interface{}, newObj interface{}) { return func(oldObj interface{}, newObj interface{}) { newSm := newObj.(*promv1.ServiceMonitor)