Skip to content

Commit

Permalink
GardenerCluster states metric
Browse files Browse the repository at this point in the history
  • Loading branch information
Disper committed Mar 4, 2024
1 parent 814ea1b commit 33e1cab
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 44 deletions.
4 changes: 3 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package main
import (
"flag"
"fmt"
"github.com/kyma-project/infrastructure-manager/internal/controller/metrics"

Check failure on line 22 in cmd/main.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gci`-ed with --skip-generated -s standard -s default (gci)
"os"
"time"

Expand Down Expand Up @@ -120,7 +121,8 @@ func main() {
}

rotationPeriod := time.Duration(minimalRotationTimeRatio*expirationTime.Minutes()) * time.Minute
if err = (controller.NewGardenerClusterController(mgr, kubeconfigProvider, logger, rotationPeriod)).SetupWithManager(mgr); err != nil {
metrics := metrics.NewMetrics()
if err = (controller.NewGardenerClusterController(mgr, kubeconfigProvider, logger, rotationPeriod, metrics)).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "GardenerCluster")
os.Exit(1)
}
Expand Down
16 changes: 6 additions & 10 deletions internal/controller/gardener_cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,17 @@ type GardenerClusterController struct {
KubeconfigProvider KubeconfigProvider
log logr.Logger
rotationPeriod time.Duration
metrics metrics.Metrics
}

func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider KubeconfigProvider, logger logr.Logger, rotationPeriod time.Duration) *GardenerClusterController {
func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider KubeconfigProvider, logger logr.Logger, rotationPeriod time.Duration, metrics metrics.Metrics) *GardenerClusterController {
return &GardenerClusterController{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
KubeconfigProvider: kubeconfigProvider,
log: logger,
rotationPeriod: rotationPeriod,
metrics: metrics,
}
}

Expand All @@ -86,7 +88,6 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req
controller.log.Info("Starting reconciliation.", loggingContext(req)...)

var cluster imv1.GardenerCluster
metrics.IncrementReconciliationLoopsStarted()

err := controller.Get(ctx, req.NamespacedName, &cluster)

Expand Down Expand Up @@ -152,12 +153,7 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req
}

func (controller *GardenerClusterController) unsetStateMetric(ctx context.Context, req ctrl.Request) {

Check warning on line 155 in internal/controller/gardener_cluster_controller.go

View workflow job for this annotation

GitHub Actions / lint

unused-parameter: parameter 'ctx' seems to be unused, consider removing or renaming it as _ (revive)
var secretKey = "kubeconfig-" + req.NamespacedName.Name
var secretNamespacedName = types.NamespacedName{Name: secretKey, Namespace: "kcp-system"}
var kubeconfigSecret corev1.Secret
_ = controller.Get(ctx, secretNamespacedName, &kubeconfigSecret)

metrics.UnSetGardenerClusterStates(kubeconfigSecret)
controller.metrics.UnSetGardenerClusterStates(req.NamespacedName.Name)
}

func loggingContextFromCluster(cluster *imv1.GardenerCluster) []any {
Expand All @@ -171,7 +167,7 @@ func loggingContext(req ctrl.Request) []any {
func (controller *GardenerClusterController) resultWithRequeue(cluster *imv1.GardenerCluster, requeueAfter time.Duration) ctrl.Result {
controller.log.Info("result with requeue", "RequeueAfter", requeueAfter.String())

metrics.SetGardenerClusterStates(*cluster)
controller.metrics.SetGardenerClusterStates(*cluster)

return ctrl.Result{
Requeue: true,
Expand All @@ -181,7 +177,7 @@ func (controller *GardenerClusterController) resultWithRequeue(cluster *imv1.Gar

func (controller *GardenerClusterController) resultWithoutRequeue(cluster *imv1.GardenerCluster) ctrl.Result { //nolint:unparam
controller.log.Info("result without requeue")
metrics.SetGardenerClusterStates(*cluster)
controller.metrics.SetGardenerClusterStates(*cluster)
return ctrl.Result{}
}

Expand Down
80 changes: 48 additions & 32 deletions internal/controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,67 @@ import (
"fmt"

Check failure on line 4 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gci`-ed with --skip-generated -s standard -s default (gci)
v1 "github.com/kyma-project/infrastructure-manager/api/v1"
"github.com/prometheus/client_golang/prometheus"
corev1 "k8s.io/api/core/v1"
ctrlMetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
)

const (
runtimeId = "runtimeId"
state = "state"
runtimeIdKeyName = "runtimeId"

Check warning on line 11 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: const runtimeIdKeyName should be runtimeIDKeyName (revive)
state = "state"
reason = "reason"
runtimeIdLabel = "kyma-project.io/runtime-id"

Check warning on line 14 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: const runtimeIdLabel should be runtimeIDLabel (revive)
componentName = "infrastructure_manager"
)

var (

//nolint:godox //TODO: test custom metric, remove when done with https://github.com/kyma-project/infrastructure-manager/issues/11
playgroundTotalReconciliationLoopsStarted = prometheus.NewCounter( //nolint:gochecknoglobals
prometheus.CounterOpts{
Name: "im_playground_reconciliation_loops_started_total",
Help: "Number of times reconciliation loop was started",
},
)

metricGardenerClustersState = prometheus.NewGaugeVec( //nolint:gochecknoglobals
prometheus.GaugeOpts{ //nolint:gochecknoglobals
Subsystem: "infrastructure_manager",
Name: "im_gardener_clusters_state",
Help: "Indicates the Status.state for GardenerCluster CRs",
}, []string{runtimeId, state})
)
type Metrics struct {
gardenerClustersStateGaugeVec *prometheus.GaugeVec
}

func init() {
ctrlMetrics.Registry.MustRegister(playgroundTotalReconciliationLoopsStarted, metricGardenerClustersState)
func NewMetrics() Metrics {
m := Metrics{
gardenerClustersStateGaugeVec: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: componentName,
Name: "im_gardener_clusters_state",
Help: "Indicates the Status.state for GardenerCluster CRs",
}, []string{runtimeIdKeyName, state, reason}),
}
ctrlMetrics.Registry.MustRegister(m.gardenerClustersStateGaugeVec)
return m
}

func IncrementReconciliationLoopsStarted() {
playgroundTotalReconciliationLoopsStarted.Inc()
func (m Metrics) SetGardenerClusterStates(cluster v1.GardenerCluster) {
var runtimeId = cluster.GetLabels()[runtimeIdLabel]

Check warning on line 36 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: var runtimeId should be runtimeID (revive)

if runtimeId != "" {
var reason = cluster.Status.Conditions[0].Reason

//first clean the old metric

Check failure on line 41 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

commentFormatting: put a space between `//` and comment text (gocritic)
m.cleanUpGardenerClusterGauge(runtimeId)
m.gardenerClustersStateGaugeVec.WithLabelValues(runtimeId, string(cluster.Status.State), reason).Set(1)
}
}

func SetGardenerClusterStates(cluster v1.GardenerCluster) {
metricGardenerClustersState.WithLabelValues(cluster.Name, string(cluster.Status.State)).Set(1)
func (m Metrics) UnSetGardenerClusterStates(runtimeId string) {

Check warning on line 47 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: method parameter runtimeId should be runtimeID (revive)
m.cleanUpGardenerClusterGauge(runtimeId)
}

func UnSetGardenerClusterStates(secret corev1.Secret) {
var runtimeId = secret.GetLabels()["kyma-project.io/runtime-id"]
var deletedReady = metricGardenerClustersState.DeleteLabelValues(runtimeId, "Ready")
var deletedError = metricGardenerClustersState.DeleteLabelValues(runtimeId, "Error")
func (m Metrics) cleanUpGardenerClusterGauge(runtimeId string) {

Check warning on line 51 in internal/controller/metrics/metrics.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: method parameter runtimeId should be runtimeID (revive)

var readyMetric, _ = m.gardenerClustersStateGaugeVec.GetMetricWithLabelValues(runtimeId, "Ready")
if readyMetric != nil {
readyMetric.Set(0)
}
var errorMetric, _ = m.gardenerClustersStateGaugeVec.GetMetricWithLabelValues(runtimeId, "Error")
if errorMetric != nil {
errorMetric.Set(0)
}
fmt.Printf("GardenerClusterStates set value to 0 for %v", runtimeId)

metricsDeleted := m.gardenerClustersStateGaugeVec.DeletePartialMatch(prometheus.Labels{
runtimeIdKeyName: runtimeId,
})

if deletedReady || deletedError {
fmt.Printf("GardenerClusterStates deleted value for %v", runtimeId)
if metricsDeleted > 0 {
fmt.Printf("gardenerClusterStateGauge deleted %d metrics for runtimeId %v", metricsDeleted, runtimeId)
}
}
4 changes: 3 additions & 1 deletion internal/controller/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controller

import (
"context"
metrics "github.com/kyma-project/infrastructure-manager/internal/controller/metrics"

Check failure on line 21 in internal/controller/suite_test.go

View workflow job for this annotation

GitHub Actions / lint

File is not `gci`-ed with --skip-generated -s standard -s default (gci)
"path/filepath"
"testing"
"time"
Expand Down Expand Up @@ -81,8 +82,9 @@ var _ = BeforeSuite(func() {

kubeconfigProviderMock := &mocks.KubeconfigProvider{}
setupKubeconfigProviderMock(kubeconfigProviderMock)
metrics := metrics.NewMetrics()

controller := NewGardenerClusterController(mgr, kubeconfigProviderMock, logger, TestKubeconfigValidityTime)
controller := NewGardenerClusterController(mgr, kubeconfigProviderMock, logger, TestKubeconfigValidityTime, metrics)
Expect(controller).NotTo(BeNil())

err = controller.SetupWithManager(mgr)
Expand Down

0 comments on commit 33e1cab

Please sign in to comment.