diff --git a/README.md b/README.md index dc3d3f68..dcd8efca 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,9 @@ This driver allows Kubernetes to access LocalStorage on Linux node. test-pvc Bound pvc-2b2c809f-33b5-437f-a4b8-61906c10a3e1 1Mi RWX pixiu-localstorage 5s ``` +## Metrics +- [metrics](./docs/metrics.md) + ## Feature - Schedule with volume status - Volume metrics diff --git a/cmd/localstorage-controller/main.go b/cmd/localstorage-controller/main.go index e08b6a10..f09a57be 100644 --- a/cmd/localstorage-controller/main.go +++ b/cmd/localstorage-controller/main.go @@ -38,6 +38,7 @@ import ( "github.com/caoyingjunz/csi-driver-localstorage/pkg/client/clientset/versioned" "github.com/caoyingjunz/csi-driver-localstorage/pkg/client/informers/externalversions" "github.com/caoyingjunz/csi-driver-localstorage/pkg/controller/storage" + "github.com/caoyingjunz/csi-driver-localstorage/pkg/metrics" "github.com/caoyingjunz/csi-driver-localstorage/pkg/runtime" "github.com/caoyingjunz/csi-driver-localstorage/pkg/signals" "github.com/caoyingjunz/csi-driver-localstorage/pkg/util" @@ -79,6 +80,10 @@ var ( resourceNamespace = flag.String("leader-elect-resource-namespace", ResourceNamespace, "The namespace of resource object that is used for locking during leader election.") leaseDuration = flag.Int("leader-elect-lease-duration", LeaseDuration, "The duration that non-leader candidates will wait") renewDeadline = flag.Int("leader-elect-renew-deadline", RenewDeadline, "The interval between attempts by the acting master to renew a leadership slot before it stops leading.") + + // metrics + metricsPort = flag.Int("metrics-port", 0, "metricsPort is the port of the localhost metrics endpoint (set to 0 to disable)") + interval = flag.Duration("metrics-interval", 10, "metricsInterval is the interval of the metrics collection, in seconds (default 10s)") ) func init() { @@ -127,12 +132,13 @@ func main() { if err != nil { klog.Fatalf("Failed to build kube clientSet: %v", err) } - run := func(ctx context.Context) { - lsClientSet, err := versioned.NewForConfig(kubeConfig) - if err != nil { - klog.Fatalf("Failed to new localstorage clientSet: %v", err) - } + lsClientSet, err := versioned.NewForConfig(kubeConfig) + if err != nil { + klog.Fatalf("Failed to new localstorage clientSet: %v", err) + } + + run := func(ctx context.Context) { sharedInformer := externalversions.NewSharedInformerFactory(lsClientSet, 300*time.Second) sc, err := storage.NewStorageController(ctx, sharedInformer.Storage().V1().LocalStorages(), @@ -164,6 +170,17 @@ func main() { }, 5*time.Second, wait.NeverStop) } + if *metricsPort > 0 { + mux := http.NewServeMux() + metrics.InstallHandler(mux, "/metrics") + metrics.TimingAcquisition(ctx, lsClientSet, *interval*time.Second) + go func() { + if err = http.ListenAndServe(net.JoinHostPort("", strconv.Itoa(*metricsPort)), mux); err != nil { + klog.ErrorS(err, "Failed to start metrics server") + } + }() + } + if !*leaderElect { run(ctx) klog.Fatalf("unreachable") diff --git a/deploy/latest/ls-controller.yaml b/deploy/latest/ls-controller.yaml index 6cc08432..315bb651 100644 --- a/deploy/latest/ls-controller.yaml +++ b/deploy/latest/ls-controller.yaml @@ -19,6 +19,9 @@ spec: - -v=2 # port's value equals 0 means health check func disabled - --healthz-port=10258 + - --metrics-port=10259 + # default metrics interval is 10s + - --metrics-interval=10 - --cert-dir=/tmp/cert - --port=8443 image: harbor.powerlaw.club/pixiuio/localstorage-controller:latest diff --git a/docs/images/localstorage_controller.png b/docs/images/localstorage_controller.png new file mode 100644 index 00000000..33081edf Binary files /dev/null and b/docs/images/localstorage_controller.png differ diff --git a/docs/localstorage_controller-dashboard.json b/docs/localstorage_controller-dashboard.json new file mode 100644 index 00000000..3a205bb0 --- /dev/null +++ b/docs/localstorage_controller-dashboard.json @@ -0,0 +1,376 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.5.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "Row title", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "volumes total", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(localstorage_controller_volumes_total)", + "legendFormat": "sum", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "localstorage_controller_volumes_total", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "localstorage_controller_volumes_total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 7, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(localstorage_controller_volume_size)", + "legendFormat": "sum", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(localstorage_controller_volume_size) by (instance)", + "hide": false, + "legendFormat": "sum_by({{instance}})", + "range": true, + "refId": "B" + } + ], + "title": "localstorage_controller_volume_size sum", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "localstorage_controller_volume_size", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "localstorage_controller_volume_size detail", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "localstorage_controller dashboard", + "uid": "d2cf5919-dfb5-4436-8c01-6c41d7371376", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/docs/metrics.md b/docs/metrics.md new file mode 100644 index 00000000..12aa6220 --- /dev/null +++ b/docs/metrics.md @@ -0,0 +1,21 @@ +# localstorage_controller_volumes metrics + +## Grafana Dashboard +A Grafana Dashboard designed for metrics from Logstash-exporter on Kubernetes is available +at [localstorage-controller-dashboard](https://grafana.com/grafana/dashboards/19251-localstorage-controller-dashboard/). +This dashboard's JSON source is +at [localstorage_controller-dashboard.json](./localstorage_controller-dashboard.json). + + + +## Metrics +Table of exported metrics: + +| Name | Type | Description | +|-----------------------------------------|-------|----------------------------------------------------------------------------------------| +| `localstorage_controller_volumes_total` | Gauge | HELP localstorage_controller_volume_size The size of each volume in localstorage | +| `localstorage_controller_volume_size` | Gauge | HELP localstorage_controller_volumes_total The total number of volumes in localstorage | + +## Prometheus +The Prometheus configuration to scrape metrics from Logstash-exporter on Kubernetes is available +at [prometheus.yml](./prometheus.yml). \ No newline at end of file diff --git a/docs/prometheus.yml b/docs/prometheus.yml new file mode 100644 index 00000000..5e8507d9 --- /dev/null +++ b/docs/prometheus.yml @@ -0,0 +1,8 @@ +global: + scrape_interval: 10s # sampling period + evaluation_interval: 10s # Alarm rule calculation period + +scrape_configs: + - job_name: 'localstorage_controller' + static_configs: + - targets: [ ':10259' ] # 10259 is metrics port \ No newline at end of file diff --git a/go.mod b/go.mod index 12adff1a..f432fe68 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/google/uuid v1.3.0 github.com/imdario/mergo v0.3.15 // indirect github.com/julienschmidt/httprouter v1.3.0 + github.com/prometheus/client_golang v1.12.1 github.com/stretchr/testify v1.8.3 // indirect golang.org/x/net v0.10.0 google.golang.org/grpc v1.54.0 diff --git a/pkg/metrics/metric.go b/pkg/metrics/metric.go new file mode 100644 index 00000000..22656890 --- /dev/null +++ b/pkg/metrics/metric.go @@ -0,0 +1,91 @@ +package metrics + +import ( + "context" + "net/http" + "time" + + metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + "github.com/caoyingjunz/csi-driver-localstorage/pkg/client/clientset/versioned" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +var ( + // volumesTotalGauge is the total number of localstorage volumes + volumesTotalGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "localstorage_controller", + Subsystem: "volumes", + Name: "total", + Help: "The total number of volumes in localstorage", + }, []string{}) + + // volumeSizeGauge is the size of localstorage volumes + volumeSizeGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "localstorage_controller", + Subsystem: "volume", + Name: "size", + Help: "The size of each volume in localstorage", + }, []string{"volId", "volName", "volPath"}) +) + +func init() { + prometheus.MustRegister(volumesTotalGauge, volumeSizeGauge) +} + +// InstallHandler registers the prometheus handler +func InstallHandler(mux *http.ServeMux, path string) { + mux.Handle(path, promhttp.Handler()) +} + +// RegisterVolumeSize registers the size of localstorage volumes +func RegisterVolumeSize(volId, volName, volPath string, size float64) { + volumeSizeGauge.WithLabelValues(volId, volName, volPath).Set(size) +} + +// RegisterVolumesGauge registers the total number of localstorage volumes +func RegisterVolumesGauge(total float64) { + volumesTotalGauge.WithLabelValues().Set(total) +} + +// TimingAcquisition is used to periodically obtain the volume indicator data +func TimingAcquisition(ctx context.Context, lsClientSet *versioned.Clientset, t time.Duration) { + // The first execution + if err := handleLocalStoragesList(ctx, lsClientSet); err != nil { + klog.ErrorS(err, "Failed to get localstorage list") + } + + go func() { + ticker := time.NewTicker(t) + defer ticker.Stop() + for range ticker.C { + if err := handleLocalStoragesList(ctx, lsClientSet); err != nil { + klog.ErrorS(err, "Failed to get localstorage list") + } + } + }() +} + +// handleLocalStoragesList is used to obtain the volume indicator data +func handleLocalStoragesList(ctx context.Context, lsClientSet *versioned.Clientset) error { + object, err := lsClientSet.StorageV1().LocalStorages().List(ctx, metaV1.ListOptions{}) + if err != nil { + return err + } + + volumeItems := object.Items + var total float64 + for _, volumeItem := range volumeItems { + volumes := volumeItem.Status.Volumes + total += float64(len(volumes)) + for _, volume := range volumes { + RegisterVolumeSize(volume.VolID, volume.VolName, volume.VolPath, float64(volume.VolSize)) + } + } + + RegisterVolumesGauge(total) + + return nil +}