diff --git a/operators/multiclusterobservability/manifests/base/grafana/dash-acm-clusters-overview.yaml b/operators/multiclusterobservability/manifests/base/grafana/dash-acm-clusters-overview.yaml index 90d6089e8a..507ddad334 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/dash-acm-clusters-overview.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/dash-acm-clusters-overview.yaml @@ -1,1910 +1,1901 @@ -apiVersion: v1 -data: - acm-clusters-overview.json: |- +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 1, + "iteration": 1682528664304, + "links": [ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "asDropdown": true, + "icon": "external link", + "includeVars": false, + "keepTime": false, + "tags": [], + "targetBlank": true, + "title": "All Dashboards", + "tooltip": "", + "type": "dashboards", + "url": "" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": "$datasource", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 1, - "iteration": 1682528664304, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": false, - "keepTime": false, - "tags": [], - "targetBlank": true, - "title": "All Dashboards", - "tooltip": "", - "type": "dashboards", - "url": "" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": "$datasource", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 + "id": 138, + "panels": [], + "title": "Control Plane Health", + "type": "row" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - "id": 138, - "panels": [], - "title": "Control Plane Health", - "type": "row" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #A" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "Value #A" - }, - "properties": [ - { - "id": "displayName", - "value": "Max latency (99th percentile)" - }, - { - "id": "unit", - "value": "s" - }, - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - } - } - ] + "id": "displayName", + "value": "Max latency (99th percentile)" }, { - "matcher": { - "id": "byName", - "options": "Value #B" - }, - "properties": [ - { - "id": "displayName", - "value": "API Errors [1h]" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.displayMode", - "value": "color-text" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] + "id": "unit", + "value": "s" + }, + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 } - }, - { - "id": "noValue", - "value": "0" - } - ] + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "API Errors [1h]" }, { - "matcher": { - "id": "byName", - "options": "api_up" - }, - "properties": [ - { - "id": "displayName", - "value": "API servers up" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "custom.displayMode", - "value": "color-text" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] + "id": "unit", + "value": "short" + }, + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 } - } - ] + ] + } }, { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "displayName", - "value": "Cluster" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Drill down to cluster", - "url": "/d/09ec8aa1e996d6ffcd6817bbaff4db1b/kubernetes-api-server?${__url_time_range}&var-cluster=${__data.fields.cluster}&var-instance=All" - } - ] - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "noValue", + "value": "0" } ] }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 146, - "interval": "4m", - "options": { - "showHeader": true - }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "topk(50, max(apiserver_request_duration_seconds:histogram_quantile_99{cluster=~\"$cluster\",clusterType!=\"ocp3\"}) by (cluster))\n* on(cluster) group_left(api_up) count_values without() (\"api_up\", (sum(up{cluster=~\"$cluster\",job=\"apiserver\",clusterType!=\"ocp3\"} == 1) by (cluster) / count(up{cluster=~\"$cluster\",job=\"apiserver\",clusterType!=\"ocp3\"}) by (cluster)))", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum by (cluster)(sum:apiserver_request_total:1h{cluster=~\"$cluster\",code=~\"5..\",clusterType!=\"ocp3\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - } - ], - "title": "Top 50 Max Latency API Server", - "transformations": [ - { - "id": "labelsToFields", - "options": {} + { + "matcher": { + "id": "byName", + "options": "api_up" }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "Value #A": 2, - "Value #B": 4, - "api_up": 3, - "cluster": 1 - }, - "renameByName": {} - } - } - ], - "type": "table" - }, - { - "datasource": null, - "description": "Leader election changes per cluster over the time range selected for dashboard.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] + "properties": [ + { + "id": "displayName", + "value": "API servers up" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.displayMode", + "value": "color-text" + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "displayName", - "value": "Cluster" - }, - { - "id": "links", - "value": [ - { - "title": "Drill down to cluster", - "url": "/d/N8BxQ2jMz/kubernetes-etcd-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" - } - ] - } - ] + "id": "displayName", + "value": "Cluster" }, { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "displayName", - "value": "Leader Election Changes" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "red", - "value": 2 - } - ] - } - }, - { - "id": "custom.displayMode", - "value": "color-text" - } - ] + "id": "unit", + "value": "short" }, { - "matcher": { - "id": "byName", - "options": "db_size" - }, - "properties": [ - { - "id": "displayName", - "value": "DB Size" - }, - { - "id": "unit", - "value": "bytes" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "decimals", - "value": 2 - } - ] + "id": "decimals", + "value": 2 }, { - "matcher": { - "id": "byName", - "options": "has_leader" - }, - "properties": [ - { - "id": "displayName", - "value": "Has a Leader" - }, - { - "id": "mappings", - "value": [ - { - "options": { - "0": { - "text": "No" - }, - "1": { - "text": "Yes" - } - }, - "type": "value" - } - ] - }, + "id": "links", + "value": [ { - "id": "custom.align", - "value": "left" + "targetBlank": false, + "title": "Drill down to cluster", + "url": "/d/09ec8aa1e996d6ffcd6817bbaff4db1b/kubernetes-api-server?${__url_time_range}&var-cluster=${__data.fields.cluster}&var-instance=All" } ] + }, + { + "id": "custom.align", + "value": "left" } ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 150, - "interval": "1m", - "options": { - "frameIndex": 2, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "sum(changes(etcd_server_leader_changes_seen_total{cluster=~\"$cluster\",job=\"etcd\"}[$__range])) by (cluster)\n* on(cluster) group_left(db_size) count_values without() (\"db_size\", max(etcd_debugging_mvcc_db_total_size_in_bytes{cluster=~\"$cluster\",job=\"etcd\"}) by (cluster))\n* on(cluster) group_left(has_leader) count_values without() (\"has_leader\", max(etcd_server_has_leader{cluster=~\"$cluster\",job=\"etcd\"}) by (cluster))", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "etcd", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "cluster", - "db_size", - "has_leader", - "Value" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "Value": 2, - "cluster": 0, - "db_size": 3, - "has_leader": 1 - }, - "renameByName": {} - } - } - ], - "type": "table" + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 146, + "interval": "4m", + "options": { + "showHeader": true + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "topk(50, max(apiserver_request_duration_seconds:histogram_quantile_99{cluster=~\"$cluster\",clusterType!=\"ocp3\"}) by (cluster))\n* on(cluster) group_left(api_up) count_values without() (\"api_up\", (sum(up{cluster=~\"$cluster\",job=\"apiserver\",clusterType!=\"ocp3\"} == 1) by (cluster) / count(up{cluster=~\"$cluster\",job=\"apiserver\",clusterType!=\"ocp3\"}) by (cluster)))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" }, { - "collapsed": false, - "datasource": "$datasource", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 140, - "panels": [], - "title": "Optimization", - "type": "row" + "exemplar": true, + "expr": "sum by (cluster)(sum:apiserver_request_total:1h{cluster=~\"$cluster\",code=~\"5..\",clusterType!=\"ocp3\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "title": "Top 50 Max Latency API Server", + "transformations": [ + { + "id": "labelsToFields", + "options": {} }, { - "datasource": "$datasource", - "description": "Highlights % differences between CPU requests commitments vs utilization. When this difference is large ( >20%), it means that resources are reserved but unused.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "Value #A": 2, + "Value #B": 4, + "api_up": 3, + "cluster": 1 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": null, + "description": "Leader election changes per cluster over the time range selected for dashboard.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ { - "color": "red", - "value": 80 + "title": "Drill down to cluster", + "url": "/d/N8BxQ2jMz/kubernetes-etcd-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" } ] } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "displayName", - "value": "Overestimation" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.2 - } - ] - } - } - ] + "id": "displayName", + "value": "Leader Election Changes" }, { - "matcher": { - "id": "byName", - "options": "cpu_requested" - }, - "properties": [ - { - "id": "displayName", - "value": "Requested" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "custom.align", + "value": "left" }, { - "matcher": { - "id": "byName", - "options": "cpu_utilized" - }, - "properties": [ - { - "id": "displayName", - "value": "Utilized" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 2 } - } - ] + ] + } }, { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Drill down to cluster", - "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" - } - ] - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "displayName", - "value": "Cluster" - } - ] + "id": "custom.displayMode", + "value": "color-text" } ] }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 151, - "interval": "5m", - "options": { - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "topk(50, cluster:cpu_requested:ratio - ignoring(usage) (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\"}[$__rate_interval])) by (cluster)))\n* on(cluster) group_left(cpu_requested) count_values without() (\"cpu_requested\", cluster:cpu_requested:ratio)\n* on(cluster) group_left(cpu_utilized) count_values without() (\"cpu_utilized\", (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\"}[$__rate_interval])) by (cluster)))", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Top 50 CPU Overestimation Clusters", - "transformations": [ - { - "id": "labelsToFields", - "options": {} - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "Value": 2, - "cluster": 1, - "cpu_requested": 3, - "cpu_utilized": 4 - }, - "renameByName": {} - } - } - ], - "type": "table" - }, - { - "datasource": "$datasource", - "description": "Highlights % differences between Memory requests commitments vs utilization. When this difference is large ( >20%), it means that resources are reserved but unused.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + { + "matcher": { + "id": "byName", + "options": "db_size" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Drill down to cluster", - "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" - } - ] - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "displayName", - "value": "Cluster" - } - ] + "id": "displayName", + "value": "DB Size" }, { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "displayName", - "value": "Overestimation" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.displayMode", - "value": "color-background" - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 0.2 - } - ] - } - } - ] + "id": "unit", + "value": "bytes" }, { - "matcher": { - "id": "byName", - "options": "memory_requested" - }, - "properties": [ - { - "id": "displayName", - "value": "Requested" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "custom.align", + "value": "left" }, { - "matcher": { - "id": "byName", - "options": "memory_utilized" - }, - "properties": [ - { - "id": "displayName", - "value": "Utilized" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - } - ] + "id": "decimals", + "value": 2 } ] }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "id": 153, - "interval": "5m", - "options": { - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "topk(50, cluster:memory_requested:ratio{cluster=~\"$cluster\"} - ignoring(usage) cluster:memory_utilized:ratio{cluster=~\"$cluster\"})\n* on(cluster) group_left(memory_requested) count_values without() (\"memory_requested\", cluster:memory_requested:ratio)\n* on(cluster) group_left(memory_utilized) count_values without() (\"memory_utilized\", cluster:memory_utilized:ratio)", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Top 50 Memory Overestimation Clusters", - "transformations": [ - { - "id": "labelsToFields", - "options": {} + { + "matcher": { + "id": "byName", + "options": "has_leader" }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "Value": 2, - "cluster": 1, - "memory_requested": 3, - "memory_utilized": 4 - }, - "renameByName": {} + "properties": [ + { + "id": "displayName", + "value": "Has a Leader" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "0": { + "text": "No" + }, + "1": { + "text": "Yes" + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.align", + "value": "left" } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 150, + "interval": "1m", + "options": { + "frameIndex": 2, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "sum(changes(etcd_server_leader_changes_seen_total{cluster=~\"$cluster\",job=\"etcd\"}[$__range])) by (cluster)\n* on(cluster) group_left(db_size) count_values without() (\"db_size\", max(etcd_debugging_mvcc_db_total_size_in_bytes{cluster=~\"$cluster\",job=\"etcd\"}) by (cluster))\n* on(cluster) group_left(has_leader) count_values without() (\"has_leader\", max(etcd_server_has_leader{cluster=~\"$cluster\",job=\"etcd\"}) by (cluster))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "etcd", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "cluster", + "db_size", + "has_leader", + "Value" + ] } - ], - "type": "table" + } }, { - "collapsed": false, - "datasource": "$datasource", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value": 2, + "cluster": 0, + "db_size": 3, + "has_leader": 1 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": "$datasource", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 140, + "panels": [], + "title": "Optimization", + "type": "row" + }, + { + "datasource": "$datasource", + "description": "Highlights % differences between CPU requests commitments vs utilization. When this difference is large ( >20%), it means that resources are reserved but unused.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false }, - "id": 34, - "panels": [], - "repeat": null, - "title": "Capacity / Utilization", - "type": "row" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Overestimation" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.2 + } + ] + } } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu_requested" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "displayName", - "value": "Cluster" - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Drill down to cluster", - "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" - } - ] - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, + "id": "displayName", + "value": "Requested" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu_utilized" + }, + "properties": [ + { + "id": "displayName", + "value": "Utilized" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ { - "id": "custom.align", - "value": "left" + "targetBlank": false, + "title": "Drill down to cluster", + "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" } ] }, { - "matcher": { - "id": "byName", - "options": "machine_cpu_cores_sum" - }, - "properties": [ - { - "id": "displayName", - "value": "Total Cores" - }, - { - "id": "unit", - "value": "short" - }, + "id": "custom.align", + "value": "left" + }, + { + "id": "displayName", + "value": "Cluster" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 151, + "interval": "5m", + "options": { + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "topk(50, cluster:cpu_requested:ratio - ignoring(usage) (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\"}[$__rate_interval])) by (cluster)))\n* on(cluster) group_left(cpu_requested) count_values without() (\"cpu_requested\", cluster:cpu_requested:ratio)\n* on(cluster) group_left(cpu_utilized) count_values without() (\"cpu_utilized\", (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\"}[$__rate_interval])) by (cluster)))", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Top 50 CPU Overestimation Clusters", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "Value": 2, + "cluster": 1, + "cpu_requested": 3, + "cpu_utilized": 4 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": "$datasource", + "description": "Highlights % differences between Memory requests commitments vs utilization. When this difference is large ( >20%), it means that resources are reserved but unused.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ { - "id": "custom.align", - "value": "left" + "targetBlank": false, + "title": "Drill down to cluster", + "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" } ] }, { - "matcher": { - "id": "byName", - "options": "node_allocatable_cpu_cores_sum" - }, - "properties": [ - { - "id": "displayName", - "value": "Allocatable Cores" - }, - { - "id": "unit", - "value": "short" - }, + "id": "custom.align", + "value": "left" + }, + { + "id": "displayName", + "value": "Cluster" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Overestimation" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.2 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory_requested" + }, + "properties": [ + { + "id": "displayName", + "value": "Requested" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory_utilized" + }, + "properties": [ + { + "id": "displayName", + "value": "Utilized" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 153, + "interval": "5m", + "options": { + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "topk(50, cluster:memory_requested:ratio{cluster=~\"$cluster\"} - ignoring(usage) cluster:memory_utilized:ratio{cluster=~\"$cluster\"})\n* on(cluster) group_left(memory_requested) count_values without() (\"memory_requested\", cluster:memory_requested:ratio)\n* on(cluster) group_left(memory_utilized) count_values without() (\"memory_utilized\", cluster:memory_utilized:ratio)", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Top 50 Memory Overestimation Clusters", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "Value": 2, + "cluster": 1, + "memory_requested": 3, + "memory_utilized": 4 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": "$datasource", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 34, + "panels": [], + "repeat": null, + "title": "Capacity / Utilization", + "type": "row" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ { - "id": "custom.align", - "value": "left" + "targetBlank": false, + "title": "Drill down to cluster", + "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" } ] }, { - "matcher": { - "id": "byName", - "options": "cpu_requested" - }, - "properties": [ - { - "id": "displayName", - "value": "Requested" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, + "id": "custom.align", + "value": "left" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "machine_cpu_cores_sum" + }, + "properties": [ + { + "id": "displayName", + "value": "Total Cores" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "node_allocatable_cpu_cores_sum" + }, + "properties": [ + { + "id": "displayName", + "value": "Allocatable Cores" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu_requested" + }, + "properties": [ + { + "id": "displayName", + "value": "Requested" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Utilized" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 47, + "interval": "5m", + "options": { + "showHeader": true + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "topk(50, (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\"}[$__rate_interval])) by (cluster)))\n* on(cluster) group_left(machine_cpu_cores_sum) count_values without() (\"machine_cpu_cores_sum\", cluster:cpu_cores:sum)\n* on(cluster) group_left(node_allocatable_cpu_cores_sum) count_values without() (\"node_allocatable_cpu_cores_sum\", cluster:cpu_allocatable:sum)\n* on(cluster) group_left(cpu_requested) count_values without() (\"cpu_requested\", cluster:cpu_requested:ratio)", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Top 50 CPU Utilized Clusters", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "usage": true + }, + "indexByName": { + "Time": 0, + "Value": 5, + "cluster": 1, + "cpu_requested": 4, + "machine_cpu_cores_sum": 2, + "node_allocatable_cpu_cores_sum": 3 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 64, + "interval": "4m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.20", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\",clusterType!=\"ocp3\"}[$__rate_interval])) by (cluster)))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cluster}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top 5 Utilized Clusters (% CPU usage)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": 1 + } + }, + { + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ { - "id": "custom.align", - "value": "left" + "targetBlank": false, + "title": "Drill down to cluster", + "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" } ] }, { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "displayName", - "value": "Utilized" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "custom.align", + "value": "left" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 } ] }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 47, - "interval": "5m", - "options": { - "showHeader": true - }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "topk(50, (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\"}[$__rate_interval])) by (cluster)))\n* on(cluster) group_left(machine_cpu_cores_sum) count_values without() (\"machine_cpu_cores_sum\", cluster:cpu_cores:sum)\n* on(cluster) group_left(node_allocatable_cpu_cores_sum) count_values without() (\"node_allocatable_cpu_cores_sum\", cluster:cpu_allocatable:sum)\n* on(cluster) group_left(cpu_requested) count_values without() (\"cpu_requested\", cluster:cpu_requested:ratio)", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Top 50 CPU Utilized Clusters", - "transformations": [ - { - "id": "labelsToFields", - "options": {} + { + "matcher": { + "id": "byName", + "options": "machine_memory_sum" }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "usage": true - }, - "indexByName": { - "Time": 0, - "Value": 5, - "cluster": 1, - "cpu_requested": 4, - "machine_cpu_cores_sum": 2, - "node_allocatable_cpu_cores_sum": 3 - }, - "renameByName": {} + "properties": [ + { + "id": "displayName", + "value": "Available Memory" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" } - } - ], - "type": "table" - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 18 - }, - "hiddenSeries": false, - "id": 64, - "interval": "4m", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.20", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(5, (1 - avg(rate(node_cpu_seconds_total{cluster=~\"$cluster\",mode=\"idle\",clusterType!=\"ocp3\"}[$__rate_interval])) by (cluster)))", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{cluster}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Top 5 Utilized Clusters (% CPU usage)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + ] }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": 1 - } - }, - { - "datasource": "$datasource", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + { + "matcher": { + "id": "byName", + "options": "machine_memory_requested" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "displayName", - "value": "Cluster" - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Drill down to cluster", - "url": "/d/8Qvi3edMz/acm-resource-optimization-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" - } - ] - }, - { - "id": "custom.align", - "value": "left" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - } - ] + "id": "displayName", + "value": "Requested" }, { - "matcher": { - "id": "byName", - "options": "machine_memory_sum" - }, - "properties": [ - { - "id": "displayName", - "value": "Available Memory" - }, - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "unit", + "value": "percentunit" }, { - "matcher": { - "id": "byName", - "options": "machine_memory_requested" - }, - "properties": [ - { - "id": "displayName", - "value": "Requested" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "decimals", + "value": 2 }, { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "displayName", - "value": "Utilized" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "id": "custom.align", + "value": "left" } ] }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 60, - "interval": "5m", - "options": { - "showHeader": true - }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "topk(50, cluster:memory_utilized:ratio{cluster=~\"$cluster\"})\n* on(cluster) group_left(machine_memory_sum) count_values without() (\"machine_memory_sum\", cluster:machine_memory:sum)\n* on(cluster) group_left(machine_memory_requested) count_values without() (\"machine_memory_requested\", cluster:memory_requested:ratio)", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Top 50 Memory Utilized Clusters", - "transformations": [ - { - "id": "labelsToFields", - "options": {} + { + "matcher": { + "id": "byName", + "options": "Value" }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "usage": true - }, - "indexByName": { - "Time": 0, - "Value": 4, - "cluster": 1, - "machine_memory_requested": 3, - "machine_memory_sum": 2 - }, - "renameByName": {} + "properties": [ + { + "id": "displayName", + "value": "Utilized" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": "left" } - } - ], - "type": "table" + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 60, + "interval": "5m", + "options": { + "showHeader": true + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "topk(50, cluster:memory_utilized:ratio{cluster=~\"$cluster\"})\n* on(cluster) group_left(machine_memory_sum) count_values without() (\"machine_memory_sum\", cluster:machine_memory:sum)\n* on(cluster) group_left(machine_memory_requested) count_values without() (\"machine_memory_requested\", cluster:memory_requested:ratio)", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Top 50 Memory Utilized Clusters", + "transformations": [ + { + "id": "labelsToFields", + "options": {} }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 25 - }, - "hiddenSeries": false, - "id": 65, - "interval": "4m", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null as zero", + "id": "organize", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.20", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - {} - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(5, (1 - sum(:node_memory_MemAvailable_bytes:sum) by (cluster) / sum(kube_node_status_allocatable{cluster=~\"$cluster\",resource=\"memory\"}) by (cluster)))", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{cluster}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Top 5 Utilized Clusters (% Memory usage)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true + "excludeByName": { + "Time": true, + "usage": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null + "indexByName": { + "Time": 0, + "Value": 4, + "cluster": 1, + "machine_memory_requested": 3, + "machine_memory_sum": 2 + }, + "renameByName": {} } + } + ], + "type": "table" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [] }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "hiddenSeries": false, + "id": 65, + "interval": "4m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.20", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + {} + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "datasource": null, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ + "expr": "topk(5, (1 - sum(:node_memory_MemAvailable_bytes:sum) by (cluster) / sum(kube_node_status_allocatable{cluster=~\"$cluster\",resource=\"memory\"}) by (cluster)))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cluster}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top 5 Utilized Clusters (% Memory usage)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "displayName", - "value": "Current Bandwidth Received" - }, - { - "id": "unit", - "value": "Bps" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "color": "green", + "value": null }, { - "matcher": { - "id": "byName", - "options": "node_transmit" - }, - "properties": [ - { - "id": "displayName", - "value": "Current Bandwidth Transmitted" - }, - { - "id": "unit", - "value": "Bps" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": "left" - } - ] + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Current Bandwidth Received" }, { - "matcher": { - "id": "byName", - "options": "cluster" - }, - "properties": [ - { - "id": "displayName", - "value": "Cluster" - }, - { - "id": "links", - "value": [ - { - "title": "Drill down to cluster", - "url": "/d/ff635a025bcfea7bc3dd4f508990a3e9/kubernetes-networking-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" - } - ] - } - ] + "id": "unit", + "value": "Bps" }, { - "matcher": { - "id": "byName", - "options": "node_transmit_drop" - }, - "properties": [ - { - "id": "displayName", - "value": "Rate of Transmitted Packets Dropped" - }, - { - "id": "unit", - "value": "pps" - } - ] + "id": "decimals", + "value": 2 }, { - "matcher": { - "id": "byName", - "options": "node_receive_drop" - }, - "properties": [ - { - "id": "displayName", - "value": "Rate of Received Packets Dropped" - }, - { - "id": "unit", - "value": "pps" - } - ] + "id": "custom.align", + "value": "left" } ] }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 148, - "options": { - "showHeader": true, - "sortBy": [ + { + "matcher": { + "id": "byName", + "options": "node_transmit" + }, + "properties": [ + { + "id": "displayName", + "value": "Current Bandwidth Transmitted" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + }, { - "desc": true, - "displayName": "Current Bandwidth Received" + "id": "custom.align", + "value": "left" } ] }, - "pluginVersion": "8.5.20", - "targets": [ - { - "exemplar": true, - "expr": "sum(instance:node_network_receive_bytes_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\",clusterType!=\"ocp3\"}) by (cluster)\n* on(cluster) group_left(node_transmit) count_values without() (\"node_transmit\", sum(instance:node_network_transmit_bytes_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\"}) by (cluster))\n* on(cluster) group_left(node_receive_drop) count_values without() (\"node_receive_drop\", sum(instance:node_network_receive_drop_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\"}) by (cluster))\n* on(cluster) group_left(node_transmit_drop) count_values without() (\"node_transmit_drop\", sum(instance:node_network_transmit_drop_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\"}) by (cluster))", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Bandwidth Utilization", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "cluster", - "node_receive_drop", - "node_transmit", - "node_transmit_drop", - "Value" - ] - } - } + { + "matcher": { + "id": "byName", + "options": "cluster" }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ { - "field": "Value #A" + "title": "Drill down to cluster", + "url": "/d/ff635a025bcfea7bc3dd4f508990a3e9/kubernetes-networking-cluster?${__url_time_range}&var-cluster=${__data.fields.cluster}" } ] } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "Value": 1, - "cluster": 0, - "node_receive_drop": 3, - "node_transmit": 2, - "node_transmit_drop": 4 - }, - "renameByName": {} - } - } - ], - "type": "table" - } - ], - "refresh": "5m", - "schemaVersion": 30, - "style": "light", - "tags": [ - "ACM" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "Observatorium", - "value": "Observatorium" - }, - "description": null, - "error": null, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "name", - "value": "name" - }, - "datasource": null, - "definition": "label_values(acm_label_names, label_name)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "Label", - "multi": false, - "name": "acm_label_names", - "options": [], - "query": { - "query": "label_values(acm_label_names, label_name)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" + ] }, { - "allValue": null, - "current": { - "selected": false, - "text": "All", - "value": "$__all" + "matcher": { + "id": "byName", + "options": "node_transmit_drop" }, - "datasource": null, - "definition": "label_values(acm_managed_cluster_labels, $acm_label_names)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Value", - "multi": true, - "name": "value", - "options": [], - "query": { - "query": "label_values(acm_managed_cluster_labels, $acm_label_names)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "properties": [ + { + "id": "displayName", + "value": "Rate of Transmitted Packets Dropped" + }, + { + "id": "unit", + "value": "pps" + } + ] }, { - "allValue": null, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": null, - "definition": "label_values(acm_managed_cluster_labels{$acm_label_names=~\"$value\"}, name)", - "description": null, - "error": null, - "hide": 2, - "includeAll": true, - "label": "Cluster", - "multi": true, - "name": "cluster", - "options": [], - "query": { - "query": "label_values(acm_managed_cluster_labels{$acm_label_names=~\"$value\"}, name)", - "refId": "StandardVariableQuery" + "matcher": { + "id": "byName", + "options": "node_receive_drop" }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "properties": [ + { + "id": "displayName", + "value": "Rate of Received Packets Dropped" + }, + { + "id": "unit", + "value": "pps" + } + ] } ] }, - "time": { - "from": "now-3h", - "to": "now" + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 32 }, - "timepicker": { - "refresh_intervals": [ - "1m", - "5m", - "10m", - "15m", - "30m", - "1h", - "2h", - "1d" + "id": 148, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Current Bandwidth Received" + } ] }, - "timezone": "browser", - "title": "ACM - Clusters Overview", - "uid": "2b679d600f3b9e7676a7c5ac3643d448", - "version": 1 + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "sum(instance:node_network_receive_bytes_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\",clusterType!=\"ocp3\"}) by (cluster)\n* on(cluster) group_left(node_transmit) count_values without() (\"node_transmit\", sum(instance:node_network_transmit_bytes_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\"}) by (cluster))\n* on(cluster) group_left(node_receive_drop) count_values without() (\"node_receive_drop\", sum(instance:node_network_receive_drop_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\"}) by (cluster))\n* on(cluster) group_left(node_transmit_drop) count_values without() (\"node_transmit_drop\", sum(instance:node_network_transmit_drop_excluding_lo:rate1m{cluster=~\"$cluster\",job=\"node-exporter\"}) by (cluster))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Bandwidth Utilization", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "cluster", + "node_receive_drop", + "node_transmit", + "node_transmit_drop", + "Value" + ] + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Value #A" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value": 1, + "cluster": 0, + "node_receive_drop": 3, + "node_transmit": 2, + "node_transmit_drop": 4 + }, + "renameByName": {} + } + } + ], + "type": "table" } -kind: ConfigMap -metadata: - name: grafana-dashboard-acm-clusters-overview - namespace: open-cluster-management-observability - labels: - general-folder: 'true' + ], + "refresh": "5m", + "schemaVersion": 30, + "style": "light", + "tags": [ + "ACM" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Observatorium", + "value": "Observatorium" + }, + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "name", + "value": "name" + }, + "datasource": null, + "definition": "label_values(acm_label_names, label_name)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Label", + "multi": false, + "name": "acm_label_names", + "options": [], + "query": { + "query": "label_values(acm_label_names, label_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": null, + "definition": "label_values(acm_managed_cluster_labels, $acm_label_names)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Value", + "multi": true, + "name": "value", + "options": [], + "query": { + "query": "label_values(acm_managed_cluster_labels, $acm_label_names)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": null, + "definition": "label_values(acm_managed_cluster_labels{$acm_label_names=~\"$value\"}, name)", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(acm_managed_cluster_labels{$acm_label_names=~\"$value\"}, name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "1m", + "5m", + "10m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "browser", + "title": "ACM - Clusters Overview", + "uid": "2b679d600f3b9e7676a7c5ac3643d448", + "version": 1 +}