Skip to content

Commit

Permalink
grafana dashboard for incidents
Browse files Browse the repository at this point in the history
Signed-off-by: Tomáš Remeš <[email protected]>
  • Loading branch information
tremes committed Jan 30, 2025
1 parent 8e24d2d commit 9dac166
Show file tree
Hide file tree
Showing 3 changed files with 379 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ data:
- cluster_infrastructure_provider
- cluster_version
- cluster_version_payload
- console_url
- container_cpu_cfs_periods_total
- container_cpu_cfs_throttled_periods_total
- container_spec_cpu_quota
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,377 @@
apiVersion: v1
data:
acm-incidents-overview.json: |-
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Incidents overview",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 0,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "000000001"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "left",
"cellOptions": {
"type": "auto",
"wrapText": false
},
"inspect": true
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "cluster"
},
"properties": [
{
"id": "links",
"value": [
{
"targetBlank": true,
"title": "Incident detail",
"url": "${__data.fields.url}"
}
]
},
{
"id": "custom.width",
"value": 152
}
]
},
{
"matcher": {
"id": "byName",
"options": "severity"
},
"properties": [
{
"id": "mappings",
"value": [
{
"options": {
"0": {
"index": 2,
"text": "info"
},
"1": {
"index": 1,
"text": "warning"
},
"2": {
"index": 0,
"text": "critical"
}
},
"type": "value"
}
]
},
{
"id": "custom.width",
"value": 92
}
]
},
{
"matcher": {
"id": "byName",
"options": "alerts"
},
"properties": [
{
"id": "custom.width",
"value": 438
}
]
},
{
"matcher": {
"id": "byName",
"options": "url"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"cellHeight": "md",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "severity"
}
]
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "000000001"
},
"editorMode": "code",
"exemplar": false,
"expr": "max(cluster:health:components:map{}) by (group_id,cluster,component,src_alertname) * on (cluster) group_left(url) console_url",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "000000001"
},
"editorMode": "code",
"expr": "min_over_time(timestamp(max by (group_id) (cluster:health:components:map))[3d:1m]) * 1000",
"format": "table",
"hide": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
}
],
"title": "Incidents",
"transformations": [
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": [
"cluster",
"component",
"group_id",
"src_alertname",
"url",
"Value #B",
"Value #A"
]
}
}
},
{
"id": "joinByField",
"options": {
"byField": "group_id",
"mode": "inner"
}
},
{
"id": "groupBy",
"options": {
"fields": {
"Value #A": {
"aggregations": [
"max"
],
"operation": "aggregate"
},
"Value #B": {
"aggregations": [
"min"
],
"operation": "aggregate"
},
"cluster": {
"aggregations": [],
"operation": "groupby"
},
"component": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
},
"group_id": {
"aggregations": [],
"operation": "groupby"
},
"src_alertname": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
},
"url": {
"aggregations": [
"first"
],
"operation": "aggregate"
}
}
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"group_id": true
},
"includeByName": {},
"indexByName": {
"Value #A (max)": 2,
"Value #B (max)": 5,
"cluster": 1,
"component (uniqueValues)": 3,
"group_id": 0,
"src_alertname (uniqueValues)": 4
},
"renameByName": {
"Value #A (max)": "severity",
"Value #B (max)": "start time",
"Value #B (min)": "start time",
"component (uniqueValues)": "components",
"src_alertname (uniqueValues)": "alerts",
"url (first)": "url"
}
}
},
{
"id": "convertFieldType",
"options": {
"conversions": [
{
"destinationType": "time",
"targetField": "start time"
}
],
"fields": {}
}
},
{
"id": "filterByValue",
"options": {
"filters": [
{
"config": {
"id": "equal",
"options": {
"value": 0
}
},
"fieldName": "severity"
},
{
"config": {
"id": "equal",
"options": {
"value": "Watchdog"
}
},
"fieldName": "alerts"
}
],
"match": "any",
"type": "exclude"
}
}
],
"type": "table"
}
],
"refresh": "15m",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-12h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Incidents overview",
"uid": "",
"version": 0,
"weekStart": ""
}
kind: ConfigMap
metadata:
name: grafana-dashboard-acm-incidents-overview
namespace: open-cluster-management-observability
annotations:
observability.open-cluster-management.io/dashboard-folder: "Incidents"
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ resources:
- dash-acm-alert-analysis.yaml
- dash-acm-alerts-by-clusters.yaml
- dash-acm-cluster-by-alerts.yaml
- dash-acm-incidents-overview.yaml
- dash-acm-optimization-overview.yaml
- dash-acm-clusters-overview.yaml
- dash-acm-clusters-overview-optimized.yaml
Expand Down

0 comments on commit 9dac166

Please sign in to comment.