diff --git a/dashboards/grafana-dashboard-exhort.configmap.yaml b/dashboards/grafana-dashboard-exhort.configmap.yaml index 11eaa943..465713d8 100644 --- a/dashboards/grafana-dashboard-exhort.configmap.yaml +++ b/dashboards/grafana-dashboard-exhort.configmap.yaml @@ -28,7 +28,7 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 682537, + "id": 692867, "links": [ { "asDropdown": false, @@ -82,6 +82,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -95,6 +96,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, @@ -138,7 +140,6 @@ data: }, "id": 29, "interval": "2m", - "links": [], "options": { "legend": { "calcs": [], @@ -182,6 +183,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -195,6 +197,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, @@ -238,7 +241,6 @@ data: }, "id": 31, "interval": "2m", - "links": [], "options": { "legend": { "calcs": [], @@ -281,6 +283,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -294,6 +297,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -336,7 +340,6 @@ data: }, "id": 42, "interval": "2m", - "links": [], "options": { "legend": { "calcs": [], @@ -379,6 +382,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -392,6 +396,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, @@ -436,7 +441,6 @@ data: "y": 8 }, "id": 30, - "links": [], "options": { "legend": { "calcs": [], @@ -479,6 +483,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -492,6 +497,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -530,7 +536,6 @@ data: "y": 8 }, "id": 32, - "links": [], "options": { "legend": { "calcs": [], @@ -597,6 +602,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -610,6 +616,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -648,7 +655,6 @@ data: "y": 8 }, "id": 43, - "links": [], "options": { "legend": { "calcs": [], @@ -691,6 +697,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -704,6 +711,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -812,6 +820,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -825,6 +834,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -933,6 +943,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -946,6 +957,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -1101,8 +1113,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1211,8 +1222,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "dark-red", @@ -1337,8 +1347,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -1430,8 +1439,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -1486,85 +1494,63 @@ data: "x": 0, "y": 26 }, - "id": 56, + "id": 49, "panels": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "The current number of live threads including both daemon and non-daemon threads", + "description": "$slo_availability_objective percent of requests result in successful (non-5xx) response.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "links": [], "mappings": [], + "max": 1, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 8, "x": 0, - "y": 3 + "y": 43 }, - "id": 58, + "id": 50, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, + "pluginVersion": "10.4.1", "targets": [ { "datasource": { @@ -1572,92 +1558,69 @@ data: "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(jvm_threads_live_threads{job=\"exhort\", namespace=\"${namespace}\", container=\"app\", pod=~\"exhort.+\"}) by (pod)", - "legendFormat": "__auto", + "expr": "sum(rate(http_server_requests_seconds_count{namespace=~\"$namespace\", job=\"exhort\", status!~\"5..\"}[28d]))\n/\nsum(rate(http_server_requests_seconds_count{namespace=~\"$namespace\", job=\"exhort\"}[28d]))", + "interval": "", + "legendFormat": "Availability", "range": true, "refId": "A" } ], - "title": "Live threads", - "type": "timeseries" + "title": "Availability ($slo_time_window days)", + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "The current number of live daemon threads", + "description": "$slo_latency_objective percent of requests services in <5000ms.\n", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", "value": 80 } ] - } + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 8, "x": 8, - "y": 3 + "y": 43 }, - "id": 61, + "id": 51, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, + "pluginVersion": "10.4.1", "targets": [ { "datasource": { @@ -1665,48 +1628,51 @@ data: "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(jvm_threads_daemon_threads{job=\"exhort\", namespace=\"${namespace}\", container=\"app\", pod=~\"exhort.+\"}) by (pod)", - "legendFormat": "__auto", + "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{ status!~\"5..\", uri=\"/api/v4/analysis\"}[28d])) by (le))", + "interval": "", + "legendFormat": "0.95", "range": true, - "refId": "A" + "refId": "B" } ], - "title": "Daemon threads", - "type": "timeseries" + "title": "Latency ($slo_time_window days)", + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "The peak live thread count since the Java virtual machine started or peak was reset", + "description": "", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", - "lineWidth": 1, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", - "spanNulls": false, + "showPoints": "never", + "spanNulls": true, "stacking": { "group": "A", "mode": "none" @@ -1715,30 +1681,27 @@ data: "mode": "off" } }, + "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "green" } ] - } + }, + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 9, "w": 8, "x": 16, - "y": 3 + "y": 43 }, - "id": 60, + "id": 52, "options": { "legend": { "calcs": [], @@ -1747,66 +1710,38 @@ data: "showLegend": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(jvm_threads_peak_threads{job=\"exhort\", namespace=\"${namespace}\", container=\"app\", pod=~\"exhort.+\"}) by (pod)", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Peak threads", + "pluginVersion": "9.3.8", + "title": "[ TDB] Error budget ($slo_time_window days)", "type": "timeseries" - }, + } + ], + "title": "SLO", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 13, + "panels": [ { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "description": "The total number of application threads started in the JVM", + "description": "The number of items in the cache", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -1815,25 +1750,212 @@ data: { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, + "h": 4, + "w": 4, "x": 0, - "y": 11 + "y": 26 }, - "id": 59, + "id": 4, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "dimensions": { + "CacheClusterId": "$cacheclusterId" + }, + "expression": "", + "id": "", + "label": "", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "CurrItems", + "metricQueryType": 0, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sqlExpression": "", + "statistic": "Average" + } + ], + "title": "CacheItems", + "type": "stat" + }, + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "description": "Percentage of the memory for the cluster that is in use", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 26 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "dimensions": { + "CacheClusterId": "$cacheclusterId" + }, + "expression": "", + "id": "", + "label": "", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "DatabaseMemoryUsagePercentage", + "metricQueryType": 0, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sqlExpression": "", + "statistic": "Average" + } + ], + "title": "DatabaseMemoryUsagePercentage", + "type": "stat" + }, + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "description": "Indicates the usage efficiency of the Redis instance. If the cache ratio is lower than about 0.8, it means that a significant amount of keys are evicted, expired, or don't exist", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 26 + }, + "id": 3, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", @@ -1844,32 +1966,77 @@ data: "sort": "none" } }, + "pluginVersion": "9.3.8", "targets": [ { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "editorMode": "code", - "expr": "sum(jvm_threads_started_threads_total{job=\"exhort\", namespace=\"${namespace}\", container=\"app\", pod=~\"exhort.+\"}) by (pod)\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" + "dimensions": { + "CacheClusterId": "$cacheclusterId" + }, + "expression": "", + "id": "", + "label": "", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "CacheHitRate", + "metricQueryType": 0, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sqlExpression": "", + "statistic": "Average" } ], - "title": "Started threads", + "title": "CacheHitRate", "type": "timeseries" }, { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "description": "The current number of threads having NEW state", + "description": "The total number of bytes allocated by Redis for all purposes, including the dataset, buffers, and so on.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -1881,26 +2048,98 @@ data: }, { "color": "red", - "value": 800 + "value": 80 } ] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, - "x": 8, - "y": 11 + "x": 16, + "y": 26 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "dimensions": { + "CacheClusterId": "$cacheclusterId" + }, + "expression": "", + "id": "", + "label": "", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "BytesUsedForCache", + "metricQueryType": 0, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sqlExpression": "", + "statistic": "Average" + } + ], + "title": "BytesUsedForCache", + "type": "timeseries" + }, + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "description": "The number of keys that have been evicted due to the maxmemory limit", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] }, - "id": 62, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 30 + }, + "id": 8, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1908,30 +2147,118 @@ data: "fields": "", "values": false }, - "showUnfilled": true + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.3.8", + "pluginVersion": "10.4.1", "targets": [ { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(jvm_threads_states_threads{job=\"exhort\", namespace=\"exhort-production\", container=\"app\"}) by (state, pod)", - "instant": false, - "interval": "", - "legendFormat": "{{pod}}: \"{{state}}\"", - "range": true, - "refId": "A" + "dimensions": { + "CacheClusterId": "$cacheclusterId" + }, + "expression": "", + "id": "", + "label": "", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "Evictions", + "metricQueryType": 0, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sqlExpression": "", + "statistic": "Average" + } + ], + "title": "Evictions", + "type": "stat" + }, + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "description": "Percentage of the total data capacity for the cluster that is in use", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 30 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "dimensions": { + "CacheClusterId": "$cacheclusterId" + }, + "expression": "", + "id": "", + "label": "", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "DatabaseCapacityUsagePercentage", + "metricQueryType": 0, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sqlExpression": "", + "statistic": "Average" } ], - "title": "Threads by state", - "type": "bargauge" + "title": "DatabaseCapacityUsagePercentage", + "type": "stat" } ], - "title": "JVM", + "title": "Elasticache", "type": "row" }, { @@ -1940,25 +2267,195 @@ data: "h": 1, "w": 24, "x": 0, - "y": 27 + "y": 28 }, - "id": 49, + "id": 15, "panels": [ { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "description": "The percentage of CPU utilization for the entire host", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 11 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.8", + "targets": [ + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "dimensions": {}, + "expression": "", + "id": "", + "label": "CPUUtilization", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "CPUUtilization", + "metricQueryType": 1, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sql": { + "from": { + "property": { + "name": "AWS/ElastiCache", + "type": "string" + }, + "type": "property" + }, + "select": { + "name": "AVG", + "parameters": [ + { + "name": "CPUUtilization", + "type": "functionParameter" + } + ], + "type": "function" + }, + "where": { + "expressions": [ + { + "operator": { + "name": "=", + "value": "$cacheclusterId" + }, + "property": { + "name": "CacheClusterId", + "type": "string" + }, + "type": "operator" + } + ], + "type": "and" + } + }, + "sqlExpression": "SELECT AVG(CPUUtilization) FROM \"AWS/ElastiCache\" WHERE CacheClusterId = '$cacheclusterId'", + "statistic": "Average" + } + ], + "title": "CPUUtilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "description": "$slo_availability_objective percent of requests result in successful (non-5xx) response.", + "description": "The amount of free memory available on the host", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1967,66 +2464,136 @@ data: }, { "color": "red", - "value": 80 + "value": 100000000 } ] }, - "unit": "percentunit" + "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 9, "w": 8, - "x": 0, - "y": 51 + "x": 8, + "y": 11 }, - "id": 50, - "links": [], + "id": 10, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "textMode": "auto" + "tooltip": { + "mode": "single", + "sort": "none" + } }, "pluginVersion": "9.3.8", "targets": [ { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "editorMode": "code", - "expr": "sum(rate(http_server_requests_seconds_count{namespace=~\"$namespace\", job=\"exhort\", status!~\"5..\"}[28d]))\n/\nsum(rate(http_server_requests_seconds_count{namespace=~\"$namespace\", job=\"exhort\"}[28d]))", - "interval": "", - "legendFormat": "Availability", - "range": true, - "refId": "A" + "dimensions": {}, + "expression": "", + "id": "", + "label": "FreeableMemory", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "", + "metricQueryType": 1, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sql": { + "from": { + "property": { + "name": "AWS/ElastiCache", + "type": "string" + }, + "type": "property" + }, + "select": { + "name": "AVG", + "parameters": [ + { + "name": "FreeableMemory", + "type": "functionParameter" + } + ], + "type": "function" + }, + "where": { + "expressions": [ + { + "operator": { + "name": "=", + "value": "$cacheclusterId" + }, + "property": { + "name": "CacheClusterId", + "type": "string" + }, + "type": "operator" + } + ], + "type": "and" + } + }, + "sqlExpression": "SELECT AVG(FreeableMemory) FROM \"AWS/ElastiCache\" WHERE CacheClusterId = '$cacheclusterId'", + "statistic": "Average" } ], - "title": "Availability ($slo_time_window days)", - "type": "stat" + "title": "FreeableMemory", + "type": "timeseries" }, { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "description": "$slo_latency_objective percent of requests services in <5000ms.\n", + "description": "The amount of swap used on the host.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", @@ -2034,62 +2601,107 @@ data: { "color": "green" }, + { + "color": "#EAB839", + "value": 70 + }, { "color": "red", - "value": 80 + "value": 90 } ] }, - "unit": "ms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 9, "w": 8, - "x": 8, - "y": 51 + "x": 16, + "y": 11 }, - "id": 51, - "links": [], + "id": 11, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "textMode": "auto" + "tooltip": { + "mode": "single", + "sort": "none" + } }, "pluginVersion": "9.3.8", "targets": [ { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{ status!~\"5..\", uri=\"/api/v4/analysis\"}[28d])) by (le))", - "interval": "", - "legendFormat": "0.95", - "range": true, - "refId": "B" + "dimensions": {}, + "expression": "", + "id": "", + "label": "SwapUsage", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "", + "metricQueryType": 1, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sql": { + "from": { + "property": { + "name": "AWS/ElastiCache", + "type": "string" + }, + "type": "property" + }, + "select": { + "name": "AVG", + "parameters": [ + { + "name": "SwapUsage", + "type": "functionParameter" + } + ], + "type": "function" + }, + "where": { + "expressions": [ + { + "operator": { + "name": "=", + "value": "$cacheclusterId" + }, + "property": { + "name": "CacheClusterId", + "type": "string" + }, + "type": "operator" + } + ], + "type": "and" + } + }, + "sqlExpression": "SELECT AVG(SwapUsage) FROM \"AWS/ElastiCache\" WHERE CacheClusterId = '$cacheclusterId'", + "statistic": "Average" } ], - "title": "Latency ($slo_time_window days)", - "type": "stat" + "title": "SwapUsage", + "type": "timeseries" }, { "datasource": { - "type": "prometheus", - "uid": "${datasource}" + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" }, - "description": "", + "description": "Provides CPU utilization of the Redis engine thread", "fieldConfig": { "defaults": { "color": { @@ -2102,21 +2714,21 @@ data: "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 20, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "smooth", - "lineWidth": 2, + "lineInterpolation": "linear", + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -2125,28 +2737,30 @@ data: "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "short" + "unit": "percent" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 8, "w": 8, - "x": 16, - "y": 51 + "x": 0, + "y": 20 }, - "id": 52, - "links": [], + "id": 7, "options": { "legend": { "calcs": [], @@ -2155,22 +2769,78 @@ data: "showLegend": true }, "tooltip": { - "mode": "multi", + "mode": "single", "sort": "none" } }, - "pluginVersion": "9.3.8", - "title": "[ TDB] Error budget ($slo_time_window days)", + "targets": [ + { + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "dimensions": {}, + "expression": "", + "id": "", + "label": "EngineCPUUtilization", + "matchExact": true, + "metricEditorMode": 0, + "metricName": "", + "metricQueryType": 1, + "namespace": "AWS/ElastiCache", + "period": "", + "queryMode": "Metrics", + "refId": "A", + "region": "$region", + "sql": { + "from": { + "property": { + "name": "AWS/ElastiCache", + "type": "string" + }, + "type": "property" + }, + "select": { + "name": "AVG", + "parameters": [ + { + "name": "EngineCPUUtilization", + "type": "functionParameter" + } + ], + "type": "function" + }, + "where": { + "expressions": [ + { + "operator": { + "name": "=", + "value": "$cacheclusterId" + }, + "property": { + "name": "CacheClusterId", + "type": "string" + }, + "type": "operator" + } + ], + "type": "and" + } + }, + "sqlExpression": "SELECT AVG(EngineCPUUtilization) FROM \"AWS/ElastiCache\" WHERE CacheClusterId = '$cacheclusterId'", + "statistic": "Average" + } + ], + "title": "EngineCPUUtilization", "type": "timeseries" } ], - "title": "SLO", + "title": "Elasticache host", "type": "row" } ], "refresh": false, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 39, "tags": [ "exhort", "trusted-content" @@ -2179,9 +2849,9 @@ data: "list": [ { "current": { - "selected": true, + "selected": false, "text": "appsrep08ue2-prometheus", - "value": "appsrep08ue2-prometheus" + "value": "PC7027C05BC564040" }, "hide": 0, "includeAll": false, @@ -2197,7 +2867,7 @@ data: }, { "current": { - "selected": true, + "selected": false, "text": "exhort-production", "value": "exhort-production" }, @@ -2244,6 +2914,113 @@ data: "query": "95", "skipUrlSync": false, "type": "constant" + }, + { + "current": { + "selected": false, + "text": "AWS app-sre", + "value": "PCB1035EE776137AD" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cloudwatch_datasource", + "options": [], + "query": "cloudwatch", + "refresh": 1, + "regex": "AWS app-sre-stage|AWS app-sre", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "us-east-2", + "value": "us-east-2" + }, + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Region", + "multi": false, + "name": "region", + "options": [], + "query": { + "queryType": "regions", + "refId": "CloudWatchVariableQueryEditor-VariableQuery", + "region": "default" + }, + "refresh": 1, + "regex": "us-east-[0-9]", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "exhort-elasticache-production-001", + "value": "exhort-elasticache-production-001" + }, + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "CacheClusterID", + "multi": false, + "name": "cacheclusterId", + "options": [], + "query": { + "dimensionKey": "CacheClusterId", + "metricName": "CPUUtilization", + "namespace": "AWS/ElastiCache", + "queryType": "dimensionValues", + "refId": "CloudWatchVariableQueryEditor-VariableQuery", + "region": "$region" + }, + "refresh": 1, + "regex": "exhort-elasticache-.*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "0001", + "value": "0001" + }, + "datasource": { + "type": "cloudwatch", + "uid": "${cloudwatch_datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "CacheNodeId", + "multi": false, + "name": "cachenodeid", + "options": [], + "query": { + "dimensionKey": "CacheNodeId", + "metricName": "CPUUtilization", + "namespace": "AWS/ElastiCache", + "queryType": "dimensionValues", + "refId": "CloudWatchVariableQueryEditor-VariableQuery", + "region": "$region" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, @@ -2279,7 +3056,7 @@ data: "timezone": "utc", "title": "Exhort", "uid": "trusted-content-exhort", - "version": 9, + "version": 10, "weekStart": "" } kind: ConfigMap