diff --git a/charts/charts/dashboard-api/values.yaml b/charts/charts/dashboard-api/values.yaml index fc7b144..c0c5bf8 100644 --- a/charts/charts/dashboard-api/values.yaml +++ b/charts/charts/dashboard-api/values.yaml @@ -8,7 +8,7 @@ image: repository: tkestack/kstone-api-amd64 pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v0.0.1-alpha.2" + tag: "v0.1.0-alpha.2" imagePullSecrets: [] nameOverride: "" diff --git a/charts/charts/dashboard/values.yaml b/charts/charts/dashboard/values.yaml index 494840c..1b3fbbe 100644 --- a/charts/charts/dashboard/values.yaml +++ b/charts/charts/dashboard/values.yaml @@ -8,7 +8,7 @@ image: repository: tkestack/kstone-dashboard pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v0.1.0-alpha.1" + tag: "v0.1.0-alpha.2" imagePullSecrets: [] nameOverride: "" diff --git a/charts/charts/etcd-controller/values.yaml b/charts/charts/etcd-controller/values.yaml index 2570f4c..c79cbb0 100644 --- a/charts/charts/etcd-controller/values.yaml +++ b/charts/charts/etcd-controller/values.yaml @@ -8,7 +8,7 @@ image: repository: tkestack/kstone-controller-amd64 pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v0.0.1-alpha.2" + tag: "v0.1.0-alpha.2" imagePullSecrets: [] nameOverride: "" diff --git a/charts/charts/grafana/dashboards/0.json b/charts/charts/grafana/dashboards/0.json index 2ffbd51..75b3db2 100644 --- a/charts/charts/grafana/dashboards/0.json +++ b/charts/charts/grafana/dashboards/0.json @@ -17,7 +17,7 @@ "gnetId": 3070, "graphTooltip": 0, "id": 1, - "iteration": 1640766483206, + "iteration": 1642043560459, "links": [], "panels": [ { @@ -512,6 +512,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": null, "editable": true, "error": false, "fieldConfig": { @@ -522,6 +523,7 @@ }, "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 9, "w": 12, @@ -529,7 +531,7 @@ "y": 17 }, "hiddenSeries": false, - "id": 41, + "id": 82, "legend": { "alignAsTable": true, "avg": false, @@ -557,30 +559,33 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_watcher_total{job=\"$job\"}", + "exemplar": true, + "expr": "etcd_debugging_mvcc_db_total_size_in_bytes{job=\"$job\"}/etcd_server_quota_backend_bytes{job=\"$job\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{job}}_{{instance}}_{{endpoint}}_mvcc_watcher_total", - "metric": "grpc_server_handled_total", + "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", + "metric": "", "refId": "A", - "step": 60 + "step": 120 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "The total number of wathcher", + "title": "DB Usage Rate", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -592,20 +597,20 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "$$hashKey": "object:192", + "format": "percentunit", "logBase": 1, "max": null, "min": null, "show": true }, { + "$$hashKey": "object:193", "format": "short", - "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { @@ -631,7 +636,7 @@ "fillGradient": 0, "grid": {}, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 0, "y": 26 @@ -736,54 +741,72 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 9, "w": 12, "x": 12, "y": 26 }, "hiddenSeries": false, - "id": 67, + "id": 41, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "linewidth": 2, + "links": [], + "nullPointMode": "connected", "options": { "alertThreshold": true }, + "paceLength": 10, "percentage": false, "pluginVersion": "8.0.3", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_pending_events_total{job=\"$job\"}", + "expr": "etcd_debugging_mvcc_watcher_total{job=\"$job\"}", + "format": "time_series", "interval": "", - "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{job}}_{{instance}}_{{endpoint}}_mvcc_watcher_total", + "metric": "grpc_server_handled_total", + "refId": "A", + "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "The total number of pending events", + "title": "The total number of wathcher", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -799,7 +822,7 @@ "yaxes": [ { "format": "short", - "label": null, + "label": "", "logBase": 1, "max": null, "min": null, @@ -837,7 +860,7 @@ "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 35 }, "hiddenSeries": false, "id": 76, @@ -934,47 +957,34 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 35 }, "hiddenSeries": false, - "id": 29, + "id": 67, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, "show": true, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", + "linewidth": 1, + "nullPointMode": "null", "options": { "alertThreshold": true }, - "paceLength": 10, "percentage": false, "pluginVersion": "8.0.3", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -983,10 +993,8 @@ "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"$job\"}", - "format": "time_series", + "expr": "etcd_debugging_mvcc_pending_events_total{job=\"$job\"}", "interval": "", - "intervalFactor": 1, "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", "refId": "A" } @@ -995,9 +1003,8 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Memory", + "title": "The total number of pending events", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -1012,7 +1019,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1053,7 +1060,7 @@ "h": 8, "w": 6, "x": 0, - "y": 42 + "y": 43 }, "hiddenSeries": false, "id": 22, @@ -1157,7 +1164,7 @@ "h": 8, "w": 6, "x": 6, - "y": 42 + "y": 43 }, "hiddenSeries": false, "id": 21, @@ -1262,12 +1269,12 @@ "fillGradient": 0, "gridPos": { "h": 8, - "w": 6, + "w": 12, "x": 12, - "y": 42 + "y": 43 }, "hiddenSeries": false, - "id": 20, + "id": 29, "legend": { "alignAsTable": true, "avg": false, @@ -1275,6 +1282,8 @@ "max": true, "min": false, "show": true, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, @@ -1297,21 +1306,19 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(etcd_network_peer_received_bytes_total{job=\"$job\"}[5m])) by (job,instance)", + "expr": "process_resident_memory_bytes{job=\"$job\"}", "format": "time_series", "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", - "metric": "etcd_network_peer_received_bytes_total", - "refId": "A", - "step": 120 + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Peer Traffic In", + "title": "Memory", "tooltip": { "msResolution": false, "shared": true, @@ -1328,7 +1335,7 @@ }, "yaxes": [ { - "format": "Bps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -1355,26 +1362,23 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "decimals": null, - "editable": true, - "error": false, + "description": "etcd_debugging_mvcc_keys_total", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 6, - "x": 18, - "y": 42 + "w": 12, + "x": 0, + "y": 51 }, "hiddenSeries": false, - "id": 16, + "id": 61, "legend": { "alignAsTable": true, "avg": false, @@ -1386,16 +1390,16 @@ "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "connected", + "nullPointMode": "null", "options": { "alertThreshold": true }, "paceLength": 10, "percentage": false, "pluginVersion": "8.0.3", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -1404,27 +1408,23 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(etcd_network_peer_sent_bytes_total{job=\"$job\"}[5m])) by (job,instance)", + "expr": "etcd_debugging_mvcc_keys_total{job=\"$job\"}", "format": "time_series", - "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", - "metric": "etcd_network_peer_sent_bytes_total", - "refId": "A", - "step": 120 + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Peer Traffic Out", + "title": "The total number of key", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -1436,7 +1436,8 @@ }, "yaxes": [ { - "format": "Bps", + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -1444,6 +1445,7 @@ }, { "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -1461,23 +1463,24 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "etcd_debugging_mvcc_keys_total", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 50 + "w": 6, + "x": 12, + "y": 51 }, "hiddenSeries": false, - "id": 61, + "id": 20, "legend": { "alignAsTable": true, "avg": false, @@ -1489,16 +1492,16 @@ "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], - "nullPointMode": "null", + "nullPointMode": "connected", "options": { "alertThreshold": true }, "paceLength": 10, "percentage": false, "pluginVersion": "8.0.3", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -1507,20 +1510,23 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_keys_total{job=\"$job\"}", + "expr": "sum(rate(etcd_network_peer_received_bytes_total{job=\"$job\"}[5m])) by (job,instance)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", - "refId": "A" + "metric": "etcd_network_peer_received_bytes_total", + "refId": "A", + "step": 120 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "The total number of key", + "title": "Peer Traffic In", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -1535,7 +1541,7 @@ }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -1562,7 +1568,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "decimals": 0, + "decimals": null, "editable": true, "error": false, "fieldConfig": { @@ -1573,24 +1579,22 @@ }, "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 50 + "w": 6, + "x": 18, + "y": 51 }, "hiddenSeries": false, - "id": 19, + "id": 16, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, - "rightSide": false, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, @@ -1613,25 +1617,27 @@ "steppedLine": false, "targets": [ { - "expr": "changes(etcd_server_leader_changes_seen_total{job=\"$job\"}[6h]) ", + "expr": "sum(rate(etcd_network_peer_sent_bytes_total{job=\"$job\"}[5m])) by (job,instance)", "format": "time_series", + "hide": false, + "interval": "", "intervalFactor": 2, "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", - "metric": "etcd_server_leader_changes_seen_total", + "metric": "etcd_network_peer_sent_bytes_total", "refId": "A", - "step": 60 + "step": 120 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Total Leader Elections Per Day", + "title": "Peer Traffic Out", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -1643,8 +1649,7 @@ }, "yaxes": [ { - "format": "short", - "label": null, + "format": "Bps", "logBase": 1, "max": null, "min": null, @@ -1652,7 +1657,6 @@ }, { "format": "short", - "label": null, "logBase": 1, "max": null, "min": null, @@ -1684,7 +1688,7 @@ "h": 8, "w": 12, "x": 0, - "y": 58 + "y": 59 }, "hiddenSeries": false, "id": 40, @@ -1802,39 +1806,42 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "indicates how many proposals are queued to commit. Rising pending proposals suggests there is a high client load or the member cannot commit proposals.", + "decimals": 0, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 58 + "y": 59 }, "hiddenSeries": false, - "id": 5, + "id": 19, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, + "rightSide": false, "show": true, - "sort": "max", + "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], - "nullPointMode": "null", + "nullPointMode": "connected", "options": { "alertThreshold": true }, @@ -1850,10 +1857,11 @@ "steppedLine": false, "targets": [ { - "expr": "sum(etcd_server_proposals_pending{job=\"$job\"}) by (job,instance)", + "expr": "changes(etcd_server_leader_changes_seen_total{job=\"$job\"}[6h]) ", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}}_{{instance}}_{{endpoint}} Proposals pending", + "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", + "metric": "etcd_server_leader_changes_seen_total", "refId": "A", "step": 60 } @@ -1862,8 +1870,9 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Proposals Pending", + "title": "Total Leader Elections Per Day", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -1917,7 +1926,7 @@ "h": 9, "w": 12, "x": 0, - "y": 66 + "y": 67 }, "hiddenSeries": false, "id": 46, @@ -2007,34 +2016,46 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "indicates how many proposals are queued to commit. Rising pending proposals suggests there is a high client load or the member cannot commit proposals.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 66 + "y": 67 }, "hiddenSeries": false, - "id": 71, + "id": 5, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, "show": true, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, + "paceLength": 10, "percentage": false, "pluginVersion": "8.0.3", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -2043,18 +2064,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "etcd_server_slow_apply_total{job=\"$job\"}", - "interval": "", - "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", - "refId": "A" + "expr": "sum(etcd_server_proposals_pending{job=\"$job\"}) by (job,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}_{{instance}}_{{endpoint}} Proposals pending", + "refId": "A", + "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "The total number of slow apply", + "title": "Proposals Pending", "tooltip": { "shared": true, "sort": 0, @@ -2110,7 +2132,7 @@ "h": 9, "w": 12, "x": 0, - "y": 75 + "y": 76 }, "hiddenSeries": false, "id": 2, @@ -2217,10 +2239,10 @@ "h": 9, "w": 12, "x": 12, - "y": 75 + "y": 76 }, "hiddenSeries": false, - "id": 69, + "id": 71, "legend": { "avg": false, "current": false, @@ -2247,7 +2269,8 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_debugging_mvcc_slow_watcher_total{job=\"$job\"}", + "exemplar": true, + "expr": "etcd_server_slow_apply_total{job=\"$job\"}", "interval": "", "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", "refId": "A" @@ -2257,7 +2280,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "The total number of slow watcher", + "title": "The total number of slow apply", "tooltip": { "shared": true, "sort": 0, @@ -2306,7 +2329,7 @@ "h": 9, "w": 12, "x": 0, - "y": 84 + "y": 85 }, "hiddenSeries": false, "id": 65, @@ -2389,27 +2412,24 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "auth revision increase", "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 84 + "y": 85 }, "hiddenSeries": false, - "id": 75, + "id": 69, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -2428,7 +2448,7 @@ "steppedLine": false, "targets": [ { - "expr": "increase(etcd_debugging_auth_revision{job=\"$job\"}[5m])", + "expr": "etcd_debugging_mvcc_slow_watcher_total{job=\"$job\"}", "interval": "", "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", "refId": "A" @@ -2438,7 +2458,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Auth Revison Increase", + "title": "The total number of slow watcher", "tooltip": { "shared": true, "sort": 0, @@ -2454,7 +2474,7 @@ }, "yaxes": [ { - "format": "count/5m", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -2493,7 +2513,7 @@ "h": 9, "w": 12, "x": 0, - "y": 93 + "y": 94 }, "hiddenSeries": false, "id": 7, @@ -2588,43 +2608,37 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "Abnormally high snapshot duration (snapshot_save_total_duration_seconds) indicates disk issues and might cause the cluster to be unstable.", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, + "description": "auth revision increase", "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 93 + "y": 94 }, "hiddenSeries": false, - "id": 9, + "id": 75, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, - "paceLength": 10, "percentage": false, "pluginVersion": "8.0.3", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -2633,19 +2647,17 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum{job=\"$job\"}[1m])) by (job)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job}} The total latency distributions of save called by snapshot", - "refId": "A", - "step": 30 + "expr": "increase(etcd_debugging_auth_revision{job=\"$job\"}[5m])", + "interval": "", + "legendFormat": "{{job}}_{{instance}}_{{endpoint}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Snapshot Duration", + "title": "Auth Revison Increase", "tooltip": { "shared": true, "sort": 0, @@ -2661,7 +2673,7 @@ }, "yaxes": [ { - "format": "short", + "format": "count/5m", "label": null, "logBase": 1, "max": null, @@ -2695,7 +2707,7 @@ "h": 9, "w": 12, "x": 0, - "y": 102 + "y": 103 }, "hiddenSeries": false, "id": 74, @@ -2780,6 +2792,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "Abnormally high snapshot duration (snapshot_save_total_duration_seconds) indicates disk issues and might cause the cluster to be unstable.", "fieldConfig": { "defaults": { "links": [] @@ -2792,10 +2805,10 @@ "h": 9, "w": 12, "x": 12, - "y": 102 + "y": 103 }, "hiddenSeries": false, - "id": 8, + "id": 9, "legend": { "avg": false, "current": false, @@ -2824,27 +2837,19 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(etcd_network_client_grpc_received_bytes_total{job=\"$job\"}[1m])) by (job) ", + "expr": "sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum{job=\"$job\"}[1m])) by (job)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} The total number of bytes received by grpc clients", + "legendFormat": "{{job}} The total latency distributions of save called by snapshot", "refId": "A", "step": 30 - }, - { - "expr": "sum(rate(etcd_network_client_grpc_sent_bytes_total{job=\"$job\"}[1m])) by (job) ", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job}} The total number of bytes sent to grpc clients", - "refId": "B", - "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Network", + "title": "Snapshot Duration", "tooltip": { "shared": true, "sort": 0, @@ -2899,7 +2904,7 @@ "h": 9, "w": 12, "x": 0, - "y": 111 + "y": 112 }, "hiddenSeries": false, "id": 50, @@ -2997,7 +3002,114 @@ "h": 9, "w": 12, "x": 12, - "y": 111 + "y": 112 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "8.0.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_network_client_grpc_received_bytes_total{job=\"$job\"}[1m])) by (job) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} The total number of bytes received by grpc clients", + "refId": "A", + "step": 30 + }, + { + "expr": "sum(rate(etcd_network_client_grpc_sent_bytes_total{job=\"$job\"}[1m])) by (job) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} The total number of bytes sent to grpc clients", + "refId": "B", + "step": 30 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 121 }, "hiddenSeries": false, "id": 52, @@ -3084,7 +3196,7 @@ "h": 1, "w": 24, "x": 0, - "y": 120 + "y": 130 }, "id": 78, "panels": [], @@ -3190,7 +3302,7 @@ "h": 9, "w": 12, "x": 0, - "y": 121 + "y": 131 }, "id": 63, "interval": null, @@ -3257,7 +3369,7 @@ "h": 9, "w": 12, "x": 12, - "y": 121 + "y": 131 }, "hiddenSeries": false, "id": 56, @@ -3359,7 +3471,7 @@ "h": 9, "w": 12, "x": 0, - "y": 130 + "y": 140 }, "hiddenSeries": false, "id": 58, @@ -3462,16 +3574,17 @@ "h": 9, "w": 12, "x": 12, - "y": 130 + "y": 140 }, "hiddenSeries": false, - "id": 81, + "id": 54, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, + "rightSide": false, "show": true, "sort": "current", "sortDesc": true, @@ -3497,52 +3610,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "kstone_inspection_etcd_node_diff_total{clusterName=\"$job\"}", + "expr": "topk(15, sum(rate(kstone_inspection_etcd_request_total{clusterName=\"$job\"}[5m])) by (clusterName,etcdPrefix,grpcMethod,resourceName))", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "key_{{clusterName}}", + "legendFormat": "{{etcdPrefix}}_{{grpcMethod}}_{{resourceName}}", "refId": "A" - }, - { - "exemplar": true, - "expr": "kstone_inspection_etcd_node_revision_diff_total{clusterName=\"$job\"}", - "hide": false, - "interval": "", - "legendFormat": "revision_{{clusterName}}", - "refId": "B" - }, - { - "exemplar": true, - "expr": "kstone_inspection_etcd_node_index_diff_total{clusterName=\"$job\"}", - "hide": false, - "interval": "", - "legendFormat": "index_{{clusterName}}", - "refId": "C" - }, - { - "exemplar": true, - "expr": "kstone_inspection_etcd_node_raft_applied_index_diff_total{clusterName=\"$job\"}", - "hide": false, - "interval": "", - "legendFormat": "raft_applied_{{clusterName}}", - "refId": "D" - }, - { - "exemplar": true, - "expr": "kstone_inspection_etcd_node_raft_index_diff_total{clusterName=\"$job\"}", - "hide": false, - "interval": "", - "legendFormat": "raft_index_{{clusterName}}", - "refId": "E" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "The consistency difference between each node", + "title": "Hot Write QPS", "tooltip": { "shared": true, "sort": 0, @@ -3596,18 +3676,17 @@ "gridPos": { "h": 9, "w": 12, - "x": 12, - "y": 139 + "x": 0, + "y": 149 }, "hiddenSeries": false, - "id": 54, + "id": 81, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, - "rightSide": false, "show": true, "sort": "current", "sortDesc": true, @@ -3633,19 +3712,52 @@ "steppedLine": false, "targets": [ { - "expr": "topk(15, sum(rate(kstone_inspection_etcd_request_total{clusterName=\"$job\"}[5m])) by (clusterName,etcdPrefix,grpcMethod,resourceName))", + "exemplar": true, + "expr": "kstone_inspection_etcd_node_diff_total{clusterName=\"$job\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{etcdPrefix}}_{{grpcMethod}}_{{resourceName}}", + "legendFormat": "key_{{clusterName}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "kstone_inspection_etcd_node_revision_diff_total{clusterName=\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "revision_{{clusterName}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "kstone_inspection_etcd_node_index_diff_total{clusterName=\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "index_{{clusterName}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "kstone_inspection_etcd_node_raft_applied_index_diff_total{clusterName=\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "raft_applied_{{clusterName}}", + "refId": "D" + }, + { + "exemplar": true, + "expr": "kstone_inspection_etcd_node_raft_index_diff_total{clusterName=\"$job\"}", + "hide": false, + "interval": "", + "legendFormat": "raft_index_{{clusterName}}", + "refId": "E" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Hot Write QPS", + "title": "The consistency difference between each node", "tooltip": { "shared": true, "sort": 0, @@ -3681,6 +3793,176 @@ "align": false, "alignLevel": null } + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 149 + }, + "id": 85, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "kstone_inspection_etcd_backup_files", + "interval": "", + "legendFormat": "{{clusterName}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "kstone_inspection_etcd_failed_backup_files", + "hide": false, + "interval": "", + "legendFormat": "{{clusterName}}_failed", + "refId": "B" + } + ], + "title": "The number of backup file in the last day", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 158 + }, + "id": 84, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "kstone_inspection_failed_num", + "interval": "", + "legendFormat": "{{clusterName}}_{{inspectionType}}", + "refId": "A" + } + ], + "title": "The total number of failed inspection", + "type": "timeseries" } ], "refresh": false, @@ -3713,9 +3995,9 @@ { "allValue": null, "current": { - "selected": true, - "text": "demo0x00", - "value": "demo0x00" + "selected": false, + "text": "demo", + "value": "demo" }, "datasource": "$datasource", "definition": "query_result(sum by (job) (increase(etcd_server_has_leader{}[$__range])))", diff --git a/charts/charts/inspection-controller/values.yaml b/charts/charts/inspection-controller/values.yaml index ba7a3da..6cd9ceb 100644 --- a/charts/charts/inspection-controller/values.yaml +++ b/charts/charts/inspection-controller/values.yaml @@ -8,7 +8,7 @@ image: repository: tkestack/kstone-controller-amd64 pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v0.0.1-alpha.2" + tag: "v0.1.0-alpha.2" imagePullSecrets: [] nameOverride: "" diff --git a/charts/values.test.yaml b/charts/values.test.yaml index 4ad95cd..cb7dd41 100644 --- a/charts/values.test.yaml +++ b/charts/values.test.yaml @@ -5,7 +5,7 @@ global: env: test kstone: - tag: v0.1.0-alpha.1 + tag: v0.1.0-alpha.2 serviceAccount: # Specifies whether a service account should be created diff --git a/charts/values.yaml b/charts/values.yaml index e31aa56..cfcd005 100644 --- a/charts/values.yaml +++ b/charts/values.yaml @@ -5,7 +5,7 @@ global: env: production kstone: - tag: v0.1.0-alpha.1 + tag: v0.1.0-alpha.2 serviceAccount: # Specifies whether a service account should be created