Skip to content

Commit

Permalink
Add etcd_pod picker
Browse files Browse the repository at this point in the history
Signed-off-by: Andrew Collins <[email protected]>
  • Loading branch information
afcollins committed Sep 27, 2024
1 parent 86bf15f commit 17cda47
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 19 deletions.
12 changes: 8 additions & 4 deletions assets/etcd-on-cluster-dashboard/panels.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,19 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
+ options.legend.withSortBy('Max')
+ options.legend.withSortDesc(true),

rightHandOverride(title, unit, targets, gridPos, leftAxis):
self.generalUsageAgg(title, unit, targets, gridPos)
+ custom.withAxisLabel(leftAxis)
generalCounter(title, unit, targets, gridPos):
self.base(title, unit, targets, gridPos)
+ options.legend.withCalcs([
'first',
'min',
'max',
'last',
])
]),

histogramStatsRightHand(title, unit, targets, gridPos, leftAxis):
self.generalCounter(title, unit, targets, gridPos)
+ custom.withAxisLabel(leftAxis)
+ options.legend.withDisplayMode('table')
+ options.legend.withSortBy('Max')
+ standardOptions.withOverrides([
byRegexp.new('.*irate.*')
Expand Down
20 changes: 10 additions & 10 deletions assets/etcd-on-cluster-dashboard/queries.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,36 @@ local generateTimeSeriesQuery(query, legend) = [

diskWalSyncDuration: {
query():
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{namespace="openshift-etcd"}[5m])) by (pod, le))', '{{pod}} WAL fsync'),
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{namespace="openshift-etcd",pod=~"$etcd_pod"}[5m])) by (pod, le))', '{{pod}} WAL fsync'),
},

diskWalSyncDurationSum: {
query():
generateTimeSeriesQuery('irate(etcd_disk_wal_fsync_duration_seconds_sum{namespace="openshift-etcd"}[2m])', '2m irate WAL sum {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_wal_fsync_duration_seconds_sum{namespace="openshift-etcd"}', 'WAL sum {{instance}} '),
generateTimeSeriesQuery('irate(etcd_disk_wal_fsync_duration_seconds_sum{namespace="openshift-etcd",pod=~"$etcd_pod"}[2m])', '2m irate WAL sum {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_wal_fsync_duration_seconds_sum{namespace="openshift-etcd",pod=~"$etcd_pod"}', 'WAL sum {{instance}} '),
},

diskWalSyncDurationCount: {
query():
generateTimeSeriesQuery('irate(etcd_disk_wal_fsync_duration_seconds_count{namespace="openshift-etcd"}[2m])', '2m irate WAL count {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_wal_fsync_duration_seconds_count{namespace="openshift-etcd"}', 'WAL count {{instance}} '),
generateTimeSeriesQuery('irate(etcd_disk_wal_fsync_duration_seconds_count{namespace="openshift-etcd",pod=~"$etcd_pod"}[2m])', '2m irate WAL count {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_wal_fsync_duration_seconds_count{namespace="openshift-etcd",pod=~"$etcd_pod"}', 'WAL count {{instance}} '),
},

diskBackendCommitDuration: {
query():
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{namespace="openshift-etcd"}[5m])) by (pod, le))', '{{pod}} DB fsync'),
generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{namespace="openshift-etcd",pod=~"$etcd_pod"}[5m])) by (pod, le))', '{{pod}} DB fsync'),
},

diskBackendCommitDurationSum: {
query():
generateTimeSeriesQuery('irate(etcd_disk_backend_commit_duration_seconds_sum{namespace="openshift-etcd"}[2m])', '2m irate WAL sum {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_backend_commit_duration_seconds_sum{namespace="openshift-etcd"}', 'WAL sum {{instance}} '),
generateTimeSeriesQuery('irate(etcd_disk_backend_commit_duration_seconds_sum{namespace="openshift-etcd",pod=~"$etcd_pod"}[2m])', '2m irate WAL sum {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_backend_commit_duration_seconds_sum{namespace="openshift-etcd",pod=~"$etcd_pod"}', 'WAL sum {{instance}} '),
},

diskBackendCommitDurationCount: {
query():
generateTimeSeriesQuery('irate(etcd_disk_backend_commit_duration_seconds_count{namespace="openshift-etcd"}[2m])', '2m irate WAL count {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_backend_commit_duration_seconds_count{namespace="openshift-etcd"}', 'WAL count {{instance}} '),
generateTimeSeriesQuery('irate(etcd_disk_backend_commit_duration_seconds_count{namespace="openshift-etcd",pod=~"$etcd_pod"}[2m])', '2m irate WAL count {{instance}} ')
+ generateTimeSeriesQuery('etcd_disk_backend_commit_duration_seconds_count{namespace="openshift-etcd",pod=~"$etcd_pod"}', 'WAL count {{instance}} '),
},

etcdContainerDiskWrites: {
Expand Down
11 changes: 11 additions & 0 deletions assets/etcd-on-cluster-dashboard/variables.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,15 @@ local var = g.dashboard.variable;
+ var.query.withRefresh(1)
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withIncludeAll(false),

etcd_pod:
var.query.new('etcd_pod')
+ var.query.withDatasourceFromVariable(self.Datasource)
+ var.query.queryTypes.withLabelValues(
'pod',
'etcd_cluster_version',
)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(false),
}
11 changes: 6 additions & 5 deletions templates/General/etcd-on-cluster-dashboard.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ g.dashboard.new('etcd-cluster-info dashoard')
+ g.dashboard.graphTooltip.withSharedCrosshair()
+ g.dashboard.withVariables([
variables.Datasource,
variables.etcd_pod,
])

+ g.dashboard.withPanels([
Expand All @@ -34,17 +35,17 @@ g.dashboard.new('etcd-cluster-info dashoard')
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.generalUsageAgg('WAL fsync Duration p99', 's', queries.diskWalSyncDuration.query(), { x: 0, y: 0, w: 8, h: 8 }),
panels.timeSeries.rightHandOverride('WAL fsync Duration sum', 'none', queries.diskWalSyncDurationSum.query(), { x: 8, y: 0, w: 8, h: 8 }, 'sum'),
panels.timeSeries.rightHandOverride('WAL fsync Duration count', 'none', queries.diskWalSyncDurationCount.query(), { x: 16, y: 0, w: 8, h: 8 }, 'count'),
panels.timeSeries.histogramStatsRightHand('WAL fsync Duration sum', 'none', queries.diskWalSyncDurationSum.query(), { x: 8, y: 0, w: 8, h: 8 }, 'sum'),
panels.timeSeries.histogramStatsRightHand('WAL fsync Duration count', 'none', queries.diskWalSyncDurationCount.query(), { x: 16, y: 0, w: 8, h: 8 }, 'count'),
]),

g.panel.row.new('Backend Commit Duration Detailed')
+ g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 })
+ g.panel.row.withCollapsed(true)
+ g.panel.row.withPanels([
panels.timeSeries.generalUsageAgg('Backend Commit Duration', 's', queries.diskBackendCommitDuration.query(), { x: 0, y: 0, w: 8, h: 8 }),
panels.timeSeries.rightHandOverride('Backend Commit Duration sum', 'none', queries.diskBackendCommitDurationSum.query(), { x: 8, y: 0, w: 8, h: 8 }, 'sum'),
panels.timeSeries.rightHandOverride('Backend Commit Duration count', 'none', queries.diskBackendCommitDurationCount.query(), { x: 16, y: 0, w: 8, h: 8 }, 'count'),
panels.timeSeries.histogramStatsRightHand('Backend Commit Duration sum', 'none', queries.diskBackendCommitDurationSum.query(), { x: 8, y: 0, w: 8, h: 8 }, 'sum'),
panels.timeSeries.histogramStatsRightHand('Backend Commit Duration count', 'none', queries.diskBackendCommitDurationCount.query(), { x: 16, y: 0, w: 8, h: 8 }, 'count'),
]),

g.panel.row.new('Network Usage')
Expand Down Expand Up @@ -80,7 +81,7 @@ g.dashboard.new('etcd-cluster-info dashoard')
panels.timeSeries.GeneralInfo('Leader Elections Per Day', 'short', queries.leaderElectionsPerDay.query(), { x: 0, y: 12, w: 12, h: 6 }),
panels.timeSeries.GeneralInfo('Slow Operations', 'ops', queries.slowOperations.query(), { x: 0, y: 20, w: 12, h: 8 }),
panels.timeSeries.GeneralInfo('Key Operations', 'ops', queries.keyOperations.query(), { x: 12, y: 20, w: 12, h: 8 }),
panels.timeSeries.GeneralInfo('Heartbeat Failures', 'short', queries.heartBeatFailure.query(), { x: 0, y: 28, w: 12, h: 8 }),
panels.timeSeries.generalCounter('Heartbeat Failures', 'short', queries.heartBeatFailure.query(), { x: 0, y: 28, w: 12, h: 8 }),
panels.timeSeries.GeneralInfo('Compacted Keys', 'short', queries.compactedKeys.query(), { x: 12, y: 28, w: 12, h: 8 }),
]),

Expand Down

0 comments on commit 17cda47

Please sign in to comment.