Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add workload summary and compare data graphs on k8s-netperf #106

Merged
merged 1 commit into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 170 additions & 2 deletions assets/k8s-netperf/panels.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,144 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
"displayName": "ea7b29d7-8991-4752-a0d4-e26446d34915 TCP_STREAM 4096 Mb/s AWS"
}
]),

workloadSummary(title, targets, gridPos):
self.base(title, targets, gridPos)
+ table.queryOptions.withTransformations([
{
"id": "organize",
"options": {
"excludeByName": {
"_id": true,
"_index": true,
"_type": true,
"clientCPU.idleCPU": true,
"clientCPU.ioCPU": true,
"clientCPU.irqCPU": true,
"clientCPU.niceCPU": true,
"clientCPU.softCPU": true,
"clientCPU.stealCPU": true,
"clientCPU.systemCPU": true,
"clientCPU.userCPU": true,
"clientNodeLabels.beta.kubernetes.io/arch": true,
"clientNodeLabels.beta.kubernetes.io/instance-type": true,
"clientNodeLabels.beta.kubernetes.io/os": true,
"clientNodeLabels.failure-domain.beta.kubernetes.io/region": true,
"clientNodeLabels.failure-domain.beta.kubernetes.io/zone": true,
"clientNodeLabels.hypershift.openshift.io/managed": true,
"clientNodeLabels.hypershift.openshift.io/nodePool": true,
"clientNodeLabels.kubernetes.io/arch": true,
"clientNodeLabels.kubernetes.io/hostname": true,
"clientNodeLabels.kubernetes.io/os": true,
"clientNodeLabels.node-role.kubernetes.io/worker": true,
"clientNodeLabels.node.kubernetes.io/instance-type": true,
"clientNodeLabels.node.openshift.io/os_id": true,
"clientNodeLabels.topology.ebs.csi.aws.com/zone": true,
"clientNodeLabels.topology.kubernetes.io/region": true,
"clientNodeLabels.topology.kubernetes.io/zone": true,
"clientPods": true,
"confidence": true,
"driver": true,
"highlight": true,
"hostNetwork": true,
"latency": true,
"local": true,
"ltcyMetric": true,
"messageSize": true,
"metadata.ipsec": true,
"metadata.k8sVersion": true,
"metadata.kernel": true,
"metadata.masterNodesCount": true,
"metadata.masterNodesType": true,
"metadata.metricName": true,
"metadata.mtu": true,
"metadata.ocpShortVersion": true,
"metadata.totalNodes": true,
"parallelism": true,
"profile": true,
"samples": true,
"serverCPU.idleCPU": true,
"serverCPU.ioCPU": true,
"serverCPU.irqCPU": true,
"serverCPU.niceCPU": true,
"serverCPU.softCPU": true,
"serverCPU.stealCPU": true,
"serverCPU.systemCPU": true,
"serverCPU.userCPU": true,
"serverNodeLabels.beta.kubernetes.io/arch": true,
"serverNodeLabels.beta.kubernetes.io/instance-type": true,
"serverNodeLabels.beta.kubernetes.io/os": true,
"serverNodeLabels.failure-domain.beta.kubernetes.io/region": true,
"serverNodeLabels.failure-domain.beta.kubernetes.io/zone": true,
"serverNodeLabels.hypershift.openshift.io/managed": true,
"serverNodeLabels.hypershift.openshift.io/nodePool": true,
"serverNodeLabels.kubernetes.io/arch": true,
"serverNodeLabels.kubernetes.io/hostname": true,
"serverNodeLabels.kubernetes.io/os": true,
"serverNodeLabels.node-role.kubernetes.io/worker": true,
"serverNodeLabels.node.kubernetes.io/instance-type": true,
"serverNodeLabels.node.openshift.io/os_id": true,
"serverNodeLabels.topology.ebs.csi.aws.com/zone": true,
"serverNodeLabels.topology.kubernetes.io/region": true,
"serverNodeLabels.topology.kubernetes.io/zone": true,
"serverPods": true,
"service": true,
"sort": true,
"tcpRetransmits": true,
"throughput": true,
"tputMetric": true,
"udpLossPercent": true
},
"indexByName": {
"uuid": 0,
"timestamp": 1,
"metadata.platform": 2,
"metadata.ocpVersion": 3,
"metadata.clusterName": 4,
"metadata.sdnType": 5,
"metadata.infraNodesCount": 6,
"metadata.infraNodesType": 7,
"metadata.workerNodesCount": 8,
"metadata.workerNodesType": 9,
"metadata.acrossAZ": 10,
"metadata.region": 11
},
"renameByName": {
"acrossAZ": "Multi-AZ",
"metadata.clusterName": "Cluster Name",
"metadata.infraNodesCount": "Infras",
"metadata.infraNodesType": "Infra Type",
"metadata.ocpVersion": "Version",
"metadata.platform": "Platform",
"metadata.region": "Region",
"metadata.sdnType": "SDN",
"metadata.workerNodesCount": "Workers",
"metadata.workerNodesType": "Workers Type",
"timestamp": "Timestamp",
"uuid": "UUID"
}
}
},
{
"id": "groupBy",
"options": {
"fields": {
"UUID": {"aggregations": [], "operation": "groupby"},
"Cluster Name": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Infra Type": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Infras": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Platform": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Region": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"SDN": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Timestamp": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Version": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Workers": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Workers Type": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"duration": {"aggregations": ["lastNotNull"], "operation": "aggregate"},
"Multi-AZ": {"aggregations": ["last"], "operation": "aggregate"}
}
}
}
]),
withLatencyOverrides(title, targets, gridPos):
self.base(title, targets, gridPos)
+ table.queryOptions.withTransformations([
Expand Down Expand Up @@ -168,7 +305,38 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
}
]),
},
barGauge: {
local barGauge = g.panel.barGauge,
local custom = barGauge.fields.defaults.custom,
local options = barGauge.options,

base(title, targets, gridPos):
barGauge.new(title)
+ barGauge.queryOptions.withTargets(targets)
+ barGauge.datasource.withType('elasticsearch')
+ barGauge.datasource.withUid('$datasource')
+ barGauge.options.reduceOptions.withValues(false)
+ barGauge.options.reduceOptions.withCalcs(["lastNotNull"])
+ barGauge.options.reduceOptions.withFields("")
+ barGauge.options.withOrientation("horizontal")
+ barGauge.options.withDisplayMode("gradient")
+ barGauge.options.withValueMode("color")
+ barGauge.panelOptions.withRepeat("messageSize")
+ barGauge.standardOptions.withMin("0")
+ barGauge.standardOptions.color.withMode("palette-classic")
+ barGauge.gridPos.withX(gridPos.x)
+ barGauge.gridPos.withY(gridPos.y)
+ barGauge.gridPos.withH(gridPos.h)
+ barGauge.gridPos.withW(gridPos.w),

withThroughput(title, targets,gridPos):
self.base(title, targets, gridPos)
+ barGauge.standardOptions.withUnit("Mbits"),

withLatency(title, targets,gridPos):
self.base(title, targets, gridPos)
+ barGauge.standardOptions.withUnit("µs"),
},
timeSeries: {
local timeSeries = g.panel.timeSeries,
local custom = timeSeries.fieldConfig.defaults.custom,
Expand Down Expand Up @@ -246,4 +414,4 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
}
]),
},
}
}
54 changes: 51 additions & 3 deletions assets/k8s-netperf/queries.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ local elasticsearch = g.query.elasticsearch;

{
all: {
query(metric, aggregationMetric):
query(metric, aggregationMetric):
elasticsearch.withAlias("{{metadata.ocpVersion.keyword}} hostNetwork={{hostNetwork}} procs={{parallelism}}")
+ elasticsearch.withBucketAggs([
elasticsearch.bucketAggs.Terms.withField("messageSize")
Expand Down Expand Up @@ -79,7 +79,7 @@ local elasticsearch = g.query.elasticsearch;
+ elasticsearch.withTimeField('timestamp')
},
parallelismAll: {
query(metric, aggregationMetric):
query(metric, aggregationMetric):
elasticsearch.withAlias("")
+ elasticsearch.withBucketAggs([
elasticsearch.bucketAggs.Terms.withField("uuid.keyword")
Expand Down Expand Up @@ -161,4 +161,52 @@ local elasticsearch = g.query.elasticsearch;
+ elasticsearch.withQuery('uuid: $uuid AND parallelism: $parallelism AND profile: ' + metric + ' AND messageSize: $messageSize AND driver.keyword: $driver AND metadata.platform: $platform AND hostNetwork: $hostNetwork AND service: $service')
+ elasticsearch.withTimeField('timestamp')
},
}
summary: {
query(metric, aggregationMetric):
elasticsearch.withAlias("")
+ elasticsearch.withBucketAggs([
])
+ elasticsearch.withMetrics([
elasticsearch.metrics.MetricAggregationWithSettings.RawData.withHide(false)
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1")
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType("raw_data")
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize("500")
])
+ elasticsearch.withQuery('uuid: $uuid AND parallelism: $parallelism AND profile: ' + metric + ' AND messageSize: $messageSize AND driver.keyword: $driver AND metadata.platform: $platform AND hostNetwork: $hostNetwork AND service: $service')
+ elasticsearch.withTimeField('timestamp')
},
metricCompare: {
query(metric, aggregationMetric, hostNetwork, service):
elasticsearch.withAlias("{{$compare_by}} Procs: {{parallelism}}")
+ elasticsearch.withBucketAggs([
elasticsearch.bucketAggs.Terms.withField("parallelism")
+ elasticsearch.bucketAggs.Terms.withId("1")
+ elasticsearch.bucketAggs.Terms.withType('terms')
+ elasticsearch.bucketAggs.Terms.settings.withOrder('asc')
+ elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term')
+ elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1')
+ elasticsearch.bucketAggs.Terms.settings.withSize("0"),
elasticsearch.bucketAggs.Terms.withField("$compare_by")
+ elasticsearch.bucketAggs.Terms.withId("2")
+ elasticsearch.bucketAggs.Terms.withType('terms')
+ elasticsearch.bucketAggs.Terms.settings.withOrder('desc')
+ elasticsearch.bucketAggs.Terms.settings.withOrderBy('_term')
+ elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1')
+ elasticsearch.bucketAggs.Terms.settings.withSize("10"),
elasticsearch.bucketAggs.DateHistogram.withField('timestamp')
+ elasticsearch.bucketAggs.DateHistogram.withId("3")
+ elasticsearch.bucketAggs.DateHistogram.withType('date_histogram')
+ elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto')
+ elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount("1")
+ elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone("utc")
+ elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0),
])
+ elasticsearch.withMetrics([
elasticsearch.metrics.MetricAggregationWithSettings.Average.withField(aggregationMetric)
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId("1")
+ elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType("avg")
])
+ elasticsearch.withQuery('uuid: $uuid AND parallelism: $parallelism AND profile: ' + metric + ' AND messageSize: $messageSize AND driver.keyword: $driver AND hostNetwork: ' + hostNetwork + ' AND service: ' + service + ' AND acrossAZ: false' )
+ elasticsearch.withTimeField('timestamp')
}
}
18 changes: 12 additions & 6 deletions assets/k8s-netperf/variables.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ local var = g.dashboard.variable;
var.query.new('platform', "{\"find\": \"terms\", \"field\": \"metadata.platform.keyword\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('Platform'),

Expand All @@ -33,7 +33,7 @@ local var = g.dashboard.variable;
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('uuid'),

hostNetwork:
var.custom.new('hostNetwork', ['true', 'false'],)
+ var.custom.selectionOptions.withMulti(true)
Expand All @@ -45,15 +45,15 @@ local var = g.dashboard.variable;
+ var.custom.selectionOptions.withMulti(true)
+ var.custom.selectionOptions.withIncludeAll(true)
+ var.custom.generalOptions.withLabel('service'),

streams:
var.query.new('parallelism', "{\"find\": \"terms\", \"field\": \"parallelism\", \"query\":\"uuid: $uuid\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.withRefresh(2)
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('streams'),

throughput_profile:
var.query.new('throughput_profile', "{\"find\": \"terms\", \"field\": \"profile.keyword\", \"query\":\"uuid:$uuid\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
Expand All @@ -71,7 +71,7 @@ local var = g.dashboard.variable;
+ var.query.selectionOptions.withMulti(true)
+ var.query.selectionOptions.withIncludeAll(true)
+ var.query.generalOptions.withLabel('Latency profile'),

messageSize:
var.query.new('messageSize', "{\"find\": \"terms\", \"field\": \"messageSize\",\"query\":\"uuid:$uuid\"}")
+ var.query.withDatasourceFromVariable(self.datasource)
Expand All @@ -87,4 +87,10 @@ local var = g.dashboard.variable;
+ var.query.selectionOptions.withMulti(false)
+ var.query.selectionOptions.withIncludeAll(false)
+ var.query.generalOptions.withLabel('Driver'),
}

compare_by:
var.custom.new('compare_by', ['uuid.keyword', 'metadata.ocpVersion.keyword', 'metadata.clusterName.keyword', 'metadata.ocpShortVersion.keyword', 'metadata.platform.keyword'],)
+ var.custom.selectionOptions.withMulti(false)
+ var.custom.selectionOptions.withIncludeAll(false)
+ var.custom.generalOptions.withLabel('Compare By'),
}
15 changes: 15 additions & 0 deletions templates/CPT/k8s-netperf-v2.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,28 @@ g.dashboard.new('k8s-netperf')
variables.latency_profile,
variables.messageSize,
variables.driver,
variables.compare_by,
])
+ g.dashboard.withPanels([
panels.row.base('Workload Summary', '', { x: 0, y: 0, w: 24, h: 0 }),
panels.table.workloadSummary('', queries.summary.query('$throughput_profile', 'throughput'), { x: 0, y: 0, w: 24, h: 11 }),
panels.row.base('$latency_profile', 'latency_profile', { x: 0, y: 0, w: 24, h: 1 }),
panels.timeSeries.base('$latency_profile - $driver - $messageSize', queries.all.query('$latency_profile', 'latency'), { x: 0, y: 0, w: 24, h: 8 }),
panels.row.base('$throughput_profile', 'throughput_profile', { x: 0, y: 9, w: 24, h: 1 }),
panels.timeSeries.withThroughputOverrides('$throughput_profile - $driver - $messageSize', queries.all.query('$throughput_profile', 'throughput'), { x: 0, y: 10, w: 24, h: 8 }),
panels.row.base('Parallelism $parallelism', 'parallelism', { x: 0, y: 18, w: 24, h: 1 }),
panels.table.base('Throughput - Parallelism: $parallelism', queries.parallelismAll.query('$throughput_profile', 'throughput'), { x: 0, y: 19, w: 24, h: 11 }),
panels.table.withLatencyOverrides('Latency - Parallelism: $parallelism', queries.parallelismAll.query('$latency_profile', 'latency'), { x: 0, y: 19, w: 24, h: 11 }),

panels.row.base('Node to Node', '', { x: 0, y: 20, w: 24, h: 1 }),
panels.barGauge.withThroughput('$throughput_profile - $driver - $messageSize', queries.metricCompare.query('TCP_STREAM', 'throughput', true, false), { x: 0, y: 21, w: 11, h: 11 }),
panels.barGauge.withLatency('Latency - $driver - $messageSize', queries.metricCompare.query('TCP_RR', 'latency', true, false), { x: 0, y: 21, w: 11, h: 11 }),

panels.row.base('Pod to Pod', '', { x: 0, y: 22, w: 24, h: 1 }),
panels.barGauge.withThroughput('$throughput_profile - $driver - $messageSize', queries.metricCompare.query('TCP_STREAM', 'throughput', false, false), { x: 0, y: 23, w: 11, h: 11 }),
panels.barGauge.withLatency('Latency - $driver - $messageSize', queries.metricCompare.query('TCP_RR', 'latency', false, false), { x: 0, y: 23, w: 11, h: 11 }),

panels.row.base('Pod to Pod via Service', '', { x: 0, y: 24, w: 24, h: 1 }),
panels.barGauge.withThroughput('$throughput_profile - $driver - $messageSize', queries.metricCompare.query('TCP_STREAM', 'throughput', false, true), { x: 0, y: 25, w: 11, h: 11 }),
panels.barGauge.withLatency('Latency - $driver - $messageSize', queries.metricCompare.query('TCP_RR', 'latency', false, true), { x: 0, y: 25, w: 11, h: 11 }),
])