diff --git a/assets/ocp-performance/panels.libsonnet b/assets/ocp-performance/panels.libsonnet index 8d192c9..01aedbd 100644 --- a/assets/ocp-performance/panels.libsonnet +++ b/assets/ocp-performance/panels.libsonnet @@ -29,7 +29,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn 'max', 'min', ]) - + options.legend.withSortBy('max') + + options.legend.withSortBy('Max') + options.legend.withSortDesc(true) + options.legend.withPlacement('bottom'), }, diff --git a/assets/ocp-performance/queries.libsonnet b/assets/ocp-performance/queries.libsonnet index 274c3a1..b91f303 100644 --- a/assets/ocp-performance/queries.libsonnet +++ b/assets/ocp-performance/queries.libsonnet @@ -103,11 +103,26 @@ local generateTimeSeriesQuery(query, legend) = [ query(): generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}) by (node))', '{{node}}') }, + promReplCpuUsage: { + query(): + generateTimeSeriesQuery('sum(irate(container_cpu_usage_seconds_total{pod=~"prometheus-k8s-0",namespace!="",name!="",container="prometheus"}[$interval])) by (pod,container) * 100', '{{pod}}') + + generateTimeSeriesQuery('sum(irate(container_cpu_usage_seconds_total{pod=~"prometheus-k8s-1",namespace!="",name!="",container="prometheus"}[$interval])) by (pod,container) * 100', '{{pod}}') + }, promReplMemUsage: { query(): generateTimeSeriesQuery('sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', '{{pod}}') + generateTimeSeriesQuery('sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', '{{pod}}') }, + metricsServerCpuUsage: { + query(): + generateTimeSeriesQuery('sum(irate(container_cpu_usage_seconds_total{pod=~"metrics-server-.*",namespace!="",name!=""}[$interval])) by (pod,container) * 100', '{{pod}}') + + generateTimeSeriesQuery('sum(irate(container_cpu_usage_seconds_total{pod=~"prometheus-adapter-.*",namespace="openshift-monitoring",name!=""}[$interval])) by (pod,container) * 100', '{{pod}}') + }, + metricsServerMemUsage: { + query(): + generateTimeSeriesQuery('sum(container_memory_rss{pod=~"metrics-server-.*",namespace!="",name!=""}) by (pod)', '{{pod}}') + + generateTimeSeriesQuery('sum(container_memory_rss{pod=~"prometheus-adapter-.*",namespace="openshift-monitoring",name!=""}) by (pod)', '{{pod}}') + }, kubeletCPU: { query(): generateTimeSeriesQuery('topk(10,irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', 'kubelet - {{node}}') @@ -202,4 +217,4 @@ local generateTimeSeriesQuery(query, legend) = [ query(): generateTimeSeriesQuery('cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', '{{name}} - {{reason}}') }, -} \ No newline at end of file +} diff --git a/templates/General/ocp-performance-v2.jsonnet b/templates/General/ocp-performance-v2.jsonnet index dbe421c..c0734d2 100644 --- a/templates/General/ocp-performance-v2.jsonnet +++ b/templates/General/ocp-performance-v2.jsonnet @@ -30,7 +30,7 @@ g.dashboard.new('Openshift Performance') + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + g.panel.row.withCollapsed(true) + g.panel.row.withPanels([ - panels.timeSeries.generic('99% Pod Annotation Latency', 's', queries.ovnAnnotationLatency.query(), { x: 0, y: 1, w: 24, h: 12 }), + panels.timeSeries.generic('99% Pod Annotation Latency', 's', queries.ovnAnnotationLatency.query(), { x: 0, y: 1, w: 24, h: 4 }), panels.timeSeries.generic('99% CNI Request ADD Latency', 's', queries.ovnCNIAdd.query(), { x: 0, y: 13, w: 12, h: 8 }), panels.timeSeries.generic('99% CNI Request DEL Latency', 's', queries.ovnCNIDel.query(), { x: 12, y: 13, w: 12, h: 8 }), panels.timeSeries.genericLegend('ovnkube-master CPU Usage', 'percent', queries.ovnKubeMasterCPU.query(), { x: 0, y: 21, w: 12, h: 8 }), @@ -42,14 +42,17 @@ g.dashboard.new('Openshift Performance') + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + g.panel.row.withCollapsed(true) + g.panel.row.withPanels([ - panels.timeSeries.genericLegend('Prometheus Replica Memory usage', 'bytes', queries.promReplMemUsage.query(), { x: 0, y: 2, w: 24, h: 12 }), + panels.timeSeries.genericLegend('Prometheus Replica CPU', 'percent', queries.promReplCpuUsage.query(), { x: 0, y: 2, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Prometheus Replica RSS', 'bytes', queries.promReplMemUsage.query(), { x: 12, y: 2, w: 12, h: 8 }), + panels.timeSeries.genericLegend('metrics-server/prom-adapter CPU', 'percent', queries.metricsServerCpuUsage.query(), { x: 0, y: 10, w: 12, h: 8 }), + panels.timeSeries.genericLegend('metrics-server/prom-adapter RSS', 'bytes', queries.metricsServerMemUsage.query(), { x: 12, y: 10, w: 12, h: 8 }), ]), g.panel.row.new('Stackrox') + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + g.panel.row.withCollapsed(true) + g.panel.row.withPanels([ - panels.timeSeries.genericLegend('Top 25 stackrox container RSS bytes', 'bytes', queries.stackroxMem.query(), { x: 0, y: 2, w: 24, h: 12 }), - panels.timeSeries.genericLegend('Top 25 stackrox container CPU percent', 'percent', queries.stackroxCPU.query(), { x: 0, y: 2, w: 24, h: 12 }), + panels.timeSeries.genericLegend('Top 25 stackrox container RSS bytes', 'bytes', queries.stackroxMem.query(), { x: 0, y: 2, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 25 stackrox container CPU percent', 'percent', queries.stackroxCPU.query(), { x: 12, y: 2, w: 12, h: 8 }), ]), g.panel.row.new('Cluster Kubelet') + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 })