From 3fb5de100eef0d3686cfef8f60ec6920a15cfb97 Mon Sep 17 00:00:00 2001 From: Krishna Harsha Voora <14876995+krishvoor@users.noreply.github.com> Date: Tue, 19 Dec 2023 23:27:16 +0530 Subject: [PATCH] Adds Dynatrace metrics (#92) * Adds Dynatrace metrics This PR adds Dynatrace metrics, adds panels for 1) OneAgent CPU/Mem 2) ActiveGate CPU/Mem 3) Opentelemetry CPU/Mem Signed-off-by: Krishna Harsha Voora * Refactor'd change 1) HyperShift-performance dash 2) Updates to Makefile * Updates the legend & metrics * Minor nits update * Revert changes to Makefile & GHA actions * Minor nit --------- Signed-off-by: Krishna Harsha Voora --- .../General/hypershift-performance.jsonnet | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/templates/General/hypershift-performance.jsonnet b/templates/General/hypershift-performance.jsonnet index 81851b8..c427a73 100644 --- a/templates/General/hypershift-performance.jsonnet +++ b/templates/General/hypershift-performance.jsonnet @@ -86,6 +86,19 @@ local suricataMemory = genericGraphLegendPanel('Suricata Memory(Running on Servi ) ); +local dynaoneagentMem = genericGraphLegendPanel('OneAgent Memory Usage', 'Cluster Prometheus', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{namespace=~"dynatrace",pod=~".*-oneagent-.*",container!=""}) by (node, namespace, pod)', + legendFormat='{{ node }}: {{ namespace }} : {{ pod }}', + ) +); + +local dynaoneagentCPU = genericGraphLegendPanel('OneAgent CPU Usage', 'Cluster Prometheus', 'percent').addTarget( + prometheus.target( + 'sum(irate(container_cpu_usage_seconds_total{namespace=~"dynatrace", pod=~".*-oneagent-.*", container!~"POD|"}[2m])*100) by (node, namespace, pod)', + legendFormat='{{ node }}: {{ namespace }} : {{ pod }}', + ) +); // Overall stats on the management cluster @@ -398,6 +411,34 @@ local request_duration_99th_quantile_by_resource = grafana.graphPanel.new( ) ); +// Dynatrace on the management cluster +local dynaactivegateMem = genericGraphLegendPanel('Active Gate Memory Usage', 'Cluster Prometheus', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{namespace=~"dynatrace",pod=~".*-activegate-.*",container!=""}) by (node, namespace, pod)', + legendFormat='{{ node }}: {{ namespace }} : {{ pod }}', + ) +); + +local dynaactivegateCPU = genericGraphLegendPanel('Active Gate CPU Usage', 'Cluster Prometheus', 'percent').addTarget( + prometheus.target( + 'sum(irate(container_cpu_usage_seconds_total{namespace=~"dynatrace", pod=~".*-activegate-.*", container!~"POD|"}[2m])*100) by (node, namespace, pod)', + legendFormat='{{ node }}: {{ namespace }} : {{ pod }}', + ) +); + +local opentelemetryMem = genericGraphLegendPanel('Opentelemetry Memory Usage', 'Cluster Prometheus', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{namespace=~"dynatrace",pod=~"opentelemetry-.*",container!=""}) by (node, namespace, pod)', + legendFormat='{{ node }}: {{ namespace }} : {{ pod }}', + ) +); + +local opentelemetryCPU = genericGraphLegendPanel('Opentelemetry CPU Usage', 'Cluster Prometheus', 'percent').addTarget( + prometheus.target( + 'sum(irate(container_cpu_usage_seconds_total{namespace=~"dynatrace", pod=~"opentelemetry-.*", container!~"POD|"}[2m])*100) by (node, namespace, pod)', + legendFormat='{{ node }}: {{ namespace }} : {{ pod }}', + ) +); // Management cluster metrics @@ -1719,6 +1760,10 @@ grafana.dashboard.new( clusterOperatorsDegraded { gridPos: { x: 8, y: 52, w: 8, h: 8 } }, FailedPods { gridPos: { x: 16, y: 52, w: 8, h: 8 } }, alerts { gridPos: { x: 0, y: 60, w: 24, h: 8 } }, + dynaactivegateMem { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, + dynaactivegateCPU { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, + opentelemetryCPU { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, + opentelemetryMem { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, ], ), { gridPos: { x: 0, y: 4, w: 24, h: 1 } }) @@ -1753,6 +1798,8 @@ grafana.dashboard.new( nodeMemory { gridPos: { x: 12, y: 2, w: 12, h: 8 } }, suricataCPU { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, suricataMemory { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, + dynaoneagentCPU { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, + dynaoneagentMem { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, ] ), { gridPos: { x: 0, y: 4, w: 24, h: 1 } } )