diff --git a/Makefile b/Makefile index 71dcc65..3093b07 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,8 @@ -JSONNET := https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_Linux_x86_64.tar.gz -JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 +ARCH := $(shell arch) +OS_TYPE := $(shell uname) +JB_OS_TYPE := $(shell uname | tr '[:upper:]' '[:lower:]') +JSONNET := https://github.com/google/go-jsonnet/releases/download/v0.20.0/go-jsonnet_0.20.0_$(OS_TYPE)_$(ARCH).tar.gz +JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-$(JB_OS_TYPE)-$(ARCH) BINDIR = bin TEMPLATESDIR = templates OUTPUTDIR = rendered diff --git a/templates/General/hypershift-performance.jsonnet b/templates/General/hypershift-performance.jsonnet index 81851b8..68d416b 100644 --- a/templates/General/hypershift-performance.jsonnet +++ b/templates/General/hypershift-performance.jsonnet @@ -86,6 +86,19 @@ local suricataMemory = genericGraphLegendPanel('Suricata Memory(Running on Servi ) ); +local dynaoneagentMem = genericGraphLegendPanel('OneAgent Memory Usage', 'Cluster Prometheus', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{namespace=~"dynatrace",pod=~".*-oneagent-.*",container!=""}) by (node) and on (node) label_replace(cluster:nodes_roles{label_hypershift_openshift_io_cluster=~"$namespace"}, "node", "$1", "node", "(.+)")', + legendFormat='{{ node }}', + ) +); + +local dynaoneagentCPU = genericGraphLegendPanel('OneAgent CPU Usage', 'Cluster Prometheus', 'percent').addTarget( + prometheus.target( + 'sum(irate(container_cpu_usage_seconds_total{namespace=~"dynatrace", pod=~".*-oneagent-.*", container!~"POD|"}[2m])*100) by (node) and on (node) label_replace(cluster:nodes_roles{label_hypershift_openshift_io_cluster=~"$namespace"}, "node", "$1", "node", "(.+)")', + legendFormat='{{ node }}', + ) +); // Overall stats on the management cluster @@ -398,6 +411,34 @@ local request_duration_99th_quantile_by_resource = grafana.graphPanel.new( ) ); +// Dynatrace on the management cluster +local dynaactivegateMem = genericGraphLegendPanel('Active Gate Memory Usage', 'Cluster Prometheus', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{namespace=~"dynatrace",pod=~".*-activegate-.*",container!=""})', + legendFormat='{{ node }}', + ) +); + +local dynaactivegateCPU = genericGraphLegendPanel('Active Gate CPU Usage', 'Cluster Prometheus', 'percent').addTarget( + prometheus.target( + 'sum(irate(container_cpu_usage_seconds_total{namespace=~"dynatrace", pod=~".*-activegate-.*", container!~"POD|"}[2m])*100)', + legendFormat='{{ node }}', + ) +); + +local opentelemetryMem = genericGraphLegendPanel('Opentelemetry Memory Usage', 'Cluster Prometheus', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{namespace=~"dynatrace",pod=~"opentelemetry-.*",container!=""})', + legendFormat='{{ node }}', + ) +); + +local opentelemetryCPU = genericGraphLegendPanel('Opentelemetry CPU Usage', 'Cluster Prometheus', 'percent').addTarget( + prometheus.target( + 'sum(irate(container_cpu_usage_seconds_total{namespace=~"dynatrace", pod=~"opentelemetry-.*", container!~"POD|"}[2m])*100)', + legendFormat='{{ node }}', + ) +); // Management cluster metrics @@ -1719,6 +1760,10 @@ grafana.dashboard.new( clusterOperatorsDegraded { gridPos: { x: 8, y: 52, w: 8, h: 8 } }, FailedPods { gridPos: { x: 16, y: 52, w: 8, h: 8 } }, alerts { gridPos: { x: 0, y: 60, w: 24, h: 8 } }, + dynaactivegateMem { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, + dynaactivegateCPU { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, + opentelemetryCPU { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, + opentelemetryMem { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, ], ), { gridPos: { x: 0, y: 4, w: 24, h: 1 } }) @@ -1753,6 +1798,8 @@ grafana.dashboard.new( nodeMemory { gridPos: { x: 12, y: 2, w: 12, h: 8 } }, suricataCPU { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, suricataMemory { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, + dynaoneagentCPU { gridPos: { x: 0, y: 18, w: 12, h: 8 } }, + dynaoneagentMem { gridPos: { x: 12, y: 18, w: 12, h: 8 } }, ] ), { gridPos: { x: 0, y: 4, w: 24, h: 1 } } ) diff --git a/templates/General/ocp-performance.jsonnet b/templates/General/ocp-performance.jsonnet index f4ae26c..55f62a6 100644 --- a/templates/General/ocp-performance.jsonnet +++ b/templates/General/ocp-performance.jsonnet @@ -184,49 +184,6 @@ local stackroxMem = genericGraphLegendPanel('Top 25 stackrox container RSS bytes ) ); -// Dynatrace -local dynaoneagentMem = genericGraphLegendPanel('OneAgent Memory Usage', 'bytes').addTarget( - prometheus.target( - 'container_memory_rss{namespace="dynatrace",pod=~".*-oneagent-.*",container!=""}' , - legendFormat='{{ pod }}', - ) -); - -local dynaoneagentCPU = genericGraphLegendPanel('OneAgent CPU Usage', 'percent').addTarget( - prometheus.target( - 'irate(container_cpu_usage_seconds_total{namespace="dynatrace", pod=~".*-oneagent-.*",container!~"POD|"}[$interval])*100', - legendFormat='{{ node }} : {{ namespace }} : {{ pod }}', - ) -); - -local dynaactivegateMem = genericGraphLegendPanel('Active Gate Memory Usage', 'bytes').addTarget( - prometheus.target( - 'container_memory_rss{namespace="dynatrace",pod=~".*-activegate-.*",container!=""}' , - legendFormat='{{ node }} : {{ namespace }} : {{ pod }}', - ) -); - -local dynaactivegateCPU = genericGraphLegendPanel('Active Gate CPU Usage', 'percent').addTarget( - prometheus.target( - 'irate(container_cpu_usage_seconds_total{namespace="dynatrace", pod=~".*-activegate-.*",container!~"POD|"}[$interval])*100', - legendFormat='{{ node }} : {{ namespace }} : {{ pod }}', - ) -); - -local opentelemetryCPU = genericGraphLegendPanel('Opentelemetry CPU Usage', 'percent').addTarget( - prometheus.target( - 'irate(container_cpu_usage_seconds_total{namespace="dynatrace", pod=~"opentelemetry-.*",container!~"POD|"}[$interval])*100', - legendFormat='{{ node }} : {{ namespace }} : {{ pod }}', - ) -); - -local opentelemetryMem = genericGraphLegendPanel('Opentelemetry Memory Usage', 'bytes').addTarget( - prometheus.target( - 'container_memory_rss{namespace="dynatrace",pod=~"opentelemetry-.*",container!=""}' , - legendFormat='{{ node }} : {{ namespace }} : {{ pod }}', - ) -); - // OVN local ovnAnnotationLatency = genericGraphPanel('99% Pod Annotation Latency', 's').addTarget( prometheus.target( @@ -706,16 +663,6 @@ grafana.dashboard.new( ], ), { gridPos: { x: 0, y: 4, w: 24, h: 1 } }) -.addPanel(grafana.row.new(title='Dynatrace Details', collapse=true).addPanels( - [ - dynaoneagentCPU { gridPos: { x: 0, y: 4, w: 24, h: 10 } }, - dynaoneagentMem { gridPos: { x: 0, y: 4, w: 24, h: 10 } }, - dynaactivegateCPU { gridPos: { x: 0, y: 4, w: 24, h: 10 } }, - dynaactivegateMem { gridPos: { x: 0, y: 4, w: 24, h: 10 } }, - opentelemetryCPU { gridPos: { x: 0, y: 4, w: 24, h: 10 } }, - opentelemetryMem { gridPos: { x: 0, y: 4, w: 24, h: 10 } }, - ], -), {gridPos: {x: 0, y: 5, w: 24, h: 1 } }) .addPanel(grafana.row.new(title='Master: $_master_node', collapse=true, repeat='_master_node').addPanels( [