diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1e79489..db020a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: run: make deps - name: Run jsonnetfmt - run: for t in templates/**/*.jsonnet; do echo "Testing template ${t}"; ./bin/jsonnetfmt --test $t; echo 'Results:' ${?}; done + run: make format build: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 82eb612..03d0198 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ templates/grafonnet-lib +templates/vendor rendered tmp bin diff --git a/Dockerfile b/Dockerfile index daad306..98f1f46 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,21 @@ -FROM ubuntu +FROM registry.access.redhat.com/ubi8/ubi-minimal +# Set the working directory WORKDIR /performance-dashboards -ARG DEBIAN_FRONTEND=noninteractive # Install necessary libraries for subsequent commands -RUN apt-get update && apt-get install -y podman dumb-init python3.6 python3-distutils python3-pip python3-apt +RUN microdnf install -y podman python3 python3-pip && \ + microdnf clean all && \ + rm -rf /var/cache/yum COPY . . + +# Set permissions RUN chmod -R 775 /performance-dashboards # Install dependencies -RUN python3 -m pip install --upgrade pip -RUN pip install -r requirements.txt - -# Cleanup the installation remainings -RUN apt-get clean autoclean && \ - apt-get autoremove --yes && \ - rm -rf /var/lib/{apt,dpkg,cache,log}/ +RUN pip3 install --upgrade pip && \ + pip3 install -r requirements.txt # Start the command -ENTRYPOINT ["/usr/bin/dumb-init", "--"] CMD ["python3", "dittybopper/syncer/entrypoint.py"] \ No newline at end of file diff --git a/Makefile b/Makefile index 2b6a8ff..f504244 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -JSONNET = https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz +JB = https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 BINDIR = bin TEMPLATESDIR = templates OUTPUTDIR = rendered @@ -6,15 +6,24 @@ ALLDIRS = $(BINDIR) $(OUTPUTDIR) SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x -# Get all templates at $(TEMPLATESDIR) -TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*.jsonnet) +ifeq ($(filter v2,$(MAKECMDGOALS)),v2) + # Set variables and instructions for v2 + TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*-v2.jsonnet) + LIBRARY_PATH := $(TEMPLATESDIR)/vendor + JSONNET := https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz +else + # Get all templates at $(TEMPLATESDIR) + TEMPLATES := $(filter-out %-v2.jsonnet, $(wildcard $(TEMPLATESDIR)/**/*.jsonnet)) + LIBRARY_PATH := $(TEMPLATESDIR)/grafonnet-lib + JSONNET := https://github.com/google/jsonnet/releases/download/v0.17.0/jsonnet-bin-v0.17.0-linux.tar.gz +endif # Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json outputs := $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES)) all: deps format build -deps: $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib $(BINDIR)/jsonnet +deps: $(ALLDIRS) $(BINDIR)/jsonnet $(LIBRARY_PATH) $(ALLDIRS): mkdir -p $(ALLDIRS) @@ -22,24 +31,34 @@ $(ALLDIRS): format: deps $(BINDIR)/jsonnetfmt -i $(TEMPLATES) -build: deps $(TEMPLATESDIR)/grafonnet-lib $(outputs) +build: deps $(LIBRARY_PATH) $(outputs) clean: @echo "Cleaning up" - rm -rf $(ALLDIRS) $(TEMPLATESDIR)/grafonnet-lib - -$(TEMPLATESDIR)/grafonnet-lib: - git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib + rm -rf $(ALLDIRS) $(TEMPLATESDIR)/vendor $(TEMPLATESDIR)/grafonnet-lib $(BINDIR)/jsonnet: @echo "Downloading jsonnet binary" curl -s -L $(JSONNET) | tar xz -C $(BINDIR) + @echo "Downloading jb binary" + curl -s -L $(JB) -o $(BINDIR)/jb + chmod +x $(BINDIR)/jb + +$(TEMPLATESDIR)/grafonnet-lib: + git clone --depth 1 https://github.com/grafana/grafonnet-lib.git $(TEMPLATESDIR)/grafonnet-lib + +$(TEMPLATESDIR)/vendor: + @echo "Downloading vendor files" + cd $(TEMPLATESDIR) && ../$(BINDIR)/jb install && cd ../ # Build each template and output to $(OUTPUTDIR) $(OUTPUTDIR)/%.json: $(TEMPLATESDIR)/%.jsonnet @echo "Building template $<" mkdir -p $(dir $@) - $(BINDIR)/jsonnet $< > $@ + $(BINDIR)/jsonnet -J ./$(LIBRARY_PATH) $< > $@ + +v2: all + @echo "Rendered the v2 dashboards with latest grafonnet library" build-syncer-image: build podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} . diff --git a/README.md b/README.md index e30f773..03abf0d 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,28 @@ bin/jsonnet templates/ocp-performance.jsonnet > rendered/ocp-performance.json $ ls rendered ocp-ingress-controller.json ocp-performance.json ``` +Similarly for V2, the dashboards that are built using latest grafonnet library, use +``` +$ make v2 +mkdir -p bin rendered +Downloading jsonnet binary +curl -s -L https://github.com/cloud-bulldozer/utils/releases/download/v0.0.0/jsonnet-bin-v0.20.0-linux.tar.gz | tar xz -C bin +Downloading jb binary +curl -s -L https://github.com/jsonnet-bundler/jsonnet-bundler/releases/latest/download/jb-linux-amd64 -o bin/jb +chmod +x bin/jb +Downloading vendor files +cd templates && ../bin/jb install && cd ../ +GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200 +GET https://github.com/grafana/grafonnet/archive/f40876da40d787e9c288de0b547ac85597c781d9.tar.gz 200 +GET https://github.com/jsonnet-libs/docsonnet/archive/cc9df63eaca56f39e8e4e1ce192141333257b08d.tar.gz 200 +GET https://github.com/jsonnet-libs/xtd/archive/0256a910ac71f0f842696d7bca0bf01ea77eb654.tar.gz 200 +bin/jsonnetfmt -i templates/General/ocp-performance-v2.jsonnet +Building template templates/General/ocp-performance-v2.jsonnet +mkdir -p rendered/General/ +bin/jsonnet -J ./templates/vendor templates/General/ocp-performance-v2.jsonnet > rendered/General/ocp-performance-v2.json +Rendered the v2 dashboards with latest grafonnet library +``` +Rest all operations reamin same as before. In order to clean up the environment execute `make clean`. diff --git a/assets/ocp-performance/panels.libsonnet b/assets/ocp-performance/panels.libsonnet new file mode 100644 index 0000000..8d192c9 --- /dev/null +++ b/assets/ocp-performance/panels.libsonnet @@ -0,0 +1,52 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +{ + timeSeries: { + local timeSeries = g.panel.timeSeries, + local fieldOverride = g.panel.timeSeries.fieldOverride, + local custom = timeSeries.fieldConfig.defaults.custom, + local options = timeSeries.options, + + generic(title, unit, targets, gridPos): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets) + + timeSeries.datasource.withUid('$datasource') + + timeSeries.standardOptions.withUnit(unit) + + timeSeries.gridPos.withX(gridPos.x) + + timeSeries.gridPos.withY(gridPos.y) + + timeSeries.gridPos.withH(gridPos.h) + + timeSeries.gridPos.withW(gridPos.w) + + custom.withSpanNulls('false') + + options.tooltip.withMode('multi') + + options.tooltip.withSort('desc') + + options.legend.withDisplayMode('table'), + + genericLegend(title, unit, targets, gridPos): + self.generic(title, unit, targets, gridPos) + + options.legend.withShowLegend(true) + + options.legend.withCalcs([ + 'mean', + 'max', + 'min', + ]) + + options.legend.withSortBy('max') + + options.legend.withSortDesc(true) + + options.legend.withPlacement('bottom'), + }, + stat: { + local stat = g.panel.stat, + local options = stat.options, + + base(title, targets, gridPos): + stat.new(title) + + stat.datasource.withUid('$datasource') + + stat.queryOptions.withTargets(targets) + + stat.gridPos.withX(gridPos.x) + + stat.gridPos.withY(gridPos.y) + + stat.gridPos.withH(gridPos.h) + + stat.gridPos.withW(gridPos.w) + + options.reduceOptions.withCalcs([ + 'last', + ]), + }, +} \ No newline at end of file diff --git a/assets/ocp-performance/queries.libsonnet b/assets/ocp-performance/queries.libsonnet new file mode 100644 index 0000000..94300ee --- /dev/null +++ b/assets/ocp-performance/queries.libsonnet @@ -0,0 +1,201 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local variables = import './variables.libsonnet'; + +local generateTimeSeriesQuery(query, legend) = [ + local prometheusQuery = g.query.prometheus; + prometheusQuery.new( + '$' + variables.datasource.name, + query + ) + + prometheusQuery.withFormat('time_series') + + prometheusQuery.withIntervalFactor(2) + + prometheusQuery.withLegendFormat(legend), +]; + +{ + nodeMemory: { + query(nodeName): + generateTimeSeriesQuery('node_memory_Active_bytes{instance=~"' + nodeName + '"}', 'Active') + + generateTimeSeriesQuery('node_memory_MemTotal_bytes{instance=~"' + nodeName + '"}', 'Total') + + generateTimeSeriesQuery('node_memory_Cached_bytes{instance=~"' + nodeName + '"} + node_memory_Buffers_bytes{instance=~"' + nodeName + '"}', 'Cached + Buffers') + + generateTimeSeriesQuery('node_memory_MemAvailable_bytes{instance=~"' + nodeName + '"}', 'Available') + + generateTimeSeriesQuery('(node_memory_MemTotal_bytes{instance=~"' + nodeName + '"} - (node_memory_MemFree_bytes{instance=~"' + nodeName + '"} + node_memory_Buffers_bytes{instance=~"' + nodeName + '"} + node_memory_Cached_bytes{instance=~"' + nodeName + '"}))', 'Used') + }, + nodeCPU: { + query(nodeName): + generateTimeSeriesQuery('sum by (instance, mode)(irate(node_cpu_seconds_total{instance=~"' + nodeName + '",job=~".*"}[$interval])) * 100', 'Busy {{mode}}') + }, + diskThroughput: { + query(nodeName): + generateTimeSeriesQuery('rate(node_disk_read_bytes_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - read') + + generateTimeSeriesQuery('rate(node_disk_written_bytes_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - write') + }, + diskIOPS: { + query(nodeName): + generateTimeSeriesQuery('rate(node_disk_reads_completed_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - read') + + generateTimeSeriesQuery('rate(node_disk_writes_completed_total{device=~"$block_device",instance=~"' + nodeName + '"}[$interval])', '{{ device }} - write') + }, + networkUtilization: { + query(nodeName): + generateTimeSeriesQuery('rate(node_network_receive_bytes_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', '{{instance}} - {{device}} - RX') + + generateTimeSeriesQuery('rate(node_network_transmit_bytes_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', '{{instance}} - {{device}} - TX') + }, + networkPackets: { + query(nodeName): + generateTimeSeriesQuery('rate(node_network_receive_packets_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval])', '{{instance}} - {{device}} - RX') + + generateTimeSeriesQuery('rate(node_network_transmit_packets_total{instance=~"' + nodeName + '",device=~"$net_device"}[$interval])', '{{instance}} - {{device}} - TX') + }, + networkDrop: { + query(nodeName): + generateTimeSeriesQuery('topk(10, rate(node_network_receive_drop_total{instance=~"' + nodeName + '"}[$interval]))', 'rx-drop-{{ device }}') + + generateTimeSeriesQuery('topk(10,rate(node_network_transmit_drop_total{instance=~"' + nodeName + '"}[$interval]))', 'tx-drop-{{ device }}') + }, + conntrackStats: { + query(nodeName): + generateTimeSeriesQuery('node_nf_conntrack_entries{instance=~"' + nodeName + '"}', 'conntrack_entries') + + generateTimeSeriesQuery('node_nf_conntrack_entries_limit{instance=~"' + nodeName + '"}', 'conntrack_limit') + }, + top10ContainerCPU: { + query(nodeName): + generateTimeSeriesQuery('topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",node=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)', '{{ pod }}: {{ container }}') + }, + top10ContainerRSS: { + query(nodeName): + generateTimeSeriesQuery('topk(10, container_memory_rss{container!="POD",name!="",node=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})', '{{ pod }}: {{ container }}') + }, + containerWriteBytes: { + query(nodeName): + generateTimeSeriesQuery('sum(rate(container_fs_writes_bytes_total{device!~".+dm.+", node=~"' + nodeName + '", container!=""}[$interval])) by (device, container)', '{{ container }}: {{ device }}') + }, + stackroxCPU: { + query(): + generateTimeSeriesQuery('topk(25, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",namespace!="",namespace=~"stackrox"}[$interval])) by (pod,container,namespace,name,service) * 100)', '{{ pod }}: {{ container }}') + }, + stackroxMem: { + query(): + generateTimeSeriesQuery('topk(25, container_memory_rss{container!="POD",name!="",namespace!="",namespace=~"stackrox"})', '{{ pod }}: {{ container }}') + }, + ovnAnnotationLatency: { + query(): + generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(ovnkube_master_pod_creation_latency_seconds_bucket[$interval])) by (pod,le)) > 0', '{{ pod }}') + }, + ovnCNIAdd: { + query(): + generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(ovnkube_node_cni_request_duration_seconds_bucket{command="ADD"}[$interval])) by (pod,le)) > 0', '{{ pod }}') + }, + ovnCNIDel: { + query(): + generateTimeSeriesQuery('histogram_quantile(0.99, sum(rate(ovnkube_node_cni_request_duration_seconds_bucket{command="DEL"}[$interval])) by (pod,le)) > 0', '{{ pod }}') + }, + ovnKubeMasterCPU: { + query(): + generateTimeSeriesQuery('irate(container_cpu_usage_seconds_total{pod=~"ovnkube-master.*",namespace="openshift-ovn-kubernetes",container!~"POD|"}[$interval])*100', '{{container}}-{{pod}}-{{node}}') + }, + ovnKubeMasterMem: { + query(): + generateTimeSeriesQuery('container_memory_rss{pod=~"ovnkube-master-.*",namespace="openshift-ovn-kubernetes",container!~"POD|"}', '{{container}}-{{pod}}-{{node}}') + }, + topOvnControllerCPU: { + query(): + generateTimeSeriesQuery('topk(10, irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}[$interval])*100)', '{{node}}') + }, + topOvnControllerMem: { + query(): + generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}) by (node))', '{{node}}') + }, + promReplMemUsage: { + query(): + generateTimeSeriesQuery('sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', '{{pod}}') + + generateTimeSeriesQuery('sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', '{{pod}}') + }, + kubeletCPU: { + query(): + generateTimeSeriesQuery('topk(10,irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', 'kubelet - {{node}}') + }, + crioCPU: { + query(): + generateTimeSeriesQuery('topk(10,irate(process_cpu_seconds_total{service="kubelet",job="crio"}[$interval])*100)', 'crio - {{node}}') + }, + kubeletMemory: { + query(): + generateTimeSeriesQuery('topk(10,process_resident_memory_bytes{service="kubelet",job="kubelet"})', 'kubelet - {{node}}') + }, + crioMemory: { + query(): + generateTimeSeriesQuery('topk(10,process_resident_memory_bytes{service="kubelet",job="crio"})', 'crio - {{node}}') + }, + crioINodes: { + query(): + generateTimeSeriesQuery('(1 - node_filesystem_files_free{fstype!="",mountpoint="/run"} / node_filesystem_files{fstype!="",mountpoint="/run"}) * 100', '/var/run - {{instance}}') + }, + currentNodeCount: { + query(): + generateTimeSeriesQuery('sum(kube_node_info{})', 'Number of nodes') + + generateTimeSeriesQuery('sum(kube_node_status_condition{status="true"}) by (condition) > 0', 'Node: {{ condition }}') + }, + currentNamespaceCount: { + query(): + generateTimeSeriesQuery('sum(kube_namespace_status_phase) by (phase)', '{{ phase }}') + }, + currentPodCount: { + query(): + generateTimeSeriesQuery('sum(kube_pod_status_phase{}) by (phase) > 0', '{{ phase}} Pods') + }, + nsCount: { + query(): + generateTimeSeriesQuery('sum(kube_namespace_status_phase) by (phase) > 0', '{{ phase }} namespaces') + }, + podCount: { + query(): + generateTimeSeriesQuery('sum(kube_pod_status_phase{}) by (phase)', '{{phase}} pods') + }, + secretCmCount: { + query(): + generateTimeSeriesQuery('count(kube_secret_info{})', 'secrets') + + generateTimeSeriesQuery('count(kube_configmap_info{})', 'Configmaps') + }, + deployCount: { + query(): + generateTimeSeriesQuery('count(kube_deployment_labels{})', 'Deployments') + }, + servicesCount: { + query(): + generateTimeSeriesQuery('count(kube_service_info{})', 'Services') + }, + routesCount: { + query(): + generateTimeSeriesQuery('count(openshift_route_info{})', 'Routes') + }, + alerts: { + query(): + generateTimeSeriesQuery('topk(10,sum(ALERTS{severity!="none"}) by (alertname, severity))', '{{severity}}: {{alertname}}') + }, + podDistribution: { + query(): + generateTimeSeriesQuery('count(kube_pod_info{}) by (node)', '{{ node }}') + }, + top10ContMem: { + query(): + generateTimeSeriesQuery('topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})', '{{ namespace }} - {{ name }}') + }, + top10ContCPU: { + query(): + generateTimeSeriesQuery('topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)', '{{ namespace }} - {{ name }}') + }, + goroutinesCount: { + query(): + generateTimeSeriesQuery('topk(10, sum(go_goroutines{}) by (job,instance))', '{{ job }} - {{ instance }}') + }, + clusterOperatorsOverview: { + query(): + generateTimeSeriesQuery('sum by (condition)(cluster_operator_conditions{condition!=""})', '{{ condition }}') + }, + clusterOperatorsInformation: { + query(): + generateTimeSeriesQuery('cluster_operator_conditions{name!="",reason!=""}', '{{name}} - {{reason}}') + }, + clusterOperatorsDegraded: { + query(): + generateTimeSeriesQuery('cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', '{{name}} - {{reason}}') + }, +} \ No newline at end of file diff --git a/assets/ocp-performance/variables.libsonnet b/assets/ocp-performance/variables.libsonnet new file mode 100644 index 0000000..e067d67 --- /dev/null +++ b/assets/ocp-performance/variables.libsonnet @@ -0,0 +1,87 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + datasource: + var.datasource.new('datasource', 'prometheus') + + var.datasource.withRegex('/^Cluster Prometheus$/'), + + master_node: + var.query.new('_master_node') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'node', + 'kube_node_role{role="master"}', + ) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti() + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Master'), + + worker_node: + var.query.new('_worker_node') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'node', + 'kube_node_role{role=~"work.*"}', + ) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti() + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Worker'), + + infra_node: + var.query.new('_infra_node') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'node', + 'kube_node_role{role="infra"}', + ) + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti() + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Infra'), + + namespace: + var.query.new('namespace') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'namespace', + 'kube_pod_info{namespace!="(cluster-density.*|node-density-.*)"}', + ) + + var.query.withRefresh(2) + + var.query.withRegex('') + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('Namespace'), + + block_device: + var.query.new('block_device') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'device', + 'node_disk_written_bytes_total', + ) + + var.query.withRefresh(2) + + var.query.withRegex('/^(?:(?!dm|rb).)*$/') + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('Block device'), + + net_device: + var.query.new('net_device') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'device', + 'node_network_receive_bytes_total', + ) + + var.query.withRefresh(2) + + var.query.withRegex('/^((br|en|et).*)$/') + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('Network device'), + + interval: + var.interval.new('interval', ['2m','3m','4m','5m'],) + + var.interval.generalOptions.withLabel('interval'), +} \ No newline at end of file diff --git a/dittybopper/deploy.sh b/dittybopper/deploy.sh index c90adb4..4ec6d73 100755 --- a/dittybopper/deploy.sh +++ b/dittybopper/deploy.sh @@ -41,7 +41,7 @@ END export PROMETHEUS_USER=internal export GRAFANA_ADMIN_PASSWORD=admin export GRAFANA_URL="http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000" -export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/syncer:latest"} # Syncer image +export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/dittybopper-syncer:latest"} # Syncer image export GRAFANA_IMAGE=${GRAFANA_IMAGE:-"quay.io/cloud-bulldozer/grafana:9.4.3"} # Syncer image # Set defaults for command options diff --git a/templates/CPT/k8s-perf.jsonnet b/templates/CPT/k8s-perf.jsonnet deleted file mode 100644 index 7308819..0000000 --- a/templates/CPT/k8s-perf.jsonnet +++ /dev/null @@ -1,499 +0,0 @@ -local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; -local prometheus = grafana.prometheus; - - -// Helper functions - -local genericGraphPanel(title, format) = grafana.graphPanel.new( - title=title, - datasource='$datasource', - format=format, - nullPointMode='null as zero', - sort='decreasing', - legend_alignAsTable=true, -); - -local genericGraphLegendPanel(title, format) = grafana.graphPanel.new( - title=title, - datasource='$datasource', - format=format, - legend_values=true, - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_hideEmpty=true, - legend_hideZero=true, - legend_sort='max', - nullPointMode='null as zero', - sort='decreasing', -); - - -local nodeMemory(nodeName) = genericGraphLegendPanel('System Memory: ' + nodeName, 'bytes').addTarget( - prometheus.target( - 'node_memory_Active_bytes{node=~"' + nodeName + '"}', - legendFormat='Active', - ) -).addTarget( - prometheus.target( - 'node_memory_MemTotal_bytes{node=~"' + nodeName + '"}', - legendFormat='Total', - ) -).addTarget( - prometheus.target( - 'node_memory_Cached_bytes{node=~"' + nodeName + '"} + node_memory_Buffers_bytes{node=~"' + nodeName + '"}', - legendFormat='Cached + Buffers', - ) -).addTarget( - prometheus.target( - 'node_memory_MemAvailable_bytes{node=~"' + nodeName + '"}', - legendFormat='Available', - ) -); - - -local nodeCPU(nodeName) = genericGraphLegendPanel('CPU Basic: ' + nodeName, 'percent').addTarget( - prometheus.target( - 'sum by (instance, mode)(rate(node_cpu_seconds_total{node=~"' + nodeName + '",job=~".*"}[$interval])) * 100', - legendFormat='Busy {{mode}}', - ) -); - - -local diskThroughput(nodeName) = genericGraphLegendPanel('Disk throughput: ' + nodeName, 'Bps').addTarget( - prometheus.target( - 'rate(node_disk_read_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - read', - ) -).addTarget( - prometheus.target( - 'rate(node_disk_written_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - write', - ) -); - -local diskIOPS(nodeName) = genericGraphLegendPanel('Disk IOPS: ' + nodeName, 'iops').addTarget( - prometheus.target( - 'rate(node_disk_reads_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - read', - ) -).addTarget( - prometheus.target( - 'rate(node_disk_writes_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', - legendFormat='{{ device }} - write', - ) -); - -local networkUtilization(nodeName) = genericGraphLegendPanel('Network Utilization: ' + nodeName, 'bps').addTarget( - prometheus.target( - 'rate(node_network_receive_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', - legendFormat='{{instance}} - {{device}} - RX', - ) -).addTarget( - prometheus.target( - 'rate(node_network_transmit_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', - legendFormat='{{instance}} - {{device}} - TX', - ) -); - -local networkPackets(nodeName) = genericGraphLegendPanel('Network Packets: ' + nodeName, 'pps').addTarget( - prometheus.target( - 'rate(node_network_receive_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', - legendFormat='{{instance}} - {{device}} - RX', - ) -).addTarget( - prometheus.target( - 'rate(node_network_transmit_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', - legendFormat='{{instance}} - {{device}} - TX', - ) -); - -local networkDrop(nodeName) = genericGraphLegendPanel('Network packets drop: ' + nodeName, 'pps').addTarget( - prometheus.target( - 'topk(10, rate(node_network_receive_drop_total{node=~"' + nodeName + '"}[$interval]))', - legendFormat='rx-drop-{{ device }}', - ) -).addTarget( - prometheus.target( - 'topk(10,rate(node_network_transmit_drop_total{node=~"' + nodeName + '"}[$interval]))', - legendFormat='tx-drop-{{ device }}', - ) -); - -local conntrackStats(nodeName) = genericGraphLegendPanel('Conntrack stats: ' + nodeName, '') - { - seriesOverrides: [{ - alias: 'conntrack_limit', - yaxis: 2, - }], - yaxes: [{ show: true }, { show: true }], -} - .addTarget( - prometheus.target( - 'node_nf_conntrack_entries{node=~"' + nodeName + '"}', - legendFormat='conntrack_entries', - ) -).addTarget( - prometheus.target( - 'node_nf_conntrack_entries_limit{node=~"' + nodeName + '"}', - legendFormat='conntrack_limit', - ) -); - -local top10ContainerCPU(nodeName) = genericGraphLegendPanel('Top 10 container CPU: ' + nodeName, 'percent').addTarget( - prometheus.target( - 'topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)', - legendFormat='{{ pod }}: {{ container }}', - ) -); - -local top10ContainerRSS(nodeName) = genericGraphLegendPanel('Top 10 container RSS: ' + nodeName, 'bytes').addTarget( - prometheus.target( - 'topk(10, container_memory_rss{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})', - legendFormat='{{ pod }}: {{ container }}', - ) -); - -local containerWriteBytes(nodeName) = genericGraphLegendPanel('Container fs write rate: ' + nodeName, 'Bps').addTarget( - prometheus.target( - 'sum(rate(container_fs_writes_bytes_total{device!~".+dm.+", node=~"' + nodeName + '", container!=""}[$interval])) by (device, container)', - legendFormat='{{ container }}: {{ device }}', - ) -); - -// Individual panel definitions - -// Monitoring Stack - -local promReplMemUsage = genericGraphLegendPanel('Prometheus Replica Memory usage', 'bytes').addTarget( - prometheus.target( - 'sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', - legendFormat='{{pod}}', - ) -).addTarget( - prometheus.target( - 'sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', - legendFormat='{{pod}}', - ) -); - -// Kubelet - -local kubeletCPU = genericGraphLegendPanel('Top 10 Kubelet CPU usage', 'percent').addTarget( - prometheus.target( - 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', - legendFormat='kubelet - {{node}}', - ) -); - -local crioCPU = genericGraphLegendPanel('Top 10 crio CPU usage', 'percent').addTarget( - prometheus.target( - 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="crio"}[$interval])*100)', - legendFormat='crio - {{node}}', - ) -); - -local kubeletMemory = genericGraphLegendPanel('Top 10 Kubelet memory usage', 'bytes').addTarget( - prometheus.target( - 'topk(10,process_resident_memory_bytes{service="kubelet",job="kubelet"})', - legendFormat='kubelet - {{node}}', - ) -); - -local crioMemory = genericGraphLegendPanel('Top 10 crio memory usage', 'bytes').addTarget( - prometheus.target( - 'topk(10,process_resident_memory_bytes{service="kubelet",job="crio"})', - legendFormat='crio - {{node}}', - ) -); - -// Cluster details - -local current_node_count = grafana.statPanel.new( - title='Current Node Count', - datasource='$datasource', - reducerFunction='last', -).addTarget( - prometheus.target( - 'sum(kube_node_info{})', - legendFormat='Number of nodes', - ) -).addTarget( - prometheus.target( - 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', - legendFormat='Node: {{ condition }}', - ) -); - -local current_namespace_count = grafana.statPanel.new( - title='Current namespace Count', - datasource='$datasource', - reducerFunction='last', -).addTarget( - prometheus.target( - 'sum(kube_namespace_status_phase) by (phase)', - legendFormat='{{ phase }}', - ) -); - -local current_pod_count = grafana.statPanel.new( - title='Current Pod Count', - reducerFunction='last', - datasource='$datasource', -).addTarget( - prometheus.target( - 'sum(kube_pod_status_phase{}) by (phase) > 0', - legendFormat='{{ phase}} Pods', - ) -); - -local nodeCount = genericGraphPanel('Number of nodes', 'none').addTarget( - prometheus.target( - 'sum(kube_node_info{})', - legendFormat='Number of nodes', - ) -).addTarget( - prometheus.target( - 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', - legendFormat='Node: {{ condition }}', - ) -); - -local nsCount = genericGraphPanel('Namespace count', 'none').addTarget( - prometheus.target( - 'sum(kube_namespace_status_phase) by (phase) > 0', - legendFormat='{{ phase }} namespaces', - ) -); - -local podCount = genericGraphPanel('Pod count', 'none').addTarget( - prometheus.target( - 'sum(kube_pod_status_phase{}) by (phase)', - legendFormat='{{phase}} pods', - ) -); - -local secretCmCount = genericGraphPanel('Secret & configmap count', 'none').addTarget( - prometheus.target( - 'count(kube_secret_info{})', - legendFormat='secrets', - ) -).addTarget( - prometheus.target( - 'count(kube_configmap_info{})', - legendFormat='Configmaps', - ) -); - -local deployCount = genericGraphPanel('Deployment count', 'none').addTarget( - prometheus.target( - 'count(kube_deployment_labels{})', - legendFormat='Deployments', - ) -); - - -local servicesCount = genericGraphPanel('Services count', 'none').addTarget( - prometheus.target( - 'count(kube_service_info{})', - legendFormat='Services', - ) -); - -local alerts = genericGraphPanel('Alerts', 'none').addTarget( - prometheus.target( - 'topk(10,sum(ALERTS{severity!="none"}) by (alertname, severity))', - legendFormat='{{severity}}: {{alertname}}', - ) -); - -local top10ContMem = genericGraphLegendPanel('Top 10 container RSS', 'bytes').addTarget( - prometheus.target( - 'topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})', - legendFormat='{{ namespace }} - {{ name }}', - ) -); - -local podDistribution = genericGraphLegendPanel('Pod Distribution', 'none').addTarget( - prometheus.target( - 'count(kube_pod_info{}) by (exported_node)', - legendFormat='{{ node }}', - ) -); - -local top10ContCPU = genericGraphLegendPanel('Top 10 container CPU', 'percent').addTarget( - prometheus.target( - 'topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)', - legendFormat='{{ namespace }} - {{ name }}', - ) -); - - -local goroutines_count = genericGraphPanel('Goroutines count', 'none').addTarget( - prometheus.target( - 'topk(10, sum(go_goroutines{}) by (job,instance))', - legendFormat='{{ job }} - {{ instance }}', - ) -); - -// Cluster operators - -local clusterOperatorsOverview = grafana.statPanel.new( - datasource='$datasource', - title='Cluster operators overview', -).addTarget( - prometheus.target( - 'sum by (condition)(cluster_operator_conditions{condition!=""})', - legendFormat='{{ condition }}', - ) -); - -local clusterOperatorsInformation = genericGraphLegendPanel('Cluster operators information', 'none').addTarget( - prometheus.target( - 'cluster_operator_conditions{name!="",reason!=""}', - legendFormat='{{name}} - {{reason}}', - ) -); - -local clusterOperatorsDegraded = genericGraphLegendPanel('Cluster operators degraded', 'none').addTarget( - prometheus.target( - 'cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', - legendFormat='{{name}} - {{reason}}', - ) -); - - -// Dashboard - -grafana.dashboard.new( - 'k8s Performance', - description='Performance dashboard for Red Hat k8s', - time_from='now-1h', - timezone='utc', - refresh='30s', - editable='true', -) - - -// Templates - -.addTemplate( - grafana.template.datasource( - 'datasource', - 'prometheus', - '', - ) -) - -.addTemplate( - grafana.template.new( - '_worker_node', - '$datasource', - 'label_values(kube_node_labels{}, exported_node)', - '', - refresh=2, - ) { - label: 'Worker', - type: 'query', - multi: true, - includeAll: false, - }, -) - -.addTemplate( - grafana.template.new( - 'namespace', - '$datasource', - 'label_values(kube_pod_info, exported_namespace)', - '', - refresh=2, - ) { - label: 'Namespace', - type: 'query', - multi: false, - includeAll: true, - }, -) - - -.addTemplate( - grafana.template.new( - 'block_device', - '$datasource', - 'label_values(node_disk_written_bytes_total,device)', - '', - regex='/^(?:(?!dm|rb).)*$/', - refresh=2, - ) { - label: 'Block device', - type: 'query', - multi: true, - includeAll: true, - }, -) - - -.addTemplate( - grafana.template.new( - 'net_device', - '$datasource', - 'label_values(node_network_receive_bytes_total,device)', - '', - regex='/^((br|en|et).*)$/', - refresh=2, - ) { - label: 'Network device', - type: 'query', - multi: true, - includeAll: true, - }, -) - -.addTemplate( - grafana.template.new( - 'interval', - '$datasource', - '$__auto_interval_period', - label='interval', - refresh='time', - ) { - type: 'interval', - query: '2m,3m,4m,5m', - auto: false, - }, -) - -// Dashboard definition - -.addPanel(grafana.row.new(title='Cluster Details', collapse=true).addPanels( - [ - current_node_count { gridPos: { x: 0, y: 4, w: 8, h: 3 } }, - current_namespace_count { gridPos: { x: 8, y: 4, w: 8, h: 3 } }, - current_pod_count { gridPos: { x: 16, y: 4, w: 8, h: 3 } }, - nodeCount { gridPos: { x: 0, y: 12, w: 8, h: 8 } }, - nsCount { gridPos: { x: 8, y: 12, w: 8, h: 8 } }, - podCount { gridPos: { x: 16, y: 12, w: 8, h: 8 } }, - secretCmCount { gridPos: { x: 0, y: 20, w: 8, h: 8 } }, - deployCount { gridPos: { x: 8, y: 20, w: 8, h: 8 } }, - servicesCount { gridPos: { x: 16, y: 20, w: 8, h: 8 } }, - top10ContMem { gridPos: { x: 0, y: 28, w: 24, h: 8 } }, - top10ContCPU { gridPos: { x: 0, y: 36, w: 12, h: 8 } }, - goroutines_count { gridPos: { x: 12, y: 36, w: 12, h: 8 } }, - podDistribution { gridPos: { x: 0, y: 44, w: 24, h: 8 } }, - ] -), { gridPos: { x: 0, y: 3, w: 24, h: 1 } }) - -.addPanel(grafana.row.new(title='Node: $_worker_node', collapse=true, repeat='_worker_node').addPanels( - [ - nodeCPU('$_worker_node') { gridPos: { x: 0, y: 0, w: 12, h: 8 } }, - nodeMemory('$_worker_node') { gridPos: { x: 12, y: 0, w: 12, h: 8 } }, - diskThroughput('$_worker_node') { gridPos: { x: 0, y: 8, w: 12, h: 8 } }, - diskIOPS('$_worker_node') { gridPos: { x: 12, y: 8, w: 12, h: 8 } }, - networkUtilization('$_worker_node') { gridPos: { x: 0, y: 16, w: 12, h: 8 } }, - networkPackets('$_worker_node') { gridPos: { x: 12, y: 16, w: 12, h: 8 } }, - networkDrop('$_worker_node') { gridPos: { x: 0, y: 24, w: 12, h: 8 } }, - conntrackStats('$_worker_node') { gridPos: { x: 12, y: 24, w: 12, h: 8 } }, - top10ContainerCPU('$_worker_node') { gridPos: { x: 0, y: 32, w: 12, h: 8 } }, - top10ContainerRSS('$_worker_node') { gridPos: { x: 12, y: 32, w: 12, h: 8 } }, - ], -), { gridPos: { x: 0, y: 1, w: 0, h: 8 } }) diff --git a/templates/CPT/kube-burner.jsonnet b/templates/CPT/kube-burner.jsonnet deleted file mode 100644 index cdb5160..0000000 --- a/templates/CPT/kube-burner.jsonnet +++ /dev/null @@ -1,4568 +0,0 @@ -local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; -local es = grafana.elasticsearch; - -local worker_count = grafana.statPanel.new( - title='Node count', - datasource='$datasource1', - justifyMode='center' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "nodeRoles"', - timeField='timestamp', - metrics=[{ - field: 'coun', - id: '1', - meta: {}, - settings: {}, - type: 'count', - }], - bucketAggs=[ - { - field: 'labels.role.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -).addThresholds([ - { color: 'green', value: null }, - { color: 'red', value: 80 }, -]); - - -local metric_count_panel = grafana.statPanel.new( - datasource='$datasource1', - justifyMode='center', - title=null -).addTarget( - // Namespaces count - es.target( - query='uuid.keyword: $uuid AND metricName: "namespaceCount" AND labels.phase: "Active"', - alias='Namespaces', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // Services count - es.target( - query='uuid.keyword: $uuid AND metricName: "serviceCount"', - alias='Services', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // Deployments count - es.target( - query='uuid.keyword: $uuid AND metricName: "deploymentCount"', - alias='Services', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // Secrets count - es.target( - query='uuid.keyword: $uuid AND metricName: "secretCount"', - alias='Services', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - // ConfigMap count - es.target( - query='uuid.keyword: $uuid AND metricName: "configmapCount"', - alias='ConfigMaps', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -).addThresholds([ - { color: 'green', value: null }, - { color: 'red', value: 80 }, -]); - -local openshift_version_panel = grafana.statPanel.new( - title='OpenShift version', - datasource='$datasource1', - justifyMode='center', - reducerFunction='lastNotNull', - fields='/^labels\\.version$/' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "clusterVersion"', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -); - -local etcd_version_panel = grafana.statPanel.new( - title='Etcd version', - datasource='$datasource1', - justifyMode='center', - reducerFunction='lastNotNull', - fields='labels.cluster_version' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "etcdVersion"', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -); - - -// Next line -// TODO: Convert to new table format once jsonnet supports it. -// That would fix the text wrapping problem. -local summary_panel_1 = grafana.tablePanel.new( - datasource='$datasource1', - title=null, - styles=[ - { - pattern: 'uuid', - alias: 'UUID', - type: 'string', - }, - { - pattern: 'jobConfig.name', - alias: 'Name', - type: 'hidden', - }, - { - pattern: 'jobConfig.qps', - alias: 'QPS', - type: 'number', - }, - { - pattern: 'jobConfig.burst', - alias: 'Burst', - type: 'number', - }, - { - pattern: 'elapsedTime', - alias: 'Elapsed time', - type: 'number', - unit: 's', - }, - { - pattern: 'jobConfig.jobIterations', - alias: 'Iterations', - type: 'number', - }, - { - pattern: 'jobConfig.jobType', - alias: 'Job Type', - type: 'string', - }, - { - pattern: 'jobConfig.podWait', - alias: 'podWait', - type: 'hidden', - }, - { - pattern: 'jobConfig.namespacedIterations', - alias: 'Namespaced iterations', - type: 'hidden', - }, - { - pattern: 'jobConfig.preLoadImages', - alias: 'Preload Images', - type: 'boolean', - }, - { - pattern: '_id', - alias: '_id', - type: 'hidden', - }, - { - pattern: '_index', - alias: '_index', - type: 'hidden', - }, - { - pattern: '_type', - alias: '_type', - type: 'hidden', - }, - { - pattern: 'highlight', - alias: 'highlight', - type: 'hidden', - }, - { - pattern: '_type', - alias: '_type', - type: 'hidden', - }, - { - pattern: 'jobConfig.cleanup', - type: 'hidden', - }, - { - pattern: 'jobConfig.errorOnVerify', - alias: 'errorOnVerify', - type: 'hidden', - }, - { - pattern: 'jobConfig.jobIterationDelay', - alias: 'jobIterationDelay', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.jobPause', - alias: 'jobPause', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.maxWaitTimeout', - alias: 'maxWaitTimeout', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.namespace', - alias: 'namespacePrefix', - type: 'hidden', - }, - { - pattern: 'jobConfig.namespaced', - alias: 'jobConfig.namespaced', - type: 'hidden', - }, - { - pattern: 'jobConfig.objects', - alias: 'jobConfig.objects', - type: 'hidden', - }, - { - pattern: 'jobConfig.preLoadPeriod', - alias: 'jobConfig.preLoadPeriod', - type: 'hidden', - }, - { - pattern: 'jobConfig.verifyObjects', - alias: 'jobConfig.verifyObjects', - type: 'hidden', - }, - { - pattern: 'metricName', - alias: 'metricName', - type: 'hidden', - }, - { - pattern: 'timestamp', - alias: 'timestamp', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitFor', - alias: 'jobConfig.waitFor', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitForDeletion', - alias: 'jobConfig.waitForDeletion', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitWhenFinished', - alias: 'jobConfig.waitWhenFinished', - type: 'hidden', - }, - { - pattern: 'sort', - alias: 'sort', - type: 'hidden', - }, - ] -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "jobSummary"', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -).addTransformation( - grafana.transformation.new('organize', options={ - indexByName: { - _id: 1, - _index: 2, - _type: 3, - elapsedTime: 8, - 'jobConfig.burst': 7, - 'jobConfig.cleanup': 12, - 'jobConfig.errorOnVerify': 13, - 'jobConfig.jobIterationDelay': 14, - 'jobConfig.jobIterations': 9, - 'jobConfig.jobPause': 15, - 'jobConfig.jobType': 10, - 'jobConfig.maxWaitTimeout': 16, - 'jobConfig.name': 5, - 'jobConfig.namespace': 17, - 'jobConfig.namespacedIterations': 18, - 'jobConfig.objects': 19, - 'jobConfig.podWait': 11, - 'jobConfig.qps': 6, - 'jobConfig.verifyObjects': 20, - 'jobConfig.waitFor': 21, - 'jobConfig.waitForDeletion': 22, - 'jobConfig.waitWhenFinished': 23, - metricName: 24, - timestamp: 0, - uuid: 4, - }, - }) -); - - -// TODO: Convert to new table format once jsonnet supports it. -// That would fix the text wrapping problem. -local summary_panel_2 = grafana.tablePanel.new( - datasource='$datasource1', - title=null, - styles=[ - { - pattern: 'k8s_version', - alias: 'k8s version', - type: 'string', - }, - { - pattern: 'result', - alias: 'Result', - type: 'string', - }, - { - pattern: 'sdn_type', - alias: 'SDN', - type: 'string', - }, - { - pattern: 'total_nodes', - alias: 'Total nodes', - type: 'number', - }, - { - pattern: 'master_nodes_count', - alias: 'Master nodes', - type: 'number', - }, - { - pattern: 'worker_nodes_count', - alias: 'Worker nodes', - type: 'number', - }, - { - pattern: 'infra_nodes_count', - alias: 'Infra nodes', - type: 'number', - }, - { - pattern: 'master_nodes_type', - alias: 'Masters flavor', - type: 'string', - }, - { - pattern: '_id', - alias: '_id', - type: 'hidden', - }, - { - pattern: '_index', - alias: '_index', - type: 'hidden', - }, - { - pattern: '_type', - alias: '_type', - type: 'hidden', - }, - { - pattern: 'benchmark', - alias: 'benchmark', - type: 'hidden', - }, - { - pattern: 'clustertype', - alias: 'clustertype', - type: 'hidden', - }, - { - pattern: 'end_date', - alias: 'end_date', - type: 'hidden', - }, - { - pattern: 'highlight', - alias: 'highlight', - type: 'hidden', - }, - { - pattern: 'jobConfig.cleanup', - alias: 'jobConfig.cleanup', - type: 'hidden', - }, - { - pattern: 'jobConfig.errorOnVerify', - alias: 'errorOnVerify', - type: 'hidden', - }, - { - pattern: 'jobConfig.jobIterationDelay', - alias: 'jobIterationDelay', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.jobPause', - alias: 'jobPause', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.maxWaitTimeout', - alias: 'maxWaitTimeout', - type: 'hidden', - unit: 's', - }, - { - pattern: 'jobConfig.namespace', - alias: 'namespacePrefix', - type: 'hidden', - }, - { - pattern: 'jobConfig.namespaced', - alias: 'jobConfig.namespaced', - type: 'hidden', - }, - { - pattern: 'jobConfig.objects', - alias: 'jobConfig.objects', - type: 'hidden', - }, - { - pattern: 'jobConfig.preLoadPeriod', - alias: 'jobConfig.preLoadPeriod', - type: 'hidden', - }, - { - pattern: 'jobConfig.verifyObjects', - alias: 'jobConfig.verifyObjects', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitFor', - alias: 'jobConfig.waitFor', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitForDeletion', - alias: 'jobConfig.waitForDeletion', - type: 'hidden', - }, - { - pattern: 'jobConfig.waitWhenFinished', - alias: 'jobConfig.waitWhenFinished', - type: 'hidden', - }, - { - pattern: 'metricName', - alias: 'metricName', - type: 'hidden', - }, - { - pattern: 'ocp_version', - alias: 'ocp_version', - type: 'hidden', - }, - { - pattern: 'ocp_version', - alias: 'ocp_version', - type: 'hidden', - }, - { - pattern: 'sort', - alias: 'sort', - type: 'hidden', - }, - { - pattern: 'timestamp', - alias: 'timestamp', - type: 'hidden', - }, - { - pattern: 'uuid', - alias: 'uuid', - type: 'hidden', - }, - { - pattern: 'workload', - alias: 'workload', - type: 'hidden', - }, - { - pattern: 'worker_nodes_type', - alias: 'worker_nodes_type', - type: 'hidden', - }, - { - pattern: 'infra_nodes_type', - alias: 'infra_nodes_type', - type: 'hidden', - }, - { - pattern: 'platform', - alias: 'platform', - type: 'hidden', - }, - { - pattern: 'workload_nodes_count', - alias: 'workload_nodes_count', - type: 'hidden', - }, - { - pattern: 'workload_nodes_type', - alias: 'workload_nodes_type', - type: 'hidden', - }, - ] -).addTarget( - es.target( - query='uuid.keyword: $uuid AND result.keyword: *', - timeField='timestamp', - metrics=[{ - id: '1', - settings: { - size: '500', - }, - type: 'raw_data', - }], - ) -).addTransformation( - grafana.transformation.new('organize', options={ - indexByName: { - _id: 4, - _index: 5, - _type: 15, - benchmark: 17, - clustertype: 18, - end_date: 19, - highlight: 20, - infra_nodes_count: 9, - infra_nodes_type: 14, - k8s_version: 1, - master_nodes_count: 7, - master_nodes_type: 11, - ocp_version: 21, - platform: 22, - result: 2, - sdn_type: 3, - sort: 23, - timestamp: 0, - total_nodes: 6, - uuid: 16, - worker_nodes_count: 8, - worker_nodes_type: 12, - workload: 24, - workload_nodes_count: 10, - workload_nodes_type: 13, - }, - }) -); - -// First row: Cluster status -local masters_cpu = grafana.graphPanel.new( - title='Masters CPU utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_avg=true, - legend_max=true, - percentage=true, - legend_values=true, - format='percent', -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND NOT labels.mode.keyword: idle AND NOT labels.mode.keyword: steal', - timeField='timestamp', - alias='{{labels.instance.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: { - script: '_value * 100', - }, - type: 'sum', - }], - bucketAggs=[ - { - field: 'labels.instance.keyword', - fake: true, - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local masters_memory = grafana.graphPanel.new( - title='Masters Memory utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_avg=true, - legend_max=true, - legend_values=true, - format='bytes' -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters"', - timeField='timestamp', - alias='Available {{labels.instance.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'sum', - }], - bucketAggs=[ - { - field: 'labels.instance.keyword', - fake: true, - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local node_status_summary = grafana.graphPanel.new( - title='Node Status Summary', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_current=true, - legend_values=true, - legend_rightSide=true, -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeStatus"', - timeField='timestamp', - alias='{{labels.condition.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.condition.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local pod_status_summary = grafana.graphPanel.new( - title='Pod Status Summary', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_current=true, - legend_values=true, -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "podStatusCount"', - timeField='timestamp', - alias='{{labels.phase.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.phase.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local kube_api_cpu = grafana.graphPanel.new( - title='Kube-apiserver CPU', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Avg CPU {{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); -// TODO: When the feature is added to grafannet, style the average differently. - - -local kube_api_memory = grafana.graphPanel.new( - title='Kube-apiserver Memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', - timeField='timestamp', - alias='Avg Rss {{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); -// TODO: When the feature is added to grafannet, style the average differently. - - -local active_controller_manager_cpu = grafana.graphPanel.new( - title='Active Kube-controller-manager CPU', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local active_controller_manager_memory = grafana.graphPanel.new( - title='Active Kube-controller-manager memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-controller-manager', - timeField='timestamp', - alias='{{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '1', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - fake: true, - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local kube_scheduler_cpu = grafana.graphPanel.new( - title='Kube-scheduler CPU', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='{{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local kube_scheduler_memory = grafana.graphPanel.new( - title='Kube-scheduler memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-scheduler', - timeField='timestamp', - alias='Rss {{labels.container.keyword}}', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local hypershift_controlplane_cpu = grafana.graphPanel.new( - title='Hypershift Controlplane CPU Usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU-Controlplane"', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '4', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -local hypershift_controlplane_memory = grafana.graphPanel.new( - title='Hypershift Controlplane RSS memory Usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory-Controlplane"', - timeField='timestamp', - metrics=[{ - field: 'value', - id: '1', - settings: {}, - type: 'avg', - }], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '20', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '4', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -// Pod latencies section -local average_pod_latency = grafana.graphPanel.new( - title='Average pod latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_min=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='ms', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', - timeField='timestamp', - alias='{{field}}', - metrics=[ - { - field: 'podReadyLatency', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'schedulingLatency', - id: '3', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'initializedLatency', - id: '4', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local pod_latencies_summary = grafana.statPanel.new( - datasource='$datasource1', - justifyMode='center', - title='Pod latencies summary $latencyPercentile', - unit='ms', - colorMode='value', // Note: There isn't currently a way to set the color palette. -).addTarget( - // Namespaces count - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: podLatencyQuantilesMeasurement', - alias='$latencyPercentile {{term quantileName.keyword}}', - timeField='timestamp', - metrics=[{ - field: '$latencyPercentile', - id: '1', - meta: {}, - settings: {}, - type: 'max', - }], - bucketAggs=[ - { - fake: true, - field: 'quantileName.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '0', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local pod_conditions_latency = grafana.tablePanel.new( - title='Pod conditions latency', - datasource='$datasource1', - transform='table', - styles=[ - { - pattern: 'Average containersReadyLatency', - alias: 'ContainersReady', - type: 'number', - unit: 'ms', - }, - { - pattern: 'Average initializedLatency', - alias: 'Initialized', - type: 'number', - unit: 'ms', - }, - { - pattern: 'Average podReadyLatency', - alias: 'Ready', - type: 'number', - unit: 'ms', - }, - { - pattern: 'Average schedulingLatency', - alias: 'Scheduling', - type: 'number', - unit: 'ms', - }, - { - pattern: 'namespace.keyword', - alias: 'Namespace', - type: 'string', - }, - { - pattern: 'podName.keyword', - alias: 'Pod', - type: 'string', - }, - { - pattern: 'nodeName.keyword', - alias: 'Node', - type: 'string', - }, - ], -).addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', - timeField='timestamp', - metrics=[ - { - field: 'schedulingLatency', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'initializedLatency', - id: '3', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'containersReadyLatency', - id: '4', - meta: {}, - settings: {}, - type: 'avg', - }, - { - field: 'podReadyLatency', - id: '5', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'namespace.keyword', - id: '6', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '5', - size: '100', - }, - type: 'terms', - }, - { - fake: true, - field: 'nodeName.keyword', - id: '7', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '100', - }, - type: 'terms', - }, - { - field: 'podName.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '5', - size: '100', - }, - type: 'terms', - }, - ], - ) -); - -local setup_latency = grafana.graphPanel.new( - title='Top 10 Container runtime network setup latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='µs', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: containerNetworkSetupLatency', - timeField='timestamp', - alias='{{labels.node.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.node.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local scheduling_throughput = grafana.graphPanel.new( - title='Scheduling throughput', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='reqps', -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: schedulingThroughput', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - meta: {}, - settings: {}, - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// OVN section -local ovnkube_master_cpu = grafana.graphPanel.new( - title='ovnkube-master CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.pod.keyword: /ovnkube-master.*/', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -local ovnkube_master_memory = grafana.graphPanel.new( - title='ovnkube-master Memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.pod.keyword: /ovnkube-master.*/', - timeField='timestamp', - alias='{{labels.pod.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - type: 'sum', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - -local ovnkube_controller_cpu = grafana.graphPanel.new( - title='ovn-controller CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -local ovnkube_controller_memory = grafana.graphPanel.new( - title='ovn-controller Memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', - timeField='timestamp', - alias='{{labels.pod.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - type: 'sum', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - id: '2', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '3', - settings: { - interval: '30s', - min_doc_count: '1', - timeZone: 'utc', - trimEdges: '0', - }, - type: 'date_histogram', - }, - ], - ) -); - - -// ETCD section -local etcd_fsync_latency = grafana.graphPanel.new( - title='etcd 99th disk WAL fsync latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskWalFsyncDurationSeconds"', - timeField='timestamp', - alias='{{labels.pod.keyword}}', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local etcd_commit_latency = grafana.graphPanel.new( - title='etcd 99th disk backend commit latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskBackendCommitDurationSeconds"', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local etcd_leader_changes = grafana.graphPanel.new( - title='Etcd leader changes', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_values=true, - min=0, - format='s', -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: etcdLeaderChangesRate', - alias='Etcd leader changes', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '1', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local etcd_peer_roundtrip_time = grafana.graphPanel.new( - title='Etcd 99th network peer roundtrip time', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: 99thEtcdRoundTripTimeSeconds', - alias='{{labels.pod.keyword}} to {{labels.To.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.pod.keyword', - fake: true, - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.To.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local etcd_cpu = grafana.graphPanel.new( - title='Etcd CPU utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: etcd', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local etcd_memory = grafana.graphPanel.new( - title='Etcd memory utilization', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: etcd', - alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.namespace.keyword', - id: '5', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// API an Kubeproxy section - -local api_latency_read_only_resource = grafana.graphPanel.new( - title='Read Only API request P99 latency - resource scoped', - datasource='$datasource1', - legend_alignAsTable=true, - format='s', - legend_max=true, - legend_avg=true, - legend_values=true, -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: resource', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.resource.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local api_latency_read_only_namespace = grafana.graphPanel.new( - title='Read Only API request P99 latency - namespace scoped', - datasource='$datasource1', - legend_alignAsTable=true, - format='s', - legend_max=true, - legend_avg=true, - legend_values=true, -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: namespace', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local api_latency_read_only_cluster = grafana.graphPanel.new( - title='Read Only API request P99 latency - cluster scoped', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: cluster', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local api_latency_mutating = grafana.graphPanel.new( - title='Mutating API request P99 latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: mutatingAPICallsLatency', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local api_request_rate = grafana.graphPanel.new( - title='API request rate', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: APIRequestRate', - alias='{{labels.verb.keyword}} {{labels.resource.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.resource.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '0', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.verb.keyword', - id: '3', - settings: { - min_doc_count: 0, - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local service_sync_latency = grafana.graphPanel.new( - title='Service sync latency', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='s', -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: kubeproxyP99ProgrammingLatency', - alias='Latency', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.instance.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid: $uuid AND metricName.keyword: serviceSyncLatency', - alias='Latency', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Cluster Kubelet & CRI-O section -local kubelet_process_cpu = grafana.graphPanel.new( - title='Kubelet process CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: kubeletCPU', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local kubelet_process_memory = grafana.graphPanel.new( - title='Kubelet process RSS memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: kubeletMemory', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cri_o_process_cpu = grafana.graphPanel.new( - title='CRI-O process CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: crioCPU', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local cri_o_process_memory = grafana.graphPanel.new( - title='CRI-O RSS memory usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: crioMemory', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'labels.node.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Master Node section - -local container_cpu_master = grafana.graphPanel.new( - title='Container CPU usage $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local container_memory_master = grafana.graphPanel.new( - title='Container RSS memory $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cpu_master = grafana.graphPanel.new( - title='CPU $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_min=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND labels.instance.keyword: $master', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local memory_master = grafana.graphPanel.new( - title='Memory $master', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters" AND labels.instance.keyword: $master', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Masters" AND labels.instance.keyword: $master', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Masters" AND labels.instance.keyword: $master', - alias='Utilization', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Worker Node section - -local container_cpu_worker = grafana.graphPanel.new( - title='Container CPU usage $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local container_memory_worker = grafana.graphPanel.new( - title='Container RSS memory $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cpu_worker = grafana.graphPanel.new( - title='CPU $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_min=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Workers" AND labels.instance.keyword: $worker', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local memory_worker = grafana.graphPanel.new( - title='Memory $worker', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Workers" AND labels.instance.keyword: $worker', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Workers" AND labels.instance.keyword: $worker', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Workers" AND labels.instance.keyword: $worker', - alias='Utilization', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -// Infra Node section - -local container_cpu_infra = grafana.graphPanel.new( - title='Container CPU usage $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_avg=true, - legend_max=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local container_memory_infra = grafana.graphPanel.new( - title='Container RSS memory $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}} {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'labels.container.keyword', - fake: true, - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '0', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local cpu_infra = grafana.graphPanel.new( - title='CPU $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_min=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Infra" AND labels.instance.keyword: $infra', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local memory_infra = grafana.graphPanel.new( - title='Memory $infra', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Infra" AND labels.instance.keyword: $infra', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Infra" AND labels.instance.keyword: $infra', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Infra" AND labels.instance.keyword: $infra', - alias='Utilization', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: '30s', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -// Aggregated worker node usage section -local agg_avg_cpu = grafana.graphPanel.new( - title='Avg CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_avg=true, - legend_max=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-AggregatedWorkers"', - alias='{{labels.mode.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - settings: { - script: { - inline: '_value*100', - }, - }, - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.mode.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local agg_avg_mem = grafana.graphPanel.new( - title='Avg Memory', - datasource='$datasource1', - legend_alignAsTable=true, - legend_rightSide=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-AggregatedWorkers"', - alias='Available', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-AggregatedWorkers"', - alias='Total', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -local agg_container_cpu = grafana.graphPanel.new( - title='Container CPU usage', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='percent', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "containerCPU-AggregatedWorkers" AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '_term', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - -local agg_container_mem = grafana.graphPanel.new( - title='Container memory RSS', - datasource='$datasource1', - legend_alignAsTable=true, - legend_max=true, - legend_avg=true, - legend_values=true, - format='bytes', -) - .addTarget( - es.target( - query='uuid.keyword: $uuid AND metricName.keyword: "containerMemory-AggregatedWorkers" AND labels.namespace.keyword: $namespace', - alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', - timeField='timestamp', - metrics=[ - { - field: 'value', - id: '1', - type: 'avg', - }, - ], - bucketAggs=[ - { - fake: true, - field: 'labels.pod.keyword', - id: '4', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - fake: true, - field: 'labels.container.keyword', - id: '3', - settings: { - min_doc_count: '1', - order: 'desc', - orderBy: '1', - size: '10', - }, - type: 'terms', - }, - { - field: 'timestamp', - id: '2', - settings: { - interval: 'auto', - min_doc_count: '1', - trimEdges: 0, - }, - type: 'date_histogram', - }, - ], - ) -); - - -//Dashboard & Templates - -grafana.dashboard.new( - 'Kube-burner report v2', - description='', - editable='true', - time_from='now/y', - time_to='now', - timezone='utc', -) -.addTemplate( - grafana.template.datasource( - 'datasource1', - 'elasticsearch', - 'AWS Dev - ripsaw-kube-burner', - label='Datasource', - regex='/.*kube-burner.*/' - ) -) -.addTemplate( - grafana.template.new( - label='Platform', - name='platform', - current='All', - query='{"find": "terms", "field": "platform.keyword"}', - refresh=2, - multi=true, - includeAll=true, - datasource='$datasource1', - ) -) -.addTemplate( - grafana.template.new( - label='SDN type', - name='sdn', - current='All', - query='{"find": "terms", "field": "sdn_type.keyword"}', - refresh=2, - multi=true, - includeAll=true, - datasource='$datasource1', - ) -) -.addTemplate( - grafana.template.new( - label='Workload', - multi=true, - query='{"find": "terms", "field": "workload.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn"}', - refresh=1, - name='workload', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Worker count', - multi=true, - query='{"find": "terms", "field": "worker_nodes_count", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload"}', - refresh=1, - name='worker_count', - includeAll=true, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='UUID', - multi=false, - query='{"find": "terms", "field": "uuid.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload AND worker_nodes_count: $worker_count"}', - refresh=2, - name='uuid', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Master nodes', - multi=true, - query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: master AND uuid.keyword: $uuid"}', - refresh=2, - name='master', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Worker nodes', - multi=true, - query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: worker AND uuid.keyword: $uuid"}', - refresh=2, - name='worker', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Infra nodes', - multi=true, - query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: infra AND uuid.keyword: $uuid"}', - refresh=2, - name='infra', - includeAll=false, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.new( - label='Namespace', - multi=true, - query='{ "find" : "terms", "field": "labels.namespace.keyword", "query": "labels.namespace.keyword: /openshift-.*/ AND uuid.keyword: $uuid"}', - refresh=2, - name='namespace', - includeAll=true, - datasource='$datasource1' - ) -) -.addTemplate( - grafana.template.custom( - label='Latency percentile', - name='latencyPercentile', - current='P99', - query='P99, P95, P50', - multi=false, - includeAll=false, - ) -) -.addPanels( - [ - worker_count { gridPos: { x: 0, y: 0, w: 4, h: 3 } }, - metric_count_panel { gridPos: { x: 4, y: 0, w: 12, h: 3 } }, - openshift_version_panel { gridPos: { x: 16, y: 0, w: 6, h: 3 } }, - etcd_version_panel { gridPos: { x: 22, y: 0, w: 2, h: 3 } }, - summary_panel_1 { gridPos: { x: 0, y: 3, h: 2, w: 24 } }, - summary_panel_2 { gridPos: { x: 0, y: 5, h: 2, w: 24 } }, - ], -) -.addPanel( - grafana.row.new(title='Cluster status', collapse=true).addPanels( - [ - masters_cpu { gridPos: { x: 0, y: 8, w: 12, h: 9 } }, - masters_memory { gridPos: { x: 12, y: 8, w: 12, h: 9 } }, - node_status_summary { gridPos: { x: 0, y: 17, w: 12, h: 8 } }, - pod_status_summary { gridPos: { x: 12, y: 17, w: 12, h: 8 } }, - kube_api_cpu { gridPos: { x: 0, y: 25, w: 12, h: 9 } }, - kube_api_memory { gridPos: { x: 12, y: 25, w: 12, h: 9 } }, - active_controller_manager_cpu { gridPos: { x: 0, y: 34, w: 12, h: 9 } }, - active_controller_manager_memory { gridPos: { x: 12, y: 34, w: 12, h: 9 } }, - kube_scheduler_cpu { gridPos: { x: 0, y: 43, w: 12, h: 9 } }, - kube_scheduler_memory { gridPos: { x: 12, y: 43, w: 12, h: 9 } }, - hypershift_controlplane_cpu { gridPos: { x: 0, y: 52, w: 12, h: 9 } }, - hypershift_controlplane_memory { gridPos: { x: 12, y: 52, w: 12, h: 9 } }, - ] - ), { x: 0, y: 7, w: 24, h: 1 } -) -.addPanel( - // Panels below for uncollapsed row. - grafana.row.new(title='Pod latency stats', collapse=false), { x: 0, y: 8, w: 24, h: 1 } -) -.addPanels( - [ - average_pod_latency { gridPos: { x: 0, y: 9, w: 12, h: 8 } }, - pod_latencies_summary { gridPos: { x: 12, y: 9, w: 12, h: 8 } }, - pod_conditions_latency { gridPos: { x: 0, y: 17, w: 24, h: 10 } }, - setup_latency { gridPos: { x: 0, y: 27, w: 12, h: 9 } }, - scheduling_throughput { gridPos: { x: 12, y: 27, w: 12, h: 9 } }, - ] -) -.addPanel( - grafana.row.new(title='OVNKubernetes', collapse=true).addPanels( - [ - ovnkube_master_cpu { gridPos: { x: 0, y: 80, w: 12, h: 8 } }, - ovnkube_master_memory { gridPos: { x: 12, y: 80, w: 12, h: 8 } }, - ovnkube_controller_cpu { gridPos: { x: 0, y: 88, w: 12, h: 8 } }, - ovnkube_controller_memory { gridPos: { x: 12, y: 88, w: 12, h: 8 } }, - ] - ), { x: 0, y: 36, w: 24, h: 1 } -) -.addPanel( - grafana.row.new(title='etcd', collapse=false), { x: 0, y: 37, w: 24, h: 1 } -) -.addPanels( - [ - etcd_fsync_latency { gridPos: { x: 0, y: 38, w: 12, h: 9 } }, - etcd_commit_latency { gridPos: { x: 12, y: 38, w: 12, h: 9 } }, - etcd_leader_changes { gridPos: { x: 0, y: 47, w: 12, h: 9 } }, - etcd_peer_roundtrip_time { gridPos: { x: 12, y: 47, w: 12, h: 9 } }, - etcd_cpu { gridPos: { x: 0, y: 56, w: 12, h: 9 } }, - etcd_memory { gridPos: { x: 12, y: 56, w: 12, h: 9 } }, - ], -) -.addPanel( - grafana.row.new(title='API and Kubeproxy', collapse=false), { x: 0, y: 65, w: 24, h: 1 } -) -.addPanels( - [ - api_latency_read_only_resource { gridPos: { x: 0, y: 66, w: 12, h: 9 } }, - api_latency_read_only_namespace { gridPos: { x: 12, y: 66, w: 12, h: 9 } }, - api_latency_read_only_cluster { gridPos: { x: 0, y: 75, w: 12, h: 9 } }, - api_latency_mutating { gridPos: { x: 12, y: 75, w: 12, h: 9 } }, - api_request_rate { gridPos: { x: 0, y: 84, w: 12, h: 9 } }, - service_sync_latency { gridPos: { x: 12, y: 84, w: 12, h: 9 } }, - ], -) - -.addPanel( - grafana.row.new(title='Cluster Kubelet & CRI-O', collapse=false), { x: 0, y: 93, w: 24, h: 1 } -) -.addPanels( - [ - kubelet_process_cpu { gridPos: { x: 0, y: 94, w: 12, h: 8 } }, - kubelet_process_memory { gridPos: { x: 12, y: 94, w: 12, h: 8 } }, - cri_o_process_cpu { gridPos: { x: 0, y: 103, w: 12, h: 8 } }, - cri_o_process_memory { gridPos: { x: 12, y: 103, w: 12, h: 8 } }, - ], -) - -.addPanel( - grafana.row.new(title='Master: $master', collapse=true, repeat='$master').addPanels( - [ - container_cpu_master { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, - container_memory_master { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, - cpu_master { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, - memory_master { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, - ] - ), { x: 0, y: 111, w: 24, h: 1 } -) - -.addPanel( - grafana.row.new(title='Worker: $worker', collapse=true, repeat='$worker').addPanels( - [ - container_cpu_worker { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, - container_memory_worker { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, - cpu_worker { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, - memory_worker { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, - ] - ), { x: 0, y: 111, w: 24, h: 1 } -) - -.addPanel( - grafana.row.new(title='Infra: $infra', collapse=true, repeat='$infra').addPanels( - [ - container_cpu_infra { gridPos: { x: 0, y: 131, w: 12, h: 9 } }, - container_memory_infra { gridPos: { x: 12, y: 131, w: 12, h: 9 } }, - cpu_infra { gridPos: { x: 0, y: 140, w: 12, h: 9 } }, - memory_infra { gridPos: { x: 12, y: 140, w: 12, h: 9 } }, - ] - ), { x: 0, y: 130, w: 24, h: 1 } -) - -.addPanel( - grafana.row.new(title='Aggregated worker nodes usage (only in aggregated metrics profile)', collapse=true).addPanels( - [ - agg_avg_cpu { gridPos: { x: 0, y: 150, w: 12, h: 9 } }, - agg_avg_mem { gridPos: { x: 12, y: 150, w: 12, h: 9 } }, - agg_container_cpu { gridPos: { x: 0, y: 159, w: 12, h: 9 } }, - agg_container_mem { gridPos: { x: 12, y: 159, w: 12, h: 9 } }, - ] - ), { x: 0, y: 149, w: 24, h: 1 } -) diff --git a/templates/General/ocp-performance-v2.jsonnet b/templates/General/ocp-performance-v2.jsonnet new file mode 100644 index 0000000..f43765d --- /dev/null +++ b/templates/General/ocp-performance-v2.jsonnet @@ -0,0 +1,141 @@ +local panels = import '../../assets/ocp-performance/panels.libsonnet'; +local queries = import '../../assets/ocp-performance/queries.libsonnet'; +local variables = import '../../assets/ocp-performance/variables.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +g.dashboard.new('Openshift Performance') ++ g.dashboard.withDescription(||| + Performance dashboard for Red Hat Openshift +|||) ++ g.dashboard.time.withFrom('now-1h') ++ g.dashboard.time.withTo('now') ++ g.dashboard.withTimezone('utc') ++ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) ++ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) ++ g.dashboard.withRefresh('30s') ++ g.dashboard.withEditable(true) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.templating.withList([ + variables.datasource, + variables.master_node, + variables.worker_node, + variables.infra_node, + variables.namespace, + variables.block_device, + variables.net_device, + variables.interval, +]) ++ g.dashboard.withPanels([ + g.panel.row.new('OVN') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.generic('99% Pod Annotation Latency', 's', queries.ovnAnnotationLatency.query(), { x: 0, y: 1, w: 24, h: 12 }), + panels.timeSeries.generic('99% CNI Request ADD Latency', 's', queries.ovnCNIAdd.query(), { x: 0, y: 13, w: 12, h: 8 }), + panels.timeSeries.generic('99% CNI Request DEL Latency', 's', queries.ovnCNIDel.query(), { x: 12, y: 13, w: 12, h: 8 }), + panels.timeSeries.genericLegend('ovnkube-master CPU Usage', 'percent', queries.ovnKubeMasterCPU.query(), { x: 0, y: 21, w: 12, h: 8 }), + panels.timeSeries.genericLegend('ovnkube-master Memory Usage', 'bytes', queries.ovnKubeMasterMem.query(), { x: 12, y: 21, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 ovn-controller CPU Usage', 'percent', queries.topOvnControllerCPU.query(), { x: 0, y: 28, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 ovn-controller Memory Usage', 'bytes', queries.topOvnControllerMem.query(), { x: 12, y: 28, w: 12, h: 8 }), + ]), + g.panel.row.new('Monitoring stack') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.genericLegend('Prometheus Replica Memory usage', 'bytes', queries.promReplMemUsage.query(), { x: 0, y: 2, w: 24, h: 12 }), + ]), + g.panel.row.new('Stackrox') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.genericLegend('Top 25 stackrox container RSS bytes', 'bytes', queries.stackroxMem.query(), { x: 0, y: 2, w: 24, h: 12 }), + panels.timeSeries.genericLegend('Top 25 stackrox container CPU percent', 'percent', queries.stackroxCPU.query(), { x: 0, y: 2, w: 24, h: 12 }), + ]), + g.panel.row.new('Cluster Kubelet') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.genericLegend('Top 10 Kubelet CPU usage', 'percent', queries.kubeletCPU.query(), { x: 0, y: 3, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 crio CPU usage', 'percent', queries.crioCPU.query(), { x: 12, y: 3, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 Kubelet memory usage', 'bytes', queries.kubeletMemory.query(), { x: 0, y: 11, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 crio memory usage', 'bytes', queries.crioMemory.query(), { x: 12, y: 11, w: 12, h: 8 }), + panels.timeSeries.genericLegend('inodes usage in /var/run', 'percent', queries.crioINodes.query(), { x: 0, y: 19, w: 24, h: 8 }), + ]), + g.panel.row.new('Cluster Details') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.stat.base('Current Node Count', queries.currentNodeCount.query(), { x: 0, y: 4, w: 8, h: 3 }), + panels.stat.base('Current Namespace Count', queries.currentNamespaceCount.query(), { x: 8, y: 4, w: 8, h: 3 }), + panels.stat.base('Current Pod Count', queries.currentPodCount.query(), { x: 16, y: 4, w: 8, h: 3 }), + panels.timeSeries.generic('Number of nodes', 'none', queries.currentNodeCount.query(), { x: 0, y: 12, w: 8, h: 8 }), + panels.timeSeries.generic('Namespace count', 'none', queries.nsCount.query(), { x: 8, y: 12, w: 8, h: 8 }), + panels.timeSeries.generic('Pod count', 'none', queries.podCount.query(), { x: 16, y: 12, w: 8, h: 8 }), + panels.timeSeries.generic('Secret & configmap count', 'none', queries.secretCmCount.query(), { x: 0, y: 20, w: 8, h: 8 }), + panels.timeSeries.generic('Deployment count', 'none', queries.deployCount.query(), { x: 8, y: 20, w: 8, h: 8 }), + panels.timeSeries.generic('Services count', 'none', queries.servicesCount.query(), { x: 16, y: 20, w: 8, h: 8 }), + panels.timeSeries.generic('Routes count', 'none', queries.routesCount.query(), { x: 0, y: 20, w: 8, h: 8 }), + panels.timeSeries.generic('Alerts', 'none', queries.alerts.query(), { x: 8, y: 20, w: 8, h: 8 }), + panels.timeSeries.genericLegend('Pod Distribution', 'none', queries.podDistribution.query(), { x: 16, y: 20, w: 8, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container RSS', 'bytes', queries.top10ContMem.query(), { x: 0, y: 28, w: 24, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container CPU', 'percent', queries.top10ContCPU.query(), { x: 0, y: 36, w: 12, h: 8 }), + panels.timeSeries.generic('Goroutines count', 'none', queries.goroutinesCount.query(), { x: 12, y: 36, w: 12, h: 8 }), + ]), + g.panel.row.new('Cluster Operators Details') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.stat.base('Cluster operators overview', queries.clusterOperatorsOverview.query(), { x: 0, y: 4, w: 24, h: 3 }), + panels.timeSeries.genericLegend('Cluster operators information', 'none', queries.clusterOperatorsInformation.query(), { x: 0, y: 4, w: 8, h: 8 }), + panels.timeSeries.genericLegend('Cluster operators degraded', 'none', queries.clusterOperatorsDegraded.query(), { x: 8, y: 4, w: 8, h: 8 }), + ]), + g.panel.row.new('Master: $_master_node') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('_master_node') + + g.panel.row.withPanels([ + panels.timeSeries.genericLegend('CPU Basic: $_master_node', 'percent', queries.nodeCPU.query('$_master_node'), { x: 0, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericLegend('System Memory: $_master_node', 'bytes', queries.nodeMemory.query('$_master_node'), { x: 12, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Disk throughput: $_master_node', 'Bps', queries.diskThroughput.query('$_master_node'), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Disk IOPS: $_master_node', 'iops', queries.diskIOPS.query('$_master_node'), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network Utilization: $_master_node', 'bps', queries.networkUtilization.query('$_master_node'), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network Packets: $_master_node', 'pps', queries.networkPackets.query('$_master_node'), { x: 12, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network packets drop: $_master_node', 'pps', queries.networkDrop.query('$_master_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Conntrack stats: $_master_node', '', queries.conntrackStats.query('$_master_node'), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container CPU: $_master_node', 'percent', queries.top10ContainerCPU.query('$_master_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container RSS: $_master_node', 'bytes', queries.top10ContainerRSS.query('$_master_node'), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Container fs write rate: $_master_node', 'Bps', queries.containerWriteBytes.query('$_master_node'), { x: 0, y: 32, w: 12, h: 8 }), + ]), + g.panel.row.new('Worker: $_worker_node') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('_worker_node') + + g.panel.row.withPanels([ + panels.timeSeries.genericLegend('CPU Basic: $_worker_node', 'percent', queries.nodeCPU.query('$_worker_node'), { x: 0, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericLegend('System Memory: $_worker_node', 'bytes', queries.nodeMemory.query('$_worker_node'), { x: 12, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Disk throughput: $_worker_node', 'Bps', queries.diskThroughput.query('$_worker_node'), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Disk IOPS: $_worker_node', 'iops', queries.diskIOPS.query('$_worker_node'), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network Utilization: $_worker_node', 'bps', queries.networkUtilization.query('$_worker_node'), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network Packets: $_worker_node', 'pps', queries.networkPackets.query('$_worker_node'), { x: 12, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network packets drop: $_worker_node', 'pps', queries.networkDrop.query('$_worker_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Conntrack stats: $_worker_node', '', queries.conntrackStats.query('$_worker_node'), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container CPU: $_worker_node', 'percent', queries.top10ContainerCPU.query('$_worker_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container RSS: $_worker_node', 'bytes', queries.top10ContainerRSS.query('$_worker_node'), { x: 12, y: 24, w: 12, h: 8 }), + ]), + g.panel.row.new('Infra: $_infra_node') + + g.panel.row.withGridPos({ x: 0, y: 0, w: 0, h: 8 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('_infra_node') + + g.panel.row.withPanels([ + panels.timeSeries.genericLegend('CPU Basic: $_infra_node', 'percent', queries.nodeCPU.query('$_infra_node'), { x: 0, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericLegend('System Memory: $_infra_node', 'bytes', queries.nodeMemory.query('$_infra_node'), { x: 12, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Disk throughput: $_infra_node', 'Bps', queries.diskThroughput.query('$_infra_node'), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Disk IOPS: $_infra_node', 'iops', queries.diskIOPS.query('$_infra_node'), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network Utilization: $_infra_node', 'bps', queries.networkUtilization.query('$_infra_node'), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network Packets: $_infra_node', 'pps', queries.networkPackets.query('$_infra_node'), { x: 12, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Network packets drop: $_infra_node', 'pps', queries.networkDrop.query('$_infra_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Conntrack stats: $_infra_node', '', queries.conntrackStats.query('$_infra_node'), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container CPU: $_infra_node', 'percent', queries.top10ContainerCPU.query('$_infra_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericLegend('Top 10 container RSS: $_infra_node', 'bytes', queries.top10ContainerRSS.query('$_infra_node'), { x: 12, y: 24, w: 12, h: 8 }), + ]), +]) diff --git a/templates/jsonnetfile.json b/templates/jsonnetfile.json new file mode 100644 index 0000000..2414c86 --- /dev/null +++ b/templates/jsonnetfile.json @@ -0,0 +1,15 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" + } + }, + "version": "main" + } + ], + "legacyImports": true +} diff --git a/templates/jsonnetfile.lock.json b/templates/jsonnetfile.lock.json new file mode 100644 index 0000000..de917ca --- /dev/null +++ b/templates/jsonnetfile.lock.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" + } + }, + "version": "f40876da40d787e9c288de0b547ac85597c781d9", + "sum": "sVzVlSLbxPkAurwO19YERigLMmRfVsViMcWC0gkTTqU=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v10.0.0" + } + }, + "version": "f40876da40d787e9c288de0b547ac85597c781d9", + "sum": "VHHuBN+bM1v2KtzfsgE7JIXND8fVJTDQlYDWQ4NhG7s=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/docsonnet.git", + "subdir": "doc-util" + } + }, + "version": "cc9df63eaca56f39e8e4e1ce192141333257b08d", + "sum": "f6smzUo/9K2+iOLpDEbKPiU+W1eWjpIQo6drXu6kJ7U=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/xtd.git", + "subdir": "" + } + }, + "version": "0256a910ac71f0f842696d7bca0bf01ea77eb654", + "sum": "zBOpb1oTNvXdq9RF6yzTHill5r1YTJLBBoqyx4JYtAg=" + } + ], + "legacyImports": false +}