From 1cac2561c8172a230d1e5a729aca1e77bf8b3962 Mon Sep 17 00:00:00 2001 From: Vishnu Challa Date: Fri, 15 Sep 2023 20:12:37 -0400 Subject: [PATCH] Added: Support for multiple multiple folders, python client Modified: Internal logic for creating/updating dashboards Deleted: Removed old logic as it is not flexible for automation --- .github/workflows/ci.yml | 30 +- .github/workflows/grafana.yml | 15 +- Dockerfile | 23 + Makefile | 9 +- dittybopper/README.md | 4 +- dittybopper/deploy.sh | 4 +- dittybopper/k8s-deploy.sh | 1 + dittybopper/syncer/Dockerfile | 7 - dittybopper/syncer/entrypoint.py | 129 + dittybopper/syncer/entrypoint.sh | 22 - .../templates/dittybopper.yaml.template | 8 +- .../templates/k8s-dittybopper.yaml.template | 8 +- requirements.txt | 2 + templates/{ => CPT}/k8s-perf.jsonnet | 2 +- templates/{ => CPT}/kube-burner.jsonnet | 2 +- .../api-performance-overview.jsonnet | 2 +- .../{ => General}/cilium-k8s-perf.jsonnet | 2 +- .../etcd-on-cluster-dashboard.jsonnet | 2 +- .../hypershift-performance.jsonnet | 2 +- templates/General/k8s-perf.jsonnet | 499 ++ templates/General/kube-burner.jsonnet | 4568 +++++++++++++++++ .../{ => General}/ocp-performance.jsonnet | 2 +- templates/{ => General}/ovn-dashboard.jsonnet | 2 +- .../{ => General}/pgbench-dashboard.jsonnet | 2 +- templates/{ => General}/uperf-perf.jsonnet | 2 +- .../{ => General}/vegeta-wrapper.jsonnet | 2 +- templates/{ => General}/ycsb.jsonnet | 2 +- 27 files changed, 5270 insertions(+), 83 deletions(-) create mode 100644 Dockerfile delete mode 100644 dittybopper/syncer/Dockerfile create mode 100644 dittybopper/syncer/entrypoint.py delete mode 100755 dittybopper/syncer/entrypoint.sh create mode 100644 requirements.txt rename templates/{ => CPT}/k8s-perf.jsonnet (99%) rename templates/{ => CPT}/kube-burner.jsonnet (99%) rename templates/{ => General}/api-performance-overview.jsonnet (99%) rename templates/{ => General}/cilium-k8s-perf.jsonnet (99%) rename templates/{ => General}/etcd-on-cluster-dashboard.jsonnet (99%) rename templates/{ => General}/hypershift-performance.jsonnet (99%) create mode 100644 templates/General/k8s-perf.jsonnet create mode 100644 templates/General/kube-burner.jsonnet rename templates/{ => General}/ocp-performance.jsonnet (99%) rename templates/{ => General}/ovn-dashboard.jsonnet (99%) rename templates/{ => General}/pgbench-dashboard.jsonnet (98%) rename templates/{ => General}/uperf-perf.jsonnet (99%) rename templates/{ => General}/vegeta-wrapper.jsonnet (98%) rename templates/{ => General}/ycsb.jsonnet (99%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48d7fa1..1e79489 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,19 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: + lint: + runs-on: ubuntu-latest + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + + - name: Get dependencies + run: make deps + + - name: Run jsonnetfmt + run: for t in templates/**/*.jsonnet; do echo "Testing template ${t}"; ./bin/jsonnetfmt --test $t; echo 'Results:' ${?}; done + build: runs-on: ubuntu-latest @@ -35,23 +48,10 @@ jobs: - name: Import dashboards to grafana run: > - for t in rendered/*.json; do + for t in rendered/**/*.json; do echo "Importing ${t}"; dashboard=$(cat ${t}); echo "{\"dashboard\": ${dashboard}, \"overwrite\": true}" | curl -k -Ss -XPOST -H "Content-Type: application/json" -H "Accept: application/json" -d@- "http://admin:admin@localhost:3000/api/dashboards/db" -o /dev/null; - done - - lint: - runs-on: ubuntu-latest - - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - - name: Get dependencies - run: make deps - - - name: Run jsonnetfmt - run: for t in templates/*.jsonnet; do echo "Testing template ${t}"; ./bin/jsonnetfmt --test $t; echo 'Results:' ${?}; done + done \ No newline at end of file diff --git a/.github/workflows/grafana.yml b/.github/workflows/grafana.yml index f87ed2b..0618d03 100644 --- a/.github/workflows/grafana.yml +++ b/.github/workflows/grafana.yml @@ -3,10 +3,6 @@ defaults: run: shell: bash -env: - # Space separated list as a string of all dashboard json files in "rendered" to load - DASHBOARDS: "kube-burner.json" - on: push: branches: [ master ] @@ -25,13 +21,10 @@ jobs: # The secret GRAFANA_URL must be set with the format http://username:password@url.org without a trailing / - name: Import dashboards to grafana run: > - dashboard_list=($(echo $DASHBOARDS)); - for path in "${dashboard_list[@]}"; do - full_path="rendered/${path}"; - echo "Importing ${full_path}"; - dashboard=$(cat ${full_path}); + for t in rendered/**/*.json; do + echo "Importing ${t}"; + dashboard=$(cat ${t}); echo "{\"dashboard\": ${dashboard}, \"overwrite\": true}" | curl -k -Ss -XPOST -H "Content-Type: application/json" -H "Accept: application/json" -d@- "${{ secrets.GRAFANA_URL }}/api/dashboards/db" -o /dev/null; - done - + done \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..daad306 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM ubuntu + +WORKDIR /performance-dashboards +ARG DEBIAN_FRONTEND=noninteractive + +# Install necessary libraries for subsequent commands +RUN apt-get update && apt-get install -y podman dumb-init python3.6 python3-distutils python3-pip python3-apt + +COPY . . +RUN chmod -R 775 /performance-dashboards + +# Install dependencies +RUN python3 -m pip install --upgrade pip +RUN pip install -r requirements.txt + +# Cleanup the installation remainings +RUN apt-get clean autoclean && \ + apt-get autoremove --yes && \ + rm -rf /var/lib/{apt,dpkg,cache,log}/ + +# Start the command +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +CMD ["python3", "dittybopper/syncer/entrypoint.py"] \ No newline at end of file diff --git a/Makefile b/Makefile index 479a16b..2b6a8ff 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,10 @@ SYNCER_IMG_TAG ?= quay.io/cloud-bulldozer/dittybopper-syncer:latest PLATFORM = linux/amd64,linux/arm64,linux/ppc64le,linux/s390x # Get all templates at $(TEMPLATESDIR) -TEMPLATES = $(wildcard $(TEMPLATESDIR)/*.jsonnet) +TEMPLATES := $(wildcard $(TEMPLATESDIR)/**/*.jsonnet) # Replace $(TEMPLATESDIR)/*.jsonnet by $(OUTPUTDIR)/*.json -outputs = $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES)) +outputs := $(patsubst $(TEMPLATESDIR)/%.jsonnet, $(OUTPUTDIR)/%.json, $(TEMPLATES)) all: deps format build @@ -38,10 +38,11 @@ $(BINDIR)/jsonnet: # Build each template and output to $(OUTPUTDIR) $(OUTPUTDIR)/%.json: $(TEMPLATESDIR)/%.jsonnet @echo "Building template $<" + mkdir -p $(dir $@) $(BINDIR)/jsonnet $< > $@ build-syncer-image: build - podman build --platform=${PLATFORM} -f dittybopper/syncer/Dockerfile --manifest=${SYNCER_IMG_TAG} . + podman build --platform=${PLATFORM} -f Dockerfile --manifest=${SYNCER_IMG_TAG} . push-syncer-image: - podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG} + podman manifest push ${SYNCER_IMG_TAG} ${SYNCER_IMG_TAG} \ No newline at end of file diff --git a/dittybopper/README.md b/dittybopper/README.md index ee9fed2..3975a3a 100644 --- a/dittybopper/README.md +++ b/dittybopper/README.md @@ -27,9 +27,9 @@ If using disconnected, you need to sync the cloud-bulldozer grafana image (shown dittybopper/templates/dittybopper.yaml.template file) and your chosen syncer image (defaults to quay.io/cloud-bulldozer/dittybopper-syncer:latest). -The syncer image is built with the context at the root of the repository, and the image in the dittybopper/syncer directory. +The syncer image is built with the context at the root of the repository, and the image in the root directory. You can build it with `make build-syncer-image SYNCER_IMG_TAG=container.registry.org/organization/syncer:latest` -Alternatively, you can run the following command form the root folder of this repository: `podman build -f dittybopper/syncer/Dockerfile -t=container.registry.org/organization/syncer:latest .` +Alternatively, you can run the following command from the root folder of this repository: `podman build -f Dockerfile -t=container.registry.org/organization/syncer:latest .` ## Contribute diff --git a/dittybopper/deploy.sh b/dittybopper/deploy.sh index 09d0cde..c90adb4 100755 --- a/dittybopper/deploy.sh +++ b/dittybopper/deploy.sh @@ -40,8 +40,8 @@ END export PROMETHEUS_USER=internal export GRAFANA_ADMIN_PASSWORD=admin -export DASHBOARDS="ocp-performance.json api-performance-overview.json etcd-on-cluster-dashboard.json hypershift-performance.json ovn-dashboard.json" -export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/dittybopper-syncer:latest"} # Syncer image +export GRAFANA_URL="http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000" +export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/syncer:latest"} # Syncer image export GRAFANA_IMAGE=${GRAFANA_IMAGE:-"quay.io/cloud-bulldozer/grafana:9.4.3"} # Syncer image # Set defaults for command options diff --git a/dittybopper/k8s-deploy.sh b/dittybopper/k8s-deploy.sh index 47eb7aa..7ba6817 100755 --- a/dittybopper/k8s-deploy.sh +++ b/dittybopper/k8s-deploy.sh @@ -38,6 +38,7 @@ END export PROMETHEUS_USER=internal export GRAFANA_ADMIN_PASSWORD=admin +export GRAFANA_URL="http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000" export DASHBOARDS="k8s-performance.json" export SYNCER_IMAGE=${SYNCER_IMAGE:-"quay.io/cloud-bulldozer/dittybopper-syncer:latest"} # Syncer image export GRAFANA_IMAGE=${GRAFANA_IMAGE:-"quay.io/cloud-bulldozer/grafana:9.4.3"} # Syncer image diff --git a/dittybopper/syncer/Dockerfile b/dittybopper/syncer/Dockerfile deleted file mode 100644 index 851b462..0000000 --- a/dittybopper/syncer/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM registry.access.redhat.com/ubi8/ubi-minimal - -WORKDIR /performance-dashboards -COPY dittybopper/syncer/entrypoint.sh /bin/entrypoint.sh -COPY rendered/*.json /performance-dashboards/ -RUN chmod -R 775 /performance-dashboards -ENTRYPOINT ["entrypoint.sh"] diff --git a/dittybopper/syncer/entrypoint.py b/dittybopper/syncer/entrypoint.py new file mode 100644 index 0000000..e9d607a --- /dev/null +++ b/dittybopper/syncer/entrypoint.py @@ -0,0 +1,129 @@ +import json +import logging +import os +import requests +import uuid +import time +from collections import defaultdict + +logging.basicConfig(level=logging.INFO) + + +class GrafanaOperations: + """ + This class is responsible for Grafana operations + """ + def __init__(self, grafana_url: str, input_directory: str): + self.grafana_url = grafana_url + self.input_directory = input_directory + self.dashboards = defaultdict(list) + self.folder_map = dict() + self.logger = logging.getLogger(__name__) + + def fetch_all_dashboards(self): + """ + This method fetches all rendered dashboards + :return: + """ + self.get_all_folders() + self.folder_map['General'] = None + for root, _, files in os.walk(self.input_directory): + folder_name = os.path.basename(root) + json_files = [os.path.join(root, filename) for filename in files if filename.endswith(".json")] + folder_name = "General" if (folder_name == "") else folder_name + if folder_name in self.folder_map: + folder_id = self.folder_map[folder_name] + else: + folder_id = self.create_folder(folder_name) + self.dashboards[folder_id].extend(json_files) + + def get_all_folders(self): + """ + This method gets the entire list of folders in grafana + :return: + """ + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + try: + response = requests.get( + f"{self.grafana_url}/api/folders", + headers=headers, + ) + response_json = response.json() + self.folder_map = {each_folder['title']: each_folder['id'] for each_folder in response_json} + except requests.exceptions.RequestException as e: + raise Exception(f"Error listing folders. Message: {e}") + + def create_folder(self, folder_name): + """ + This method creates a folder in grafana + :return: + """ + uid = str(uuid.uuid4()) + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + try: + response = requests.post( + f"{self.grafana_url}/api/folders", + headers=headers, + json={ + "title": folder_name, + "uid": uid, + }, + ) + response_json = response.json() + self.folder_map[folder_name] = id + return response_json['id'] + + except requests.exceptions.RequestException as e: + raise Exception(f"Error creating folder with name:'{self.folder_name}' and uid:'{uid}'. Message: {e}") + + def read_dashboard_json(self, json_file): + """ + This method reads dashboard from json file + :return: + """ + with open(json_file, 'r') as f: + return json.load(f) + + def create_dashboards(self): + """ + This method creates/updates dashboard with new json + :return: + """ + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + for folder_id, files in self.dashboards.items(): + for json_file in set(files): + dashboard_json = self.read_dashboard_json(json_file) + try: + response = requests.post( + f"{self.grafana_url}/api/dashboards/db", + headers=headers, + json={ + "dashboard": dashboard_json, + "folderId": folder_id, + "overwrite": True, + }, + ) + if response.status_code == 200: + self.logger.info(f"Dashboard '{dashboard_json['title']}' created successfully in folder '{folder_id}'") + else: + raise Exception( + f"Failed to create dashboard '{dashboard_json['title']}' in folder '{folder_id}'. Status code: {response.status_code}. Message: {response.text}") + + except requests.exceptions.RequestException as e: + raise Exception(f"Error creating dashboard '{dashboard_json['title']}' in folder '{folder_id}'. Message: {e}") + +if __name__ == '__main__': + grafana_operations = GrafanaOperations(os.environ.get("GRAFANA_URL"), os.environ.get("INPUT_DIR")) + grafana_operations.fetch_all_dashboards() + grafana_operations.create_dashboards() + while True: + time.sleep(60) diff --git a/dittybopper/syncer/entrypoint.sh b/dittybopper/syncer/entrypoint.sh deleted file mode 100755 index d674d90..0000000 --- a/dittybopper/syncer/entrypoint.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -while [[ $(curl -s -o /dev/null -w '%{http_code}' http://localhost:3000/api/health) != "200" ]]; do - echo "Grafana still not ready, waiting 5 seconds" - sleep 5 -done - -for d in ${DASHBOARDS}; do - if [[ ! -f $d ]]; then - echo "Dashboard ${d} not found" - continue - else - echo "Importing dashboard $d" - dashboard=$(cat ${d}) - echo "{\"dashboard\": ${dashboard}, \"overwrite\": true}" | \ - curl -Ss -XPOST -H "Content-Type: application/json" -H "Accept: application/json" -d@- \ - "http://admin:${GRAFANA_ADMIN_PASSWORD}@localhost:3000/api/dashboards/db" -o /dev/null - fi -done - -echo "Dittybopper ready" -exec sleep inf diff --git a/dittybopper/templates/dittybopper.yaml.template b/dittybopper/templates/dittybopper.yaml.template index 725cbc0..b9f8945 100644 --- a/dittybopper/templates/dittybopper.yaml.template +++ b/dittybopper/templates/dittybopper.yaml.template @@ -60,10 +60,10 @@ spec: - name: dittybopper-syncer imagePullPolicy: Always env: - - name: GRAFANA_ADMIN_PASSWORD - value: ${GRAFANA_ADMIN_PASSWORD} - - name: DASHBOARDS - value: ${DASHBOARDS} + - name: GRAFANA_URL + value: ${GRAFANA_URL} + - name: INPUT_DIR + value: "/performance-dashboards/rendered/" image: ${SYNCER_IMAGE} volumes: - name: sc-grafana-config diff --git a/dittybopper/templates/k8s-dittybopper.yaml.template b/dittybopper/templates/k8s-dittybopper.yaml.template index 282cf69..cffb85a 100644 --- a/dittybopper/templates/k8s-dittybopper.yaml.template +++ b/dittybopper/templates/k8s-dittybopper.yaml.template @@ -48,10 +48,10 @@ spec: - name: dittybopper-syncer imagePullPolicy: Always env: - - name: GRAFANA_ADMIN_PASSWORD - value: ${GRAFANA_ADMIN_PASSWORD} - - name: DASHBOARDS - value: ${DASHBOARDS} + - name: GRAFANA_URL + value: ${GRAFANA_URL} + - name: INPUT_DIR + value: "/performance-dashboards/rendered/" image: ${SYNCER_IMAGE} volumes: - name: sc-grafana-config diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..138d722 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.26.0 + diff --git a/templates/k8s-perf.jsonnet b/templates/CPT/k8s-perf.jsonnet similarity index 99% rename from templates/k8s-perf.jsonnet rename to templates/CPT/k8s-perf.jsonnet index d00dcb1..7308819 100644 --- a/templates/k8s-perf.jsonnet +++ b/templates/CPT/k8s-perf.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; diff --git a/templates/kube-burner.jsonnet b/templates/CPT/kube-burner.jsonnet similarity index 99% rename from templates/kube-burner.jsonnet rename to templates/CPT/kube-burner.jsonnet index 5e32c18..cdb5160 100644 --- a/templates/kube-burner.jsonnet +++ b/templates/CPT/kube-burner.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; local worker_count = grafana.statPanel.new( diff --git a/templates/api-performance-overview.jsonnet b/templates/General/api-performance-overview.jsonnet similarity index 99% rename from templates/api-performance-overview.jsonnet rename to templates/General/api-performance-overview.jsonnet index 77f4db2..246e9ff 100644 --- a/templates/api-performance-overview.jsonnet +++ b/templates/General/api-performance-overview.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; //Panel definitions diff --git a/templates/cilium-k8s-perf.jsonnet b/templates/General/cilium-k8s-perf.jsonnet similarity index 99% rename from templates/cilium-k8s-perf.jsonnet rename to templates/General/cilium-k8s-perf.jsonnet index 3bcef8b..90c21f0 100644 --- a/templates/cilium-k8s-perf.jsonnet +++ b/templates/General/cilium-k8s-perf.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; diff --git a/templates/etcd-on-cluster-dashboard.jsonnet b/templates/General/etcd-on-cluster-dashboard.jsonnet similarity index 99% rename from templates/etcd-on-cluster-dashboard.jsonnet rename to templates/General/etcd-on-cluster-dashboard.jsonnet index ca52c2e..68bbc9d 100644 --- a/templates/etcd-on-cluster-dashboard.jsonnet +++ b/templates/General/etcd-on-cluster-dashboard.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; // Panel definitions diff --git a/templates/hypershift-performance.jsonnet b/templates/General/hypershift-performance.jsonnet similarity index 99% rename from templates/hypershift-performance.jsonnet rename to templates/General/hypershift-performance.jsonnet index bd321e4..8416234 100644 --- a/templates/hypershift-performance.jsonnet +++ b/templates/General/hypershift-performance.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; local stat = grafana.statPanel; diff --git a/templates/General/k8s-perf.jsonnet b/templates/General/k8s-perf.jsonnet new file mode 100644 index 0000000..7308819 --- /dev/null +++ b/templates/General/k8s-perf.jsonnet @@ -0,0 +1,499 @@ +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; +local prometheus = grafana.prometheus; + + +// Helper functions + +local genericGraphPanel(title, format) = grafana.graphPanel.new( + title=title, + datasource='$datasource', + format=format, + nullPointMode='null as zero', + sort='decreasing', + legend_alignAsTable=true, +); + +local genericGraphLegendPanel(title, format) = grafana.graphPanel.new( + title=title, + datasource='$datasource', + format=format, + legend_values=true, + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_hideEmpty=true, + legend_hideZero=true, + legend_sort='max', + nullPointMode='null as zero', + sort='decreasing', +); + + +local nodeMemory(nodeName) = genericGraphLegendPanel('System Memory: ' + nodeName, 'bytes').addTarget( + prometheus.target( + 'node_memory_Active_bytes{node=~"' + nodeName + '"}', + legendFormat='Active', + ) +).addTarget( + prometheus.target( + 'node_memory_MemTotal_bytes{node=~"' + nodeName + '"}', + legendFormat='Total', + ) +).addTarget( + prometheus.target( + 'node_memory_Cached_bytes{node=~"' + nodeName + '"} + node_memory_Buffers_bytes{node=~"' + nodeName + '"}', + legendFormat='Cached + Buffers', + ) +).addTarget( + prometheus.target( + 'node_memory_MemAvailable_bytes{node=~"' + nodeName + '"}', + legendFormat='Available', + ) +); + + +local nodeCPU(nodeName) = genericGraphLegendPanel('CPU Basic: ' + nodeName, 'percent').addTarget( + prometheus.target( + 'sum by (instance, mode)(rate(node_cpu_seconds_total{node=~"' + nodeName + '",job=~".*"}[$interval])) * 100', + legendFormat='Busy {{mode}}', + ) +); + + +local diskThroughput(nodeName) = genericGraphLegendPanel('Disk throughput: ' + nodeName, 'Bps').addTarget( + prometheus.target( + 'rate(node_disk_read_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - read', + ) +).addTarget( + prometheus.target( + 'rate(node_disk_written_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - write', + ) +); + +local diskIOPS(nodeName) = genericGraphLegendPanel('Disk IOPS: ' + nodeName, 'iops').addTarget( + prometheus.target( + 'rate(node_disk_reads_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - read', + ) +).addTarget( + prometheus.target( + 'rate(node_disk_writes_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])', + legendFormat='{{ device }} - write', + ) +); + +local networkUtilization(nodeName) = genericGraphLegendPanel('Network Utilization: ' + nodeName, 'bps').addTarget( + prometheus.target( + 'rate(node_network_receive_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', + legendFormat='{{instance}} - {{device}} - RX', + ) +).addTarget( + prometheus.target( + 'rate(node_network_transmit_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8', + legendFormat='{{instance}} - {{device}} - TX', + ) +); + +local networkPackets(nodeName) = genericGraphLegendPanel('Network Packets: ' + nodeName, 'pps').addTarget( + prometheus.target( + 'rate(node_network_receive_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', + legendFormat='{{instance}} - {{device}} - RX', + ) +).addTarget( + prometheus.target( + 'rate(node_network_transmit_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])', + legendFormat='{{instance}} - {{device}} - TX', + ) +); + +local networkDrop(nodeName) = genericGraphLegendPanel('Network packets drop: ' + nodeName, 'pps').addTarget( + prometheus.target( + 'topk(10, rate(node_network_receive_drop_total{node=~"' + nodeName + '"}[$interval]))', + legendFormat='rx-drop-{{ device }}', + ) +).addTarget( + prometheus.target( + 'topk(10,rate(node_network_transmit_drop_total{node=~"' + nodeName + '"}[$interval]))', + legendFormat='tx-drop-{{ device }}', + ) +); + +local conntrackStats(nodeName) = genericGraphLegendPanel('Conntrack stats: ' + nodeName, '') + { + seriesOverrides: [{ + alias: 'conntrack_limit', + yaxis: 2, + }], + yaxes: [{ show: true }, { show: true }], +} + .addTarget( + prometheus.target( + 'node_nf_conntrack_entries{node=~"' + nodeName + '"}', + legendFormat='conntrack_entries', + ) +).addTarget( + prometheus.target( + 'node_nf_conntrack_entries_limit{node=~"' + nodeName + '"}', + legendFormat='conntrack_limit', + ) +); + +local top10ContainerCPU(nodeName) = genericGraphLegendPanel('Top 10 container CPU: ' + nodeName, 'percent').addTarget( + prometheus.target( + 'topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)', + legendFormat='{{ pod }}: {{ container }}', + ) +); + +local top10ContainerRSS(nodeName) = genericGraphLegendPanel('Top 10 container RSS: ' + nodeName, 'bytes').addTarget( + prometheus.target( + 'topk(10, container_memory_rss{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})', + legendFormat='{{ pod }}: {{ container }}', + ) +); + +local containerWriteBytes(nodeName) = genericGraphLegendPanel('Container fs write rate: ' + nodeName, 'Bps').addTarget( + prometheus.target( + 'sum(rate(container_fs_writes_bytes_total{device!~".+dm.+", node=~"' + nodeName + '", container!=""}[$interval])) by (device, container)', + legendFormat='{{ container }}: {{ device }}', + ) +); + +// Individual panel definitions + +// Monitoring Stack + +local promReplMemUsage = genericGraphLegendPanel('Prometheus Replica Memory usage', 'bytes').addTarget( + prometheus.target( + 'sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)', + legendFormat='{{pod}}', + ) +).addTarget( + prometheus.target( + 'sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)', + legendFormat='{{pod}}', + ) +); + +// Kubelet + +local kubeletCPU = genericGraphLegendPanel('Top 10 Kubelet CPU usage', 'percent').addTarget( + prometheus.target( + 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[$interval])*100)', + legendFormat='kubelet - {{node}}', + ) +); + +local crioCPU = genericGraphLegendPanel('Top 10 crio CPU usage', 'percent').addTarget( + prometheus.target( + 'topk(10,rate(process_cpu_seconds_total{service="kubelet",job="crio"}[$interval])*100)', + legendFormat='crio - {{node}}', + ) +); + +local kubeletMemory = genericGraphLegendPanel('Top 10 Kubelet memory usage', 'bytes').addTarget( + prometheus.target( + 'topk(10,process_resident_memory_bytes{service="kubelet",job="kubelet"})', + legendFormat='kubelet - {{node}}', + ) +); + +local crioMemory = genericGraphLegendPanel('Top 10 crio memory usage', 'bytes').addTarget( + prometheus.target( + 'topk(10,process_resident_memory_bytes{service="kubelet",job="crio"})', + legendFormat='crio - {{node}}', + ) +); + +// Cluster details + +local current_node_count = grafana.statPanel.new( + title='Current Node Count', + datasource='$datasource', + reducerFunction='last', +).addTarget( + prometheus.target( + 'sum(kube_node_info{})', + legendFormat='Number of nodes', + ) +).addTarget( + prometheus.target( + 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', + legendFormat='Node: {{ condition }}', + ) +); + +local current_namespace_count = grafana.statPanel.new( + title='Current namespace Count', + datasource='$datasource', + reducerFunction='last', +).addTarget( + prometheus.target( + 'sum(kube_namespace_status_phase) by (phase)', + legendFormat='{{ phase }}', + ) +); + +local current_pod_count = grafana.statPanel.new( + title='Current Pod Count', + reducerFunction='last', + datasource='$datasource', +).addTarget( + prometheus.target( + 'sum(kube_pod_status_phase{}) by (phase) > 0', + legendFormat='{{ phase}} Pods', + ) +); + +local nodeCount = genericGraphPanel('Number of nodes', 'none').addTarget( + prometheus.target( + 'sum(kube_node_info{})', + legendFormat='Number of nodes', + ) +).addTarget( + prometheus.target( + 'sum(kube_node_status_condition{status="true"}) by (condition) > 0', + legendFormat='Node: {{ condition }}', + ) +); + +local nsCount = genericGraphPanel('Namespace count', 'none').addTarget( + prometheus.target( + 'sum(kube_namespace_status_phase) by (phase) > 0', + legendFormat='{{ phase }} namespaces', + ) +); + +local podCount = genericGraphPanel('Pod count', 'none').addTarget( + prometheus.target( + 'sum(kube_pod_status_phase{}) by (phase)', + legendFormat='{{phase}} pods', + ) +); + +local secretCmCount = genericGraphPanel('Secret & configmap count', 'none').addTarget( + prometheus.target( + 'count(kube_secret_info{})', + legendFormat='secrets', + ) +).addTarget( + prometheus.target( + 'count(kube_configmap_info{})', + legendFormat='Configmaps', + ) +); + +local deployCount = genericGraphPanel('Deployment count', 'none').addTarget( + prometheus.target( + 'count(kube_deployment_labels{})', + legendFormat='Deployments', + ) +); + + +local servicesCount = genericGraphPanel('Services count', 'none').addTarget( + prometheus.target( + 'count(kube_service_info{})', + legendFormat='Services', + ) +); + +local alerts = genericGraphPanel('Alerts', 'none').addTarget( + prometheus.target( + 'topk(10,sum(ALERTS{severity!="none"}) by (alertname, severity))', + legendFormat='{{severity}}: {{alertname}}', + ) +); + +local top10ContMem = genericGraphLegendPanel('Top 10 container RSS', 'bytes').addTarget( + prometheus.target( + 'topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})', + legendFormat='{{ namespace }} - {{ name }}', + ) +); + +local podDistribution = genericGraphLegendPanel('Pod Distribution', 'none').addTarget( + prometheus.target( + 'count(kube_pod_info{}) by (exported_node)', + legendFormat='{{ node }}', + ) +); + +local top10ContCPU = genericGraphLegendPanel('Top 10 container CPU', 'percent').addTarget( + prometheus.target( + 'topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)', + legendFormat='{{ namespace }} - {{ name }}', + ) +); + + +local goroutines_count = genericGraphPanel('Goroutines count', 'none').addTarget( + prometheus.target( + 'topk(10, sum(go_goroutines{}) by (job,instance))', + legendFormat='{{ job }} - {{ instance }}', + ) +); + +// Cluster operators + +local clusterOperatorsOverview = grafana.statPanel.new( + datasource='$datasource', + title='Cluster operators overview', +).addTarget( + prometheus.target( + 'sum by (condition)(cluster_operator_conditions{condition!=""})', + legendFormat='{{ condition }}', + ) +); + +local clusterOperatorsInformation = genericGraphLegendPanel('Cluster operators information', 'none').addTarget( + prometheus.target( + 'cluster_operator_conditions{name!="",reason!=""}', + legendFormat='{{name}} - {{reason}}', + ) +); + +local clusterOperatorsDegraded = genericGraphLegendPanel('Cluster operators degraded', 'none').addTarget( + prometheus.target( + 'cluster_operator_conditions{condition="Degraded",name!="",reason!=""}', + legendFormat='{{name}} - {{reason}}', + ) +); + + +// Dashboard + +grafana.dashboard.new( + 'k8s Performance', + description='Performance dashboard for Red Hat k8s', + time_from='now-1h', + timezone='utc', + refresh='30s', + editable='true', +) + + +// Templates + +.addTemplate( + grafana.template.datasource( + 'datasource', + 'prometheus', + '', + ) +) + +.addTemplate( + grafana.template.new( + '_worker_node', + '$datasource', + 'label_values(kube_node_labels{}, exported_node)', + '', + refresh=2, + ) { + label: 'Worker', + type: 'query', + multi: true, + includeAll: false, + }, +) + +.addTemplate( + grafana.template.new( + 'namespace', + '$datasource', + 'label_values(kube_pod_info, exported_namespace)', + '', + refresh=2, + ) { + label: 'Namespace', + type: 'query', + multi: false, + includeAll: true, + }, +) + + +.addTemplate( + grafana.template.new( + 'block_device', + '$datasource', + 'label_values(node_disk_written_bytes_total,device)', + '', + regex='/^(?:(?!dm|rb).)*$/', + refresh=2, + ) { + label: 'Block device', + type: 'query', + multi: true, + includeAll: true, + }, +) + + +.addTemplate( + grafana.template.new( + 'net_device', + '$datasource', + 'label_values(node_network_receive_bytes_total,device)', + '', + regex='/^((br|en|et).*)$/', + refresh=2, + ) { + label: 'Network device', + type: 'query', + multi: true, + includeAll: true, + }, +) + +.addTemplate( + grafana.template.new( + 'interval', + '$datasource', + '$__auto_interval_period', + label='interval', + refresh='time', + ) { + type: 'interval', + query: '2m,3m,4m,5m', + auto: false, + }, +) + +// Dashboard definition + +.addPanel(grafana.row.new(title='Cluster Details', collapse=true).addPanels( + [ + current_node_count { gridPos: { x: 0, y: 4, w: 8, h: 3 } }, + current_namespace_count { gridPos: { x: 8, y: 4, w: 8, h: 3 } }, + current_pod_count { gridPos: { x: 16, y: 4, w: 8, h: 3 } }, + nodeCount { gridPos: { x: 0, y: 12, w: 8, h: 8 } }, + nsCount { gridPos: { x: 8, y: 12, w: 8, h: 8 } }, + podCount { gridPos: { x: 16, y: 12, w: 8, h: 8 } }, + secretCmCount { gridPos: { x: 0, y: 20, w: 8, h: 8 } }, + deployCount { gridPos: { x: 8, y: 20, w: 8, h: 8 } }, + servicesCount { gridPos: { x: 16, y: 20, w: 8, h: 8 } }, + top10ContMem { gridPos: { x: 0, y: 28, w: 24, h: 8 } }, + top10ContCPU { gridPos: { x: 0, y: 36, w: 12, h: 8 } }, + goroutines_count { gridPos: { x: 12, y: 36, w: 12, h: 8 } }, + podDistribution { gridPos: { x: 0, y: 44, w: 24, h: 8 } }, + ] +), { gridPos: { x: 0, y: 3, w: 24, h: 1 } }) + +.addPanel(grafana.row.new(title='Node: $_worker_node', collapse=true, repeat='_worker_node').addPanels( + [ + nodeCPU('$_worker_node') { gridPos: { x: 0, y: 0, w: 12, h: 8 } }, + nodeMemory('$_worker_node') { gridPos: { x: 12, y: 0, w: 12, h: 8 } }, + diskThroughput('$_worker_node') { gridPos: { x: 0, y: 8, w: 12, h: 8 } }, + diskIOPS('$_worker_node') { gridPos: { x: 12, y: 8, w: 12, h: 8 } }, + networkUtilization('$_worker_node') { gridPos: { x: 0, y: 16, w: 12, h: 8 } }, + networkPackets('$_worker_node') { gridPos: { x: 12, y: 16, w: 12, h: 8 } }, + networkDrop('$_worker_node') { gridPos: { x: 0, y: 24, w: 12, h: 8 } }, + conntrackStats('$_worker_node') { gridPos: { x: 12, y: 24, w: 12, h: 8 } }, + top10ContainerCPU('$_worker_node') { gridPos: { x: 0, y: 32, w: 12, h: 8 } }, + top10ContainerRSS('$_worker_node') { gridPos: { x: 12, y: 32, w: 12, h: 8 } }, + ], +), { gridPos: { x: 0, y: 1, w: 0, h: 8 } }) diff --git a/templates/General/kube-burner.jsonnet b/templates/General/kube-burner.jsonnet new file mode 100644 index 0000000..cdb5160 --- /dev/null +++ b/templates/General/kube-burner.jsonnet @@ -0,0 +1,4568 @@ +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; +local es = grafana.elasticsearch; + +local worker_count = grafana.statPanel.new( + title='Node count', + datasource='$datasource1', + justifyMode='center' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "nodeRoles"', + timeField='timestamp', + metrics=[{ + field: 'coun', + id: '1', + meta: {}, + settings: {}, + type: 'count', + }], + bucketAggs=[ + { + field: 'labels.role.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +).addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, +]); + + +local metric_count_panel = grafana.statPanel.new( + datasource='$datasource1', + justifyMode='center', + title=null +).addTarget( + // Namespaces count + es.target( + query='uuid.keyword: $uuid AND metricName: "namespaceCount" AND labels.phase: "Active"', + alias='Namespaces', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // Services count + es.target( + query='uuid.keyword: $uuid AND metricName: "serviceCount"', + alias='Services', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // Deployments count + es.target( + query='uuid.keyword: $uuid AND metricName: "deploymentCount"', + alias='Services', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // Secrets count + es.target( + query='uuid.keyword: $uuid AND metricName: "secretCount"', + alias='Services', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + // ConfigMap count + es.target( + query='uuid.keyword: $uuid AND metricName: "configmapCount"', + alias='ConfigMaps', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +).addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, +]); + +local openshift_version_panel = grafana.statPanel.new( + title='OpenShift version', + datasource='$datasource1', + justifyMode='center', + reducerFunction='lastNotNull', + fields='/^labels\\.version$/' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "clusterVersion"', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +); + +local etcd_version_panel = grafana.statPanel.new( + title='Etcd version', + datasource='$datasource1', + justifyMode='center', + reducerFunction='lastNotNull', + fields='labels.cluster_version' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "etcdVersion"', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +); + + +// Next line +// TODO: Convert to new table format once jsonnet supports it. +// That would fix the text wrapping problem. +local summary_panel_1 = grafana.tablePanel.new( + datasource='$datasource1', + title=null, + styles=[ + { + pattern: 'uuid', + alias: 'UUID', + type: 'string', + }, + { + pattern: 'jobConfig.name', + alias: 'Name', + type: 'hidden', + }, + { + pattern: 'jobConfig.qps', + alias: 'QPS', + type: 'number', + }, + { + pattern: 'jobConfig.burst', + alias: 'Burst', + type: 'number', + }, + { + pattern: 'elapsedTime', + alias: 'Elapsed time', + type: 'number', + unit: 's', + }, + { + pattern: 'jobConfig.jobIterations', + alias: 'Iterations', + type: 'number', + }, + { + pattern: 'jobConfig.jobType', + alias: 'Job Type', + type: 'string', + }, + { + pattern: 'jobConfig.podWait', + alias: 'podWait', + type: 'hidden', + }, + { + pattern: 'jobConfig.namespacedIterations', + alias: 'Namespaced iterations', + type: 'hidden', + }, + { + pattern: 'jobConfig.preLoadImages', + alias: 'Preload Images', + type: 'boolean', + }, + { + pattern: '_id', + alias: '_id', + type: 'hidden', + }, + { + pattern: '_index', + alias: '_index', + type: 'hidden', + }, + { + pattern: '_type', + alias: '_type', + type: 'hidden', + }, + { + pattern: 'highlight', + alias: 'highlight', + type: 'hidden', + }, + { + pattern: '_type', + alias: '_type', + type: 'hidden', + }, + { + pattern: 'jobConfig.cleanup', + type: 'hidden', + }, + { + pattern: 'jobConfig.errorOnVerify', + alias: 'errorOnVerify', + type: 'hidden', + }, + { + pattern: 'jobConfig.jobIterationDelay', + alias: 'jobIterationDelay', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.jobPause', + alias: 'jobPause', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.maxWaitTimeout', + alias: 'maxWaitTimeout', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.namespace', + alias: 'namespacePrefix', + type: 'hidden', + }, + { + pattern: 'jobConfig.namespaced', + alias: 'jobConfig.namespaced', + type: 'hidden', + }, + { + pattern: 'jobConfig.objects', + alias: 'jobConfig.objects', + type: 'hidden', + }, + { + pattern: 'jobConfig.preLoadPeriod', + alias: 'jobConfig.preLoadPeriod', + type: 'hidden', + }, + { + pattern: 'jobConfig.verifyObjects', + alias: 'jobConfig.verifyObjects', + type: 'hidden', + }, + { + pattern: 'metricName', + alias: 'metricName', + type: 'hidden', + }, + { + pattern: 'timestamp', + alias: 'timestamp', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitFor', + alias: 'jobConfig.waitFor', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitForDeletion', + alias: 'jobConfig.waitForDeletion', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitWhenFinished', + alias: 'jobConfig.waitWhenFinished', + type: 'hidden', + }, + { + pattern: 'sort', + alias: 'sort', + type: 'hidden', + }, + ] +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "jobSummary"', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +).addTransformation( + grafana.transformation.new('organize', options={ + indexByName: { + _id: 1, + _index: 2, + _type: 3, + elapsedTime: 8, + 'jobConfig.burst': 7, + 'jobConfig.cleanup': 12, + 'jobConfig.errorOnVerify': 13, + 'jobConfig.jobIterationDelay': 14, + 'jobConfig.jobIterations': 9, + 'jobConfig.jobPause': 15, + 'jobConfig.jobType': 10, + 'jobConfig.maxWaitTimeout': 16, + 'jobConfig.name': 5, + 'jobConfig.namespace': 17, + 'jobConfig.namespacedIterations': 18, + 'jobConfig.objects': 19, + 'jobConfig.podWait': 11, + 'jobConfig.qps': 6, + 'jobConfig.verifyObjects': 20, + 'jobConfig.waitFor': 21, + 'jobConfig.waitForDeletion': 22, + 'jobConfig.waitWhenFinished': 23, + metricName: 24, + timestamp: 0, + uuid: 4, + }, + }) +); + + +// TODO: Convert to new table format once jsonnet supports it. +// That would fix the text wrapping problem. +local summary_panel_2 = grafana.tablePanel.new( + datasource='$datasource1', + title=null, + styles=[ + { + pattern: 'k8s_version', + alias: 'k8s version', + type: 'string', + }, + { + pattern: 'result', + alias: 'Result', + type: 'string', + }, + { + pattern: 'sdn_type', + alias: 'SDN', + type: 'string', + }, + { + pattern: 'total_nodes', + alias: 'Total nodes', + type: 'number', + }, + { + pattern: 'master_nodes_count', + alias: 'Master nodes', + type: 'number', + }, + { + pattern: 'worker_nodes_count', + alias: 'Worker nodes', + type: 'number', + }, + { + pattern: 'infra_nodes_count', + alias: 'Infra nodes', + type: 'number', + }, + { + pattern: 'master_nodes_type', + alias: 'Masters flavor', + type: 'string', + }, + { + pattern: '_id', + alias: '_id', + type: 'hidden', + }, + { + pattern: '_index', + alias: '_index', + type: 'hidden', + }, + { + pattern: '_type', + alias: '_type', + type: 'hidden', + }, + { + pattern: 'benchmark', + alias: 'benchmark', + type: 'hidden', + }, + { + pattern: 'clustertype', + alias: 'clustertype', + type: 'hidden', + }, + { + pattern: 'end_date', + alias: 'end_date', + type: 'hidden', + }, + { + pattern: 'highlight', + alias: 'highlight', + type: 'hidden', + }, + { + pattern: 'jobConfig.cleanup', + alias: 'jobConfig.cleanup', + type: 'hidden', + }, + { + pattern: 'jobConfig.errorOnVerify', + alias: 'errorOnVerify', + type: 'hidden', + }, + { + pattern: 'jobConfig.jobIterationDelay', + alias: 'jobIterationDelay', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.jobPause', + alias: 'jobPause', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.maxWaitTimeout', + alias: 'maxWaitTimeout', + type: 'hidden', + unit: 's', + }, + { + pattern: 'jobConfig.namespace', + alias: 'namespacePrefix', + type: 'hidden', + }, + { + pattern: 'jobConfig.namespaced', + alias: 'jobConfig.namespaced', + type: 'hidden', + }, + { + pattern: 'jobConfig.objects', + alias: 'jobConfig.objects', + type: 'hidden', + }, + { + pattern: 'jobConfig.preLoadPeriod', + alias: 'jobConfig.preLoadPeriod', + type: 'hidden', + }, + { + pattern: 'jobConfig.verifyObjects', + alias: 'jobConfig.verifyObjects', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitFor', + alias: 'jobConfig.waitFor', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitForDeletion', + alias: 'jobConfig.waitForDeletion', + type: 'hidden', + }, + { + pattern: 'jobConfig.waitWhenFinished', + alias: 'jobConfig.waitWhenFinished', + type: 'hidden', + }, + { + pattern: 'metricName', + alias: 'metricName', + type: 'hidden', + }, + { + pattern: 'ocp_version', + alias: 'ocp_version', + type: 'hidden', + }, + { + pattern: 'ocp_version', + alias: 'ocp_version', + type: 'hidden', + }, + { + pattern: 'sort', + alias: 'sort', + type: 'hidden', + }, + { + pattern: 'timestamp', + alias: 'timestamp', + type: 'hidden', + }, + { + pattern: 'uuid', + alias: 'uuid', + type: 'hidden', + }, + { + pattern: 'workload', + alias: 'workload', + type: 'hidden', + }, + { + pattern: 'worker_nodes_type', + alias: 'worker_nodes_type', + type: 'hidden', + }, + { + pattern: 'infra_nodes_type', + alias: 'infra_nodes_type', + type: 'hidden', + }, + { + pattern: 'platform', + alias: 'platform', + type: 'hidden', + }, + { + pattern: 'workload_nodes_count', + alias: 'workload_nodes_count', + type: 'hidden', + }, + { + pattern: 'workload_nodes_type', + alias: 'workload_nodes_type', + type: 'hidden', + }, + ] +).addTarget( + es.target( + query='uuid.keyword: $uuid AND result.keyword: *', + timeField='timestamp', + metrics=[{ + id: '1', + settings: { + size: '500', + }, + type: 'raw_data', + }], + ) +).addTransformation( + grafana.transformation.new('organize', options={ + indexByName: { + _id: 4, + _index: 5, + _type: 15, + benchmark: 17, + clustertype: 18, + end_date: 19, + highlight: 20, + infra_nodes_count: 9, + infra_nodes_type: 14, + k8s_version: 1, + master_nodes_count: 7, + master_nodes_type: 11, + ocp_version: 21, + platform: 22, + result: 2, + sdn_type: 3, + sort: 23, + timestamp: 0, + total_nodes: 6, + uuid: 16, + worker_nodes_count: 8, + worker_nodes_type: 12, + workload: 24, + workload_nodes_count: 10, + workload_nodes_type: 13, + }, + }) +); + +// First row: Cluster status +local masters_cpu = grafana.graphPanel.new( + title='Masters CPU utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_avg=true, + legend_max=true, + percentage=true, + legend_values=true, + format='percent', +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND NOT labels.mode.keyword: idle AND NOT labels.mode.keyword: steal', + timeField='timestamp', + alias='{{labels.instance.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: { + script: '_value * 100', + }, + type: 'sum', + }], + bucketAggs=[ + { + field: 'labels.instance.keyword', + fake: true, + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local masters_memory = grafana.graphPanel.new( + title='Masters Memory utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_avg=true, + legend_max=true, + legend_values=true, + format='bytes' +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters"', + timeField='timestamp', + alias='Available {{labels.instance.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'sum', + }], + bucketAggs=[ + { + field: 'labels.instance.keyword', + fake: true, + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local node_status_summary = grafana.graphPanel.new( + title='Node Status Summary', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_current=true, + legend_values=true, + legend_rightSide=true, +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeStatus"', + timeField='timestamp', + alias='{{labels.condition.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.condition.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local pod_status_summary = grafana.graphPanel.new( + title='Pod Status Summary', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_current=true, + legend_values=true, +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "podStatusCount"', + timeField='timestamp', + alias='{{labels.phase.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.phase.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local kube_api_cpu = grafana.graphPanel.new( + title='Kube-apiserver CPU', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Avg CPU {{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); +// TODO: When the feature is added to grafannet, style the average differently. + + +local kube_api_memory = grafana.graphPanel.new( + title='Kube-apiserver Memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Rss {{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-apiserver', + timeField='timestamp', + alias='Avg Rss {{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); +// TODO: When the feature is added to grafannet, style the average differently. + + +local active_controller_manager_cpu = grafana.graphPanel.new( + title='Active Kube-controller-manager CPU', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local active_controller_manager_memory = grafana.graphPanel.new( + title='Active Kube-controller-manager memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-controller-manager', + timeField='timestamp', + alias='{{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '1', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + fake: true, + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local kube_scheduler_cpu = grafana.graphPanel.new( + title='Kube-scheduler CPU', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Masters" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='{{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local kube_scheduler_memory = grafana.graphPanel.new( + title='Kube-scheduler memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Masters" AND labels.container.keyword: kube-scheduler', + timeField='timestamp', + alias='Rss {{labels.container.keyword}}', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local hypershift_controlplane_cpu = grafana.graphPanel.new( + title='Hypershift Controlplane CPU Usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU-Controlplane"', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '4', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +local hypershift_controlplane_memory = grafana.graphPanel.new( + title='Hypershift Controlplane RSS memory Usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory-Controlplane"', + timeField='timestamp', + metrics=[{ + field: 'value', + id: '1', + settings: {}, + type: 'avg', + }], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '20', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '4', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +// Pod latencies section +local average_pod_latency = grafana.graphPanel.new( + title='Average pod latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_min=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='ms', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', + timeField='timestamp', + alias='{{field}}', + metrics=[ + { + field: 'podReadyLatency', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'schedulingLatency', + id: '3', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'initializedLatency', + id: '4', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local pod_latencies_summary = grafana.statPanel.new( + datasource='$datasource1', + justifyMode='center', + title='Pod latencies summary $latencyPercentile', + unit='ms', + colorMode='value', // Note: There isn't currently a way to set the color palette. +).addTarget( + // Namespaces count + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: podLatencyQuantilesMeasurement', + alias='$latencyPercentile {{term quantileName.keyword}}', + timeField='timestamp', + metrics=[{ + field: '$latencyPercentile', + id: '1', + meta: {}, + settings: {}, + type: 'max', + }], + bucketAggs=[ + { + fake: true, + field: 'quantileName.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '0', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local pod_conditions_latency = grafana.tablePanel.new( + title='Pod conditions latency', + datasource='$datasource1', + transform='table', + styles=[ + { + pattern: 'Average containersReadyLatency', + alias: 'ContainersReady', + type: 'number', + unit: 'ms', + }, + { + pattern: 'Average initializedLatency', + alias: 'Initialized', + type: 'number', + unit: 'ms', + }, + { + pattern: 'Average podReadyLatency', + alias: 'Ready', + type: 'number', + unit: 'ms', + }, + { + pattern: 'Average schedulingLatency', + alias: 'Scheduling', + type: 'number', + unit: 'ms', + }, + { + pattern: 'namespace.keyword', + alias: 'Namespace', + type: 'string', + }, + { + pattern: 'podName.keyword', + alias: 'Pod', + type: 'string', + }, + { + pattern: 'nodeName.keyword', + alias: 'Node', + type: 'string', + }, + ], +).addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: podLatencyMeasurement', + timeField='timestamp', + metrics=[ + { + field: 'schedulingLatency', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'initializedLatency', + id: '3', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'containersReadyLatency', + id: '4', + meta: {}, + settings: {}, + type: 'avg', + }, + { + field: 'podReadyLatency', + id: '5', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'namespace.keyword', + id: '6', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '5', + size: '100', + }, + type: 'terms', + }, + { + fake: true, + field: 'nodeName.keyword', + id: '7', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '100', + }, + type: 'terms', + }, + { + field: 'podName.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '5', + size: '100', + }, + type: 'terms', + }, + ], + ) +); + +local setup_latency = grafana.graphPanel.new( + title='Top 10 Container runtime network setup latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='µs', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: containerNetworkSetupLatency', + timeField='timestamp', + alias='{{labels.node.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.node.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local scheduling_throughput = grafana.graphPanel.new( + title='Scheduling throughput', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='reqps', +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: schedulingThroughput', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + meta: {}, + settings: {}, + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// OVN section +local ovnkube_master_cpu = grafana.graphPanel.new( + title='ovnkube-master CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.pod.keyword: /ovnkube-master.*/', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +local ovnkube_master_memory = grafana.graphPanel.new( + title='ovnkube-master Memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.pod.keyword: /ovnkube-master.*/', + timeField='timestamp', + alias='{{labels.pod.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + type: 'sum', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + +local ovnkube_controller_cpu = grafana.graphPanel.new( + title='ovn-controller CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +local ovnkube_controller_memory = grafana.graphPanel.new( + title='ovn-controller Memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.namespace.keyword: "openshift-ovn-kubernetes" AND labels.pod.keyword: /ovnkube-node.*/ AND labels.container.keyword: "ovn-controller"', + timeField='timestamp', + alias='{{labels.pod.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + type: 'sum', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + id: '2', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '3', + settings: { + interval: '30s', + min_doc_count: '1', + timeZone: 'utc', + trimEdges: '0', + }, + type: 'date_histogram', + }, + ], + ) +); + + +// ETCD section +local etcd_fsync_latency = grafana.graphPanel.new( + title='etcd 99th disk WAL fsync latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskWalFsyncDurationSeconds"', + timeField='timestamp', + alias='{{labels.pod.keyword}}', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local etcd_commit_latency = grafana.graphPanel.new( + title='etcd 99th disk backend commit latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "99thEtcdDiskBackendCommitDurationSeconds"', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local etcd_leader_changes = grafana.graphPanel.new( + title='Etcd leader changes', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_values=true, + min=0, + format='s', +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: etcdLeaderChangesRate', + alias='Etcd leader changes', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '1', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local etcd_peer_roundtrip_time = grafana.graphPanel.new( + title='Etcd 99th network peer roundtrip time', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: 99thEtcdRoundTripTimeSeconds', + alias='{{labels.pod.keyword}} to {{labels.To.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.pod.keyword', + fake: true, + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.To.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local etcd_cpu = grafana.graphPanel.new( + title='Etcd CPU utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.container.keyword: etcd', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local etcd_memory = grafana.graphPanel.new( + title='Etcd memory utilization', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.container.keyword: etcd', + alias='{{labels.namespace.keyword}}-{{labels.pod.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.namespace.keyword', + id: '5', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// API an Kubeproxy section + +local api_latency_read_only_resource = grafana.graphPanel.new( + title='Read Only API request P99 latency - resource scoped', + datasource='$datasource1', + legend_alignAsTable=true, + format='s', + legend_max=true, + legend_avg=true, + legend_values=true, +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: resource', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.resource.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local api_latency_read_only_namespace = grafana.graphPanel.new( + title='Read Only API request P99 latency - namespace scoped', + datasource='$datasource1', + legend_alignAsTable=true, + format='s', + legend_max=true, + legend_avg=true, + legend_values=true, +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: namespace', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local api_latency_read_only_cluster = grafana.graphPanel.new( + title='Read Only API request P99 latency - cluster scoped', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: readOnlyAPICallsLatency AND labels.scope.keyword: cluster', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local api_latency_mutating = grafana.graphPanel.new( + title='Mutating API request P99 latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: mutatingAPICallsLatency', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local api_request_rate = grafana.graphPanel.new( + title='API request rate', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: APIRequestRate', + alias='{{labels.verb.keyword}} {{labels.resource.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.resource.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '0', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.verb.keyword', + id: '3', + settings: { + min_doc_count: 0, + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local service_sync_latency = grafana.graphPanel.new( + title='Service sync latency', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='s', +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: kubeproxyP99ProgrammingLatency', + alias='Latency', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.instance.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid: $uuid AND metricName.keyword: serviceSyncLatency', + alias='Latency', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Cluster Kubelet & CRI-O section +local kubelet_process_cpu = grafana.graphPanel.new( + title='Kubelet process CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: kubeletCPU', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local kubelet_process_memory = grafana.graphPanel.new( + title='Kubelet process RSS memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: kubeletMemory', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cri_o_process_cpu = grafana.graphPanel.new( + title='CRI-O process CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: crioCPU', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local cri_o_process_memory = grafana.graphPanel.new( + title='CRI-O RSS memory usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: crioMemory', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'labels.node.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Master Node section + +local container_cpu_master = grafana.graphPanel.new( + title='Container CPU usage $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local container_memory_master = grafana.graphPanel.new( + title='Container RSS memory $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $master AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cpu_master = grafana.graphPanel.new( + title='CPU $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_min=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Masters" AND labels.instance.keyword: $master', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local memory_master = grafana.graphPanel.new( + title='Memory $master', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Masters" AND labels.instance.keyword: $master', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Masters" AND labels.instance.keyword: $master', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Masters" AND labels.instance.keyword: $master', + alias='Utilization', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Worker Node section + +local container_cpu_worker = grafana.graphPanel.new( + title='Container CPU usage $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local container_memory_worker = grafana.graphPanel.new( + title='Container RSS memory $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $worker AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cpu_worker = grafana.graphPanel.new( + title='CPU $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_min=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Workers" AND labels.instance.keyword: $worker', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local memory_worker = grafana.graphPanel.new( + title='Memory $worker', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Workers" AND labels.instance.keyword: $worker', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Workers" AND labels.instance.keyword: $worker', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Workers" AND labels.instance.keyword: $worker', + alias='Utilization', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +// Infra Node section + +local container_cpu_infra = grafana.graphPanel.new( + title='Container CPU usage $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_avg=true, + legend_max=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerCPU" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local container_memory_infra = grafana.graphPanel.new( + title='Container RSS memory $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName: "containerMemory" AND labels.node.keyword: $infra AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}} {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'labels.container.keyword', + fake: true, + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '0', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local cpu_infra = grafana.graphPanel.new( + title='CPU $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_min=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-Infra" AND labels.instance.keyword: $infra', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local memory_infra = grafana.graphPanel.new( + title='Memory $infra', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-Infra" AND labels.instance.keyword: $infra', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-Infra" AND labels.instance.keyword: $infra', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryUtilization-Infra" AND labels.instance.keyword: $infra', + alias='Utilization', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: '30s', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +// Aggregated worker node usage section +local agg_avg_cpu = grafana.graphPanel.new( + title='Avg CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_avg=true, + legend_max=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeCPU-AggregatedWorkers"', + alias='{{labels.mode.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + settings: { + script: { + inline: '_value*100', + }, + }, + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.mode.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local agg_avg_mem = grafana.graphPanel.new( + title='Avg Memory', + datasource='$datasource1', + legend_alignAsTable=true, + legend_rightSide=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryAvailable-AggregatedWorkers"', + alias='Available', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "nodeMemoryTotal-AggregatedWorkers"', + alias='Total', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +local agg_container_cpu = grafana.graphPanel.new( + title='Container CPU usage', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='percent', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "containerCPU-AggregatedWorkers" AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '_term', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + +local agg_container_mem = grafana.graphPanel.new( + title='Container memory RSS', + datasource='$datasource1', + legend_alignAsTable=true, + legend_max=true, + legend_avg=true, + legend_values=true, + format='bytes', +) + .addTarget( + es.target( + query='uuid.keyword: $uuid AND metricName.keyword: "containerMemory-AggregatedWorkers" AND labels.namespace.keyword: $namespace', + alias='{{labels.pod.keyword}}: {{labels.container.keyword}}', + timeField='timestamp', + metrics=[ + { + field: 'value', + id: '1', + type: 'avg', + }, + ], + bucketAggs=[ + { + fake: true, + field: 'labels.pod.keyword', + id: '4', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + fake: true, + field: 'labels.container.keyword', + id: '3', + settings: { + min_doc_count: '1', + order: 'desc', + orderBy: '1', + size: '10', + }, + type: 'terms', + }, + { + field: 'timestamp', + id: '2', + settings: { + interval: 'auto', + min_doc_count: '1', + trimEdges: 0, + }, + type: 'date_histogram', + }, + ], + ) +); + + +//Dashboard & Templates + +grafana.dashboard.new( + 'Kube-burner report v2', + description='', + editable='true', + time_from='now/y', + time_to='now', + timezone='utc', +) +.addTemplate( + grafana.template.datasource( + 'datasource1', + 'elasticsearch', + 'AWS Dev - ripsaw-kube-burner', + label='Datasource', + regex='/.*kube-burner.*/' + ) +) +.addTemplate( + grafana.template.new( + label='Platform', + name='platform', + current='All', + query='{"find": "terms", "field": "platform.keyword"}', + refresh=2, + multi=true, + includeAll=true, + datasource='$datasource1', + ) +) +.addTemplate( + grafana.template.new( + label='SDN type', + name='sdn', + current='All', + query='{"find": "terms", "field": "sdn_type.keyword"}', + refresh=2, + multi=true, + includeAll=true, + datasource='$datasource1', + ) +) +.addTemplate( + grafana.template.new( + label='Workload', + multi=true, + query='{"find": "terms", "field": "workload.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn"}', + refresh=1, + name='workload', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Worker count', + multi=true, + query='{"find": "terms", "field": "worker_nodes_count", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload"}', + refresh=1, + name='worker_count', + includeAll=true, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='UUID', + multi=false, + query='{"find": "terms", "field": "uuid.keyword", "query": "platform.keyword: $platform AND sdn_type.keyword: $sdn AND workload.keyword: $workload AND worker_nodes_count: $worker_count"}', + refresh=2, + name='uuid', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Master nodes', + multi=true, + query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: master AND uuid.keyword: $uuid"}', + refresh=2, + name='master', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Worker nodes', + multi=true, + query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: worker AND uuid.keyword: $uuid"}', + refresh=2, + name='worker', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Infra nodes', + multi=true, + query='{ "find" : "terms", "field": "labels.node.keyword", "query": "metricName.keyword: nodeRoles AND labels.role.keyword: infra AND uuid.keyword: $uuid"}', + refresh=2, + name='infra', + includeAll=false, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.new( + label='Namespace', + multi=true, + query='{ "find" : "terms", "field": "labels.namespace.keyword", "query": "labels.namespace.keyword: /openshift-.*/ AND uuid.keyword: $uuid"}', + refresh=2, + name='namespace', + includeAll=true, + datasource='$datasource1' + ) +) +.addTemplate( + grafana.template.custom( + label='Latency percentile', + name='latencyPercentile', + current='P99', + query='P99, P95, P50', + multi=false, + includeAll=false, + ) +) +.addPanels( + [ + worker_count { gridPos: { x: 0, y: 0, w: 4, h: 3 } }, + metric_count_panel { gridPos: { x: 4, y: 0, w: 12, h: 3 } }, + openshift_version_panel { gridPos: { x: 16, y: 0, w: 6, h: 3 } }, + etcd_version_panel { gridPos: { x: 22, y: 0, w: 2, h: 3 } }, + summary_panel_1 { gridPos: { x: 0, y: 3, h: 2, w: 24 } }, + summary_panel_2 { gridPos: { x: 0, y: 5, h: 2, w: 24 } }, + ], +) +.addPanel( + grafana.row.new(title='Cluster status', collapse=true).addPanels( + [ + masters_cpu { gridPos: { x: 0, y: 8, w: 12, h: 9 } }, + masters_memory { gridPos: { x: 12, y: 8, w: 12, h: 9 } }, + node_status_summary { gridPos: { x: 0, y: 17, w: 12, h: 8 } }, + pod_status_summary { gridPos: { x: 12, y: 17, w: 12, h: 8 } }, + kube_api_cpu { gridPos: { x: 0, y: 25, w: 12, h: 9 } }, + kube_api_memory { gridPos: { x: 12, y: 25, w: 12, h: 9 } }, + active_controller_manager_cpu { gridPos: { x: 0, y: 34, w: 12, h: 9 } }, + active_controller_manager_memory { gridPos: { x: 12, y: 34, w: 12, h: 9 } }, + kube_scheduler_cpu { gridPos: { x: 0, y: 43, w: 12, h: 9 } }, + kube_scheduler_memory { gridPos: { x: 12, y: 43, w: 12, h: 9 } }, + hypershift_controlplane_cpu { gridPos: { x: 0, y: 52, w: 12, h: 9 } }, + hypershift_controlplane_memory { gridPos: { x: 12, y: 52, w: 12, h: 9 } }, + ] + ), { x: 0, y: 7, w: 24, h: 1 } +) +.addPanel( + // Panels below for uncollapsed row. + grafana.row.new(title='Pod latency stats', collapse=false), { x: 0, y: 8, w: 24, h: 1 } +) +.addPanels( + [ + average_pod_latency { gridPos: { x: 0, y: 9, w: 12, h: 8 } }, + pod_latencies_summary { gridPos: { x: 12, y: 9, w: 12, h: 8 } }, + pod_conditions_latency { gridPos: { x: 0, y: 17, w: 24, h: 10 } }, + setup_latency { gridPos: { x: 0, y: 27, w: 12, h: 9 } }, + scheduling_throughput { gridPos: { x: 12, y: 27, w: 12, h: 9 } }, + ] +) +.addPanel( + grafana.row.new(title='OVNKubernetes', collapse=true).addPanels( + [ + ovnkube_master_cpu { gridPos: { x: 0, y: 80, w: 12, h: 8 } }, + ovnkube_master_memory { gridPos: { x: 12, y: 80, w: 12, h: 8 } }, + ovnkube_controller_cpu { gridPos: { x: 0, y: 88, w: 12, h: 8 } }, + ovnkube_controller_memory { gridPos: { x: 12, y: 88, w: 12, h: 8 } }, + ] + ), { x: 0, y: 36, w: 24, h: 1 } +) +.addPanel( + grafana.row.new(title='etcd', collapse=false), { x: 0, y: 37, w: 24, h: 1 } +) +.addPanels( + [ + etcd_fsync_latency { gridPos: { x: 0, y: 38, w: 12, h: 9 } }, + etcd_commit_latency { gridPos: { x: 12, y: 38, w: 12, h: 9 } }, + etcd_leader_changes { gridPos: { x: 0, y: 47, w: 12, h: 9 } }, + etcd_peer_roundtrip_time { gridPos: { x: 12, y: 47, w: 12, h: 9 } }, + etcd_cpu { gridPos: { x: 0, y: 56, w: 12, h: 9 } }, + etcd_memory { gridPos: { x: 12, y: 56, w: 12, h: 9 } }, + ], +) +.addPanel( + grafana.row.new(title='API and Kubeproxy', collapse=false), { x: 0, y: 65, w: 24, h: 1 } +) +.addPanels( + [ + api_latency_read_only_resource { gridPos: { x: 0, y: 66, w: 12, h: 9 } }, + api_latency_read_only_namespace { gridPos: { x: 12, y: 66, w: 12, h: 9 } }, + api_latency_read_only_cluster { gridPos: { x: 0, y: 75, w: 12, h: 9 } }, + api_latency_mutating { gridPos: { x: 12, y: 75, w: 12, h: 9 } }, + api_request_rate { gridPos: { x: 0, y: 84, w: 12, h: 9 } }, + service_sync_latency { gridPos: { x: 12, y: 84, w: 12, h: 9 } }, + ], +) + +.addPanel( + grafana.row.new(title='Cluster Kubelet & CRI-O', collapse=false), { x: 0, y: 93, w: 24, h: 1 } +) +.addPanels( + [ + kubelet_process_cpu { gridPos: { x: 0, y: 94, w: 12, h: 8 } }, + kubelet_process_memory { gridPos: { x: 12, y: 94, w: 12, h: 8 } }, + cri_o_process_cpu { gridPos: { x: 0, y: 103, w: 12, h: 8 } }, + cri_o_process_memory { gridPos: { x: 12, y: 103, w: 12, h: 8 } }, + ], +) + +.addPanel( + grafana.row.new(title='Master: $master', collapse=true, repeat='$master').addPanels( + [ + container_cpu_master { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, + container_memory_master { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, + cpu_master { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, + memory_master { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, + ] + ), { x: 0, y: 111, w: 24, h: 1 } +) + +.addPanel( + grafana.row.new(title='Worker: $worker', collapse=true, repeat='$worker').addPanels( + [ + container_cpu_worker { gridPos: { x: 0, y: 112, w: 12, h: 9 } }, + container_memory_worker { gridPos: { x: 12, y: 112, w: 12, h: 9 } }, + cpu_worker { gridPos: { x: 0, y: 121, w: 12, h: 9 } }, + memory_worker { gridPos: { x: 12, y: 121, w: 12, h: 9 } }, + ] + ), { x: 0, y: 111, w: 24, h: 1 } +) + +.addPanel( + grafana.row.new(title='Infra: $infra', collapse=true, repeat='$infra').addPanels( + [ + container_cpu_infra { gridPos: { x: 0, y: 131, w: 12, h: 9 } }, + container_memory_infra { gridPos: { x: 12, y: 131, w: 12, h: 9 } }, + cpu_infra { gridPos: { x: 0, y: 140, w: 12, h: 9 } }, + memory_infra { gridPos: { x: 12, y: 140, w: 12, h: 9 } }, + ] + ), { x: 0, y: 130, w: 24, h: 1 } +) + +.addPanel( + grafana.row.new(title='Aggregated worker nodes usage (only in aggregated metrics profile)', collapse=true).addPanels( + [ + agg_avg_cpu { gridPos: { x: 0, y: 150, w: 12, h: 9 } }, + agg_avg_mem { gridPos: { x: 12, y: 150, w: 12, h: 9 } }, + agg_container_cpu { gridPos: { x: 0, y: 159, w: 12, h: 9 } }, + agg_container_mem { gridPos: { x: 12, y: 159, w: 12, h: 9 } }, + ] + ), { x: 0, y: 149, w: 24, h: 1 } +) diff --git a/templates/ocp-performance.jsonnet b/templates/General/ocp-performance.jsonnet similarity index 99% rename from templates/ocp-performance.jsonnet rename to templates/General/ocp-performance.jsonnet index bd9c7b3..49a7a42 100644 --- a/templates/ocp-performance.jsonnet +++ b/templates/General/ocp-performance.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; diff --git a/templates/ovn-dashboard.jsonnet b/templates/General/ovn-dashboard.jsonnet similarity index 99% rename from templates/ovn-dashboard.jsonnet rename to templates/General/ovn-dashboard.jsonnet index d9abada..2d1a3db 100644 --- a/templates/ovn-dashboard.jsonnet +++ b/templates/General/ovn-dashboard.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local prometheus = grafana.prometheus; local stat = grafana.statPanel; diff --git a/templates/pgbench-dashboard.jsonnet b/templates/General/pgbench-dashboard.jsonnet similarity index 98% rename from templates/pgbench-dashboard.jsonnet rename to templates/General/pgbench-dashboard.jsonnet index bd7d7c5..1f39d0f 100644 --- a/templates/pgbench-dashboard.jsonnet +++ b/templates/General/pgbench-dashboard.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; local tps_report = grafana.graphPanel.new( diff --git a/templates/uperf-perf.jsonnet b/templates/General/uperf-perf.jsonnet similarity index 99% rename from templates/uperf-perf.jsonnet rename to templates/General/uperf-perf.jsonnet index 1ecd3b4..d70b3ab 100644 --- a/templates/uperf-perf.jsonnet +++ b/templates/General/uperf-perf.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; diff --git a/templates/vegeta-wrapper.jsonnet b/templates/General/vegeta-wrapper.jsonnet similarity index 98% rename from templates/vegeta-wrapper.jsonnet rename to templates/General/vegeta-wrapper.jsonnet index 338bb95..eed3278 100644 --- a/templates/vegeta-wrapper.jsonnet +++ b/templates/General/vegeta-wrapper.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; // Panels diff --git a/templates/ycsb.jsonnet b/templates/General/ycsb.jsonnet similarity index 99% rename from templates/ycsb.jsonnet rename to templates/General/ycsb.jsonnet index 893cefb..e6fa8c6 100644 --- a/templates/ycsb.jsonnet +++ b/templates/General/ycsb.jsonnet @@ -1,4 +1,4 @@ -local grafana = import 'grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import '../grafonnet-lib/grafonnet/grafana.libsonnet'; local es = grafana.elasticsearch; //Panel definitions