Skip to content

Commit

Permalink
label and scrape metrics from 10 workers
Browse files Browse the repository at this point in the history
Label workers with ovnic and then scrape metrics from only these
workers.

node-desnity-cni on 500 nodes runs for 2 hours 15 minutes.
Scraping metrics from 500 nodes for the duration of 2 hours
15 minutes is overkill.
So we scrape from only 10 worker nodes if the worker node count is
more than 120.
  • Loading branch information
venkataanil committed Oct 25, 2023
1 parent 70c0e9a commit e67478a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
9 changes: 9 additions & 0 deletions workloads/kube-burner-ocp-wrapper/metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,17 @@
- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus|ingress|monitoring|image-registry)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0
metricName: containerMemory-Infra

- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="ovnic"}) > 0
metricName: containerCPU-Workers

- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="ovnic"}) > 0
metricName: containerMemory-Workers

# Node metrics: CPU & Memory

- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="ovnic"}, "instance", "$1", "node", "(.+)")) > 0
metricName: nodeCPU-Workers

- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0
metricName: nodeCPU-Masters

Expand Down
18 changes: 18 additions & 0 deletions workloads/kube-burner-ocp-wrapper/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,24 @@ fi
# Capture the exit code of the run, but don't exit the script if it fails.
set +e

# Label workers with ovnic. Metrics from only these workers are pulled.
# node-desnity-cni on 500 nodes runs for 2 hours 15 minutes. Scraping metrics from 500 nodes for the duration of 2 hours 15 minutes is overkill.
# So we scrape from only 10 worker nodes if the worker node count is more than 120.
workers_to_label=$(oc get nodes --ignore-not-found -l node-role.kubernetes.io/worker --no-headers=true | wc -l) || true
if [ "$workers_to_label" -gt 2 ]; then
workers_to_label=2
fi

count=0
for node in $(oc get nodes --ignore-not-found -l node-role.kubernetes.io/worker --no-headers -o custom-columns=":.metadata.name"); do
if [ "$count" -eq "$workers_to_label" ]; then
break
fi
oc label nodes $node 'node-role.kubernetes.io/ovnic='
((count++))
done


echo $cmd
JOB_START=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
$cmd
Expand Down

0 comments on commit e67478a

Please sign in to comment.