forked from cloud-bulldozer/benchmark-operator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.yaml
111 lines (76 loc) · 4.27 KB
/
metrics.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
metrics:
- query: sum(irate(container_cpu_usage_seconds_total{name!="",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler)"}[2m]) * 100) by (pod, namespace, node)
metricName: podCPU
- query: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!="WATCH"}[5m])) by (le, verb)) > 0
metricName: API99thLatency
- query: sum(container_memory_rss{name!="",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler)"}) by (pod, namespace, node)
metricName: podMemory
- query: sum(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100) by (node) and on (node) kube_node_role{role="worker"}
metricName: kubeletCPU
- query: sum(process_resident_memory_bytes{service="kubelet",job="kubelet"}) by (node) and on (node) kube_node_role{role="worker"}
metricName: kubeletMemory
- query: sum(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m]) * 100) by (node) and on (node) kube_node_role{role="worker"}
metricName: crioCPU
- query: sum(process_resident_memory_bytes{service="kubelet",job="crio"}) by (node) and on (node) kube_node_role{role="worker"}
metricName: crioMemory
- query: sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) > 0
metricName: nodeCPU
- query: avg(node_memory_MemAvailable_bytes) by (instance)
metricName: nodeMemoryAvailable
- query: avg(node_memory_Active_bytes) by (instance)
metricName: nodeMemoryActive
- query: avg(node_memory_Cached_bytes) by (instance) + avg(node_memory_Buffers_bytes) by (instance)
metricName: nodeMemoryCached+nodeMemoryBuffers
- query: irate(node_network_receive_bytes_total{device=~"^(ens|eth|bond|team).*"}[2m])
metricName: rxNetworkBytes
- query: irate(node_network_transmit_bytes_total{device=~"^(ens|eth|bond|team).*"}[2m])
metricName: txNetworkBytes
- query: irate(node_network_receive_drop_total{device=~"^(ens|eth|bond|team).*"}[2m])
metricName: rxDroppedPackets
- query: irate(node_network_transmit_drop_total{device=~"^(ens|eth|bond|team).*"}[2m])
metricName: txDroppedPackets
- query: rate(node_disk_written_bytes_total{device!~"^(dm|rb).*"}[2m])
metricName: nodeDiskWrittenBytes
- query: rate(node_disk_read_bytes_total{device!~"^(dm|rb).*"}[2m])
metricName: nodeDiskReadBytes
- query: sum(rate(etcd_server_leader_changes_seen_total[2m]))
metricName: etcdLeaderChangesRate
- query: etcd_server_is_leader > 0
metricName: etcdServerIsLeader
- query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m]))
metricName: 99thEtcdDiskBackendCommitDurationSeconds
- query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m]))
metricName: 99thEtcdDiskWalFsyncDurationSeconds
- query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m]))
metricName: 99thEtcdRoundTripTimeSeconds
- query: etcd_mvcc_db_total_size_in_bytes
metricName: etcdDBPhysicalSizeBytes
- query: etcd_mvcc_db_total_size_in_use_in_bytes
metricName: etcdDBLogicalSizeBytes
- query: count(kube_namespace_created)
metricName: namespaceCount
- query: sum(kube_pod_status_phase{}) by (phase)
metricName: podStatusCount
- query: count(kube_secret_info{})
metricName: secretCount
- query: count(kube_deployment_labels{})
metricName: deploymentCount
- query: count(kube_configmap_info{})
metricName: configmapCount
- query: count(kube_service_info{})
metricName: serviceCount
- query: kube_node_role
metricName: nodeRoles
instant: true
- query: sum(kube_node_status_condition{status="true"}) by (condition)
metricName: nodeStatus
- query: (sum(rate(container_fs_writes_bytes_total{container!="",device!~".+dm.+"}[5m])) by (device, container, node) and on (node) kube_node_role{role="master"}) > 0
metricName: containerDiskUsage
- query: sum(rate(etcd_object_counts{}[5m])) by (resource) > 0
metricName: etcdObjectCount
- query: cluster_version{type="completed"}
metricName: clusterVersion
instant: true
- query: sum by (cluster_version)(etcd_cluster_version)
metricName: etcdVersion
instant: true