forked from bvis/docker-prometheus-swarm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalert.rules_nodes
48 lines (42 loc) · 2.31 KB
/
alert.rules_nodes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
ALERT high_cpu_usage_on_node
IF (sum(rate(process_cpu_seconds_total[5m])) by (instance) * 100 > 70) * ON(instance) GROUP_LEFT(host) host * ON(instance) GROUP_LEFT(node) host
FOR 5m
ANNOTATIONS {
summary = "HIGH CPU USAGE WARNING ON '{{ $labels.host }}'",
description = "{{ $labels.host }} ({{ $labels.node }}) is using a LOT of CPU. CPU usage is {{ humanize $value}}%.",
}
ALERT high_memory_usage_on_node
IF (((node_memory_MemTotal-node_memory_MemAvailable)/node_memory_MemTotal)*100 > 80) * ON(instance) GROUP_LEFT(host) host * ON(instance) GROUP_LEFT(node) host
FOR 5m
ANNOTATIONS {
summary = "HIGH MEMORY USAGE WARNING TASK ON '{{ $labels.host }}'",
description = "{{ $labels.host }} ({{ $labels.node }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}%.",
}
ALERT high_la_usage_on_node
IF (node_load5 > 5) * ON(instance) GROUP_LEFT(host) host * ON(instance) GROUP_LEFT(node) host
FOR 5m
ANNOTATIONS {
summary = "HIGH LOAD AVERAGE WARNING ON '{{ $labels.host }}'",
description = "{{ $labels.host }} ({{ $labels.node }}) has a high load average. Load Average 5m is {{ humanize $value}}.",
}
ALERT monitoring_service_down
IF (up == 0) * ON(instance) GROUP_LEFT(host) host * ON(instance) GROUP_LEFT(node) host
FOR 5m
ANNOTATIONS {
summary = "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels.host }}'",
description = "The monitoring service '{{ $labels.job }}' is down in host {{ $labels.host }} ({{ $labels.node }}).",
}
ALERT node_running_out_of_disk_space
IF ((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}) * 100/ node_filesystem_size{mountpoint="/"} > 80) * ON(instance) GROUP_LEFT(host) host * ON(instance) GROUP_LEFT(node) host
FOR 5m
ANNOTATIONS {
summary = "LOW DISK SPACE WARING: NODE '{{ $labels.host }}'",
description = "More than 80% of disk used in {{ $labels.host }} ({{ $labels.node }}). Disk usage {{ humanize $value }}%.",
}
ALERT disk_will_fill_in_8_hours
IF (predict_linear(node_filesystem_free{mountpoint="/"}[1h], 8*3600) < 0) * ON(instance) GROUP_LEFT(host) host * ON(instance) GROUP_LEFT(node) host
FOR 5m
ANNOTATIONS {
summary = "DISK SPACE FULL IN 8 HOURS: NODE '{{ $labels.host }}'",
description = "{{ $labels.host }} ({{ $labels.node }}) is writing a lot.",
}