Skip to content

Commit

Permalink
fix(alerting): Remove alert rules for detecting high disk read/write (#…
Browse files Browse the repository at this point in the history
…149)

* Remove alert rules for detecting high disk read/write 

This check does not make a lot of sense as a canned alert rule as  it is way too context dependent.

* Remove the HostReadonlyFilesystem alert

---------

Co-authored-by: Leon <[email protected]>
  • Loading branch information
simskij and sed-i authored Nov 28, 2024
1 parent 5691548 commit 151460b
Showing 1 changed file with 0 additions and 41 deletions.
41 changes: 0 additions & 41 deletions src/prometheus_alert_rules/disk.rules
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,6 @@ groups:
Host filesystem '{{ $labels.mountpoint }}' is using {{ $value | printf "%.0f" }}% of the total space.
VALUE = {{ $value }}
LABELS = {{ $labels }}
- alert: HostReadonlyFilesystem
expr: node_filesystem_readonly{mountpoint!~"/snap/.*|/sys/fs/cgroup/.*"} > 0
for: 0m
labels:
severity: warning
annotations:
summary: Host filesystem '{{ $labels.mountpoint }}' is readonly (instance {{ $labels.instance }})
description: >-
Host filesystem '{{ $labels.mountpoint }}' is readonly.
VALUE = {{ $value }}
LABELS = {{ $labels }}
- alert: HostXFSError
expr: node_filesystem_device_error{fstype="xfs"} > 0
for: 0m
Expand All @@ -51,33 +40,3 @@ groups:
XFS error found for device '{{ $labels.device }}'.
VALUE = {{ $value }}
LABELS = {{ $labels }}
- alert: HostHighDiskReadRate
expr: irate(node_disk_read_bytes_total[2m]) / 1024 / 1024 > 50
for: 5m
labels:
severity: warning
annotations:
summary: Host high disk '{{ $labels.device }}' read rate (instance {{ $labels.instance }})
description: >-
Host disk '{{ $labels.device }}' is probably reading too much data ({{ $value | printf "%.0f" }} > 50 MB/s) for last 5m.
VALUE = {{ $value }}
LABELS = {{ $labels }}
- alert: HostHighDiskWriteRate
expr: irate(node_disk_written_bytes_total[2m]) / 1024 / 1024 > 50
for: 5m
labels:
severity: warning
annotations:
summary: Host high disk '{{ $labels.device }}' write rate (instance {{ $labels.instance }})
description: >-
Host disk '{{ $labels.device }}' is probably writing too much data ({{ $value | printf "%.0f" }} > 50 MB/s) for last 5m.
VALUE = {{ $value }}
LABELS = {{ $labels }}
- alert: DiskRemoval
expr: count(node_disk_info{model!="", serial!="", wwn!=""}) by (instance, device) < on(instance, device) group_left() (count(node_disk_info{model!="", serial!="", wwn!=""} offset 30d) by (instance, device))
for: 5m
labels:
severity: warning
annotations:
summary: "Disk removal detected on {{ $labels.instance }}"
description: "Device {{ $labels.device }} on {{ $labels.instance }} has been removed which can be a hardware failure."

0 comments on commit 151460b

Please sign in to comment.