From f825da3e1fceebff4a51d4f89c886e213a45b270 Mon Sep 17 00:00:00 2001 From: Vlad Zolotarov Date: Thu, 26 Oct 2023 16:55:50 -0400 Subject: [PATCH] prometheus.rules.yml: make InstanceDown alerts 'error' InstanceDown is definitely not a warning - it's an error that needs to be treated immediately. --- prometheus/prom_rules/prometheus.rules.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prometheus/prom_rules/prometheus.rules.yml b/prometheus/prom_rules/prometheus.rules.yml index 4f3dec12e..28cdce674 100644 --- a/prometheus/prom_rules/prometheus.rules.yml +++ b/prometheus/prom_rules/prometheus.rules.yml @@ -133,7 +133,7 @@ groups: expr: up{job="scylla"} == 0 for: 30s labels: - severity: "warn" + severity: "error" annotations: description: '{{ $labels.instance }} has been down for more than 30 seconds.' summary: Instance {{ $labels.instance }} down @@ -141,7 +141,7 @@ groups: expr: sum(up{job="scylla"}>0)by(instance) unless sum(scylla_transport_requests_served{shard="0"}) by(instance) for: 1m labels: - severity: "warn" + severity: "error" annotations: description: '{{ $labels.instance }} instance is shutting down.' summary: Instance {{ $labels.instance }} down @@ -149,7 +149,7 @@ groups: expr: scylla_node_operation_mode > 3 for: 30s labels: - severity: "warn" + severity: "error" annotations: description: '{{ $labels.instance }} instance is shutting down.' summary: Instance {{ $labels.instance }} down