Skip to content

Commit

Permalink
Merge pull request #193 from ministryofjustice/velero-backup-failure
Browse files Browse the repository at this point in the history
chore: 🤖 velero alerts too sensitive
  • Loading branch information
jaskaransarkaria authored Nov 21, 2023
2 parents 2d38d7a + 880e4a7 commit cbe0df6
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions resources/prometheusrule-alerts/application-alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -308,25 +308,25 @@ spec:
severity: warning
- alert: VeleroBackupPartialFailure-velero-allnamespacebackup
annotations:
message: A Velero backup partial failure in past 3 hours - velero-allnamespacebackup
message: A Velero backup partial failure in past 6 hours - velero-allnamespacebackup
runbook_url: https://runbooks.cloud-platform.service.justice.gov.uk/disaster-recovery-scenarios.html#resolving-a-partiallyfailed-backup-alert
expr: sum(increase(velero_backup_partial_failure_total{schedule="velero-allnamespacebackup"}[3h])) > 0
expr: sum(increase(velero_backup_partial_failure_total{schedule="velero-allnamespacebackup"}[3h])) > 1
for: 1m
labels:
severity: warning
- alert: VeleroBackupFailure-velero-allnamespacebackup
annotations:
message: A Velero backup failure in past 3 hours - velero-allnamespacebackup
message: A Velero backup failure in past 6 hours - velero-allnamespacebackup
runbook_url: https://runbooks.cloud-platform.service.justice.gov.uk/disaster-recovery-scenarios.html#resolving-a-partiallyfailed-backup-alert
expr: sum(increase(velero_backup_failure_total{schedule="velero-allnamespacebackup"}[3h])) > 0
expr: sum(increase(velero_backup_failure_total{schedule="velero-allnamespacebackup"}[3h])) > 1
for: 1m
labels:
severity: warning
- alert: VeleroBackupNotSuccessfulForOverFourHours-velero-allnamespacebackup
- alert: VeleroBackupNotSuccessfulForOverEightHours-velero-allnamespacebackup
annotations:
message: The Velero backup schedule for AllNamespaceBackup does not have a successful timestamp for over 4 hours
message: The Velero backup schedule for AllNamespaceBackup does not have a successful timestamp for over 8 hours
runbook_url: https://runbooks.cloud-platform.service.justice.gov.uk/disaster-recovery-scenarios.html#resolving-a-partiallyfailed-backup-alert
expr: (time() - velero_backup_last_successful_timestamp{schedule="velero-allnamespacebackup"}) / 60 / 60 > 4
expr: (time() - velero_backup_last_successful_timestamp{schedule="velero-allnamespacebackup"}) / 60 / 60 > 8
for: 1m
labels:
severity: warning
Expand Down

0 comments on commit cbe0df6

Please sign in to comment.