Skip to content

Commit

Permalink
Merge pull request 2i2c-org#2348 from 2i2c-org/fix-uptime-check-ci
Browse files Browse the repository at this point in the history
Fix uptime-check terraform for prometheus
  • Loading branch information
pnasrat authored Mar 14, 2023
2 parents d8f3aa6 + 81f08c9 commit b0b55c6
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions terraform/uptime-checks/checks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ resource "google_monitoring_uptime_check_config" "prometheus_simple_uptime_check
display_name = "${each.value.domain} on ${each.value.cluster}"
timeout = "30s"

# Check every 5 minutes
period = "300s"
# Check every 15 minutes
period = "900s"
selected_regions = ["USA",]

http_check {
path = "/"
Expand Down Expand Up @@ -145,14 +146,14 @@ resource "google_monitoring_alert_policy" "prometheus_simple_uptime_alert" {
# every 5 minutes, this means we alert if two checks have failed. This shoulod
# prevent alerts if the hub is momentarily down during a deployment. All alerts
# *must* be actionable, so we trade-off some latency here for resiliency.
duration = "660s"
duration = "1860s"
threshold_value = 1 # 1 means 'a check failed', 0 means 'a check succeeded'
comparison = "COMPARISON_GT"
aggregations {
group_by_fields = ["resource.label.host"]
# https://cloud.google.com/monitoring/alerts/concepts-indepth#duration has
# more info on alignment
alignment_period = "300s"
alignment_period = "900s"
per_series_aligner = "ALIGN_NEXT_OLDER"
# Count each failure as a "1"
cross_series_reducer = "REDUCE_COUNT_FALSE"
Expand Down

0 comments on commit b0b55c6

Please sign in to comment.