From e389e973dd975ec1c58d49927678ba706c74e7e6 Mon Sep 17 00:00:00 2001 From: adinhodovic Date: Thu, 19 Oct 2023 11:29:11 +0200 Subject: [PATCH] fix: Add job to alert descriptions --- celery-mixin/alerts/alerts.libsonnet | 7 ++++--- celery-mixin/prometheus-alerts.yaml | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/celery-mixin/alerts/alerts.libsonnet b/celery-mixin/alerts/alerts.libsonnet index 6919975..f70aade 100644 --- a/celery-mixin/alerts/alerts.libsonnet +++ b/celery-mixin/alerts/alerts.libsonnet @@ -42,7 +42,7 @@ ||| % $._config, annotations: { summary: 'Celery high task fail rate.', - description: 'More than %(celeryTaskFailedThreshold)s%% tasks failed for the task {{ $labels.queue_name }}/{{ $labels.name }} the past %(celeryTaskFailedInterval)s.' % $._config, + description: 'More than %(celeryTaskFailedThreshold)s%% tasks failed for the task {{ $labels.job }}/{{ $labels.queue_name }}/{{ $labels.name }} the past %(celeryTaskFailedInterval)s.' % $._config, dashboard_url: $._config.celeryTasksByTaskUrl + '?var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}&var-task={{ $labels.name }}', }, 'for': '1m', @@ -67,7 +67,7 @@ }, annotations: { summary: 'Celery high queue length.', - description: 'More than %(celeryHighQueueLengthThreshold)s tasks in the queue {{ $labels.queue_name }} the past %(celeryHighQueueLengthInterval)s.' % $._config, + description: 'More than %(celeryHighQueueLengthThreshold)s tasks in the queue {{{ $labels.job }}/{ $labels.queue_name }} the past %(celeryHighQueueLengthInterval)s.' % $._config, dashboard_url: $._config.celeryTasksOverviewUrl + '?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}', }, }, @@ -82,7 +82,8 @@ }, annotations: { summary: 'A Celery worker is offline.', - description: 'The Celery worker {{ $labels.hostname }} is offline.', + description: 'The Celery worker {{ $labels.job }}/{{ $labels.hostname }} is offline.', + dashboard_url: $._config.celeryTasksOverviewUrl + '?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}', }, }, ]), diff --git a/celery-mixin/prometheus-alerts.yaml b/celery-mixin/prometheus-alerts.yaml index dd0e5f4..bf62120 100644 --- a/celery-mixin/prometheus-alerts.yaml +++ b/celery-mixin/prometheus-alerts.yaml @@ -4,7 +4,7 @@ - "alert": "CeleryTaskHighFailRate" "annotations": "dashboard_url": "https://grafana.com/d/celery-tasks-by-task-32s3/celery-tasks-by-task?var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}&var-task={{ $labels.name }}" - "description": "More than 5% tasks failed for the task {{ $labels.queue_name }}/{{ $labels.name }} the past 10m." + "description": "More than 5% tasks failed for the task {{ $labels.job }}/{{ $labels.queue_name }}/{{ $labels.name }} the past 10m." "summary": "Celery high task fail rate." "expr": | sum( @@ -45,7 +45,7 @@ - "alert": "CeleryHighQueueLength" "annotations": "dashboard_url": "https://grafana.com/d/celery-tasks-overview-32s3/celery-tasks-overview?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}" - "description": "More than 100 tasks in the queue {{ $labels.queue_name }} the past 20m." + "description": "More than 100 tasks in the queue {{{ $labels.job }}/{ $labels.queue_name }} the past 20m." "summary": "Celery high queue length." "expr": | sum( @@ -60,7 +60,8 @@ "severity": "warning" - "alert": "CeleryWorkerDown" "annotations": - "description": "The Celery worker {{ $labels.hostname }} is offline." + "dashboard_url": "https://grafana.com/d/celery-tasks-overview-32s3/celery-tasks-overview?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}" + "description": "The Celery worker {{ $labels.job }}/{{ $labels.hostname }} is offline." "summary": "A Celery worker is offline." "expr": | celery_worker_up{job=~"celery|celery-exporter"} == 0