From 4128a3a31a9d99349c86bce7d6c306622b0c161f Mon Sep 17 00:00:00 2001 From: Hanno Hecker <hanno@zalando.de> Date: Wed, 9 Jan 2019 08:16:54 +0100 Subject: [PATCH 1/5] cleanup entities task see also https://github.com/zalando-zmon/zmon-scheduler/pull/89 --- zmon_worker_monitor/zmon_worker/tasks/main.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/zmon_worker_monitor/zmon_worker/tasks/main.py b/zmon_worker_monitor/zmon_worker/tasks/main.py index 34d5ba2f..cf4cf941 100755 --- a/zmon_worker_monitor/zmon_worker/tasks/main.py +++ b/zmon_worker_monitor/zmon_worker/tasks/main.py @@ -1158,6 +1158,24 @@ def cleanup(self, *args, **kwargs): else: self._cleanup_alert(p, alert_id) + span = extract_span_from_kwargs(**kwargs) + span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) + for entity_id in kwargs.get('cleanup_entities', []): + alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') + for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))] + for alert_id in alert_ids: + self._cleanup_common(p, 'alerts', alert_id, set(entity_id)) + # All entities matching given alert definition. + self.logger.info('Removing entity %s from hash %s', entity_id, + 'zmon:alerts:{}:entities'.format(alert_id)) + p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity_id) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity_id)) + + check_ids = [c.replace('zmon:checks:', '').replace(':{}'.format(entity_id), '') + for c in self.con.keys('zmon:checks:*:{}'.format(entity_id))] + for check_id in check_ids: + self._cleanup_common(p, 'checks', check_id, set(entity_id)) + p.execute() def _cleanup_check(self, pipeline, check_id): From 7e9ac7f4d3a0e0b8f86070abf33ca24064f2a868 Mon Sep 17 00:00:00 2001 From: Hanno Hecker <hanno@zalando.de> Date: Wed, 9 Jan 2019 08:41:37 +0100 Subject: [PATCH 2/5] finish span... --- zmon_worker_monitor/zmon_worker/tasks/main.py | 135 +++++++++--------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/zmon_worker_monitor/zmon_worker/tasks/main.py b/zmon_worker_monitor/zmon_worker/tasks/main.py index cf4cf941..8c1a9ccc 100755 --- a/zmon_worker_monitor/zmon_worker/tasks/main.py +++ b/zmon_worker_monitor/zmon_worker/tasks/main.py @@ -1103,80 +1103,81 @@ def trial_run(self, req, alerts, task_context=None, **kwargs): @trace() def cleanup(self, *args, **kwargs): - self.task_context = kwargs.get('task_context') - p = self.con.pipeline() - p.smembers('zmon:checks') - p.smembers('zmon:alerts') - check_ids, alert_ids = p.execute() - - for check_id in kwargs.get('disabled_checks', {}): - self._cleanup_check(p, check_id) - - for alert_id in kwargs.get('disabled_alerts', {}): - self._cleanup_alert(p, alert_id) - - for check_id in check_ids: - if check_id in kwargs.get('check_entities', {}): - redis_entities = self.con.smembers('zmon:checks:{}'.format(check_id)) - check_entities = set(kwargs['check_entities'][check_id]) - - # If it happens that we remove all entities for given check, we should remove all the things. - if not check_entities: - p.srem('zmon:checks', check_id) - p.delete('zmon:checks:{}'.format(check_id)) - for entity in redis_entities: - p.delete('zmon:checks:{}:{}'.format(check_id, entity)) - else: - self._cleanup_common(p, 'checks', check_id, redis_entities - check_entities) - else: + span = extract_span_from_kwargs(**kwargs) + with span: + self.task_context = kwargs.get('task_context') + p = self.con.pipeline() + p.smembers('zmon:checks') + p.smembers('zmon:alerts') + check_ids, alert_ids = p.execute() + for check_id in kwargs.get('disabled_checks', {}): self._cleanup_check(p, check_id) - for alert_id in alert_ids: - if alert_id in kwargs.get('alert_entities', {}): - # Entities that are in the alert state. - redis_entities = self.con.smembers('zmon:alerts:{}'.format(alert_id)) - alert_entities = set(kwargs['alert_entities'][alert_id]) - - # If it happens that we remove all entities for given alert, we should remove all the things. - if not alert_entities: - p.srem('zmon:alerts', alert_id) - p.delete('zmon:alerts:{}'.format(alert_id)) - p.delete('zmon:alerts:{}:entities'.format(alert_id)) - for entity in redis_entities: - p.delete('zmon:alerts:{}:{}'.format(alert_id, entity)) - p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) - else: - self._cleanup_common(p, 'alerts', alert_id, redis_entities - alert_entities) - # All entities matching given alert definition. - all_entities = set(self.con.hkeys('zmon:alerts:{}:entities'.format(alert_id))) - for entity in all_entities - alert_entities: - self.logger.info('Removing entity %s from hash %s', entity, - 'zmon:alerts:{}:entities'.format(alert_id)) - p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity) - p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) - else: + for alert_id in kwargs.get('disabled_alerts', {}): self._cleanup_alert(p, alert_id) - span = extract_span_from_kwargs(**kwargs) - span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) - for entity_id in kwargs.get('cleanup_entities', []): - alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') - for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))] - for alert_id in alert_ids: - self._cleanup_common(p, 'alerts', alert_id, set(entity_id)) - # All entities matching given alert definition. - self.logger.info('Removing entity %s from hash %s', entity_id, - 'zmon:alerts:{}:entities'.format(alert_id)) - p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity_id) - p.delete('zmon:notifications:{}:{}'.format(alert_id, entity_id)) - - check_ids = [c.replace('zmon:checks:', '').replace(':{}'.format(entity_id), '') - for c in self.con.keys('zmon:checks:*:{}'.format(entity_id))] for check_id in check_ids: - self._cleanup_common(p, 'checks', check_id, set(entity_id)) + if check_id in kwargs.get('check_entities', {}): + redis_entities = self.con.smembers('zmon:checks:{}'.format(check_id)) + check_entities = set(kwargs['check_entities'][check_id]) + + # If it happens that we remove all entities for given check, we should remove all the things. + if not check_entities: + p.srem('zmon:checks', check_id) + p.delete('zmon:checks:{}'.format(check_id)) + for entity in redis_entities: + p.delete('zmon:checks:{}:{}'.format(check_id, entity)) + else: + self._cleanup_common(p, 'checks', check_id, redis_entities - check_entities) + else: + + self._cleanup_check(p, check_id) + + for alert_id in alert_ids: + if alert_id in kwargs.get('alert_entities', {}): + # Entities that are in the alert state. + redis_entities = self.con.smembers('zmon:alerts:{}'.format(alert_id)) + alert_entities = set(kwargs['alert_entities'][alert_id]) + + # If it happens that we remove all entities for given alert, we should remove all the things. + if not alert_entities: + p.srem('zmon:alerts', alert_id) + p.delete('zmon:alerts:{}'.format(alert_id)) + p.delete('zmon:alerts:{}:entities'.format(alert_id)) + for entity in redis_entities: + p.delete('zmon:alerts:{}:{}'.format(alert_id, entity)) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) + else: + self._cleanup_common(p, 'alerts', alert_id, redis_entities - alert_entities) + # All entities matching given alert definition. + all_entities = set(self.con.hkeys('zmon:alerts:{}:entities'.format(alert_id))) + for entity in all_entities - alert_entities: + self.logger.info('Removing entity %s from hash %s', entity, + 'zmon:alerts:{}:entities'.format(alert_id)) + p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) + else: + self._cleanup_alert(p, alert_id) + + span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) + for entity_id in kwargs.get('cleanup_entities', []): + alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') + for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))] + for alert_id in alert_ids: + self._cleanup_common(p, 'alerts', alert_id, set(entity_id)) + # All entities matching given alert definition. + self.logger.info('Removing entity %s from hash %s', entity_id, + 'zmon:alerts:{}:entities'.format(alert_id)) + p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity_id) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity_id)) - p.execute() + check_ids = [c.replace('zmon:checks:', '').replace(':{}'.format(entity_id), '') + for c in self.con.keys('zmon:checks:*:{}'.format(entity_id))] + for check_id in check_ids: + self._cleanup_common(p, 'checks', check_id, set(entity_id)) + + p.execute() def _cleanup_check(self, pipeline, check_id): self.logger.info('Removing check with id %s from zmon:checks set', check_id) From 5c68bd1cbd1ea81e2e2939ee33ff5b1548d1f7b4 Mon Sep 17 00:00:00 2001 From: Hanno Hecker <hanno@zalando.de> Date: Wed, 9 Jan 2019 09:06:36 +0100 Subject: [PATCH 3/5] fix span? --- zmon_worker_monitor/zmon_worker/tasks/main.py | 133 +++++++++--------- 1 file changed, 66 insertions(+), 67 deletions(-) diff --git a/zmon_worker_monitor/zmon_worker/tasks/main.py b/zmon_worker_monitor/zmon_worker/tasks/main.py index 8c1a9ccc..fc9163da 100755 --- a/zmon_worker_monitor/zmon_worker/tasks/main.py +++ b/zmon_worker_monitor/zmon_worker/tasks/main.py @@ -1104,80 +1104,79 @@ def trial_run(self, req, alerts, task_context=None, **kwargs): @trace() def cleanup(self, *args, **kwargs): span = extract_span_from_kwargs(**kwargs) - with span: - self.task_context = kwargs.get('task_context') - p = self.con.pipeline() - p.smembers('zmon:checks') - p.smembers('zmon:alerts') - check_ids, alert_ids = p.execute() - - for check_id in kwargs.get('disabled_checks', {}): - self._cleanup_check(p, check_id) - - for alert_id in kwargs.get('disabled_alerts', {}): - self._cleanup_alert(p, alert_id) - - for check_id in check_ids: - if check_id in kwargs.get('check_entities', {}): - redis_entities = self.con.smembers('zmon:checks:{}'.format(check_id)) - check_entities = set(kwargs['check_entities'][check_id]) - - # If it happens that we remove all entities for given check, we should remove all the things. - if not check_entities: - p.srem('zmon:checks', check_id) - p.delete('zmon:checks:{}'.format(check_id)) - for entity in redis_entities: - p.delete('zmon:checks:{}:{}'.format(check_id, entity)) - else: - self._cleanup_common(p, 'checks', check_id, redis_entities - check_entities) + self.task_context = kwargs.get('task_context') + p = self.con.pipeline() + p.smembers('zmon:checks') + p.smembers('zmon:alerts') + check_ids, alert_ids = p.execute() + + for check_id in kwargs.get('disabled_checks', {}): + self._cleanup_check(p, check_id) + + for alert_id in kwargs.get('disabled_alerts', {}): + self._cleanup_alert(p, alert_id) + + for check_id in check_ids: + if check_id in kwargs.get('check_entities', {}): + redis_entities = self.con.smembers('zmon:checks:{}'.format(check_id)) + check_entities = set(kwargs['check_entities'][check_id]) + + # If it happens that we remove all entities for given check, we should remove all the things. + if not check_entities: + p.srem('zmon:checks', check_id) + p.delete('zmon:checks:{}'.format(check_id)) + for entity in redis_entities: + p.delete('zmon:checks:{}:{}'.format(check_id, entity)) else: + self._cleanup_common(p, 'checks', check_id, redis_entities - check_entities) + else: - self._cleanup_check(p, check_id) + self._cleanup_check(p, check_id) - for alert_id in alert_ids: - if alert_id in kwargs.get('alert_entities', {}): - # Entities that are in the alert state. - redis_entities = self.con.smembers('zmon:alerts:{}'.format(alert_id)) - alert_entities = set(kwargs['alert_entities'][alert_id]) - - # If it happens that we remove all entities for given alert, we should remove all the things. - if not alert_entities: - p.srem('zmon:alerts', alert_id) - p.delete('zmon:alerts:{}'.format(alert_id)) - p.delete('zmon:alerts:{}:entities'.format(alert_id)) - for entity in redis_entities: - p.delete('zmon:alerts:{}:{}'.format(alert_id, entity)) - p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) - else: - self._cleanup_common(p, 'alerts', alert_id, redis_entities - alert_entities) - # All entities matching given alert definition. - all_entities = set(self.con.hkeys('zmon:alerts:{}:entities'.format(alert_id))) - for entity in all_entities - alert_entities: - self.logger.info('Removing entity %s from hash %s', entity, - 'zmon:alerts:{}:entities'.format(alert_id)) - p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity) - p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) + for alert_id in alert_ids: + if alert_id in kwargs.get('alert_entities', {}): + # Entities that are in the alert state. + redis_entities = self.con.smembers('zmon:alerts:{}'.format(alert_id)) + alert_entities = set(kwargs['alert_entities'][alert_id]) + + # If it happens that we remove all entities for given alert, we should remove all the things. + if not alert_entities: + p.srem('zmon:alerts', alert_id) + p.delete('zmon:alerts:{}'.format(alert_id)) + p.delete('zmon:alerts:{}:entities'.format(alert_id)) + for entity in redis_entities: + p.delete('zmon:alerts:{}:{}'.format(alert_id, entity)) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) else: - self._cleanup_alert(p, alert_id) - - span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) - for entity_id in kwargs.get('cleanup_entities', []): - alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') - for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))] - for alert_id in alert_ids: - self._cleanup_common(p, 'alerts', alert_id, set(entity_id)) + self._cleanup_common(p, 'alerts', alert_id, redis_entities - alert_entities) # All entities matching given alert definition. - self.logger.info('Removing entity %s from hash %s', entity_id, - 'zmon:alerts:{}:entities'.format(alert_id)) - p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity_id) - p.delete('zmon:notifications:{}:{}'.format(alert_id, entity_id)) + all_entities = set(self.con.hkeys('zmon:alerts:{}:entities'.format(alert_id))) + for entity in all_entities - alert_entities: + self.logger.info('Removing entity %s from hash %s', entity, + 'zmon:alerts:{}:entities'.format(alert_id)) + p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity)) + else: + self._cleanup_alert(p, alert_id) - check_ids = [c.replace('zmon:checks:', '').replace(':{}'.format(entity_id), '') - for c in self.con.keys('zmon:checks:*:{}'.format(entity_id))] - for check_id in check_ids: - self._cleanup_common(p, 'checks', check_id, set(entity_id)) + span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) + for entity_id in kwargs.get('cleanup_entities', []): + alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') + for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))] + for alert_id in alert_ids: + self._cleanup_common(p, 'alerts', alert_id, set(entity_id)) + # All entities matching given alert definition. + self.logger.info('Removing entity %s from hash %s', entity_id, + 'zmon:alerts:{}:entities'.format(alert_id)) + p.hdel('zmon:alerts:{}:entities'.format(alert_id), entity_id) + p.delete('zmon:notifications:{}:{}'.format(alert_id, entity_id)) + + check_ids = [c.replace('zmon:checks:', '').replace(':{}'.format(entity_id), '') + for c in self.con.keys('zmon:checks:*:{}'.format(entity_id))] + for check_id in check_ids: + self._cleanup_common(p, 'checks', check_id, set(entity_id)) - p.execute() + p.execute() def _cleanup_check(self, pipeline, check_id): self.logger.info('Removing check with id %s from zmon:checks set', check_id) From 1ae1bd027f1ad4fbb5a26bed42df043f2917966c Mon Sep 17 00:00:00 2001 From: Hanno Hecker <hanno@zalando.de> Date: Wed, 9 Jan 2019 09:17:25 +0100 Subject: [PATCH 4/5] fix span? --- zmon_worker_monitor/zmon_worker/tasks/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zmon_worker_monitor/zmon_worker/tasks/main.py b/zmon_worker_monitor/zmon_worker/tasks/main.py index fc9163da..5319d0a1 100755 --- a/zmon_worker_monitor/zmon_worker/tasks/main.py +++ b/zmon_worker_monitor/zmon_worker/tasks/main.py @@ -1101,9 +1101,9 @@ def trial_run(self, req, alerts, task_context=None, **kwargs): else: self.notify_for_trial_run(val, req, alerts) - @trace() + @trace(pass_span=True) def cleanup(self, *args, **kwargs): - span = extract_span_from_kwargs(**kwargs) + current_span = extract_span_from_kwargs(**kwargs) self.task_context = kwargs.get('task_context') p = self.con.pipeline() p.smembers('zmon:checks') @@ -1159,7 +1159,7 @@ def cleanup(self, *args, **kwargs): else: self._cleanup_alert(p, alert_id) - span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) + current_span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) for entity_id in kwargs.get('cleanup_entities', []): alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))] From cbda0566c0b9393ac6ea71702490e51aad17f702 Mon Sep 17 00:00:00 2001 From: Hanno Hecker <hanno@zalando.de> Date: Wed, 9 Jan 2019 09:33:22 +0100 Subject: [PATCH 5/5] log_kv requires dict --- zmon_worker_monitor/zmon_worker/tasks/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zmon_worker_monitor/zmon_worker/tasks/main.py b/zmon_worker_monitor/zmon_worker/tasks/main.py index 5319d0a1..f3a37a8c 100755 --- a/zmon_worker_monitor/zmon_worker/tasks/main.py +++ b/zmon_worker_monitor/zmon_worker/tasks/main.py @@ -1159,7 +1159,7 @@ def cleanup(self, *args, **kwargs): else: self._cleanup_alert(p, alert_id) - current_span.log_kv('cleanup_entities', kwargs.get('cleanup_entities', [])) + current_span.log_kv({'cleanup_entities': kwargs.get('cleanup_entities', [])}) for entity_id in kwargs.get('cleanup_entities', []): alert_ids = [a.replace('zmon:alerts:', '').replace(':{}'.format(entity_id), '') for a in self.con.keys('zmon:alerts:*:{}'.format(entity_id))]