Skip to content

Commit

Permalink
Add tests for history-based rules
Browse files Browse the repository at this point in the history
  • Loading branch information
VladimirFilonov committed Dec 10, 2024
1 parent bf72a98 commit dc59681
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 17 deletions.
2 changes: 1 addition & 1 deletion keep/api/core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -4281,7 +4281,7 @@ def is_all_alerts_in_status(
session: Optional[Session] = None
):

if incident.alerts_count == 0:
if incident and incident.alerts_count == 0:
return False

with existed_or_new_session(session) as session:
Expand Down
2 changes: 1 addition & 1 deletion keep/api/models/db/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ def add_alert(self, condition, fingerprint):
flag_modified(self, "state")

def get_all_alerts(self):
return list(set(chain(*self.state.values())))
return list(set(chain(*self.state.values())))
23 changes: 10 additions & 13 deletions keep/rulesengine/rulesengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,14 @@ def run_rules(
rule_fingerprint,
session=session,
)

if not incident:
if incident:
incident = assign_alert_to_incident(
fingerprint=event.fingerprint,
incident=incident,
tenant_id=self.tenant_id,
session=session,
)
else:

self.logger.info(
f"No existing incidents for rule {rule.name}. Checking incident creation conditions"
Expand All @@ -100,22 +106,13 @@ def run_rules(
incident = self._create_incident_with_alerts(
rule, rule_fingerprint, [event.fingerprint], session=session
)
incidents_dto[incident.id] = IncidentDto.from_db_incident(incident)

elif rule.create_on == "all":
incident = self._process_event_for_history_based_rule(
event, rule, sub_rule, rule_groups, rule_fingerprint, session
)
if incident:
incidents_dto[incident.id] = IncidentDto.from_db_incident(incident)

else:
incident = assign_alert_to_incident(
fingerprint=event.fingerprint,
incident=incident,
tenant_id=self.tenant_id,
session=session,
)
if incident:

incident = self._resolve_incident_if_require(rule, incident, session)
incidents_dto[incident.id] = IncidentDto.from_db_incident(incident)

Expand Down
148 changes: 146 additions & 2 deletions tests/test_rules_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import json
import os
import uuid
from time import sleep

import pytest

from boom import fingerprint
from keep.api.core.db import create_rule as create_rule_db
from keep.api.core.db import get_incident_alerts_by_incident_id, get_last_incidents, set_last_alert
from keep.api.core.db import get_rules as get_rules_db
Expand All @@ -17,8 +19,8 @@
IncidentSeverity,
IncidentStatus,
)
from keep.api.models.db.alert import Alert
from keep.api.models.db.rule import ResolveOn
from keep.api.models.db.alert import Alert, Incident
from keep.api.models.db.rule import ResolveOn, CreateIncidentOn, RuleEventGroup
from keep.rulesengine.rulesengine import RulesEngine


Expand Down Expand Up @@ -582,6 +584,148 @@ def test_incident_resolution_on_edge(
assert incident.status == IncidentStatus.RESOLVED.value


def test_rule_event_groups(db_session, create_alert):

create_rule_db(
tenant_id=SINGLE_TENANT_UUID,
name="test-rule",
definition={
"sql": "N/A", # we don't use it anymore
"params": {},
},
timeframe=600,
timeunit="seconds",
definition_cel='(severity == "critical") || (severity == "high")',
created_by="[email protected]",
create_on=CreateIncidentOn.ALL.value,
)

create_alert(
"Critical Alert",
AlertStatus.FIRING,
datetime.datetime.utcnow(),
{
"severity": AlertSeverity.CRITICAL.value,
},
)

# No incident yet
assert db_session.query(Incident).count() == 0
# But RuleEventGroup
assert db_session.query(RuleEventGroup).count() == 1
event_group = db_session.query(RuleEventGroup).first()
alert_1 = db_session.query(Alert).order_by(Alert.timestamp.desc()).first()

assert isinstance(event_group.state, dict)
assert 'severity == "critical"' in event_group.state
assert len(event_group.state['severity == "critical"']) == 1
assert event_group.state['severity == "critical"'][0] == alert_1.fingerprint

create_alert(
"Critical Alert 2",
AlertStatus.FIRING,
datetime.datetime.utcnow(),
{
"severity": AlertSeverity.CRITICAL.value,
},
)

db_session.refresh(event_group)
alert_2 = db_session.query(Alert).order_by(Alert.timestamp.desc()).first()

# Still no incident yet
assert db_session.query(Incident).count() == 0
# And still one RuleEventGroup
assert db_session.query(RuleEventGroup).count() == 1

assert isinstance(event_group.state, dict)
assert 'severity == "critical"' in event_group.state
assert len(event_group.state['severity == "critical"']) == 2
assert event_group.state['severity == "critical"'][0] == alert_1.fingerprint
assert event_group.state['severity == "critical"'][1] == alert_2.fingerprint

create_alert(
"High Alert",
AlertStatus.FIRING,
datetime.datetime.utcnow(),
{
"severity": AlertSeverity.HIGH.value,
},
)
alert_3 = db_session.query(Alert).order_by(Alert.timestamp.desc()).first()

# RuleEventGroup was removed
assert db_session.query(RuleEventGroup).count() == 0

# And incident was started
assert db_session.query(Incident).count() == 1

incident = db_session.query(Incident).first()
assert incident.alerts_count == 3

alerts, alert_count = get_incident_alerts_by_incident_id(
tenant_id=SINGLE_TENANT_UUID,
incident_id=str(incident.id),
session=db_session,
)
assert alert_count == 3
assert len(alerts) == 3

fingerprints = [a.fingerprint for a in alerts]

assert alert_1.fingerprint in fingerprints
assert alert_2.fingerprint in fingerprints
assert alert_3.fingerprint in fingerprints


def test_rule_event_groups_expires(db_session, create_alert):

create_rule_db(
tenant_id=SINGLE_TENANT_UUID,
name="test-rule",
definition={
"sql": "N/A", # we don't use it anymore
"params": {},
},
timeframe=1,
timeunit="seconds",
definition_cel='(severity == "critical") || (severity == "high")',
created_by="[email protected]",
create_on=CreateIncidentOn.ALL.value,
)

create_alert(
"Critical Alert",
AlertStatus.FIRING,
datetime.datetime.utcnow(),
{
"severity": AlertSeverity.CRITICAL.value,
},
)

# No incident yet
assert db_session.query(Incident).count() == 0
# One RuleEventGroup
assert db_session.query(RuleEventGroup).count() == 1

sleep(1)

create_alert(
"High Alert",
AlertStatus.FIRING,
datetime.datetime.utcnow(),
{
"severity": AlertSeverity.HIGH.value,
},
)

# Still no incident
assert db_session.query(Incident).count() == 0
# And now two RuleEventGroup - first one was expired
assert db_session.query(RuleEventGroup).count() == 2



# Next steps:
# - test that alerts in the same group are being updated correctly
# - test group are being updated correctly
Expand Down

0 comments on commit dc59681

Please sign in to comment.