Skip to content

Commit

Permalink
fix: incident.alerts_count should return count of unique fingerprints…
Browse files Browse the repository at this point in the history
… instead of alerts (#2217)
  • Loading branch information
VladimirFilonov authored Oct 28, 2024
1 parent ebbb889 commit d101552
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 20 deletions.
30 changes: 23 additions & 7 deletions keep/api/core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2950,7 +2950,9 @@ def get_all_same_alert_ids(


def get_alerts_data_for_incident(
alert_ids: List[str | UUID], session: Optional[Session] = None
alert_ids: List[str | UUID],
existed_fingerprints: Optional[List[str]] = None,
session: Optional[Session] = None
) -> dict:
"""
Function to prepare aggregated data for incidents from the given list of alert_ids
Expand All @@ -2962,12 +2964,14 @@ def get_alerts_data_for_incident(
Returns: dict {sources: list[str], services: list[str], count: int}
"""
existed_fingerprints = existed_fingerprints or []

with existed_or_new_session(session) as session:

fields = (
get_json_extract_field(session, Alert.event, "service"),
Alert.provider_type,
Alert.fingerprint,
get_json_extract_field(session, Alert.event, "severity"),
)

Expand All @@ -2980,8 +2984,9 @@ def get_alerts_data_for_incident(
sources = []
services = []
severities = []
fingerprints = set()

for service, source, severity in alerts_data:
for service, source, fingerprint, severity in alerts_data:
if source:
sources.append(source)
if service:
Expand All @@ -2991,12 +2996,14 @@ def get_alerts_data_for_incident(
severities.append(IncidentSeverity.from_number(severity))
else:
severities.append(IncidentSeverity(severity))
if fingerprint and fingerprint not in existed_fingerprints:
fingerprints.add(fingerprint)

return {
"sources": set(sources),
"services": set(services),
"max_severity": max(severities),
"count": len(alerts_data),
"count": len(fingerprints),
}


Expand Down Expand Up @@ -3047,6 +3054,17 @@ def add_alerts_to_incident(
)
).all()
)
existing_fingerprints = set(
session.exec(
select(Alert.fingerprint)
.join(AlertToIncident, AlertToIncident.alert_id == Alert.id)
.where(
AlertToIncident.deleted_at == NULL_FOR_DELETED_AT,
AlertToIncident.tenant_id == tenant_id,
AlertToIncident.incident_id == incident.id,
)
).all()
)

new_alert_ids = [
alert_id for alert_id in all_alert_ids if alert_id not in existing_alert_ids
Expand All @@ -3055,9 +3073,7 @@ def add_alerts_to_incident(
if not new_alert_ids:
return incident

alerts_data_for_incident = get_alerts_data_for_incident(
new_alert_ids, session
)
alerts_data_for_incident = get_alerts_data_for_incident(new_alert_ids, existing_fingerprints, session)

incident.sources = list(
set(incident.sources if incident.sources else []) | set(alerts_data_for_incident["sources"])
Expand Down Expand Up @@ -3177,7 +3193,7 @@ def remove_alerts_to_incident_by_incident_id(
session.commit()

# Getting aggregated data for incidents for alerts which just was removed
alerts_data_for_incident = get_alerts_data_for_incident(all_alert_ids, session)
alerts_data_for_incident = get_alerts_data_for_incident(all_alert_ids, session=session)

service_field = get_json_extract_field(session, Alert.event, "service")

Expand Down
5 changes: 4 additions & 1 deletion keep/api/tasks/process_event_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,10 @@ def process_event(
and isinstance(event, dict)
or isinstance(event, FormData)
):
provider_class = ProvidersFactory.get_provider_class(provider_type)
try:
provider_class = ProvidersFactory.get_provider_class(provider_type)
except Exception:
provider_class = ProvidersFactory.get_provider_class("keep")
event = provider_class.format_alert(
tenant_id=tenant_id,
event=event,
Expand Down
59 changes: 47 additions & 12 deletions tests/test_incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from itertools import cycle

import pytest
from sqlalchemy import func
from sqlalchemy import func, distinct
from sqlalchemy.orm.exc import DetachedInstanceError

from keep.api.core.db import (
Expand All @@ -23,24 +23,40 @@
IncidentSeverity,
IncidentStatus,
)
from keep.api.models.db.alert import Alert
from keep.api.models.db.alert import Alert, AlertToIncident
from keep.api.utils.enrichment_helpers import convert_db_alerts_to_dto_alerts
from tests.fixtures.client import client, test_app # noqa


def test_get_alerts_data_for_incident(db_session, create_alert):
for i in range(100):
create_alert(
f"alert-test-{i % 10}",
AlertStatus.FIRING,
datetime.utcnow(),
{
"source": [f"source_{i % 10}"],
"service": f"service_{i % 10}",
}
)

alerts = db_session.query(Alert).all()

unique_fingerprints = db_session.query(func.count(distinct(Alert.fingerprint))).scalar()

def test_get_alerts_data_for_incident(db_session, setup_stress_alerts_no_elastic):
alerts = setup_stress_alerts_no_elastic(100)
assert 100 == db_session.query(func.count(Alert.id)).scalar()
assert 10 == unique_fingerprints

data = get_alerts_data_for_incident([a.id for a in alerts])
assert data["sources"] == set(["source_{}".format(i) for i in range(10)])
assert data["services"] == set(["service_{}".format(i) for i in range(10)])
assert data["count"] == 100
assert data["sources"] == set([f"source_{i}" for i in range(10)])
assert data["services"] == set([f"service_{i}" for i in range(10)])
assert data["count"] == unique_fingerprints


def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elastic):
alerts = setup_stress_alerts_no_elastic(100)
# Adding 10 non-unique fingerprints
alerts.extend(setup_stress_alerts_no_elastic(10))
incident = create_incident_from_dict(
SINGLE_TENANT_UUID, {"user_generated_name": "test", "user_summary": "test"}
)
Expand All @@ -53,7 +69,10 @@ def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elasti

incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id)

assert len(incident.alerts) == 100
# 110 alerts
assert len(incident.alerts) == 110
# But 100 unique fingerprints
assert incident.alerts_count == 100

assert sorted(incident.affected_services) == sorted(
["service_{}".format(i) for i in range(10)]
Expand All @@ -66,6 +85,18 @@ def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elasti

service_0 = db_session.query(Alert.id).filter(service_field == "service_0").all()

# Testing unique fingerprints
more_alerts_with_same_fingerprints = setup_stress_alerts_no_elastic(10)

add_alerts_to_incident_by_incident_id(
SINGLE_TENANT_UUID, incident.id, [a.id for a in more_alerts_with_same_fingerprints]
)

incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id)

assert incident.alerts_count == 100
assert db_session.query(func.count(AlertToIncident.alert_id)).scalar() == 120

remove_alerts_to_incident_by_incident_id(
SINGLE_TENANT_UUID,
incident.id,
Expand All @@ -76,7 +107,8 @@ def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elasti

incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id)

assert len(incident.alerts) == 99
# 117 because we removed multiple alerts with service_0
assert len(incident.alerts) == 117
assert "service_0" in incident.affected_services
assert len(incident.affected_services) == 10
assert sorted(incident.affected_services) == sorted(
Expand All @@ -92,11 +124,12 @@ def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elasti
incident_id=incident.id,
tenant_id=incident.tenant_id,
include_unlinked=True
)[0]) == 100
)[0]) == 120

incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id)

assert len(incident.alerts) == 90
# 108 because we removed multiple alert with same fingerprints
assert len(incident.alerts) == 108
assert "service_0" not in incident.affected_services
assert len(incident.affected_services) == 9
assert sorted(incident.affected_services) == sorted(
Expand All @@ -117,7 +150,7 @@ def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elasti

incident = get_incident_by_id(SINGLE_TENANT_UUID, incident.id)

assert len(incident.alerts) == 89
assert len(incident.alerts) == 105
assert "source_1" in incident.sources
# source_0 was removed together with service_0
assert len(incident.sources) == 9
Expand All @@ -137,6 +170,8 @@ def test_add_remove_alert_to_incidents(db_session, setup_stress_alerts_no_elasti
)




def test_get_last_incidents(db_session, create_alert):

severity_cycle = cycle([s.order for s in IncidentSeverity])
Expand Down

0 comments on commit d101552

Please sign in to comment.