Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: BaseIncidentProvider, PagerDuty incidents and process_incident #2394

Merged
merged 38 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
ad1be89
feat: pagerduty oauth
35C4n0r Nov 4, 2024
9ef71c1
fix: uncomment needed changes
35C4n0r Nov 4, 2024
5c60897
fix: type checks
35C4n0r Nov 4, 2024
7e01ab5
fix: CI pass
35C4n0r Nov 4, 2024
091c7ba
fix: update tests
35C4n0r Nov 4, 2024
a5da877
fix: update tests
35C4n0r Nov 4, 2024
02970fe
Merge branch 'main' into feat-pd-app
35C4n0r Nov 4, 2024
aee8a92
fix: imports
35C4n0r Nov 4, 2024
4a5edad
fix: update typescript
35C4n0r Nov 4, 2024
f2ddfd5
Merge branch 'main' into feat-pd-app
35C4n0r Nov 4, 2024
2c84a40
fix: resolve merge conflicts
35C4n0r Nov 4, 2024
bda66a1
fix: minor fixes
35C4n0r Nov 4, 2024
6a80845
fix: CI pass
35C4n0r Nov 4, 2024
a1daa04
fix: typos
35C4n0r Nov 4, 2024
4200317
chore: minor refactors
35C4n0r Nov 4, 2024
5120384
fix: extra flags for ai incident creation
35C4n0r Nov 5, 2024
b17b146
Merge branch 'main' into feat-pd-app
35C4n0r Nov 5, 2024
e2bff6d
Merge branch 'main' into feat-pd-app
talboren Nov 6, 2024
08b263c
fix(alertdto): something with url
talboren Nov 6, 2024
1cf1802
fix: improvements
talboren Nov 6, 2024
b017990
chore: add docstrings
35C4n0r Nov 7, 2024
c561a0c
Merge remote-tracking branch 'origin/feat-pd-app' into feat-pd-app
35C4n0r Nov 7, 2024
48f37ea
Merge branch 'main' into feat-pd-app
talboren Nov 10, 2024
9dc47e4
fix: wip
talboren Nov 10, 2024
e7112cf
fix: wip
talboren Nov 10, 2024
b9b5e04
fix: wip
talboren Nov 10, 2024
1f407ad
Merge branch 'main' into feat-pd-app
talboren Nov 10, 2024
66acb18
fix: providers
talboren Nov 10, 2024
cba0006
fix: fix
talboren Nov 10, 2024
ce075c3
fix: fix
talboren Nov 10, 2024
8b02198
fix: fix
talboren Nov 10, 2024
b339aed
fix: improvements
talboren Nov 10, 2024
00b9111
docs: pagerduty oauth in self hosted
talboren Nov 11, 2024
595cbee
Merge branch 'main' into feat-pd-app
talboren Nov 11, 2024
7573aac
fix: reverting a wrong description
35C4n0r Nov 11, 2024
5a5b19a
Merge branch 'main' into feat-pd-app
35C4n0r Nov 11, 2024
8010791
fix: migration
talboren Nov 12, 2024
6c0a4be
Merge branch 'main' into feat-pd-app
talboren Nov 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions keep-ui/app/providers/filter-context/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export const PROVIDER_LABELS: Record<TProviderLabels, string> = {
ticketing: 'Ticketing',
data: 'Data',
queue: 'Queue',
incident: 'Incident'
}

export const PROVIDER_LABELS_KEYS = Object.keys(PROVIDER_LABELS);
3 changes: 3 additions & 0 deletions keep-ui/app/providers/provider-tile.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
BellAlertIcon,
ChatBubbleBottomCenterIcon,
CircleStackIcon,
ExclamationTriangleIcon,
QueueListIcon,
TicketIcon,
MapIcon,
Expand Down Expand Up @@ -101,6 +102,8 @@ function getIconForTag(tag: TProviderLabels) {
return QueueListIcon;
case "topology":
return MapIcon;
case "incident":
return ExclamationTriangleIcon;
default:
return ChatBubbleBottomCenterIcon;
}
Expand Down
1 change: 1 addition & 0 deletions keep-ui/app/providers/providers.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ interface AlertDistritbuionData {

export type TProviderLabels =
| "alert"
| "incident"
| "topology"
| "messaging"
| "ticketing"
Expand Down
2 changes: 1 addition & 1 deletion keep/api/bl/ai_suggestion_bl.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ async def commit_incidents(
try:
# Create the incident
incident_dto = IncidentDto.parse_obj(incident_with_feedback["incident"])
created_incident = incident_bl.create_incident(incident_dto)
created_incident = incident_bl.create_incident(incident_dto, generated_from_ai=True)
talboren marked this conversation as resolved.
Show resolved Hide resolved

# Add alerts to the created incident
alert_ids = [
Expand Down
7 changes: 3 additions & 4 deletions keep/api/bl/incidents_bl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
from keep.api.arq_pool import get_pool
from keep.api.core.db import (
add_alerts_to_incident_by_incident_id,
create_incident_from_dto,
delete_incident_by_id,
get_incident_alerts_by_incident_id,
get_incident_by_id,
get_incident_unique_fingerprint_count,
remove_alerts_to_incident_by_incident_id,
update_incident_from_dto_by_id,
update_incident_from_dto_by_id, create_incident_from_dto,
)
from keep.api.core.elastic import ElasticClient
from keep.api.models.alert import IncidentDto, IncidentDtoIn
Expand Down Expand Up @@ -52,12 +51,12 @@ def __init__(
self.ee_enabled = os.environ.get("EE_ENABLED", "false").lower() == "true"
self.redis = os.environ.get("REDIS", "false") == "true"

def create_incident(self, incident_dto: IncidentDtoIn) -> IncidentDto:
def create_incident(self, incident_dto: IncidentDtoIn, generated_from_ai: bool = False) -> IncidentDto:
self.logger.info(
"Creating incident",
extra={"incident_dto": incident_dto.dict(), "tenant_id": self.tenant_id},
)
incident = create_incident_from_dto(self.tenant_id, incident_dto)
incident = create_incident_from_dto(self.tenant_id, incident_dto, generated_from_ai=generated_from_ai)
talboren marked this conversation as resolved.
Show resolved Hide resolved
self.logger.info(
"Incident created",
extra={"incident_id": incident.id, "tenant_id": self.tenant_id},
Expand Down
40 changes: 27 additions & 13 deletions keep/api/core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -3175,12 +3175,10 @@ def get_incident_by_id(


def create_incident_from_dto(
tenant_id: str, incident_dto: IncidentDtoIn | IncidentDto
tenant_id: str, incident_dto: IncidentDtoIn | IncidentDto, generated_from_ai: bool = False
) -> Optional[Incident]:
# from AI
if isinstance(incident_dto, IncidentDto):
# get all the fields from the DTO

if issubclass(type(incident_dto), IncidentDto) and generated_from_ai:
talboren marked this conversation as resolved.
Show resolved Hide resolved
# NOTE: we do not use dto's alerts, alert count, start time etc
# because we want to re-use the BL of creating incidents
# where all of these are calculated inside add_alerts_to_incident
Expand All @@ -3193,10 +3191,16 @@ def create_incident_from_dto(
"is_predicted": False, # its not a prediction, but an AI generation
"is_confirmed": True, # confirmed by the user :)
}
return create_incident_from_dict(tenant_id, incident_dict)
# from user

elif issubclass(type(incident_dto), IncidentDto):
# we will reach this block when incident is pulled from a provider
incident_dict = incident_dto.to_db_incident().dict()

else:
return create_incident_from_dict(tenant_id, incident_dto.dict())
# We'll reach this block when a user creates an incident
incident_dict = incident_dto.dict()

return create_incident_from_dict(tenant_id, incident_dict)


def create_incident_from_dict(
Expand All @@ -3217,7 +3221,7 @@ def create_incident_from_dict(
def update_incident_from_dto_by_id(
tenant_id: str,
incident_id: str,
updated_incident_dto: IncidentDtoIn,
updated_incident_dto: IncidentDtoIn | IncidentDto,
generated_by_ai: bool = False,
) -> Optional[Incident]:
with Session(engine) as session:
Expand All @@ -3233,11 +3237,21 @@ def update_incident_from_dto_by_id(
if not incident:
return None

incident.user_generated_name = updated_incident_dto.user_generated_name
incident.assignee = updated_incident_dto.assignee
incident.same_incident_in_the_past_id = (
updated_incident_dto.same_incident_in_the_past_id
)
if issubclass(type(updated_incident_dto), IncidentDto):
# We execute this when we update an incident received from the provider
updated_data = updated_incident_dto.to_db_incident().dict()
else:
# When a user updates an Incident
updated_data = updated_incident_dto.dict()

for key, value in updated_data.items():
# Update only if the new value is different from the current one
if hasattr(incident, key) and getattr(incident, key) != value:
if isinstance(value, Enum):
setattr(incident, key, value.value)

else:
setattr(incident, key, value)

if generated_by_ai:
incident.generated_summary = updated_incident_dto.user_summary
Expand Down
31 changes: 31 additions & 0 deletions keep/api/models/alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ class IncidentDto(IncidentDtoIn):
start_time: datetime.datetime | None
last_seen_time: datetime.datetime | None
end_time: datetime.datetime | None
creation_time: datetime.datetime | None

alerts_count: int
alert_sources: list[str]
Expand Down Expand Up @@ -515,6 +516,36 @@ def from_db_incident(cls, db_incident: "Incident"):
dto._tenant_id = db_incident.tenant_id
return dto

def to_db_incident(self) -> "Incident":
"""Converts an IncidentDto instance to an Incident database model."""
from keep.api.models.db.alert import Incident
db_incident = Incident(
id=self.id,
user_generated_name=self.user_generated_name,
ai_generated_name=self.ai_generated_name,
user_summary=self.user_summary,
generated_summary=self.generated_summary,
assignee=self.assignee,
severity=self.severity.order,
status=self.status.value,
creation_time=self.creation_time or datetime.datetime.utcnow(),
start_time=self.start_time,
end_time=self.end_time,
last_seen_time=self.last_seen_time,
alerts_count=self.alerts_count,
affected_services=self.services,
sources=self.alert_sources,
is_predicted=self.is_predicted,
is_confirmed=self.is_confirmed,
rule_fingerprint=self.rule_fingerprint,
35C4n0r marked this conversation as resolved.
Show resolved Hide resolved
same_incident_in_the_past_id=self.same_incident_in_the_past_id,
merged_into_incident_id=self.merged_into_incident_id,
merged_by=self.merged_by,
merged_at=self.merged_at,
)
talboren marked this conversation as resolved.
Show resolved Hide resolved

return db_incident


class MergeIncidentsRequestDto(BaseModel):
source_incident_ids: list[UUID]
Expand Down
3 changes: 2 additions & 1 deletion keep/api/models/db/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class TopologyService(SQLModel, table=True):
dependencies: List["TopologyServiceDependency"] = Relationship(
back_populates="service",
sa_relationship_kwargs={
"foreign_keys": "[TopologyServiceDependency.service_id]"
"foreign_keys": "[TopologyServiceDependency.service_id]",
"cascade": "all, delete-orphan"
talboren marked this conversation as resolved.
Show resolved Hide resolved
},
)

Expand Down
2 changes: 1 addition & 1 deletion keep/api/models/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Provider(BaseModel):
last_pull_time: datetime | None = None
docs: str | None = None
tags: list[
Literal["alert", "ticketing", "messaging", "data", "queue", "topology"]
Literal["alert", "ticketing", "messaging", "data", "queue", "topology", "incident"]
] = []
alertsDistribution: dict[str, int] | None = None
alertExample: dict | None = None
Expand Down
84 changes: 82 additions & 2 deletions keep/api/routes/incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
from datetime import datetime
from typing import List

from fastapi import APIRouter, Depends, HTTPException, Query, Response
from arq import ArqRedis
from fastapi import APIRouter, Depends, HTTPException, Query, Response, BackgroundTasks, Request
from pusher import Pusher
from pydantic import BaseModel, Field # noqa
from pydantic.types import UUID
from sqlmodel import Session

from keep.api.bl.ai_suggestion_bl import AISuggestionBl
from keep.api.bl.incidents_bl import IncidentBl
from keep.api.arq_pool import get_pool
from keep.api.consts import REDIS, KEEP_ARQ_QUEUE_BASIC
from keep.api.core.db import (
DestinationIncidentNotFound,
add_audit,
Expand All @@ -26,7 +29,7 @@
get_workflow_executions_for_incident_or_alert,
merge_incidents_to_id,
)
from keep.api.core.dependencies import get_pusher_client
from keep.api.core.dependencies import get_pusher_client, extract_generic_body
from keep.api.models.alert import (
AlertDto,
EnrichAlertRequestBody,
Expand All @@ -44,6 +47,7 @@
)
from keep.api.models.db.alert import AlertActionType, AlertAudit
from keep.api.routes.alerts import _enrich_alert
from keep.api.tasks.process_event_task import process_event
from keep.api.utils.enrichment_helpers import convert_db_alerts_to_dto_alerts
from keep.api.utils.import_ee import mine_incidents_and_create_objects
from keep.api.utils.pagination import (
Expand All @@ -54,6 +58,7 @@
from keep.api.utils.pluralize import pluralize
from keep.identitymanager.authenticatedentity import AuthenticatedEntity
from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory
from keep.providers.providers_factory import ProvidersFactory
from keep.topologies.topologies_service import TopologiesService # noqa

router = APIRouter()
Expand Down Expand Up @@ -473,6 +478,81 @@ def delete_alerts_from_incident(
return Response(status_code=202)


@router.post(
talboren marked this conversation as resolved.
Show resolved Hide resolved
"/event/{provider_type}",
description="Receive an alert event from a provider",
status_code=202,
)
async def receive_event(
provider_type: str,
bg_tasks: BackgroundTasks,
request: Request,
provider_id: str | None = None,
event=Depends(extract_generic_body),
authenticated_entity: AuthenticatedEntity = Depends(
IdentityManagerFactory.get_auth_verifier(["write:incident"])
),
pusher_client: Pusher = Depends(get_pusher_client),
) -> dict[str, str]:
trace_id = request.state.trace_id

provider_class = None
try:
provider_class = ProvidersFactory.get_provider_class(provider_type)
except ModuleNotFoundError:
raise HTTPException(
status_code=400, detail=f"Provider {provider_type} not found"
)
if not provider_class:
raise HTTPException(
status_code=400, detail=f"Provider {provider_type} not found"
)

# Parse the raw body
event = provider_class.parse_event_raw_body(event)

if REDIS:
redis: ArqRedis = await get_pool()
job = await redis.enqueue_job(
"async_process_event",
authenticated_entity.tenant_id,
provider_type,
provider_id,
None,
authenticated_entity.api_key_name,
trace_id,
event,
True,
None,
"incident",
_queue_name=KEEP_ARQ_QUEUE_BASIC,
)
logger.info(
"Enqueued job",
extra={
"job_id": job.job_id,
"tenant_id": authenticated_entity.tenant_id,
"queue": KEEP_ARQ_QUEUE_BASIC,
},
)
else:
bg_tasks.add_task(
process_event,
{},
authenticated_entity.tenant_id,
provider_type,
provider_id,
None,
authenticated_entity.api_key_name,
trace_id,
event,
True,
None,
"incident"
)
return Response(status_code=202)
talboren marked this conversation as resolved.
Show resolved Hide resolved


@router.post(
"/{incident_id}/status",
description="Change incident status",
Expand Down
Loading
Loading