Skip to content

Commit

Permalink
feat: pd service level ops
Browse files Browse the repository at this point in the history
Signed-off-by: 35C4n0r <[email protected]>
  • Loading branch information
35C4n0r committed Nov 18, 2024
1 parent e121f23 commit ceab51b
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 50 deletions.
1 change: 1 addition & 0 deletions docs/providers/documentation/pagerduty-provider.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ To connect Keep to PagerDuty:

- **Routing Key**: Use for event posting via the PagerDuty Events API.
- **API Key**: Use for incident creation and management through the PagerDuty Incidents API.
- **Service Id** (Optional): If provided, keep operates within the service's scope.
- **OAuth2**: Token management handled automatically by Keep.

<Frame>
Expand Down
2 changes: 2 additions & 0 deletions keep/api/models/alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ def from_db_incident(cls, db_incident: "Incident"):
assignee=db_incident.assignee,
services=db_incident.affected_services or [],
rule_fingerprint=db_incident.rule_fingerprint,
fingerprint=db_incident.fingerprint,
same_incident_in_the_past_id=db_incident.same_incident_in_the_past_id,
merged_into_incident_id=db_incident.merged_into_incident_id,
merged_by=db_incident.merged_by,
Expand Down Expand Up @@ -550,6 +551,7 @@ def to_db_incident(self) -> "Incident":
is_predicted=self.is_predicted,
is_confirmed=self.is_confirmed,
rule_fingerprint=self.rule_fingerprint,
fingerprint=self.fingerprint,
same_incident_in_the_past_id=self.same_incident_in_the_past_id,
merged_into_incident_id=self.merged_into_incident_id,
merged_by=self.merged_by,
Expand Down
119 changes: 69 additions & 50 deletions keep/providers/pagerduty_provider/pagerduty_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from keep.providers.models.provider_config import ProviderConfig, ProviderScope
from keep.providers.providers_factory import ProvidersFactory


# Todo: think about splitting in to PagerdutyIncidentsProvider and PagerdutyAlertsProvider
# Read this: https://community.pagerduty.com/forum/t/create-incident-using-python/3596/3

Expand Down Expand Up @@ -59,6 +60,14 @@ class PagerdutyProviderAuthConfig:
},
default="",
)
service_id: str | None = dataclasses.field(
metadata={
"required": False,
"description": "Service Id (if provided, keep will only operate on this service)",
"sensitive": False,
},
default=None,
)


class PagerdutyProvider(BaseTopologyProvider, BaseIncidentProvider):
Expand Down Expand Up @@ -134,7 +143,7 @@ class PagerdutyProvider(BaseTopologyProvider, BaseIncidentProvider):
FINGERPRINT_FIELDS = ["alert_key"]

def __init__(
self, context_manager: ContextManager, provider_id: str, config: ProviderConfig
self, context_manager: ContextManager, provider_id: str, config: ProviderConfig
):
super().__init__(context_manager, provider_id, config)

Expand All @@ -155,7 +164,7 @@ def __init__(
self.logger.info("Refreshing access token")
self.__refresh_token()
elif (
self.authentication_config.api_key or self.authentication_config.routing_key
self.authentication_config.api_key or self.authentication_config.routing_key
):
# No need to do anything
return
Expand Down Expand Up @@ -197,9 +206,9 @@ def validate_config(self):
**self.config.authentication
)
if (
not self.authentication_config.routing_key
and not self.authentication_config.api_key
and not self.authentication_config.oauth_data
not self.authentication_config.routing_key
and not self.authentication_config.api_key
and not self.authentication_config.oauth_data
):
raise ProviderConfigException(
"PagerdutyProvider requires either routing_key or api_key or OAuth configuration",
Expand Down Expand Up @@ -325,13 +334,13 @@ def validate_scopes(self):
return scopes

def _build_alert(
self,
title: str,
alert_body: str,
dedup: str | None = None,
severity: typing.Literal["critical", "error", "warning", "info"] | None = None,
event_type: typing.Literal["trigger", "acknowledge", "resolve"] | None = None,
source: str = "custom_event",
self,
title: str,
alert_body: str,
dedup: str | None = None,
severity: typing.Literal["critical", "error", "warning", "info"] | None = None,
event_type: typing.Literal["trigger", "acknowledge", "resolve"] | None = None,
source: str = "custom_event",
) -> typing.Dict[str, typing.Any]:
"""
Builds the payload for an event alert.
Expand Down Expand Up @@ -383,13 +392,13 @@ def _build_alert(
}

def _send_alert(
self,
title: str,
body: str,
dedup: str | None = None,
severity: typing.Literal["critical", "error", "warning", "info"] | None = None,
event_type: typing.Literal["trigger", "acknowledge", "resolve"] | None = None,
source: str = "custom_event",
self,
title: str,
body: str,
dedup: str | None = None,
severity: typing.Literal["critical", "error", "warning", "info"] | None = None,
event_type: typing.Literal["trigger", "acknowledge", "resolve"] | None = None,
source: str = "custom_event",
):
"""
Sends PagerDuty Alert
Expand Down Expand Up @@ -417,12 +426,12 @@ def _send_alert(
return result.json()

def _trigger_incident(
self,
service_id: str,
title: str,
body: dict,
requester: str,
incident_key: str | None = None,
self,
service_id: str,
title: str,
body: dict,
requester: str,
incident_key: str | None = None,
):
"""Triggers an incident via the V2 REST API using sample data."""

Expand Down Expand Up @@ -455,11 +464,11 @@ def dispose(self):
pass

def setup_incident_webhook(
self,
tenant_id: str,
keep_api_url: str,
api_key: str,
setup_alerts: bool = True,
self,
tenant_id: str,
keep_api_url: str,
api_key: str,
setup_alerts: bool = True,
):
self.logger.info("Setting up Pagerduty webhook")

Expand Down Expand Up @@ -505,7 +514,14 @@ def setup_incident_webhook(
"incident.triggered",
"incident.unacknowledged",
],
"filter": {"type": "account_reference"},
"filter": (
{
"type": "service_reference",
"id": self.authentication_config.service_id,
}
if self.authentication_config.service_id
else {"type": "account_reference"}
),
},
}
if webhook_exists:
Expand All @@ -530,17 +546,17 @@ def setup_incident_webhook(
self.logger.info("Webhook created")

def _notify(
self,
title: str = "",
alert_body: str = "",
dedup: str = "",
service_id: str = "",
requester: str = "",
incident_id: str = "",
event_type: typing.Literal["trigger", "acknowledge", "resolve"] | None = None,
severity: typing.Literal["critical", "error", "warning", "info"] | None = None,
source: str = "custom_event",
**kwargs: dict,
self,
title: str = "",
alert_body: str = "",
dedup: str = "",
service_id: str = "",
requester: str = "",
incident_id: str = "",
event_type: typing.Literal["trigger", "acknowledge", "resolve"] | None = None,
severity: typing.Literal["critical", "error", "warning", "info"] | None = None,
source: str = "custom_event",
**kwargs: dict,
):
"""
Create a PagerDuty alert.
Expand Down Expand Up @@ -576,7 +592,7 @@ def _query(self, incident_id: str = None):
)

def _format_alert(
event: dict, provider_instance: "BaseProvider" = None
event: dict, provider_instance: "BaseProvider" = None
) -> AlertDto:
# If somebody connected the provider before we refactored it
old_format_event = event.get("event", {})
Expand Down Expand Up @@ -678,14 +694,17 @@ def __get_all_incidents_or_alerts(self, incident_id: str = None):
url += f"/{incident_id}/alerts"
include = ["teams", "services"]
resource = "alerts"
params = {
"include[]": include,
"offset": offset,
"limit": 100,
}
if not incident_id and self.authentication_config.service_id:
params["service_ids[]"] = [self.authentication_config.service_id]
response = requests.get(
url=url,
headers=self.__get_headers(),
params={
"include[]": include,
"offset": offset,
"limit": 100,
},
params=params,
)
response.raise_for_status()
response = response.json()
Expand All @@ -696,7 +715,7 @@ def __get_all_incidents_or_alerts(self, incident_id: str = None):
paginated_response.extend(response.get(resource, []))
self.logger.info("Fetched incidents or alerts", extra={"offset": offset})
# No more results
if response.get("more", False) == False:
if not response.get("more", False):
self.logger.info("No more incidents or alerts")
break
self.logger.info(
Expand Down Expand Up @@ -817,7 +836,7 @@ def _get_incident_id(incident_id: str) -> str:

@staticmethod
def _format_incident(
event: dict, provider_instance: "BaseProvider" = None
event: dict, provider_instance: "BaseProvider" = None
) -> IncidentDto | list[IncidentDto]:

event = event["event"]["data"]
Expand Down

0 comments on commit ceab51b

Please sign in to comment.