From 94e696caccb513033ef35f58a10b603437cea6e2 Mon Sep 17 00:00:00 2001 From: Varad Gupta <114755221+vr-varad@users.noreply.github.com> Date: Fri, 20 Sep 2024 18:46:56 +0530 Subject: [PATCH 1/3] fix: Using standard OTLP env convention on backend service (#1973) --- keep/api/observability.py | 46 ++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/keep/api/observability.py b/keep/api/observability.py index d16f894b2..b5aa3e0fa 100644 --- a/keep/api/observability.py +++ b/keep/api/observability.py @@ -1,5 +1,6 @@ import logging import os +from urllib.parse import urlparse from fastapi import FastAPI, Request from opentelemetry import metrics, trace @@ -22,37 +23,52 @@ from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor +def get_protocol_from_endpoint(endpoint): + parsed_url = urlparse(endpoint) + if parsed_url.scheme == "http": + return HTTPOTLPSpanExporter + elif parsed_url.scheme == "grpc": + return GRPCOTLPSpanExporter + else: + raise ValueError(f"Unsupported protocol: {parsed_url.scheme}") def setup(app: FastAPI): logger = logging.getLogger(__name__) # Configure the OpenTelemetry SDK - service_name = os.environ.get("SERVICE_NAME", "keep-api") - otlp_collector_endpoint = os.environ.get("OTLP_ENDPOINT", False) + service_name = os.environ.get("OTEL_SERVICE_NAME", os.environ.get("SERVICE_NAME", "keep-api")) + otlp_collector_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", os.environ.get("OTLP_ENDPOINT", False)) + otlp_traces_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", None) + otlp_logs_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT", None) + otlp_metrics_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", None) enable_cloud_trace_exporeter = os.environ.get("CLOUD_TRACE_ENABLED", False) metrics_enabled = os.environ.get("METRIC_OTEL_ENABLED", "") - # to support both grpc and http - for example dynatrace doesn't support grpc - http_or_grpc = os.environ.get("OTLP_SPAN_EXPORTER", "grpc") - if http_or_grpc == "grpc": - OTLPSpanExporter = GRPCOTLPSpanExporter - else: - OTLPSpanExporter = HTTPOTLPSpanExporter resource = Resource.create({"service.name": service_name}) provider = TracerProvider(resource=resource) + if otlp_collector_endpoint: + logger.info(f"OTLP endpoint set to {otlp_collector_endpoint}") - processor = BatchSpanProcessor( - OTLPSpanExporter(endpoint=otlp_collector_endpoint) - ) - provider.add_span_processor(processor) - if metrics_enabled.lower() == "true": - logger.info("Metrics enabled.") + + if otlp_traces_endpoint: + logger.info(f"OTLP Traces endpoint set to {otlp_traces_endpoint}") + SpanExporter = get_protocol_from_endpoint(otlp_traces_endpoint) + processor = BatchSpanProcessor( + SpanExporter(endpoint=otlp_traces_endpoint) + ) + provider.add_span_processor(processor) + + if metrics_enabled.lower() == "true" and otlp_metrics_endpoint: + logger.info(f"Metrics enabled. OTLP Metrics endpoint set to {otlp_metrics_endpoint}") reader = PeriodicExportingMetricReader( - OTLPMetricExporter(endpoint=otlp_collector_endpoint) + OTLPMetricExporter(endpoint=otlp_metrics_endpoint) ) metric_provider = MeterProvider(resource=resource, metric_readers=[reader]) metrics.set_meter_provider(metric_provider) + if otlp_logs_endpoint: + logger.info(f"OTLP Logs endpoint set to {otlp_logs_endpoint}") + if enable_cloud_trace_exporeter: logger.info("Cloud Trace exporter enabled.") processor = BatchSpanProcessor(CloudTraceSpanExporter()) From 1fb5e546ef6c5a9dac15834bffb6e7f41d800a38 Mon Sep 17 00:00:00 2001 From: Tal Date: Sun, 22 Sep 2024 10:59:14 +0300 Subject: [PATCH 2/3] chore(ui): better empty states in incident view (#1979) --- .../app/incidents/[id]/incident-timeline.tsx | 19 ++++++++++++++++++- keep-ui/app/incidents/[id]/incident.tsx | 12 +++++++++++- keep-ui/next-env.d.ts | 2 +- keep-ui/package-lock.json | 8 ++++---- keep-ui/package.json | 2 +- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/keep-ui/app/incidents/[id]/incident-timeline.tsx b/keep-ui/app/incidents/[id]/incident-timeline.tsx index 7259e46b3..d2400f6d4 100644 --- a/keep-ui/app/incidents/[id]/incident-timeline.tsx +++ b/keep-ui/app/incidents/[id]/incident-timeline.tsx @@ -8,6 +8,8 @@ import { useIncidentAlerts } from "utils/hooks/useIncidents"; import { IncidentDto } from "../models"; import Image from "next/image"; import AlertSeverity from "app/alerts/alert-severity"; +import { EmptyStateCard } from "@/components/ui/EmptyStateCard"; +import { useRouter } from "next/navigation"; const severityColors = { critical: "bg-red-300", @@ -237,6 +239,20 @@ const AlertBar: React.FC = ({ ); }; +const IncidentTimelineNoAlerts: React.FC = () => { + const router = useRouter(); + return ( +
+ router.push("/alerts/feed")} + /> +
+ ); +}; + export default function IncidentTimeline({ incident, }: { @@ -313,7 +329,8 @@ export default function IncidentTimeline({ return {}; }, [auditEvents, alerts]); - if (auditEventsLoading || !auditEvents || alertsLoading) return <>No Data; + if (auditEventsLoading || !auditEvents || alertsLoading) + return ; const { startTime, diff --git a/keep-ui/app/incidents/[id]/incident.tsx b/keep-ui/app/incidents/[id]/incident.tsx index 416bead04..2c546f16d 100644 --- a/keep-ui/app/incidents/[id]/incident.tsx +++ b/keep-ui/app/incidents/[id]/incident.tsx @@ -19,6 +19,7 @@ import { useRouter } from "next/navigation"; import IncidentTimeline from "./incident-timeline"; import { CiBellOn, CiViewTimeline } from "react-icons/ci"; import { IoIosGitNetwork } from "react-icons/io"; +import { EmptyStateCard } from "@/components/ui/EmptyStateCard"; interface Props { incidentId: string; @@ -72,7 +73,16 @@ export default function IncidentView({ incidentId }: Props) { - Coming Soon... + +
+ router.push("/topology")} + /> +
+
diff --git a/keep-ui/next-env.d.ts b/keep-ui/next-env.d.ts index fd36f9494..725dd6f24 100644 --- a/keep-ui/next-env.d.ts +++ b/keep-ui/next-env.d.ts @@ -3,4 +3,4 @@ /// // NOTE: This file should not be edited -// see https://nextjs.org/docs/basic-features/typescript for more information. +// see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information. diff --git a/keep-ui/package-lock.json b/keep-ui/package-lock.json index ebe5dba09..20cbd7923 100644 --- a/keep-ui/package-lock.json +++ b/keep-ui/package-lock.json @@ -272,7 +272,7 @@ "postcss-nested": "^6.0.1", "postcss-selector-parser": "^6.0.12", "postcss-value-parser": "^4.2.0", - "posthog-js": "^1.161.6", + "posthog-js": "^1.163.0", "posthog-node": "^3.1.1", "preact-render-to-string": "^5.2.6", "prelude-ls": "^1.2.1", @@ -11598,9 +11598,9 @@ "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==" }, "node_modules/posthog-js": { - "version": "1.161.6", - "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.161.6.tgz", - "integrity": "sha512-UO0z/YTuan55Kl5Yg9Xs5x1PKUkm2zGKUNPioznb4GLRcxFnLBkWoeKQXNro2YZsYJvK+MY8jlF3cdGa8BZ8/Q==", + "version": "1.163.0", + "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.163.0.tgz", + "integrity": "sha512-gpLbxZkOm06oOWg0uvCxBIVIHrhX3A5hxf9eAi/Z+aFP9DvWxwHQdGUkIWjnYUyxXilIbLxBPvWmiM98dYsAHA==", "dependencies": { "fflate": "^0.4.8", "preact": "^10.19.3", diff --git a/keep-ui/package.json b/keep-ui/package.json index e1c722b95..7690720c9 100644 --- a/keep-ui/package.json +++ b/keep-ui/package.json @@ -273,7 +273,7 @@ "postcss-nested": "^6.0.1", "postcss-selector-parser": "^6.0.12", "postcss-value-parser": "^4.2.0", - "posthog-js": "^1.161.6", + "posthog-js": "^1.163.0", "posthog-node": "^3.1.1", "preact-render-to-string": "^5.2.6", "prelude-ls": "^1.2.1", From dda0ac7dfa308a3a99a3779d3e8d9e422155563b Mon Sep 17 00:00:00 2001 From: Tal Borenstein Date: Sun, 22 Sep 2024 14:49:36 +0300 Subject: [PATCH 3/3] fix: cannot update incident name --- .../incidents/create-or-update-incident.tsx | 60 ++++++++------- keep/api/core/db.py | 74 +++++++++++-------- 2 files changed, 71 insertions(+), 63 deletions(-) diff --git a/keep-ui/app/incidents/create-or-update-incident.tsx b/keep-ui/app/incidents/create-or-update-incident.tsx index fcfae9934..d5c5be2c4 100644 --- a/keep-ui/app/incidents/create-or-update-incident.tsx +++ b/keep-ui/app/incidents/create-or-update-incident.tsx @@ -17,14 +17,14 @@ import { useIncidents } from "utils/hooks/useIncidents"; interface Props { incidentToEdit: IncidentDto | null; - createCallback?: (id: string) => void - exitCallback?: () => void + createCallback?: (id: string) => void; + exitCallback?: () => void; } export default function CreateOrUpdateIncident({ incidentToEdit, createCallback, - exitCallback + exitCallback, }: Props) { const { data: session } = useSession(); const { mutate } = useIncidents(true, 20); @@ -34,12 +34,18 @@ export default function CreateOrUpdateIncident({ const editMode = incidentToEdit !== null; // Display cancel btn if editing or we need to cancel for another reason (eg. going one step back in the modal etc.) - const cancellable = editMode || exitCallback + const cancellable = editMode || exitCallback; useEffect(() => { if (incidentToEdit) { - setIncidentName(incidentToEdit.user_generated_name ?? incidentToEdit.ai_generated_name ?? ""); - setIncidentUserSummary(incidentToEdit.user_summary ?? incidentToEdit.generated_summary ?? "" ); + setIncidentName( + incidentToEdit.user_generated_name ?? + incidentToEdit.ai_generated_name ?? + "" + ); + setIncidentUserSummary( + incidentToEdit.user_summary ?? incidentToEdit.generated_summary ?? "" + ); setIncidentAssignee(incidentToEdit.assignee ?? ""); } }, [incidentToEdit]); @@ -70,8 +76,8 @@ export default function CreateOrUpdateIncident({ await mutate(); toast.success("Incident created successfully"); - const created = await response.json() - createCallback?.(created.id) // close the modal and associate the alert incident + const created = await response.json(); + createCallback?.(created.id); // close the modal and associate the alert incident } else { toast.error( "Failed to create incident, please contact us if this issue persists." @@ -83,21 +89,18 @@ export default function CreateOrUpdateIncident({ const updateIncident = async (e: FormEvent) => { e.preventDefault(); const apiUrl = getApiURL(); - const response = await fetch( - `${apiUrl}/incidents/${incidentToEdit?.id}`, - { - method: "PUT", - headers: { - Authorization: `Bearer ${session?.accessToken}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - name: incidentName, - user_summary: incidentUserSummary, - assignee: incidentAssignee, - }), - } - ); + const response = await fetch(`${apiUrl}/incidents/${incidentToEdit?.id}`, { + method: "PUT", + headers: { + Authorization: `Bearer ${session?.accessToken}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + user_generated_name: incidentName, + user_summary: incidentUserSummary, + assignee: incidentAssignee, + }), + }); if (response.ok) { exitEditMode(); await mutate(); @@ -111,21 +114,16 @@ export default function CreateOrUpdateIncident({ // If the Incident is successfully updated or the user cancels the update we exit the editMode and set the editRule in the incident.tsx to null. const exitEditMode = () => { - exitCallback?.() + exitCallback?.(); clearForm(); }; const submitEnabled = (): boolean => { - return ( - !!incidentName - ); + return !!incidentName; }; return ( -
+ Incident Metadata
diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 89c550c6d..b53931bcb 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -696,8 +696,9 @@ def get_workflow_executions( ).scalar() avgDuration = avgDuration if avgDuration else 0.0 - query = (query.order_by(desc(WorkflowExecution.started)).limit(limit).offset(offset) -) + query = ( + query.order_by(desc(WorkflowExecution.started)).limit(limit).offset(offset) + ) # Execute the query workflow_executions = query.all() @@ -1750,6 +1751,7 @@ def update_key_last_used( session.add(tenant_api_key_entry) session.commit() + def get_linked_providers(tenant_id: str) -> List[Tuple[str, str, datetime]]: with Session(engine) as session: providers = ( @@ -2260,7 +2262,9 @@ def get_last_incidents( return incidents, total_count -def get_incident_by_id(tenant_id: str, incident_id: str | UUID, with_alerts: bool = False) -> Optional[Incident]: +def get_incident_by_id( + tenant_id: str, incident_id: str | UUID, with_alerts: bool = False +) -> Optional[Incident]: with Session(engine) as session: query = session.query( Incident, @@ -2269,7 +2273,7 @@ def get_incident_by_id(tenant_id: str, incident_id: str | UUID, with_alerts: boo Incident.id == incident_id, ) if with_alerts: - query= query.options(joinedload(Incident.alerts)) + query = query.options(joinedload(Incident.alerts)) return query.first() @@ -2313,16 +2317,9 @@ def update_incident_from_dto_by_id( if not incident: return None - session.query(Incident).filter( - Incident.tenant_id == tenant_id, - Incident.id == incident_id, - ).update( - { - "user_generated_name": updated_incident_dto.user_generated_name, - "user_summary": updated_incident_dto.user_summary, - "assignee": updated_incident_dto.assignee, - } - ) + incident.user_generated_name = updated_incident_dto.user_generated_name + incident.user_summary = updated_incident_dto.user_summary + incident.assignee = updated_incident_dto.assignee session.commit() session.refresh(incident) @@ -2374,7 +2371,10 @@ def get_incidents_count( def get_incident_alerts_by_incident_id( - tenant_id: str, incident_id: str, limit: Optional[int] = None, offset: Optional[int] = None + tenant_id: str, + incident_id: str, + limit: Optional[int] = None, + offset: Optional[int] = None, ) -> (List[Alert], int): with Session(engine) as session: query = ( @@ -2458,8 +2458,10 @@ def inner(db_session: Session): def add_alerts_to_incident_by_incident_id( tenant_id: str, incident_id: str | UUID, alert_ids: List[UUID] ) -> Optional[Incident]: - logger.info(f"Adding alerts to incident {incident_id} in database, total {len(alert_ids)} alerts", - extra={"tags": {"tenant_id": tenant_id, "incident_id": incident_id}}) + logger.info( + f"Adding alerts to incident {incident_id} in database, total {len(alert_ids)} alerts", + extra={"tags": {"tenant_id": tenant_id, "incident_id": incident_id}}, + ) with Session(engine) as session: query = select(Incident).where( @@ -2482,27 +2484,39 @@ def add_alerts_to_incident_by_incident_id( ).all() ) - new_alert_ids = [alert_id for alert_id in alert_ids if alert_id not in existing_alert_ids] + new_alert_ids = [ + alert_id for alert_id in alert_ids if alert_id not in existing_alert_ids + ] if not new_alert_ids: return incident alerts_data_for_incident = get_alerts_data_for_incident(new_alert_ids, session) - incident.sources = list(set(incident.sources) | set(alerts_data_for_incident["sources"])) - incident.affected_services = list(set(incident.affected_services) | set(alerts_data_for_incident["services"])) + incident.sources = list( + set(incident.sources) | set(alerts_data_for_incident["sources"]) + ) + incident.affected_services = list( + set(incident.affected_services) | set(alerts_data_for_incident["services"]) + ) incident.alerts_count += alerts_data_for_incident["count"] alert_to_incident_entries = [ - AlertToIncident(alert_id=alert_id, incident_id=incident.id, tenant_id=tenant_id) + AlertToIncident( + alert_id=alert_id, incident_id=incident.id, tenant_id=tenant_id + ) for alert_id in new_alert_ids ] for idx, entry in enumerate(alert_to_incident_entries): session.add(entry) if (idx + 1) % 100 == 0: - logger.info(f"Added {idx + 1}/{len(alert_to_incident_entries)} alerts to incident {incident.id} in database", - extra={"tags": {"tenant_id": tenant_id, "incident_id": incident.id}}) + logger.info( + f"Added {idx + 1}/{len(alert_to_incident_entries)} alerts to incident {incident.id} in database", + extra={ + "tags": {"tenant_id": tenant_id, "incident_id": incident.id} + }, + ) session.commit() session.flush() @@ -2717,19 +2731,13 @@ def get_pmi_values_from_temp_file(temp_dir: str) -> Tuple[np.array, Dict[str, in def get_tenant_config(tenant_id: str) -> dict: with Session(engine) as session: - tenant_data = session.exec( - select(Tenant) - .where(Tenant.id == tenant_id) - ).first() + tenant_data = session.exec(select(Tenant).where(Tenant.id == tenant_id)).first() return tenant_data.configuration if tenant_data else {} def write_tenant_config(tenant_id: str, config: dict) -> None: with Session(engine) as session: - tenant_data = session.exec( - select(Tenant) - .where(Tenant.id == tenant_id) - ).first() + tenant_data = session.exec(select(Tenant).where(Tenant.id == tenant_id)).first() tenant_data.configuration = config session.commit() session.refresh(tenant_data) @@ -2880,7 +2888,9 @@ def get_provider_by_name(tenant_id: str, provider_name: str) -> Provider: return provider -def change_incident_status_by_id(tenant_id: str, incident_id: UUID | str, status: IncidentStatus) -> bool: +def change_incident_status_by_id( + tenant_id: str, incident_id: UUID | str, status: IncidentStatus +) -> bool: with Session(engine) as session: stmt = ( update(Incident)