From 5db7c6e21604a67ef08da46a69f817e6ef454c5c Mon Sep 17 00:00:00 2001 From: Tal Date: Thu, 19 Sep 2024 12:44:57 +0300 Subject: [PATCH 1/4] chore(logs): move some topology logs to debug (#1970) --- keep/api/bl/enrichments_bl.py | 5 ++++- keep/api/routes/preset.py | 9 +++++---- keep/api/tasks/process_topology_task.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/keep/api/bl/enrichments_bl.py b/keep/api/bl/enrichments_bl.py index 3cec23e0e..d8f18691c 100644 --- a/keep/api/bl/enrichments_bl.py +++ b/keep/api/bl/enrichments_bl.py @@ -278,7 +278,10 @@ def _check_alert_matches_rule(self, alert: AlertDto, rule: MappingRule) -> bool: ) if not topology_service: - self.logger.warning("No topology service found to match on") + self.logger.debug( + "No topology service found to match on", + extra={"matcher_value": matcher_value}, + ) else: enrichments = topology_service.dict(exclude_none=True) # Remove redundant fields diff --git a/keep/api/routes/preset.py b/keep/api/routes/preset.py index 53fa09f85..45f46c983 100644 --- a/keep/api/routes/preset.py +++ b/keep/api/routes/preset.py @@ -102,6 +102,9 @@ def pull_data_from_providers( f"Pulling alerts from provider {provider.type} ({provider.id})", extra=extra, ) + # Even if we failed at processing some event, lets save the last pull time to not iterate this process over and over again. + update_provider_last_pull_time(tenant_id=tenant_id, provider_id=provider.id) + provider_class = ProvidersFactory.get_provider( context_manager=context_manager, provider_id=provider.id, @@ -121,7 +124,8 @@ def pull_data_from_providers( logger.info("Pulling topology data", extra=extra) topology_data = provider_class.pull_topology() logger.info( - "Pulling topology data finished, processing", extra=extra + "Pulling topology data finished, processing", + extra={**extra, "topology_length": len(topology_data)}, ) process_topology( tenant_id, topology_data, provider.id, provider.type @@ -155,9 +159,6 @@ def pull_data_from_providers( f"Unknown error pulling from provider {provider.type} ({provider.id})", extra=extra, ) - finally: - # Even if we failed at processing some event, lets save the last pull time to not iterate this process over and over again. - update_provider_last_pull_time(tenant_id=tenant_id, provider_id=provider.id) logger.info( "Pulling data from providers completed", extra={ diff --git a/keep/api/tasks/process_topology_task.py b/keep/api/tasks/process_topology_task.py index 5d65de035..b681dcd54 100644 --- a/keep/api/tasks/process_topology_task.py +++ b/keep/api/tasks/process_topology_task.py @@ -72,7 +72,7 @@ def process_topology( service_id = service_to_keep_service_id_map.get(service.service) depends_on_service_id = service_to_keep_service_id_map.get(dependency) if not service_id or not depends_on_service_id: - logger.warning( + logger.debug( "Found a dangling service, skipping", extra={"service": service.service, "dependency": dependency}, ) From 879ce157c514315202dcf8213b9d757f54558150 Mon Sep 17 00:00:00 2001 From: Rajesh Jonnalagadda <38752904+rajeshj11@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:57:44 +0530 Subject: [PATCH 2/4] chore: fixed the disabled input issue and auto deploy issue (#1972) --- keep-ui/app/workflows/builder/builder-store.tsx | 4 ++-- keep-ui/app/workflows/builder/editors.tsx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/keep-ui/app/workflows/builder/builder-store.tsx b/keep-ui/app/workflows/builder/builder-store.tsx index 3bc6fa0f3..202c32c19 100644 --- a/keep-ui/app/workflows/builder/builder-store.tsx +++ b/keep-ui/app/workflows/builder/builder-store.tsx @@ -303,10 +303,10 @@ const useStore = create((set, get) => ({ }); } }, - setV2Properties: (properties) => set({ v2Properties: properties }), + setV2Properties: (properties) => set({ v2Properties: properties, canDeploy:false }), updateV2Properties: (properties) => { const updatedProperties = { ...get().v2Properties, ...properties }; - set({ v2Properties: updatedProperties, changes: get().changes + 1 }); + set({ v2Properties: updatedProperties, changes: get().changes + 1, canDeploy:false }); }, setSelectedNode: (id) => { set({ diff --git a/keep-ui/app/workflows/builder/editors.tsx b/keep-ui/app/workflows/builder/editors.tsx index 1df59f1f4..cb34b2c51 100644 --- a/keep-ui/app/workflows/builder/editors.tsx +++ b/keep-ui/app/workflows/builder/editors.tsx @@ -413,7 +413,7 @@ function WorkflowEditorV2({ } value={properties[key] || ""as string} />); - case "isabled": + case "disabled": return (
Date: Fri, 20 Sep 2024 18:46:56 +0530 Subject: [PATCH 3/4] fix: Using standard OTLP env convention on backend service (#1973) --- keep/api/observability.py | 46 ++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/keep/api/observability.py b/keep/api/observability.py index d16f894b2..b5aa3e0fa 100644 --- a/keep/api/observability.py +++ b/keep/api/observability.py @@ -1,5 +1,6 @@ import logging import os +from urllib.parse import urlparse from fastapi import FastAPI, Request from opentelemetry import metrics, trace @@ -22,37 +23,52 @@ from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor +def get_protocol_from_endpoint(endpoint): + parsed_url = urlparse(endpoint) + if parsed_url.scheme == "http": + return HTTPOTLPSpanExporter + elif parsed_url.scheme == "grpc": + return GRPCOTLPSpanExporter + else: + raise ValueError(f"Unsupported protocol: {parsed_url.scheme}") def setup(app: FastAPI): logger = logging.getLogger(__name__) # Configure the OpenTelemetry SDK - service_name = os.environ.get("SERVICE_NAME", "keep-api") - otlp_collector_endpoint = os.environ.get("OTLP_ENDPOINT", False) + service_name = os.environ.get("OTEL_SERVICE_NAME", os.environ.get("SERVICE_NAME", "keep-api")) + otlp_collector_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", os.environ.get("OTLP_ENDPOINT", False)) + otlp_traces_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", None) + otlp_logs_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT", None) + otlp_metrics_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", None) enable_cloud_trace_exporeter = os.environ.get("CLOUD_TRACE_ENABLED", False) metrics_enabled = os.environ.get("METRIC_OTEL_ENABLED", "") - # to support both grpc and http - for example dynatrace doesn't support grpc - http_or_grpc = os.environ.get("OTLP_SPAN_EXPORTER", "grpc") - if http_or_grpc == "grpc": - OTLPSpanExporter = GRPCOTLPSpanExporter - else: - OTLPSpanExporter = HTTPOTLPSpanExporter resource = Resource.create({"service.name": service_name}) provider = TracerProvider(resource=resource) + if otlp_collector_endpoint: + logger.info(f"OTLP endpoint set to {otlp_collector_endpoint}") - processor = BatchSpanProcessor( - OTLPSpanExporter(endpoint=otlp_collector_endpoint) - ) - provider.add_span_processor(processor) - if metrics_enabled.lower() == "true": - logger.info("Metrics enabled.") + + if otlp_traces_endpoint: + logger.info(f"OTLP Traces endpoint set to {otlp_traces_endpoint}") + SpanExporter = get_protocol_from_endpoint(otlp_traces_endpoint) + processor = BatchSpanProcessor( + SpanExporter(endpoint=otlp_traces_endpoint) + ) + provider.add_span_processor(processor) + + if metrics_enabled.lower() == "true" and otlp_metrics_endpoint: + logger.info(f"Metrics enabled. OTLP Metrics endpoint set to {otlp_metrics_endpoint}") reader = PeriodicExportingMetricReader( - OTLPMetricExporter(endpoint=otlp_collector_endpoint) + OTLPMetricExporter(endpoint=otlp_metrics_endpoint) ) metric_provider = MeterProvider(resource=resource, metric_readers=[reader]) metrics.set_meter_provider(metric_provider) + if otlp_logs_endpoint: + logger.info(f"OTLP Logs endpoint set to {otlp_logs_endpoint}") + if enable_cloud_trace_exporeter: logger.info("Cloud Trace exporter enabled.") processor = BatchSpanProcessor(CloudTraceSpanExporter()) From 1fb5e546ef6c5a9dac15834bffb6e7f41d800a38 Mon Sep 17 00:00:00 2001 From: Tal Date: Sun, 22 Sep 2024 10:59:14 +0300 Subject: [PATCH 4/4] chore(ui): better empty states in incident view (#1979) --- .../app/incidents/[id]/incident-timeline.tsx | 19 ++++++++++++++++++- keep-ui/app/incidents/[id]/incident.tsx | 12 +++++++++++- keep-ui/next-env.d.ts | 2 +- keep-ui/package-lock.json | 8 ++++---- keep-ui/package.json | 2 +- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/keep-ui/app/incidents/[id]/incident-timeline.tsx b/keep-ui/app/incidents/[id]/incident-timeline.tsx index 7259e46b3..d2400f6d4 100644 --- a/keep-ui/app/incidents/[id]/incident-timeline.tsx +++ b/keep-ui/app/incidents/[id]/incident-timeline.tsx @@ -8,6 +8,8 @@ import { useIncidentAlerts } from "utils/hooks/useIncidents"; import { IncidentDto } from "../models"; import Image from "next/image"; import AlertSeverity from "app/alerts/alert-severity"; +import { EmptyStateCard } from "@/components/ui/EmptyStateCard"; +import { useRouter } from "next/navigation"; const severityColors = { critical: "bg-red-300", @@ -237,6 +239,20 @@ const AlertBar: React.FC = ({ ); }; +const IncidentTimelineNoAlerts: React.FC = () => { + const router = useRouter(); + return ( +
+ router.push("/alerts/feed")} + /> +
+ ); +}; + export default function IncidentTimeline({ incident, }: { @@ -313,7 +329,8 @@ export default function IncidentTimeline({ return {}; }, [auditEvents, alerts]); - if (auditEventsLoading || !auditEvents || alertsLoading) return <>No Data; + if (auditEventsLoading || !auditEvents || alertsLoading) + return ; const { startTime, diff --git a/keep-ui/app/incidents/[id]/incident.tsx b/keep-ui/app/incidents/[id]/incident.tsx index 416bead04..2c546f16d 100644 --- a/keep-ui/app/incidents/[id]/incident.tsx +++ b/keep-ui/app/incidents/[id]/incident.tsx @@ -19,6 +19,7 @@ import { useRouter } from "next/navigation"; import IncidentTimeline from "./incident-timeline"; import { CiBellOn, CiViewTimeline } from "react-icons/ci"; import { IoIosGitNetwork } from "react-icons/io"; +import { EmptyStateCard } from "@/components/ui/EmptyStateCard"; interface Props { incidentId: string; @@ -72,7 +73,16 @@ export default function IncidentView({ incidentId }: Props) { - Coming Soon... + +
+ router.push("/topology")} + /> +
+
diff --git a/keep-ui/next-env.d.ts b/keep-ui/next-env.d.ts index fd36f9494..725dd6f24 100644 --- a/keep-ui/next-env.d.ts +++ b/keep-ui/next-env.d.ts @@ -3,4 +3,4 @@ /// // NOTE: This file should not be edited -// see https://nextjs.org/docs/basic-features/typescript for more information. +// see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information. diff --git a/keep-ui/package-lock.json b/keep-ui/package-lock.json index ebe5dba09..20cbd7923 100644 --- a/keep-ui/package-lock.json +++ b/keep-ui/package-lock.json @@ -272,7 +272,7 @@ "postcss-nested": "^6.0.1", "postcss-selector-parser": "^6.0.12", "postcss-value-parser": "^4.2.0", - "posthog-js": "^1.161.6", + "posthog-js": "^1.163.0", "posthog-node": "^3.1.1", "preact-render-to-string": "^5.2.6", "prelude-ls": "^1.2.1", @@ -11598,9 +11598,9 @@ "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==" }, "node_modules/posthog-js": { - "version": "1.161.6", - "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.161.6.tgz", - "integrity": "sha512-UO0z/YTuan55Kl5Yg9Xs5x1PKUkm2zGKUNPioznb4GLRcxFnLBkWoeKQXNro2YZsYJvK+MY8jlF3cdGa8BZ8/Q==", + "version": "1.163.0", + "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.163.0.tgz", + "integrity": "sha512-gpLbxZkOm06oOWg0uvCxBIVIHrhX3A5hxf9eAi/Z+aFP9DvWxwHQdGUkIWjnYUyxXilIbLxBPvWmiM98dYsAHA==", "dependencies": { "fflate": "^0.4.8", "preact": "^10.19.3", diff --git a/keep-ui/package.json b/keep-ui/package.json index e1c722b95..7690720c9 100644 --- a/keep-ui/package.json +++ b/keep-ui/package.json @@ -273,7 +273,7 @@ "postcss-nested": "^6.0.1", "postcss-selector-parser": "^6.0.12", "postcss-value-parser": "^4.2.0", - "posthog-js": "^1.161.6", + "posthog-js": "^1.163.0", "posthog-node": "^3.1.1", "preact-render-to-string": "^5.2.6", "prelude-ls": "^1.2.1",