From c646294a5ea9fc7bf9e74860db4e16236b8b80d6 Mon Sep 17 00:00:00 2001 From: shahargl Date: Wed, 4 Sep 2024 13:30:54 +0300 Subject: [PATCH 01/36] feat: wip --- .gitignore | 4 +- .../DeduplicationPlaceholder.tsx | 37 +++ .../app/deduplication/DeduplicationSankey.tsx | 88 +++++++ .../deduplication/DeduplicationSidebar.tsx | 219 +++++++++++++++++ .../app/deduplication/DeduplicationTable.tsx | 224 ++++++++++++++++++ keep-ui/app/deduplication/client.tsx | 21 ++ keep-ui/app/deduplication/models.tsx | 14 ++ keep-ui/app/deduplication/page.tsx | 10 + .../components/navbar/NoiseReductionLinks.tsx | 6 + keep-ui/utils/hooks/useDeduplicationRules.ts | 17 ++ .../alert_deduplicator/alert_deduplicator.py | 54 ++++- keep/api/api.py | 5 +- keep/api/core/db.py | 2 +- keep/api/models/db/alert.py | 4 +- keep/api/models/provider.py | 1 + keep/api/routes/deduplications.py | 44 ++++ .../grafana_provider/grafana_provider.py | 2 + .../prometheus_provider.py | 1 + keep/providers/providers_factory.py | 38 +++ 19 files changed, 785 insertions(+), 6 deletions(-) create mode 100644 keep-ui/app/deduplication/DeduplicationPlaceholder.tsx create mode 100644 keep-ui/app/deduplication/DeduplicationSankey.tsx create mode 100644 keep-ui/app/deduplication/DeduplicationSidebar.tsx create mode 100644 keep-ui/app/deduplication/DeduplicationTable.tsx create mode 100644 keep-ui/app/deduplication/client.tsx create mode 100644 keep-ui/app/deduplication/models.tsx create mode 100644 keep-ui/app/deduplication/page.tsx create mode 100644 keep-ui/utils/hooks/useDeduplicationRules.ts create mode 100644 keep/api/routes/deduplications.py diff --git a/.gitignore b/.gitignore index ecc7b8450..45b06baf1 100644 --- a/.gitignore +++ b/.gitignore @@ -205,4 +205,6 @@ playwright_dump_*.html playwright_dump_*.png ee/experimental/ai_temp/* -!ee/experimental/ai_temp/.gitkeep +,e!ee/experimental/ai_temp/.gitkeep + +oauth2.cfg diff --git a/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx b/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx new file mode 100644 index 000000000..afb8830f2 --- /dev/null +++ b/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx @@ -0,0 +1,37 @@ +import { Fragment, useState } from "react"; +import { Button, Card, Subtitle, Title } from "@tremor/react"; +// import { CorrelationSidebar } from "./CorrelationSidebar"; +import { DeduplicationSankey } from "./DeduplicationSankey"; + +export const DeduplicationPlaceholder = () => { + const [isSidebarOpen, setIsSidebarOpen] = useState(false); + + const onCorrelationClick = () => { + setIsSidebarOpen(true); + }; + + return ( + + +
+ No Deduplications Yet + + Reduce noise by creatiing deduplications. + +
+ + +
+ {/* setIsSidebarOpen(!isSidebarOpen)} + /> */} +
+ ); +}; diff --git a/keep-ui/app/deduplication/DeduplicationSankey.tsx b/keep-ui/app/deduplication/DeduplicationSankey.tsx new file mode 100644 index 000000000..a1c2ba23b --- /dev/null +++ b/keep-ui/app/deduplication/DeduplicationSankey.tsx @@ -0,0 +1,88 @@ +import {SVGProps} from "react"; + +export const DeduplicationSankey = (props: SVGProps) => ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +); diff --git a/keep-ui/app/deduplication/DeduplicationSidebar.tsx b/keep-ui/app/deduplication/DeduplicationSidebar.tsx new file mode 100644 index 000000000..e6eedf7eb --- /dev/null +++ b/keep-ui/app/deduplication/DeduplicationSidebar.tsx @@ -0,0 +1,219 @@ +import React, { Fragment, useEffect, useState } from "react"; +import { Dialog, Transition } from "@headlessui/react"; +import { useForm, Controller, SubmitHandler } from "react-hook-form"; +import { Text, Button, TextInput, Callout, Badge, MultiSelect, MultiSelectItem } from "@tremor/react"; +import { IoMdClose } from "react-icons/io"; +import { DeduplicationRule } from "app/deduplication/models"; +import { useProviders } from "utils/hooks/useProviders"; + +interface DeduplicationSidebarProps { + isOpen: boolean; + toggle: VoidFunction; + defaultValue?: Partial; + onSubmit: (data: Partial) => Promise; +} + +const DeduplicationSidebar: React.FC = ({ + isOpen, + toggle, + defaultValue, + onSubmit, +}) => { + const { control, handleSubmit, setValue, reset, setError, formState: { errors }, clearErrors } = useForm>({ + defaultValues: defaultValue || { + name: "", + description: "", + sources: [], + fingerprint_fields: [], + }, + }); + + const [isSubmitting, setIsSubmitting] = useState(false); + const { data: providers = { installed_providers: [], linked_providers: [] } } = useProviders(); + + const alertProviders = [...providers.installed_providers, ...providers.linked_providers].filter( + provider => provider.labels?.includes("alert") + ); + + useEffect(() => { + if (isOpen && defaultValue) { + reset(defaultValue); + } else if (isOpen) { + reset({ + name: "", + description: "", + sources: [], + fingerprint_fields: [], + }); + } + }, [isOpen, defaultValue, reset]); + + const handleToggle = () => { + if (isOpen) { + clearErrors(); + } + toggle(); + }; + + const onFormSubmit: SubmitHandler> = async (data) => { + setIsSubmitting(true); + clearErrors(); + try { + await onSubmit(data); + handleToggle(); + } catch (error) { + setError("root.serverError", { type: "manual", message: "Failed to save deduplication rule" }); + } finally { + setIsSubmitting(false); + } + }; + + return ( + + + + + + ); +}; + +export default DeduplicationSidebar; diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx new file mode 100644 index 000000000..0692a9b27 --- /dev/null +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -0,0 +1,224 @@ +import React, { useEffect, useMemo, useState } from 'react'; +import { + Button, + Card, + Subtitle, + Table, + TableBody, + TableCell, + TableHead, + TableHeaderCell, + TableRow, + Title, + Badge, + SparkAreaChart, +} from "@tremor/react"; +import { useRouter, useSearchParams } from "next/navigation"; +import { + createColumnHelper, + flexRender, + getCoreRowModel, + useReactTable, +} from "@tanstack/react-table"; +import { DeduplicationRule } from "app/deduplication/models"; +import DeduplicationSidebar from "app/deduplication/DeduplicationSidebar"; +import { TrashIcon, PauseIcon, PlusIcon } from "@heroicons/react/24/outline"; +import Image from "next/image"; + +const columnHelper = createColumnHelper(); + +type DeduplicationTableProps = { + deduplicationRules: DeduplicationRule[]; +}; + +export const DeduplicationTable: React.FC = ({ deduplicationRules }) => { + const router = useRouter(); + const searchParams = useSearchParams(); + + const selectedId = searchParams ? searchParams.get("id") : null; + const selectedRule = deduplicationRules.find((rule) => rule.id === selectedId); + const [isSidebarOpen, setIsSidebarOpen] = useState(false); + const [selectedDeduplicationRule, setSelectedDeduplicationRule] = useState(null); + + const deduplicationFormFromRule = useMemo(() => { + if (selectedDeduplicationRule) { + return { + name: selectedDeduplicationRule.name, + description: selectedDeduplicationRule.description, + timeUnit: "seconds", + }; + } + + return {}; + }, [selectedDeduplicationRule]); + + const onDeduplicationClick = (rule: DeduplicationRule) => { + setSelectedDeduplicationRule(rule); + setIsSidebarOpen(true); + }; + + const onCloseDeduplication = () => { + setIsSidebarOpen(false); + setSelectedDeduplicationRule(null); + }; + + useEffect(() => { + if (selectedRule) { + onDeduplicationClick(selectedRule); + } + }, [selectedRule]); + + const DEDUPLICATION_TABLE_COLS = useMemo( + () => [ + columnHelper.accessor("provider_type", { + header: "", + cell: (info) => ( +
+ {info.getValue()} +
+ ), + }), + columnHelper.accessor("description", { + header: "Name", + cell: (info) => ( +
+ {info.getValue()} + {info.row.original.default && ( + Default + )} +
+ ), + }), + columnHelper.accessor("alertsDigested", { + header: "Digested", + cell: (info) => {info.getValue() || 0}, + }), + columnHelper.accessor("dedupRatio", { + header: "Dedup Ratio", + cell: (info) => {info.getValue() || "N/A"}, + }), + columnHelper.accessor("distribution", { + header: "Distribution", + cell: (info) => ( + + ), + }), + columnHelper.accessor("default_fingerprint_fields", { + header: "Fields", + cell: (info) => ( +
+ {info.getValue().map((field: string, index: number) => ( + + {index > 0 && } + {field} + + ))} +
+ ), + }), + columnHelper.display({ + id: "actions", + cell: (info) => ( +
+
+ ), + }), + ], + [] + ); + + const table = useReactTable({ + data: deduplicationRules, + columns: DEDUPLICATION_TABLE_COLS, + getCoreRowModel: getCoreRowModel(), + }) + + const handleSubmitDeduplicationRule = async (data: Partial) => { + // Implement the logic to submit the deduplication rule + // This is a placeholder function, replace with actual implementation + console.log("Submitting deduplication rule:", data); + // Add API call or state update logic here + }; + + return ( +
+
+
+ + Deduplication Rules <span className="text-gray-400">({deduplicationRules.length})</span> + + + Set up rules to deduplicate similar alerts + +
+ +
+ + + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map((header) => ( + + {flexRender( + header.column.columnDef.header, + header.getContext() + )} + + ))} + + ))} + + + {table.getRowModel().rows.map((row) => ( + onDeduplicationClick(row.original)} + > + {row.getVisibleCells().map((cell) => ( + + {flexRender(cell.column.columnDef.cell, cell.getContext())} + + ))} + + ))} + +
+
+ +
+ ); +}; diff --git a/keep-ui/app/deduplication/client.tsx b/keep-ui/app/deduplication/client.tsx new file mode 100644 index 000000000..ce65a3ca9 --- /dev/null +++ b/keep-ui/app/deduplication/client.tsx @@ -0,0 +1,21 @@ +"use client"; + +import { useDeduplicationRules } from "utils/hooks/useDeduplicationRules"; +import { DeduplicationPlaceholder } from "./DeduplicationPlaceholder"; +import { DeduplicationTable } from "./DeduplicationTable"; +import Loading from "app/loading"; + +export const Client = () => { + + const { data: deduplicationRules = [], isLoading } = useDeduplicationRules(); + + if (isLoading) { + return ; + } + + if (deduplicationRules.length === 0) { + return ; + } + + return ; +}; diff --git a/keep-ui/app/deduplication/models.tsx b/keep-ui/app/deduplication/models.tsx new file mode 100644 index 000000000..a3a4e4edc --- /dev/null +++ b/keep-ui/app/deduplication/models.tsx @@ -0,0 +1,14 @@ +export interface DeduplicationRule { + id: string; + name: string; + description: string; + default: boolean; + distribution: Record; + provider_type: string; + last_updated: string; + last_updated_by: string; + created_at: string; + created_by: string; + enabled: boolean; + default_fingerprint_fields: string[]; +} diff --git a/keep-ui/app/deduplication/page.tsx b/keep-ui/app/deduplication/page.tsx new file mode 100644 index 000000000..01a99a74d --- /dev/null +++ b/keep-ui/app/deduplication/page.tsx @@ -0,0 +1,10 @@ +import { Client } from "./client"; + +export default function Page() { + return ; +} + +export const metadata = { + title: "Keep - Deduplication", + description: "Create and manage Keep Deduplication.", +}; diff --git a/keep-ui/components/navbar/NoiseReductionLinks.tsx b/keep-ui/components/navbar/NoiseReductionLinks.tsx index 3533cc2d8..05a3577ed 100644 --- a/keep-ui/components/navbar/NoiseReductionLinks.tsx +++ b/keep-ui/components/navbar/NoiseReductionLinks.tsx @@ -10,6 +10,7 @@ import classNames from "classnames"; import { AILink } from "./AILink"; import { TbTopologyRing } from "react-icons/tb"; import { FaVolumeMute } from "react-icons/fa"; +import { IoMdGitMerge } from "react-icons/io"; type NoiseReductionLinksProps = { session: Session | null }; @@ -39,6 +40,11 @@ export const NoiseReductionLinks = ({ session }: NoiseReductionLinksProps) => { +
  • + + Deduplication + +
  • Correlations diff --git a/keep-ui/utils/hooks/useDeduplicationRules.ts b/keep-ui/utils/hooks/useDeduplicationRules.ts new file mode 100644 index 000000000..b90b4aca6 --- /dev/null +++ b/keep-ui/utils/hooks/useDeduplicationRules.ts @@ -0,0 +1,17 @@ +import { DeduplicationRule } from "app/deduplication/models"; +import { useSession } from "next-auth/react"; +import { SWRConfiguration } from "swr"; +import useSWRImmutable from "swr/immutable"; +import { getApiURL } from "utils/apiUrl"; +import { fetcher } from "utils/fetcher"; + +export const useDeduplicationRules = (options: SWRConfiguration = {}) => { + const apiUrl = getApiURL(); + const { data: session } = useSession(); + + return useSWRImmutable( + () => (session ? `${apiUrl}/deduplications` : null), + (url) => fetcher(url, session?.accessToken), + options + ); +}; diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index a16cf20af..5328ad550 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -5,8 +5,12 @@ import celpy -from keep.api.core.db import get_all_filters, get_last_alert_hash_by_fingerprint +from keep.api.core.db import ( + get_all_deduplication_rules, + get_last_alert_hash_by_fingerprint, +) from keep.api.models.alert import AlertDto +from keep.providers.providers_factory import ProvidersFactory # decide whether this should be a singleton so that we can keep the filters in memory @@ -16,7 +20,7 @@ class AlertDeduplicator: DEFAULT_FIELDS = ["lastReceived"] def __init__(self, tenant_id): - self.filters = get_all_filters(tenant_id) + self.filters = get_all_deduplication_rules(tenant_id) self.logger = logging.getLogger(__name__) self.tenant_id = tenant_id @@ -101,3 +105,49 @@ def _remove_field(self, field, alert: AlertDto) -> AlertDto: del d[field_parts[-1]] setattr(alert, field_parts[0], d) return alert + + def get_deduplications(self): + installed_providers = ProvidersFactory.get_installed_providers(self.tenant_id) + # filter out the providers that are not "alert" in tags + installed_providers = [ + provider for provider in installed_providers if "alert" in provider.tags + ] + linked_providers = ProvidersFactory.get_linked_providers(self.tenant_id) + providers = [*installed_providers, *linked_providers] + + default_deduplications = ProvidersFactory.get_default_deduplications() + default_deduplications_dict = { + dd["provider_type"]: dd for dd in default_deduplications + } + + custom_deduplications = get_all_deduplication_rules(self.tenant_id) + custom_deduplications_dict = { + filt.provider_id: filt for filt in custom_deduplications + } + + final_deduplications = [] + # if provider doesn't have custom deduplication, use the default one + for provider in providers: + if provider.id not in custom_deduplications_dict: + if provider.type not in default_deduplications_dict: + self.logger.warning( + f"Provider {provider.type} does not have a default deduplication" + ) + continue + + # copy the default deduplication and set the provider id + default_deduplication = copy.copy( + default_deduplications_dict[provider.type] + ) + if provider.id: + default_deduplication["description"] = ( + f"{default_deduplication['description']} - {provider.id}" + ) + + final_deduplications.append(default_deduplication) + else: + final_deduplications.append(custom_deduplications_dict[provider.id]) + + # compression = get_deduplication_ratio(self.tenant_id) + # combine lists + return final_deduplications diff --git a/keep/api/api.py b/keep/api/api.py index f12699494..26b8879eb 100644 --- a/keep/api/api.py +++ b/keep/api/api.py @@ -29,6 +29,7 @@ ai, alerts, dashboard, + deduplications, extraction, healthcheck, incidents, @@ -222,7 +223,9 @@ def get_app( app.include_router(tags.router, prefix="/tags", tags=["tags"]) app.include_router(maintenance.router, prefix="/maintenance", tags=["maintenance"]) app.include_router(topology.router, prefix="/topology", tags=["topology"]) - + app.include_router( + deduplications.router, prefix="/deduplications", tags=["deduplications"] + ) # if its single tenant with authentication, add signin endpoint logger.info(f"Starting Keep with authentication type: {AUTH_TYPE}") # If we run Keep with SINGLE_TENANT auth type, we want to add the signin endpoint diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 379285d6c..1664238d4 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -1531,7 +1531,7 @@ def get_rule_distribution(tenant_id, minute=False): return rule_distribution -def get_all_filters(tenant_id): +def get_all_deduplication_rules(tenant_id): with Session(engine) as session: filters = session.exec( select(AlertDeduplicationFilter).where( diff --git a/keep/api/models/db/alert.py b/keep/api/models/db/alert.py index 475ba323a..53a4ecd8d 100644 --- a/keep/api/models/db/alert.py +++ b/keep/api/models/db/alert.py @@ -60,7 +60,7 @@ class Incident(SQLModel, table=True): id: UUID = Field(default_factory=uuid4, primary_key=True) tenant_id: str = Field(foreign_key="tenant.id") tenant: Tenant = Relationship() - + user_generated_name: str | None ai_generated_name: str | None @@ -176,6 +176,8 @@ class AlertDeduplicationFilter(SQLModel, table=True): fields: list = Field(sa_column=Column(JSON), default=[]) # a CEL expression to match the alert matcher_cel: str + # the provider id to use for this deduplication - None for linked providers + provider_id: str | None class Config: arbitrary_types_allowed = True diff --git a/keep/api/models/provider.py b/keep/api/models/provider.py index 78df4eb62..15629d5e4 100644 --- a/keep/api/models/provider.py +++ b/keep/api/models/provider.py @@ -44,3 +44,4 @@ class Provider(BaseModel): ] = [] alertsDistribution: dict[str, int] | None = None alertExample: dict | None = None + default_fingerprint_fields: list[str] | None = None diff --git a/keep/api/routes/deduplications.py b/keep/api/routes/deduplications.py new file mode 100644 index 000000000..8a71d7492 --- /dev/null +++ b/keep/api/routes/deduplications.py @@ -0,0 +1,44 @@ +import logging + +from fastapi import APIRouter, Depends +from pydantic import BaseModel + +from keep.api.alert_deduplicator.alert_deduplicator import AlertDeduplicator +from keep.identitymanager.authenticatedentity import AuthenticatedEntity +from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory + +router = APIRouter() + +logger = logging.getLogger(__name__) + + +class DeduplicationDto(BaseModel): + name: str + description: str + default: bool + distribution: dict + sources: list[str] + last_updated: str + last_updated_by: str + created_at: str + created_by: str + enabled: bool + + +@router.get( + "", + description="Get Deduplications", +) +def get_deduplications( + authenticated_entity: AuthenticatedEntity = Depends( + IdentityManagerFactory.get_auth_verifier(["read:deduplications"]) + ), +): + tenant_id = authenticated_entity.tenant_id + logger.info("Getting deduplications") + + alert_deduplicator = AlertDeduplicator(tenant_id) + deduplications = alert_deduplicator.get_deduplications() + + logger.info(deduplications) + return deduplications diff --git a/keep/providers/grafana_provider/grafana_provider.py b/keep/providers/grafana_provider/grafana_provider.py index 86c0a4104..80dbea5b4 100644 --- a/keep/providers/grafana_provider/grafana_provider.py +++ b/keep/providers/grafana_provider/grafana_provider.py @@ -50,6 +50,8 @@ class GrafanaProvider(BaseProvider): """Pull/Push alerts from Grafana.""" KEEP_GRAFANA_WEBHOOK_INTEGRATION_NAME = "keep-grafana-webhook-integration" + FINGERPRINT_FIELDS = ["fingerprint"] + PROVIDER_SCOPES = [ ProviderScope( name="alert.rules:read", diff --git a/keep/providers/prometheus_provider/prometheus_provider.py b/keep/providers/prometheus_provider/prometheus_provider.py index 7f4ecf0c8..a816794a4 100644 --- a/keep/providers/prometheus_provider/prometheus_provider.py +++ b/keep/providers/prometheus_provider/prometheus_provider.py @@ -82,6 +82,7 @@ class PrometheusProvider(BaseProvider): name="connectivity", description="Connectivity Test", mandatory=True ) ] + FINGERPRINT_FIELDS = ["fingerprint"] def __init__( self, context_manager: ContextManager, provider_id: str, config: ProviderConfig diff --git a/keep/providers/providers_factory.py b/keep/providers/providers_factory.py index 6e9f6a70e..083223071 100644 --- a/keep/providers/providers_factory.py +++ b/keep/providers/providers_factory.py @@ -341,6 +341,13 @@ def get_all_providers() -> list[Provider]: # not all providers have this method (yet ^^) except Exception: alert_example = None + + # Add default fingerprint fields if available + if hasattr(provider_class, "FINGERPRINT_FIELDS"): + default_fingerprint_fields = provider_class.FINGERPRINT_FIELDS + else: + default_fingerprint_fields = [] + providers.append( Provider( type=provider_type, @@ -359,6 +366,7 @@ def get_all_providers() -> list[Provider]: methods=provider_methods, tags=provider_tags, alertExample=alert_example, + default_fingerprint_fields=default_fingerprint_fields, ) ) except ModuleNotFoundError: @@ -511,3 +519,33 @@ def get_linked_providers(tenant_id: str) -> list[Provider]: _linked_providers.append(provider) return _linked_providers + + @staticmethod + def get_default_deduplications() -> list[dict]: + """ + Get the default deduplications for all providers with FINGERPRINT_FIELDS. + + Returns: + list: The default deduplications for each provider. + """ + default_deduplications = [] + all_providers = ProvidersFactory.get_all_providers() + + for provider in all_providers: + if provider.default_fingerprint_fields: + deduplication = { + "name": f"{provider.type}_default", + "description": f"Default deduplication for {provider.display_name}", + "default": True, + "distribution": {}, + "provider_type": provider.type, + "last_updated": "", + "last_updated_by": "", + "created_at": "", + "created_by": "", + "enabled": True, + "default_fingerprint_fields": provider.default_fingerprint_fields, + } + default_deduplications.append(deduplication) + + return default_deduplications From 1aa54a66fe34d0ecbf7da4f7f2736a1eb3da42e2 Mon Sep 17 00:00:00 2001 From: shahargl Date: Wed, 4 Sep 2024 16:46:38 +0300 Subject: [PATCH 02/36] feat: wip --- .../app/deduplication/DeduplicationTable.tsx | 53 +++++++++++++------ keep-ui/app/deduplication/models.tsx | 6 ++- .../alert_deduplicator/alert_deduplicator.py | 47 ++++++++++++---- keep/api/core/db.py | 48 ++++++++++++++++- keep/api/models/alert.py | 25 +++++++-- keep/api/models/db/alert.py | 35 +++++++++++- keep/api/routes/deduplications.py | 14 ----- keep/providers/providers_factory.py | 31 ++++++----- scripts/simulate_alerts.py | 7 +-- 9 files changed, 201 insertions(+), 65 deletions(-) diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx index 0692a9b27..30ecce411 100644 --- a/keep-ui/app/deduplication/DeduplicationTable.tsx +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -73,7 +73,7 @@ export const DeduplicationTable: React.FC = ({ deduplic columnHelper.accessor("provider_type", { header: "", cell: (info) => ( -
    +
    = ({ deduplic columnHelper.accessor("description", { header: "Name", cell: (info) => ( -
    +
    {info.getValue()} {info.row.original.default && ( Default @@ -97,29 +97,48 @@ export const DeduplicationTable: React.FC = ({ deduplic
    ), }), - columnHelper.accessor("alertsDigested", { - header: "Digested", + columnHelper.accessor("ingested", { + header: "Ingested", cell: (info) => {info.getValue() || 0}, }), - columnHelper.accessor("dedupRatio", { + columnHelper.accessor("dedup_ratio", { header: "Dedup Ratio", - cell: (info) => {info.getValue() || "N/A"}, + cell: (info) => { + const value = info.getValue() || 0; + const formattedValue = Number(value).toFixed(1); + return {formattedValue}%; + }, }), columnHelper.accessor("distribution", { header: "Distribution", - cell: (info) => ( - - ), + cell: (info) => { + const rawData = info.getValue(); + const maxNumber = Math.max(...rawData.map(item => item.number)); + const allZero = rawData.every(item => item.number === 0); + const data = rawData.map(item => ({ + ...item, + number: maxNumber > 0 ? (item.number / maxNumber) + 1 : 0.5 + })); + const colors = ["orange"]; + const showGradient = true; + return ( + + ); + }, }), - columnHelper.accessor("default_fingerprint_fields", { + columnHelper.accessor("fingerprint_fields", { header: "Fields", cell: (info) => ( -
    +
    {info.getValue().map((field: string, index: number) => ( {index > 0 && } @@ -132,7 +151,7 @@ export const DeduplicationTable: React.FC = ({ deduplic columnHelper.display({ id: "actions", cell: (info) => ( -
    +
    ( {alertProviders.map((provider) => ( - + {provider.type} ))} @@ -185,6 +191,46 @@ const DeduplicationSidebar: React.FC = ({ )} />
    +
    + +
    + {fullDeduplication && ( +
    + + ( + + {/* Replace this with actual ignore field options */} + Title + Description + Severity + Source + + )} + /> +
    + )} {errors.root?.serverError && ( {errors.root.serverError.message} diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx index 30ecce411..9acda1836 100644 --- a/keep-ui/app/deduplication/DeduplicationTable.tsx +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -89,11 +89,14 @@ export const DeduplicationTable: React.FC = ({ deduplic columnHelper.accessor("description", { header: "Name", cell: (info) => ( -
    +
    {info.getValue()} {info.row.original.default && ( Default )} + {info.row.original.full_deduplication && ( + Full Deduplication + )}
    ), }), diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index f0f629ae1..0620b1743 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -13,8 +13,8 @@ get_provider_distribution, ) from keep.api.models.alert import AlertDto, DeduplicationRuleDto -from keep.api.models.db.alert import AlertDeduplicationRule from keep.providers.providers_factory import ProvidersFactory +from keep.searchengine.searchengine import SearchEngine class AlertDeduplicator: @@ -25,6 +25,7 @@ def __init__(self, tenant_id): self.provider_distribution_enabled = config( "PROVIDER_DISTRIBUTION_ENABLED", cast=bool, default=True ) + self.search_engine = SearchEngine(self.tenant_id) def _apply_deduplication_rule( self, alert: AlertDto, rule: DeduplicationRuleDto @@ -151,12 +152,24 @@ def get_full_deduplication_rule( def _get_default_full_deduplication_rule(self) -> DeduplicationRuleDto: # just return a default deduplication rule with lastReceived field - return AlertDeduplicationRule( + return DeduplicationRuleDto( + name="Keep Full Deduplication Rule", + description="Keep Full Deduplication Rule", + default=True, + distribution=[], fingerprint_fields=[], + provider_type="keep", provider_id=None, full_deduplication=True, ignore_fields=["lastReceived"], priority=0, + last_updated=None, + last_updated_by=None, + created_at=None, + created_by=None, + ingested=0, + dedup_ratio=0.0, + enabled=True, ) def get_deduplications(self) -> list[DeduplicationRuleDto]: @@ -179,11 +192,13 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: custom_deduplications_dict = { rule.provider_id: rule for rule in custom_deduplications } + # get the "catch all" full deduplication rule + catch_all_full_deduplication = self._get_default_full_deduplication_rule() # calculate the deduplciations # if a provider has custom deduplication rule, use it # else, use the default deduplication rule of the provider - final_deduplications = [] + final_deduplications = [catch_all_full_deduplication] for provider in providers: # if the provider doesn't have a deduplication rule, use the default one if provider.id not in custom_deduplications_dict: @@ -235,3 +250,9 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: break return result + + def get_deduplication_fields(self) -> list[str]: + # SHAHAR: this could be improved by saving the fields on ingestion time + # SHAHAR: it may be broken + fields = self.search_engine.search_alerts_by_cel() + return fields diff --git a/keep/api/models/alert.py b/keep/api/models/alert.py index 238180b44..89d26b853 100644 --- a/keep/api/models/alert.py +++ b/keep/api/models/alert.py @@ -426,10 +426,10 @@ class DeduplicationRuleDto(BaseModel): distribution: list[dict] # list of {hour: int, count: int} provider_id: str | None # None for default rules provider_type: str - last_updated: str - last_updated_by: str - created_at: str - created_by: str + last_updated: str | None + last_updated_by: str | None + created_at: str | None + created_by: str | None ingested: int dedup_ratio: float enabled: bool diff --git a/keep/api/models/db/alert.py b/keep/api/models/db/alert.py index 3722792f2..74d6a802e 100644 --- a/keep/api/models/db/alert.py +++ b/keep/api/models/db/alert.py @@ -207,6 +207,7 @@ class AlertDeduplicationEvent(SQLModel, table=True): # these are only soft reference since it could be linked provider provider_id: str | None = Field() provider_type: str | None = Field() + priority: int = Field(default=0) # for future use __table_args__ = ( Index( diff --git a/keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py b/keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py new file mode 100644 index 000000000..aef7800b6 --- /dev/null +++ b/keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py @@ -0,0 +1,100 @@ +"""Deduplication + +Revision ID: e066535d7d76 +Revises: 1aacee84447e +Create Date: 2024-09-11 10:03:54.782474 + +""" + +import sqlalchemy as sa +import sqlmodel +from alembic import op +from sqlalchemy.dialects import sqlite + +# revision identifiers, used by Alembic. +revision = "e066535d7d76" +down_revision = "1aacee84447e" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("alertdeduplicationrule", schema=None) as batch_op: + batch_op.add_column( + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False) + ) + batch_op.add_column( + sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False) + ) + batch_op.add_column(sa.Column("last_updated", sa.DateTime(), nullable=False)) + batch_op.add_column( + sa.Column( + "last_updated_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False + ) + ) + batch_op.add_column(sa.Column("created_at", sa.DateTime(), nullable=False)) + batch_op.add_column( + sa.Column("created_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False) + ) + batch_op.add_column(sa.Column("enabled", sa.Boolean(), nullable=False)) + batch_op.alter_column( + "provider_type", + existing_type=sa.TEXT(), + type_=sqlmodel.sql.sqltypes.AutoString(), + nullable=False, + ) + batch_op.create_index( + batch_op.f("ix_alertdeduplicationrule_name"), ["name"], unique=False + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("preset", schema=None) as batch_op: + batch_op.drop_constraint(None, type_="foreignkey") + batch_op.drop_constraint(None, type_="unique") + batch_op.drop_constraint(None, type_="unique") + batch_op.drop_index(batch_op.f("ix_preset_tenant_id")) + batch_op.drop_index(batch_op.f("ix_preset_created_by")) + batch_op.alter_column("id", existing_type=sa.CHAR(length=32), nullable=True) + batch_op.alter_column("options", existing_type=sqlite.JSON(), nullable=False) + + with op.batch_alter_table("alertdeduplicationrule", schema=None) as batch_op: + batch_op.add_column(sa.Column("priority", sa.INTEGER(), nullable=False)) + batch_op.drop_index(batch_op.f("ix_alertdeduplicationrule_name")) + batch_op.alter_column( + "provider_type", + existing_type=sqlmodel.sql.sqltypes.AutoString(), + type_=sa.TEXT(), + nullable=True, + ) + batch_op.drop_column("enabled") + batch_op.drop_column("created_by") + batch_op.drop_column("created_at") + batch_op.drop_column("last_updated_by") + batch_op.drop_column("last_updated") + batch_op.drop_column("description") + batch_op.drop_column("name") + + with op.batch_alter_table("alertaudit", schema=None) as batch_op: + batch_op.alter_column("description", existing_type=sa.TEXT(), nullable=False) + + op.create_table( + "_alembic_tmp_alertaudit", + sa.Column("id", sa.CHAR(length=32), nullable=False), + sa.Column("fingerprint", sa.VARCHAR(), nullable=False), + sa.Column("tenant_id", sa.VARCHAR(), nullable=False), + sa.Column("timestamp", sa.DATETIME(), nullable=False), + sa.Column("user_id", sa.VARCHAR(), nullable=False), + sa.Column("action", sa.VARCHAR(), nullable=False), + sa.Column("description", sa.TEXT(), nullable=True), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + # ### end Alembic commands ### diff --git a/keep/api/routes/deduplications.py b/keep/api/routes/deduplications.py index f241ea44a..1b9925ee9 100644 --- a/keep/api/routes/deduplications.py +++ b/keep/api/routes/deduplications.py @@ -28,3 +28,22 @@ def get_deduplications( logger.info(deduplications) return deduplications + + +@router.get( + "/fields", + description="Get Optional Fields For Deduplications", +) +def get_deduplication_fields( + authenticated_entity: AuthenticatedEntity = Depends( + IdentityManagerFactory.get_auth_verifier(["read:deduplications"]) + ), +): + tenant_id = authenticated_entity.tenant_id + logger.info("Getting deduplication fields") + + alert_deduplicator = AlertDeduplicator(tenant_id) + fields = alert_deduplicator.get_deduplication_fields() + + logger.info("Got deduplication fields") + return fields From 109d58b411b4a7593f55d6dfde6e97f6e3dc110d Mon Sep 17 00:00:00 2001 From: shahargl Date: Wed, 11 Sep 2024 11:39:33 +0300 Subject: [PATCH 08/36] feat: wip --- keep/api/core/db.py | 12 +- keep/rulesengine/rulesengine.py | 275 +++++++++++++++++++++++++++----- 2 files changed, 245 insertions(+), 42 deletions(-) diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 885998fb3..3f36ad61e 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -13,8 +13,8 @@ from typing import Any, Dict, List, Tuple, Union from uuid import uuid4 -import numpy as np -import pandas as pd +# import numpy as np +# import pandas as pd import validators from dotenv import find_dotenv, load_dotenv from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor @@ -1537,10 +1537,7 @@ def get_rule_incidents_count_db(tenant_id): query = ( session.query(Incident.rule_id, func.count(Incident.id)) .select_from(Incident) - .filter( - Incident.tenant_id == tenant_id, - col(Incident.rule_id).isnot(None) - ) + .filter(Incident.tenant_id == tenant_id, col(Incident.rule_id).isnot(None)) .group_by(Incident.rule_id) ) return dict(query.all()) @@ -2651,6 +2648,7 @@ def confirm_predicted_incident_by_id( return incident +""" def write_pmi_matrix_to_temp_file( tenant_id: str, pmi_matrix: np.array, fingerprints: List, temp_dir: str ) -> bool: @@ -2732,6 +2730,7 @@ def get_pmi_value( return pmi_entry.pmi if pmi_entry else None + def get_pmi_values_from_temp_file(temp_dir: str) -> Tuple[np.array, Dict[str, int]]: npzfile = np.load(f"{temp_dir}/pmi_matrix.npz", allow_pickle=True) pmi_matrix = npzfile["pmi_matrix"] @@ -2754,6 +2753,7 @@ def get_pmi_values( (entry.fingerprint_i, entry.fingerprint_j): entry.pmi for entry in pmi_entries } return pmi_values +""" def update_incident_summary( diff --git a/keep/rulesengine/rulesengine.py b/keep/rulesengine/rulesengine.py index 9cdacc1b8..3c95d30c2 100644 --- a/keep/rulesengine/rulesengine.py +++ b/keep/rulesengine/rulesengine.py @@ -1,13 +1,17 @@ +import itertools import json import logging +import re import celpy +import chevron from keep.api.consts import STATIC_PRESETS -from keep.api.core.db import get_incident_for_grouping_rule, assign_alert_to_incident +from keep.api.core.db import assign_alert_to_group as assign_alert_to_group_db +from keep.api.core.db import create_alert as create_alert_db from keep.api.core.db import get_rules as get_rules_db -from keep.api.models.alert import AlertDto, AlertSeverity, IncidentDto -from keep.api.utils.cel_utils import preprocess_cel_expression +from keep.api.models.alert import AlertDto, AlertSeverity, AlertStatus +from keep.api.models.group import GroupDto class RulesEngine: @@ -15,11 +19,36 @@ def __init__(self, tenant_id=None): self.tenant_id = tenant_id self.logger = logging.getLogger(__name__) - def run_rules(self, events: list[AlertDto]) -> list[IncidentDto]: + def _calc_max_severity(self, alerts): + if not alerts: + # should not happen + self.logger.info( + "Could not calculate max severity from empty list - fallbacking to info" + ) + return str(AlertSeverity.INFO) + + alerts_by_fingerprint = {} + for alert in alerts: + if alert.fingerprint not in alerts_by_fingerprint: + alerts_by_fingerprint[alert.fingerprint] = [alert] + else: + alerts_by_fingerprint[alert.fingerprint].append(alert) + + # now take the latest (by timestamp) for each fingerprint: + alerts = [ + max(alerts, key=lambda alert: alert.event["lastReceived"]) + for alerts in alerts_by_fingerprint.values() + ] + # if all alerts are with the same status, just use it + severities = [AlertSeverity(alert.event["severity"]) for alert in alerts] + max_severity = max(severities, key=lambda severity: severity.order) + return str(max_severity) + + def run_rules(self, events: list[AlertDto]): self.logger.info("Running rules") rules = get_rules_db(tenant_id=self.tenant_id) - incidents_dto = {} + groups = [] for rule in rules: self.logger.info(f"Evaluating rule {rule.name}") for event in events: @@ -37,37 +66,105 @@ def run_rules(self, events: list[AlertDto]) -> list[IncidentDto]: self.logger.info( f"Rule {rule.name} on event {event.id} is relevant" ) - - rule_fingerprint = self._calc_rule_fingerprint(event, rule) - - incident = get_incident_for_grouping_rule(self.tenant_id, rule, rule.timeframe, rule_fingerprint) - - incident = assign_alert_to_incident( + group_fingerprint = self._calc_group_fingerprint(event, rule) + # Add relation between this event and the group + updated_group = assign_alert_to_group_db( + tenant_id=self.tenant_id, alert_id=event.event_id, - incident_id=incident.id, - tenant_id=self.tenant_id + rule_id=str(rule.id), + timeframe=rule.timeframe, + group_fingerprint=group_fingerprint, ) - - incidents_dto[incident.id] = IncidentDto.from_db_incident(incident) + groups.append(updated_group) else: self.logger.info( f"Rule {rule.name} on event {event.id} is not relevant" ) self.logger.info("Rules ran successfully") # if we don't have any updated groups, we don't need to create any alerts - if not incidents_dto: - return [] - - self.logger.info(f"Rules ran, {len(incidents_dto)} incidents created") + if not groups: + return + # get the rules of the groups + updated_group_rule_ids = [group.rule_id for group in groups] + updated_rules = get_rules_db( + tenant_id=self.tenant_id, ids=updated_group_rule_ids + ) + # more convenient to work with a dict + updated_rules_dict = {str(rule.id): rule for rule in updated_rules} + # Now let's create a new alert for each group + grouped_alerts = [] + for group in groups: + rule = updated_rules_dict.get(str(group.rule_id)) + group_fingerprint = group.calculate_fingerprint() + try: + group_attributes = GroupDto.get_group_attributes(group.alerts) + except Exception: + # should not happen since I fixed the assign_alert_to_group_db + self.logger.exception( + f"Failed to calculate group attributes for group {group.id}" + ) + continue + context = { + "group_attributes": group_attributes, + # Shahar: first, group have at least one alert. + # second, the only supported {{ }} are the ones in the group + # attributes, so we can use the first alert because they are the same for any other alert in the group + **group.alerts[0].event, + } + group_description = chevron.render(rule.group_description, context) + group_severity = self._calc_max_severity(group.alerts) + # group all the sources from all the alerts + group_source = list( + set( + itertools.chain.from_iterable( + [alert.event["source"] for alert in group.alerts] + ) + ) + ) + # inert "keep" as the first source to emphasize that this alert was generated by keep + group_source.insert(0, "keep") + # if the group has "group by", add it to the group name + if rule.grouping_criteria: + group_name = f"Alert group genereted by rule {rule.name} | group:{group.group_fingerprint}" + else: + group_name = f"Alert group genereted by rule {rule.name}" - return list(incidents_dto.values()) + group_status = self._calc_group_status(group.alerts) + # get the payload of the group + # todo: this is not scaling, needs to find another solution + # group_payload = self._generate_group_payload(group.alerts) + # create the alert + group_alert = create_alert_db( + tenant_id=self.tenant_id, + provider_type="group", + provider_id=rule.id, + # todo: event should support list? + event={ + "name": group_name, + "id": group_fingerprint, + "description": group_description, + "lastReceived": group_attributes.get("last_update_time"), + "severity": group_severity, + "source": group_source, + "status": group_status, + "pushed": True, + "group": True, + # "groupPayload": group_payload, + "fingerprint": group_fingerprint, + **group_attributes, + }, + fingerprint=group_fingerprint, + ) + grouped_alerts.append(group_alert) + self.logger.info(f"Created alert {group_alert.id} for group {group.id}") + self.logger.info(f"Rules ran, {len(grouped_alerts)} alerts created") + alerts_dto = [AlertDto(**alert.event) for alert in grouped_alerts] + return alerts_dto def _extract_subrules(self, expression): # CEL rules looks like '(source == "sentry") && (source == "grafana" && severity == "critical")' # and we need to extract the subrules sub_rules = expression.split(") && (") - if len(sub_rules) == 1: - return sub_rules # the first and the last rules will have a ( or ) at the beginning or the end # e.g. for the example of: # (source == "sentry") && (source == "grafana" && severity == "critical") @@ -106,7 +203,7 @@ def _check_if_rule_apply(self, rule, event: AlertDto): # no subrules matched return False - def _calc_rule_fingerprint(self, event: AlertDto, rule): + def _calc_group_fingerprint(self, event: AlertDto, rule): # extract all the grouping criteria from the event # e.g. if the grouping criteria is ["event.labels.queue", "event.labels.cluster"] # and the event is: @@ -117,13 +214,13 @@ def _calc_rule_fingerprint(self, event: AlertDto, rule): # "foo": "bar" # } # } - # than the rule_fingerprint will be "queue1,cluster1" + # than the group_fingerprint will be "queue1,cluster1" - # note: rule_fingerprint is not a unique id, since different rules can lead to the same rule_fingerprint - # hence, the actual fingerprint is composed of the rule_fingerprint and the incident id + # note: group_fingerprint is not a unique id, since different rules can lead to the same group_fingerprint + # hence, the actual fingerprint is composed of the group_fingerprint and the group id event_payload = event.dict() grouping_criteria = rule.grouping_criteria or [] - rule_fingerprint = [] + group_fingerprint = [] for criteria in grouping_criteria: # we need to extract the value from the event # e.g. if the criteria is "event.labels.queue" @@ -132,23 +229,129 @@ def _calc_rule_fingerprint(self, event: AlertDto, rule): value = event_payload for part in criteria_parts: value = value.get(part) - if isinstance(value, list): - value = ",".join(value) - rule_fingerprint.append(value) + group_fingerprint.append(value) # if, for example, the event should have labels.X but it doesn't, - # than we will have None in the rule_fingerprint - if not rule_fingerprint: + # than we will have None in the group_fingerprint + if not group_fingerprint: self.logger.warning( - f"Failed to calculate rule fingerprint for event {event.id} and rule {rule.name}" + f"Failed to calculate group fingerprint for event {event.id} and rule {rule.name}" ) return "none" # if any of the values is None, we will return "none" - if any([fingerprint is None for fingerprint in rule_fingerprint]): + if any([fingerprint is None for fingerprint in group_fingerprint]): self.logger.warning( f"Failed to fetch the appropriate labels from the event {event.id} and rule {rule.name}" ) return "none" - return ",".join(rule_fingerprint) + return ",".join(group_fingerprint) + + def _calc_group_status(self, alerts): + """This function calculates the status of a group of alerts according to the following logic: + 1. If the last alert of each fingerprint is resolved, the group is resolved + 2. If at least one of the alerts is firing, the group is firing + + + Args: + alerts (list[Alert]): list of alerts related to the group + + Returns: + AlertStatus: the alert status (enum) + """ + # take the last alert from each fingerprint + # if all of them are resolved, the group is resolved + alerts_by_fingerprint = {} + for alert in alerts: + if alert.fingerprint not in alerts_by_fingerprint: + alerts_by_fingerprint[alert.fingerprint] = [alert] + else: + alerts_by_fingerprint[alert.fingerprint].append(alert) + + # now take the latest (by timestamp) for each fingerprint: + alerts = [ + max(alerts, key=lambda alert: alert.event["lastReceived"]) + for alerts in alerts_by_fingerprint.values() + ] + # 1. if all alerts are with the same status, just use it + if len(set(alert.event["status"] for alert in alerts)) == 1: + return alerts[0].event["status"] + # 2. Else, if at least one of them is firing, the group is firing + if any(alert.event["status"] == AlertStatus.FIRING for alert in alerts): + return AlertStatus.FIRING + # 3. Last, just return the last status + return alerts[-1].event["status"] + + def _generate_group_payload(self, alerts): + # todo: group payload should be configurable + """This function generates the payload of the group alert. + + Args: + alerts (list[Alert]): list of alerts related to the group + + Returns: + dict: the payload of the group alert + """ + + # first, group by fingerprints + alerts_by_fingerprint = {} + for alert in alerts: + if alert.fingerprint not in alerts_by_fingerprint: + alerts_by_fingerprint[alert.fingerprint] = [alert] + else: + alerts_by_fingerprint[alert.fingerprint].append(alert) + + group_payload = {} + for fingerprint, alerts in alerts_by_fingerprint.items(): + # take the latest (by timestamp) for each fingerprint: + alert = max(alerts, key=lambda alert: alert.event["lastReceived"]) + group_payload[fingerprint] = { + "name": alert.event["name"], + "number_of_alerts": len(alerts), + "fingerprint": fingerprint, + "last_status": alert.event["status"], + "last_severity": alert.event["severity"], + } + + return group_payload + + @staticmethod + def preprocess_cel_expression(cel_expression: str) -> str: + """Preprocess CEL expressions to replace string-based comparisons with numeric values where applicable.""" + + # Construct a regex pattern that matches any severity level or other comparisons + # and accounts for both single and double quotes as well as optional spaces around the operator + severities = "|".join( + [f"\"{severity.value}\"|'{severity.value}'" for severity in AlertSeverity] + ) + pattern = rf"(\w+)\s*([=> Date: Wed, 11 Sep 2024 13:11:13 +0300 Subject: [PATCH 09/36] feat: wip --- keep/api/models/db/migrations/env.py | 8 +- .../versions/2024-09-11-10-03_e066535d7d76.py | 100 ------- keep/rulesengine/rulesengine.py | 277 +++--------------- 3 files changed, 39 insertions(+), 346 deletions(-) delete mode 100644 keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py diff --git a/keep/api/models/db/migrations/env.py b/keep/api/models/db/migrations/env.py index 6aab4afad..f655e0a9c 100644 --- a/keep/api/models/db/migrations/env.py +++ b/keep/api/models/db/migrations/env.py @@ -11,14 +11,11 @@ from keep.api.models.db.alert import * from keep.api.models.db.dashboard import * from keep.api.models.db.extraction import * -from keep.api.models.db.maintenance_window import * from keep.api.models.db.mapping import * from keep.api.models.db.preset import * from keep.api.models.db.provider import * from keep.api.models.db.rule import * -from keep.api.models.db.statistics import * from keep.api.models.db.tenant import * -from keep.api.models.db.topology import * from keep.api.models.db.user import * from keep.api.models.db.workflow import * @@ -55,7 +52,6 @@ async def run_migrations_offline() -> None: target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, - render_as_batch=True, ) with context.begin_transaction(): @@ -68,9 +64,7 @@ def do_run_migrations(connection: Connection) -> None: :param connection: connection to the database. """ - context.configure( - connection=connection, target_metadata=target_metadata, render_as_batch=True - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py b/keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py deleted file mode 100644 index aef7800b6..000000000 --- a/keep/api/models/db/migrations/versions/2024-09-11-10-03_e066535d7d76.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Deduplication - -Revision ID: e066535d7d76 -Revises: 1aacee84447e -Create Date: 2024-09-11 10:03:54.782474 - -""" - -import sqlalchemy as sa -import sqlmodel -from alembic import op -from sqlalchemy.dialects import sqlite - -# revision identifiers, used by Alembic. -revision = "e066535d7d76" -down_revision = "1aacee84447e" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("alertdeduplicationrule", schema=None) as batch_op: - batch_op.add_column( - sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False) - ) - batch_op.add_column( - sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False) - ) - batch_op.add_column(sa.Column("last_updated", sa.DateTime(), nullable=False)) - batch_op.add_column( - sa.Column( - "last_updated_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False - ) - ) - batch_op.add_column(sa.Column("created_at", sa.DateTime(), nullable=False)) - batch_op.add_column( - sa.Column("created_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False) - ) - batch_op.add_column(sa.Column("enabled", sa.Boolean(), nullable=False)) - batch_op.alter_column( - "provider_type", - existing_type=sa.TEXT(), - type_=sqlmodel.sql.sqltypes.AutoString(), - nullable=False, - ) - batch_op.create_index( - batch_op.f("ix_alertdeduplicationrule_name"), ["name"], unique=False - ) - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("preset", schema=None) as batch_op: - batch_op.drop_constraint(None, type_="foreignkey") - batch_op.drop_constraint(None, type_="unique") - batch_op.drop_constraint(None, type_="unique") - batch_op.drop_index(batch_op.f("ix_preset_tenant_id")) - batch_op.drop_index(batch_op.f("ix_preset_created_by")) - batch_op.alter_column("id", existing_type=sa.CHAR(length=32), nullable=True) - batch_op.alter_column("options", existing_type=sqlite.JSON(), nullable=False) - - with op.batch_alter_table("alertdeduplicationrule", schema=None) as batch_op: - batch_op.add_column(sa.Column("priority", sa.INTEGER(), nullable=False)) - batch_op.drop_index(batch_op.f("ix_alertdeduplicationrule_name")) - batch_op.alter_column( - "provider_type", - existing_type=sqlmodel.sql.sqltypes.AutoString(), - type_=sa.TEXT(), - nullable=True, - ) - batch_op.drop_column("enabled") - batch_op.drop_column("created_by") - batch_op.drop_column("created_at") - batch_op.drop_column("last_updated_by") - batch_op.drop_column("last_updated") - batch_op.drop_column("description") - batch_op.drop_column("name") - - with op.batch_alter_table("alertaudit", schema=None) as batch_op: - batch_op.alter_column("description", existing_type=sa.TEXT(), nullable=False) - - op.create_table( - "_alembic_tmp_alertaudit", - sa.Column("id", sa.CHAR(length=32), nullable=False), - sa.Column("fingerprint", sa.VARCHAR(), nullable=False), - sa.Column("tenant_id", sa.VARCHAR(), nullable=False), - sa.Column("timestamp", sa.DATETIME(), nullable=False), - sa.Column("user_id", sa.VARCHAR(), nullable=False), - sa.Column("action", sa.VARCHAR(), nullable=False), - sa.Column("description", sa.TEXT(), nullable=True), - sa.ForeignKeyConstraint( - ["tenant_id"], - ["tenant.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - # ### end Alembic commands ### diff --git a/keep/rulesengine/rulesengine.py b/keep/rulesengine/rulesengine.py index 3c95d30c2..095077645 100644 --- a/keep/rulesengine/rulesengine.py +++ b/keep/rulesengine/rulesengine.py @@ -1,17 +1,13 @@ -import itertools import json import logging -import re import celpy -import chevron from keep.api.consts import STATIC_PRESETS -from keep.api.core.db import assign_alert_to_group as assign_alert_to_group_db -from keep.api.core.db import create_alert as create_alert_db +from keep.api.core.db import assign_alert_to_incident, get_incident_for_grouping_rule from keep.api.core.db import get_rules as get_rules_db -from keep.api.models.alert import AlertDto, AlertSeverity, AlertStatus -from keep.api.models.group import GroupDto +from keep.api.models.alert import AlertDto, AlertSeverity, IncidentDto +from keep.api.utils.cel_utils import preprocess_cel_expression class RulesEngine: @@ -19,36 +15,11 @@ def __init__(self, tenant_id=None): self.tenant_id = tenant_id self.logger = logging.getLogger(__name__) - def _calc_max_severity(self, alerts): - if not alerts: - # should not happen - self.logger.info( - "Could not calculate max severity from empty list - fallbacking to info" - ) - return str(AlertSeverity.INFO) - - alerts_by_fingerprint = {} - for alert in alerts: - if alert.fingerprint not in alerts_by_fingerprint: - alerts_by_fingerprint[alert.fingerprint] = [alert] - else: - alerts_by_fingerprint[alert.fingerprint].append(alert) - - # now take the latest (by timestamp) for each fingerprint: - alerts = [ - max(alerts, key=lambda alert: alert.event["lastReceived"]) - for alerts in alerts_by_fingerprint.values() - ] - # if all alerts are with the same status, just use it - severities = [AlertSeverity(alert.event["severity"]) for alert in alerts] - max_severity = max(severities, key=lambda severity: severity.order) - return str(max_severity) - - def run_rules(self, events: list[AlertDto]): + def run_rules(self, events: list[AlertDto]) -> list[IncidentDto]: self.logger.info("Running rules") rules = get_rules_db(tenant_id=self.tenant_id) - groups = [] + incidents_dto = {} for rule in rules: self.logger.info(f"Evaluating rule {rule.name}") for event in events: @@ -66,105 +37,39 @@ def run_rules(self, events: list[AlertDto]): self.logger.info( f"Rule {rule.name} on event {event.id} is relevant" ) - group_fingerprint = self._calc_group_fingerprint(event, rule) - # Add relation between this event and the group - updated_group = assign_alert_to_group_db( - tenant_id=self.tenant_id, + + rule_fingerprint = self._calc_rule_fingerprint(event, rule) + + incident = get_incident_for_grouping_rule( + self.tenant_id, rule, rule.timeframe, rule_fingerprint + ) + + incident = assign_alert_to_incident( alert_id=event.event_id, - rule_id=str(rule.id), - timeframe=rule.timeframe, - group_fingerprint=group_fingerprint, + incident_id=incident.id, + tenant_id=self.tenant_id, ) - groups.append(updated_group) + + incidents_dto[incident.id] = IncidentDto.from_db_incident(incident) else: self.logger.info( f"Rule {rule.name} on event {event.id} is not relevant" ) self.logger.info("Rules ran successfully") # if we don't have any updated groups, we don't need to create any alerts - if not groups: - return - # get the rules of the groups - updated_group_rule_ids = [group.rule_id for group in groups] - updated_rules = get_rules_db( - tenant_id=self.tenant_id, ids=updated_group_rule_ids - ) - # more convenient to work with a dict - updated_rules_dict = {str(rule.id): rule for rule in updated_rules} - # Now let's create a new alert for each group - grouped_alerts = [] - for group in groups: - rule = updated_rules_dict.get(str(group.rule_id)) - group_fingerprint = group.calculate_fingerprint() - try: - group_attributes = GroupDto.get_group_attributes(group.alerts) - except Exception: - # should not happen since I fixed the assign_alert_to_group_db - self.logger.exception( - f"Failed to calculate group attributes for group {group.id}" - ) - continue - context = { - "group_attributes": group_attributes, - # Shahar: first, group have at least one alert. - # second, the only supported {{ }} are the ones in the group - # attributes, so we can use the first alert because they are the same for any other alert in the group - **group.alerts[0].event, - } - group_description = chevron.render(rule.group_description, context) - group_severity = self._calc_max_severity(group.alerts) - # group all the sources from all the alerts - group_source = list( - set( - itertools.chain.from_iterable( - [alert.event["source"] for alert in group.alerts] - ) - ) - ) - # inert "keep" as the first source to emphasize that this alert was generated by keep - group_source.insert(0, "keep") - # if the group has "group by", add it to the group name - if rule.grouping_criteria: - group_name = f"Alert group genereted by rule {rule.name} | group:{group.group_fingerprint}" - else: - group_name = f"Alert group genereted by rule {rule.name}" + if not incidents_dto: + return [] - group_status = self._calc_group_status(group.alerts) - # get the payload of the group - # todo: this is not scaling, needs to find another solution - # group_payload = self._generate_group_payload(group.alerts) - # create the alert - group_alert = create_alert_db( - tenant_id=self.tenant_id, - provider_type="group", - provider_id=rule.id, - # todo: event should support list? - event={ - "name": group_name, - "id": group_fingerprint, - "description": group_description, - "lastReceived": group_attributes.get("last_update_time"), - "severity": group_severity, - "source": group_source, - "status": group_status, - "pushed": True, - "group": True, - # "groupPayload": group_payload, - "fingerprint": group_fingerprint, - **group_attributes, - }, - fingerprint=group_fingerprint, - ) - grouped_alerts.append(group_alert) - self.logger.info(f"Created alert {group_alert.id} for group {group.id}") - self.logger.info(f"Rules ran, {len(grouped_alerts)} alerts created") - alerts_dto = [AlertDto(**alert.event) for alert in grouped_alerts] - return alerts_dto + self.logger.info(f"Rules ran, {len(incidents_dto)} incidents created") + + return list(incidents_dto.values()) def _extract_subrules(self, expression): # CEL rules looks like '(source == "sentry") && (source == "grafana" && severity == "critical")' # and we need to extract the subrules sub_rules = expression.split(") && (") + if len(sub_rules) == 1: + return sub_rules # the first and the last rules will have a ( or ) at the beginning or the end # e.g. for the example of: # (source == "sentry") && (source == "grafana" && severity == "critical") @@ -203,7 +108,7 @@ def _check_if_rule_apply(self, rule, event: AlertDto): # no subrules matched return False - def _calc_group_fingerprint(self, event: AlertDto, rule): + def _calc_rule_fingerprint(self, event: AlertDto, rule): # extract all the grouping criteria from the event # e.g. if the grouping criteria is ["event.labels.queue", "event.labels.cluster"] # and the event is: @@ -214,13 +119,13 @@ def _calc_group_fingerprint(self, event: AlertDto, rule): # "foo": "bar" # } # } - # than the group_fingerprint will be "queue1,cluster1" + # than the rule_fingerprint will be "queue1,cluster1" - # note: group_fingerprint is not a unique id, since different rules can lead to the same group_fingerprint - # hence, the actual fingerprint is composed of the group_fingerprint and the group id + # note: rule_fingerprint is not a unique id, since different rules can lead to the same rule_fingerprint + # hence, the actual fingerprint is composed of the rule_fingerprint and the incident id event_payload = event.dict() grouping_criteria = rule.grouping_criteria or [] - group_fingerprint = [] + rule_fingerprint = [] for criteria in grouping_criteria: # we need to extract the value from the event # e.g. if the criteria is "event.labels.queue" @@ -229,129 +134,23 @@ def _calc_group_fingerprint(self, event: AlertDto, rule): value = event_payload for part in criteria_parts: value = value.get(part) - group_fingerprint.append(value) + if isinstance(value, list): + value = ",".join(value) + rule_fingerprint.append(value) # if, for example, the event should have labels.X but it doesn't, - # than we will have None in the group_fingerprint - if not group_fingerprint: + # than we will have None in the rule_fingerprint + if not rule_fingerprint: self.logger.warning( - f"Failed to calculate group fingerprint for event {event.id} and rule {rule.name}" + f"Failed to calculate rule fingerprint for event {event.id} and rule {rule.name}" ) return "none" # if any of the values is None, we will return "none" - if any([fingerprint is None for fingerprint in group_fingerprint]): + if any([fingerprint is None for fingerprint in rule_fingerprint]): self.logger.warning( f"Failed to fetch the appropriate labels from the event {event.id} and rule {rule.name}" ) return "none" - return ",".join(group_fingerprint) - - def _calc_group_status(self, alerts): - """This function calculates the status of a group of alerts according to the following logic: - 1. If the last alert of each fingerprint is resolved, the group is resolved - 2. If at least one of the alerts is firing, the group is firing - - - Args: - alerts (list[Alert]): list of alerts related to the group - - Returns: - AlertStatus: the alert status (enum) - """ - # take the last alert from each fingerprint - # if all of them are resolved, the group is resolved - alerts_by_fingerprint = {} - for alert in alerts: - if alert.fingerprint not in alerts_by_fingerprint: - alerts_by_fingerprint[alert.fingerprint] = [alert] - else: - alerts_by_fingerprint[alert.fingerprint].append(alert) - - # now take the latest (by timestamp) for each fingerprint: - alerts = [ - max(alerts, key=lambda alert: alert.event["lastReceived"]) - for alerts in alerts_by_fingerprint.values() - ] - # 1. if all alerts are with the same status, just use it - if len(set(alert.event["status"] for alert in alerts)) == 1: - return alerts[0].event["status"] - # 2. Else, if at least one of them is firing, the group is firing - if any(alert.event["status"] == AlertStatus.FIRING for alert in alerts): - return AlertStatus.FIRING - # 3. Last, just return the last status - return alerts[-1].event["status"] - - def _generate_group_payload(self, alerts): - # todo: group payload should be configurable - """This function generates the payload of the group alert. - - Args: - alerts (list[Alert]): list of alerts related to the group - - Returns: - dict: the payload of the group alert - """ - - # first, group by fingerprints - alerts_by_fingerprint = {} - for alert in alerts: - if alert.fingerprint not in alerts_by_fingerprint: - alerts_by_fingerprint[alert.fingerprint] = [alert] - else: - alerts_by_fingerprint[alert.fingerprint].append(alert) - - group_payload = {} - for fingerprint, alerts in alerts_by_fingerprint.items(): - # take the latest (by timestamp) for each fingerprint: - alert = max(alerts, key=lambda alert: alert.event["lastReceived"]) - group_payload[fingerprint] = { - "name": alert.event["name"], - "number_of_alerts": len(alerts), - "fingerprint": fingerprint, - "last_status": alert.event["status"], - "last_severity": alert.event["severity"], - } - - return group_payload - - @staticmethod - def preprocess_cel_expression(cel_expression: str) -> str: - """Preprocess CEL expressions to replace string-based comparisons with numeric values where applicable.""" - - # Construct a regex pattern that matches any severity level or other comparisons - # and accounts for both single and double quotes as well as optional spaces around the operator - severities = "|".join( - [f"\"{severity.value}\"|'{severity.value}'" for severity in AlertSeverity] - ) - pattern = rf"(\w+)\s*([=> Date: Wed, 11 Sep 2024 16:21:07 +0300 Subject: [PATCH 10/36] feat: wip" --- .../deduplication/DeduplicationSidebar.tsx | 36 +++--- keep-ui/utils/hooks/useDeduplicationRules.ts | 11 ++ .../alert_deduplicator/alert_deduplicator.py | 36 +++++- keep/api/core/db.py | 100 +++++++++++++++- keep/api/logging.py | 4 +- keep/api/models/alert.py | 6 +- keep/api/models/db/alert.py | 24 +++- .../versions/2024-09-11-15-17_74eff4617402.py | 110 ++++++++++++++++++ keep/api/routes/deduplications.py | 2 +- keep/api/tasks/process_event_task.py | 38 +++++- keep/providers/base/base_provider.py | 14 ++- keep/providers/providers_service.py | 22 ++-- 12 files changed, 356 insertions(+), 47 deletions(-) create mode 100644 keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py diff --git a/keep-ui/app/deduplication/DeduplicationSidebar.tsx b/keep-ui/app/deduplication/DeduplicationSidebar.tsx index 7d2f1042d..227c14d72 100644 --- a/keep-ui/app/deduplication/DeduplicationSidebar.tsx +++ b/keep-ui/app/deduplication/DeduplicationSidebar.tsx @@ -1,10 +1,11 @@ import React, { Fragment, useEffect, useState } from "react"; import { Dialog, Transition } from "@headlessui/react"; import { useForm, Controller, SubmitHandler } from "react-hook-form"; -import { Text, Button, TextInput, Callout, Badge, MultiSelect, MultiSelectItem, Switch } from "@tremor/react"; +import { Text, Button, TextInput, Callout, Badge, Select, SelectItem, MultiSelect, MultiSelectItem, Switch } from "@tremor/react"; import { IoMdClose } from "react-icons/io"; import { DeduplicationRule } from "app/deduplication/models"; import { useProviders } from "utils/hooks/useProviders"; +import { useDeduplicationFields } from "utils/hooks/useDeduplicationRules"; interface DeduplicationSidebarProps { isOpen: boolean; @@ -32,9 +33,10 @@ const DeduplicationSidebar: React.FC = ({ const [isSubmitting, setIsSubmitting] = useState(false); const { data: providers = { installed_providers: [], linked_providers: [] } } = useProviders(); + const { data: deduplicationFields = [] } = useDeduplicationFields(); const alertProviders = [...providers.installed_providers, ...providers.linked_providers].filter( - provider => provider.labels?.includes("alert") + provider => provider.tags?.includes("alert") ); const fullDeduplication = watch("full_deduplication"); @@ -152,18 +154,18 @@ const DeduplicationSidebar: React.FC = ({ control={control} rules={{ required: "Provider type is required" }} render={({ field }) => ( - {alertProviders.map((provider) => ( - - {provider.type} - + + {provider.type} {provider.name || provider.id} + ))} - + )} />
    @@ -182,11 +184,11 @@ const DeduplicationSidebar: React.FC = ({ error={!!errors.fingerprint_fields} errorMessage={errors.fingerprint_fields?.message} > - {/* Replace this with actual fingerprint field options */} - Title - Description - Severity - Source + {deduplicationFields.map((fieldName) => ( + + {fieldName} + + ))} )} /> @@ -221,11 +223,11 @@ const DeduplicationSidebar: React.FC = ({ error={!!errors.ignore_fields} errorMessage={errors.ignore_fields?.message} > - {/* Replace this with actual ignore field options */} - Title - Description - Severity - Source + {deduplicationFields.map((fieldName) => ( + + {fieldName} + + ))} )} /> diff --git a/keep-ui/utils/hooks/useDeduplicationRules.ts b/keep-ui/utils/hooks/useDeduplicationRules.ts index b90b4aca6..2ebb73c2e 100644 --- a/keep-ui/utils/hooks/useDeduplicationRules.ts +++ b/keep-ui/utils/hooks/useDeduplicationRules.ts @@ -15,3 +15,14 @@ export const useDeduplicationRules = (options: SWRConfiguration = {}) => { options ); }; + +export const useDeduplicationFields = (options: SWRConfiguration = {}) => { + const apiUrl = getApiURL(); + const { data: session } = useSession(); + + return useSWRImmutable( + () => (session ? `${apiUrl}/deduplications/fields` : null), + (url) => fetcher(url, session?.accessToken), + options + ); +}; diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 0620b1743..e87e1ad49 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -6,6 +6,7 @@ from keep.api.core.config import config from keep.api.core.db import ( create_deduplication_event, + get_alerts_fields, get_all_dedup_ratio, get_all_deduplication_rules, get_custom_full_deduplication_rules, @@ -16,6 +17,8 @@ from keep.providers.providers_factory import ProvidersFactory from keep.searchengine.searchengine import SearchEngine +DEFAULT_RULE_UUID = "00000000-0000-0000-0000-000000000000" + class AlertDeduplicator: @@ -91,9 +94,23 @@ def apply_deduplication(self, alert: AlertDto) -> bool: rule = self.get_full_deduplication_rule( self.tenant_id, alert.providerId, alert.providerType ) - self.logger.debug(f"Applying deduplication rule {rule.id} to alert {alert.id}") + self.logger.debug( + "Applying deduplication rule to alert", + extra={ + "rule_id": rule.id, + "alert_id": alert.id, + }, + ) alert = self._apply_deduplication_rule(alert, rule) - self.logger.debug(f"Alert after deduplication rule {rule.id}: {alert}") + self.logger.debug( + "Alert after deduplication rule applied", + extra={ + "rule_id": rule.id, + "alert_id": alert.id, + "is_full_duplicate": alert.isFullDuplicate, + "is_partial_duplicate": alert.isPartialDuplicate, + }, + ) if alert.isFullDuplicate or alert.isPartialDuplicate: # create deduplication event create_deduplication_event( @@ -153,6 +170,7 @@ def get_full_deduplication_rule( def _get_default_full_deduplication_rule(self) -> DeduplicationRuleDto: # just return a default deduplication rule with lastReceived field return DeduplicationRuleDto( + id=DEFAULT_RULE_UUID, name="Keep Full Deduplication Rule", description="Keep Full Deduplication Rule", default=True, @@ -252,7 +270,13 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: return result def get_deduplication_fields(self) -> list[str]: - # SHAHAR: this could be improved by saving the fields on ingestion time - # SHAHAR: it may be broken - fields = self.search_engine.search_alerts_by_cel() - return fields + fields = get_alerts_fields(self.tenant_id) + + fields_per_provider = {} + for field in fields: + key = f"{field.provider_type}_{field.provider_id}" + if key not in fields_per_provider: + fields_per_provider[key] = [] + fields_per_provider[key].append(field.field_name) + + return fields_per_provider diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 089699637..ad7272cd3 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -19,10 +19,13 @@ from dotenv import find_dotenv, load_dotenv from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor from sqlalchemy import and_, desc, null, update +from sqlalchemy.dialects.mysql import insert as mysql_insert +from sqlalchemy.dialects.postgresql import insert as pg_insert +from sqlalchemy.dialects.sqlite import insert as sqlite_insert from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import joinedload, selectinload, subqueryload from sqlalchemy.sql import expression -from sqlmodel import Session, col, or_, select +from sqlmodel import Session, col, or_, select, text from keep.api.core.db_utils import create_db_engine, get_json_extract_field @@ -301,7 +304,7 @@ def add_or_update_workflow( existing_workflow.revision += 1 # Increment the revision existing_workflow.last_updated = datetime.now() # Update last_updated existing_workflow.is_deleted = False - existing_workflow.is_disabled= is_disabled + existing_workflow.is_disabled = is_disabled else: # Create a new workflow @@ -313,7 +316,7 @@ def add_or_update_workflow( created_by=created_by, updated_by=updated_by, # Set updated_by to the provided value interval=interval, - is_disabled =is_disabled, + is_disabled=is_disabled, workflow_raw=workflow_raw, ) session.add(workflow) @@ -499,6 +502,7 @@ def get_raw_workflow(tenant_id: str, workflow_id: str) -> str: return None return workflow.workflow_raw + def update_provider_last_pull_time(tenant_id: str, provider_id: str): extra = {"tenant_id": tenant_id, "provider_id": provider_id} logger.info("Updating provider last pull time", extra=extra) @@ -2894,3 +2898,93 @@ def get_provider_by_name(tenant_id: str, provider_name: str) -> Provider: .where(Provider.name == provider_name) ).first() return provider + + +def bulk_upsert_alert_fields( + tenant_id: str, fields: List[str], provider_id: str, provider_type: str +): + with Session(engine) as session: + try: + # Prepare the data for bulk insert + data = [ + { + "tenant_id": tenant_id, + "field_name": field, + "provider_id": provider_id, + "provider_type": provider_type, + } + for field in fields + ] + + if engine.dialect.name == "postgresql": + stmt = pg_insert(AlertField).values(data) + stmt = stmt.on_conflict_do_update( + index_elements=[ + "tenant_id", + "field_name", + ], # Unique constraint columns + set_={ + "provider_id": stmt.excluded.provider_id, + "provider_type": stmt.excluded.provider_type, + }, + ) + elif engine.dialect.name == "mysql": + stmt = mysql_insert(AlertField).values(data) + stmt = stmt.on_duplicate_key_update( + provider_id=stmt.inserted.provider_id, + provider_type=stmt.inserted.provider_type, + ) + elif engine.dialect.name == "sqlite": + stmt = sqlite_insert(AlertField).values(data) + stmt = stmt.on_conflict_do_update( + index_elements=[ + "tenant_id", + "field_name", + ], # Unique constraint columns + set_={ + "provider_id": stmt.excluded.provider_id, + "provider_type": stmt.excluded.provider_type, + }, + ) + elif engine.dialect.name == "mssql": + # SQL Server requires a raw query with a MERGE statement + values = ", ".join( + f"('{tenant_id}', '{field}', '{provider_id}', '{provider_type}')" + for field in fields + ) + + merge_query = text( + f""" + MERGE INTO AlertField AS target + USING (VALUES {values}) AS source (tenant_id, field_name, provider_id, provider_type) + ON target.tenant_id = source.tenant_id AND target.field_name = source.field_name + WHEN MATCHED THEN + UPDATE SET provider_id = source.provider_id, provider_type = source.provider_type + WHEN NOT MATCHED THEN + INSERT (tenant_id, field_name, provider_id, provider_type) + VALUES (source.tenant_id, source.field_name, source.provider_id, source.provider_type); + """ + ) + + session.execute(merge_query) + else: + raise NotImplementedError( + f"Upsert not supported for {engine.dialect.name}" + ) + + # Execute the statement + if engine.dialect.name != "mssql": # Already executed for SQL Server + session.execute(stmt) + session.commit() + + except IntegrityError: + # Handle any potential race conditions + session.rollback() + + +def get_alerts_fields(tenant_id: str) -> List[AlertField]: + with Session(engine) as session: + fields = session.exec( + select(AlertField).where(AlertField.tenant_id == tenant_id) + ).all() + return fields diff --git a/keep/api/logging.py b/keep/api/logging.py index d453c852c..4fbaac2d7 100644 --- a/keep/api/logging.py +++ b/keep/api/logging.py @@ -105,7 +105,9 @@ def format(self, record): # Extract extra from the _log function's local variables extra = frame.f_locals.get("extra", {}) if extra: - extra_info = " ".join([f"[{k}: {v}]" for k, v in extra.items()]) + extra_info = " ".join( + [f"[{k}: {v}]" for k, v in extra.items() if k != "raw_event"] + ) else: extra_info = "" break diff --git a/keep/api/models/alert.py b/keep/api/models/alert.py index 89d26b853..cc01b846a 100644 --- a/keep/api/models/alert.py +++ b/keep/api/models/alert.py @@ -108,8 +108,8 @@ class AlertDto(BaseModel): lastReceived: str firingStartTime: str | None = None environment: str = "undefined" - isFullDuplicate: bool | None = None - isPartialDuplicate: bool | None = None + isFullDuplicate: bool | None = False + isPartialDuplicate: bool | None = False duplicateReason: str | None = None service: str | None = None source: list[str] | None = [] @@ -290,7 +290,6 @@ class Config: "status": "firing", "lastReceived": "2021-01-01T00:00:00.000Z", "environment": "production", - "isDuplicate": False, "duplicateReason": None, "service": "backend", "source": ["keep"], @@ -420,6 +419,7 @@ def from_db_incident(cls, db_incident): class DeduplicationRuleDto(BaseModel): + id: str | None # UUID name: str description: str default: bool diff --git a/keep/api/models/db/alert.py b/keep/api/models/db/alert.py index 74d6a802e..6774e4494 100644 --- a/keep/api/models/db/alert.py +++ b/keep/api/models/db/alert.py @@ -4,7 +4,7 @@ from typing import List from uuid import UUID, uuid4 -from sqlalchemy import ForeignKey +from sqlalchemy import ForeignKey, UniqueConstraint from sqlalchemy.dialects.mssql import DATETIME2 as MSSQL_DATETIME2 from sqlalchemy.dialects.mysql import DATETIME as MySQL_DATETIME from sqlalchemy.engine.url import make_url @@ -184,6 +184,7 @@ class AlertDeduplicationRule(SQLModel, table=True): fingerprint_fields: list[str] = Field(sa_column=Column(JSON), default=[]) full_deduplication: bool = Field(default=False) ignore_fields: list[str] = Field(sa_column=Column(JSON), default=[]) + priority: int = Field(default=0) # for future use class Config: arbitrary_types_allowed = True @@ -207,7 +208,6 @@ class AlertDeduplicationEvent(SQLModel, table=True): # these are only soft reference since it could be linked provider provider_id: str | None = Field() provider_type: str | None = Field() - priority: int = Field(default=0) # for future use __table_args__ = ( Index( @@ -234,6 +234,26 @@ class Config: arbitrary_types_allowed = True +class AlertField(SQLModel, table=True): + id: UUID = Field(default_factory=uuid4, primary_key=True) + tenant_id: str = Field(foreign_key="tenant.id", index=True) + field_name: str = Field(index=True) + provider_id: str | None = Field(index=True) + provider_type: str | None = Field(index=True) + + __table_args__ = ( + UniqueConstraint("tenant_id", "field_name", name="uq_tenant_field"), + Index("ix_alert_field_tenant_id", "tenant_id"), + Index("ix_alert_field_tenant_id_field_name", "tenant_id", "field_name"), + Index( + "ix_alert_field_provider_id_provider_type", "provider_id", "provider_type" + ), + ) + + class Config: + arbitrary_types_allowed = True + + class AlertRaw(SQLModel, table=True): id: UUID = Field(default_factory=uuid4, primary_key=True) tenant_id: str = Field(foreign_key="tenant.id") diff --git a/keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py b/keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py new file mode 100644 index 000000000..f0d8df7ff --- /dev/null +++ b/keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py @@ -0,0 +1,110 @@ +"""Dedup + +Revision ID: 74eff4617402 +Revises: 710b4ff1d19e +Create Date: 2024-09-11 15:17:38.762175 + +""" + +import sqlalchemy as sa +import sqlmodel +from alembic import op +from sqlalchemy.dialects import sqlite + +# revision identifiers, used by Alembic. +revision = "74eff4617402" +down_revision = "710b4ff1d19e" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "alertfield", + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("field_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("provider_type", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("tenant_id", "field_name", name="uq_tenant_field"), + ) + op.create_index( + "ix_alert_field_provider_id_provider_type", + "alertfield", + ["provider_id", "provider_type"], + unique=False, + ) + op.create_index( + "ix_alert_field_tenant_id", "alertfield", ["tenant_id"], unique=False + ) + op.create_index( + "ix_alert_field_tenant_id_field_name", + "alertfield", + ["tenant_id", "field_name"], + unique=False, + ) + op.create_index( + op.f("ix_alertfield_field_name"), "alertfield", ["field_name"], unique=False + ) + op.create_index( + op.f("ix_alertfield_provider_id"), "alertfield", ["provider_id"], unique=False + ) + op.create_index( + op.f("ix_alertfield_provider_type"), + "alertfield", + ["provider_type"], + unique=False, + ) + op.create_index( + op.f("ix_alertfield_tenant_id"), "alertfield", ["tenant_id"], unique=False + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("workflow", "is_disabled") + op.drop_constraint(None, "preset", type_="foreignkey") + op.drop_constraint(None, "preset", type_="unique") + op.drop_constraint(None, "preset", type_="unique") + op.drop_index(op.f("ix_preset_tenant_id"), table_name="preset") + op.drop_index(op.f("ix_preset_created_by"), table_name="preset") + op.alter_column("preset", "id", existing_type=sa.CHAR(length=32), nullable=True) + op.alter_column("preset", "options", existing_type=sqlite.JSON(), nullable=False) + op.add_column( + "alertdeduplicationrule", sa.Column("priority", sa.INTEGER(), nullable=False) + ) + op.drop_column("alertdeduplicationevent", "priority") + op.alter_column( + "alertaudit", "description", existing_type=sa.TEXT(), nullable=False + ) + op.create_table( + "_alembic_tmp_alertaudit", + sa.Column("id", sa.CHAR(length=32), nullable=False), + sa.Column("fingerprint", sa.VARCHAR(), nullable=False), + sa.Column("tenant_id", sa.VARCHAR(), nullable=False), + sa.Column("timestamp", sa.DATETIME(), nullable=False), + sa.Column("user_id", sa.VARCHAR(), nullable=False), + sa.Column("action", sa.VARCHAR(), nullable=False), + sa.Column("description", sa.TEXT(), nullable=True), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.drop_index(op.f("ix_alertfield_tenant_id"), table_name="alertfield") + op.drop_index(op.f("ix_alertfield_provider_type"), table_name="alertfield") + op.drop_index(op.f("ix_alertfield_provider_id"), table_name="alertfield") + op.drop_index(op.f("ix_alertfield_field_name"), table_name="alertfield") + op.drop_index("ix_alert_field_tenant_id_field_name", table_name="alertfield") + op.drop_index("ix_alert_field_tenant_id", table_name="alertfield") + op.drop_index("ix_alert_field_provider_id_provider_type", table_name="alertfield") + op.drop_table("alertfield") + # ### end Alembic commands ### diff --git a/keep/api/routes/deduplications.py b/keep/api/routes/deduplications.py index 1b9925ee9..1aecb7f66 100644 --- a/keep/api/routes/deduplications.py +++ b/keep/api/routes/deduplications.py @@ -38,7 +38,7 @@ def get_deduplication_fields( authenticated_entity: AuthenticatedEntity = Depends( IdentityManagerFactory.get_auth_verifier(["read:deduplications"]) ), -): +) -> dict[str, list[str]]: tenant_id = authenticated_entity.tenant_id logger.info("Getting deduplication fields") diff --git a/keep/api/tasks/process_event_task.py b/keep/api/tasks/process_event_task.py index 6925a14e4..c9cbcf383 100644 --- a/keep/api/tasks/process_event_task.py +++ b/keep/api/tasks/process_event_task.py @@ -17,6 +17,7 @@ from keep.api.bl.enrichments_bl import EnrichmentsBl from keep.api.bl.maintenance_windows_bl import MaintenanceWindowsBl from keep.api.core.db import ( + bulk_upsert_alert_fields, get_alerts_by_fingerprint, get_all_presets, get_enrichment_with_session, @@ -283,10 +284,10 @@ def __handle_formatted_events( # filter out the deduplicated events deduplicated_events = list( - filter(lambda event: event.isDuplicate, formatted_events) + filter(lambda event: event.isFullDuplicate, formatted_events) ) formatted_events = list( - filter(lambda event: not event.isDuplicate, formatted_events) + filter(lambda event: not event.isFullDuplicate, formatted_events) ) # save to db @@ -301,6 +302,39 @@ def __handle_formatted_events( timestamp_forced, ) + # let's save all fields to the DB so that we can use them in the future such in deduplication fields suggestions + # todo: also use it on correlation rules suggestions + for enriched_formatted_event in enriched_formatted_events: + logger.debug( + "Bulk upserting alert fields", + extra={ + "alert_event_id": enriched_formatted_event.event_id, + "alert_fingerprint": enriched_formatted_event.fingerprint, + }, + ) + fields = [] + for key, value in enriched_formatted_event.dict().items(): + if isinstance(value, dict): + for nested_key in value.keys(): + fields.append(f"{key}_{nested_key}") + else: + fields.append(key) + + bulk_upsert_alert_fields( + tenant_id=tenant_id, + fields=fields, + provider_id=enriched_formatted_event.providerId, + provider_type=enriched_formatted_event.providerType, + ) + + logger.debug( + "Bulk upserted alert fields", + extra={ + "alert_event_id": enriched_formatted_event.event_id, + "alert_fingerprint": enriched_formatted_event.fingerprint, + }, + ) + # after the alert enriched and mapped, lets send it to the elasticsearch elastic_client = ElasticClient(tenant_id=tenant_id) for alert in enriched_formatted_events: diff --git a/keep/providers/base/base_provider.py b/keep/providers/base/base_provider.py index 600c2309b..c6806f624 100644 --- a/keep/providers/base/base_provider.py +++ b/keep/providers/base/base_provider.py @@ -321,12 +321,21 @@ def format_alert( provider_id=provider_id, provider_type=provider_type, ) + + if not isinstance(formatted_alert, list): + formatted_alert.providerId = provider_id + formatted_alert.providerType = provider_type + formatted_alert = [formatted_alert] + + else: + for alert in formatted_alert: + alert.providerId = provider_id + alert.providerType = provider_type + # if there is no custom deduplication rule, return the formatted alert if not custom_deduplication_rule: return formatted_alert # if there is a custom deduplication rule, apply it - if not isinstance(formatted_alert, list): - formatted_alert = [formatted_alert] # apply the custom deduplication rule to calculate the fingerprint for alert in formatted_alert: logger.info( @@ -340,7 +349,6 @@ def format_alert( alert.fingerprint = cls.get_alert_fingerprint( alert, custom_deduplication_rule.deduplication_fields ) - return formatted_alert @staticmethod diff --git a/keep/providers/providers_service.py b/keep/providers/providers_service.py index 804fd17dc..b59b986d9 100644 --- a/keep/providers/providers_service.py +++ b/keep/providers/providers_service.py @@ -238,12 +238,16 @@ def provision_providers_from_env(tenant_id: str): logger.info(f"Provider {provider_name} already installed") continue logger.info(f"Installing provider {provider_name}") - ProvidersService.install_provider( - tenant_id=tenant_id, - installed_by="system", - provider_id=provider_config["type"], - provider_name=provider_name, - provider_type=provider_config["type"], - provider_config=provider_config["authentication"], - ) - logger.info(f"Provider {provider_name} provisioned") + try: + ProvidersService.install_provider( + tenant_id=tenant_id, + installed_by="system", + provider_id=provider_config["type"], + provider_name=provider_name, + provider_type=provider_config["type"], + provider_config=provider_config["authentication"], + ) + logger.info(f"Provider {provider_name} provisioned") + except Exception: + logger.exception(f"Failed to provision provider {provider_name}") + continue From a3d8efbde79d02461563ee0f88f05f570ab033b4 Mon Sep 17 00:00:00 2001 From: shahargl Date: Wed, 11 Sep 2024 18:58:46 +0300 Subject: [PATCH 11/36] feat: wip --- .../deduplication/DeduplicationSidebar.tsx | 148 ++++++++++++------ .../app/deduplication/DeduplicationTable.tsx | 34 ++-- keep-ui/app/deduplication/models.tsx | 1 + keep-ui/components/ui/MultiSelect.tsx | 93 +++++++++++ keep-ui/components/ui/Select.tsx | 79 +++++++--- keep-ui/utils/hooks/useDeduplicationRules.ts | 2 +- .../alert_deduplicator/alert_deduplicator.py | 4 +- 7 files changed, 271 insertions(+), 90 deletions(-) create mode 100644 keep-ui/components/ui/MultiSelect.tsx diff --git a/keep-ui/app/deduplication/DeduplicationSidebar.tsx b/keep-ui/app/deduplication/DeduplicationSidebar.tsx index 227c14d72..dff0ab963 100644 --- a/keep-ui/app/deduplication/DeduplicationSidebar.tsx +++ b/keep-ui/app/deduplication/DeduplicationSidebar.tsx @@ -1,30 +1,41 @@ -import React, { Fragment, useEffect, useState } from "react"; +import React, { Fragment, useEffect, useState, useMemo } from "react"; import { Dialog, Transition } from "@headlessui/react"; import { useForm, Controller, SubmitHandler } from "react-hook-form"; -import { Text, Button, TextInput, Callout, Badge, Select, SelectItem, MultiSelect, MultiSelectItem, Switch } from "@tremor/react"; +import { Text, Button, TextInput, Callout, Badge, Switch } from "@tremor/react"; import { IoMdClose } from "react-icons/io"; import { DeduplicationRule } from "app/deduplication/models"; import { useProviders } from "utils/hooks/useProviders"; import { useDeduplicationFields } from "utils/hooks/useDeduplicationRules"; +import { GroupBase } from "react-select"; +import Select from "@/components/ui/Select"; +import MultiSelect from "@/components/ui/MultiSelect"; + + +interface ProviderOption { + value: string; + label: string; + logoUrl: string; +} interface DeduplicationSidebarProps { isOpen: boolean; toggle: VoidFunction; - defaultValue?: Partial; + selectedDeduplicationRule: DeduplicationRule | null; onSubmit: (data: Partial) => Promise; } const DeduplicationSidebar: React.FC = ({ isOpen, toggle, - defaultValue, + selectedDeduplicationRule, onSubmit, }) => { const { control, handleSubmit, setValue, reset, setError, watch, formState: { errors }, clearErrors } = useForm>({ - defaultValues: defaultValue || { + defaultValues: selectedDeduplicationRule || { name: "", description: "", provider_type: "", + provider_id: "", fingerprint_fields: [], full_deduplication: false, ignore_fields: [], @@ -33,28 +44,40 @@ const DeduplicationSidebar: React.FC = ({ const [isSubmitting, setIsSubmitting] = useState(false); const { data: providers = { installed_providers: [], linked_providers: [] } } = useProviders(); - const { data: deduplicationFields = [] } = useDeduplicationFields(); - - const alertProviders = [...providers.installed_providers, ...providers.linked_providers].filter( - provider => provider.tags?.includes("alert") - ); + const { data: deduplicationFields = {} } = useDeduplicationFields(); + const alertProviders = useMemo(() => [ + { id: null, "type": "keep", "details": { name: "Keep" }, tags: ["alert"] }, + ...providers.installed_providers, + ...providers.linked_providers + ].filter(provider => provider.tags?.includes("alert")), [providers]); const fullDeduplication = watch("full_deduplication"); + const selectedProviderType = watch("provider_type"); + const selectedProviderId = watch("provider_id"); + + const availableFields = useMemo(() => { + if (selectedProviderType && selectedProviderId) { + const key = `${selectedProviderType}_${selectedProviderId}`; + return deduplicationFields[key] || []; + } + return []; + }, [selectedProviderType, selectedProviderId, deduplicationFields]); useEffect(() => { - if (isOpen && defaultValue) { - reset(defaultValue); + if (isOpen && selectedDeduplicationRule) { + reset(selectedDeduplicationRule); } else if (isOpen) { reset({ name: "", description: "", provider_type: "", + provider_id: "", fingerprint_fields: [], full_deduplication: false, ignore_fields: [], }); } - }, [isOpen, defaultValue, reset]); + }, [isOpen, selectedDeduplicationRule, reset]); const handleToggle = () => { if (isOpen) { @@ -102,7 +125,7 @@ const DeduplicationSidebar: React.FC = ({
    - {defaultValue ? "Edit Deduplication Rule" : "Add Deduplication Rule"} + {selectedDeduplicationRule ? "Edit Deduplication Rule" : "Add Deduplication Rule"} Beta
    ( - + > + {...field} + options={alertProviders.map((provider) => ({ + value: `${provider.type}_${provider.id}`, + label: provider.details?.name || provider.id || "main", + logoUrl: `/icons/${provider.type}-icon.png` + }))} + placeholder="Select provider" + onChange={(selectedOption) => { + if (selectedOption) { + const [providerType, providerId] = selectedOption.value.split('_'); + setValue("provider_type", providerType); + setValue("provider_id", providerId as any); + } + }} + value={alertProviders.find( + (provider) => `${provider.type}_${provider.id}` === `${selectedProviderType}_${selectedProviderId}` + ) ? { + value: `${selectedProviderType}_${selectedProviderId}`, + label: alertProviders.find( + (provider) => `${provider.type}_${provider.id}` === `${selectedProviderType}_${selectedProviderId}` + )?.details?.name || selectedProviderId || "main", + logoUrl: `/icons/${selectedProviderType}-icon.png` + } : null} + /> )} /> + {errors.provider_type && ( +

    {errors.provider_type.message}

    + )}
    )} {errors.root?.serverError && ( diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx index 9acda1836..0d34cd064 100644 --- a/keep-ui/app/deduplication/DeduplicationTable.tsx +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -35,38 +35,38 @@ export const DeduplicationTable: React.FC = ({ deduplic const router = useRouter(); const searchParams = useSearchParams(); - const selectedId = searchParams ? searchParams.get("id") : null; + let selectedId = searchParams ? searchParams.get("id") : null; const selectedRule = deduplicationRules.find((rule) => rule.id === selectedId); const [isSidebarOpen, setIsSidebarOpen] = useState(false); const [selectedDeduplicationRule, setSelectedDeduplicationRule] = useState(null); - const deduplicationFormFromRule = useMemo(() => { - if (selectedDeduplicationRule) { - return { - name: selectedDeduplicationRule.name, - description: selectedDeduplicationRule.description, - timeUnit: "seconds", - }; - } - - return {}; - }, [selectedDeduplicationRule]); - const onDeduplicationClick = (rule: DeduplicationRule) => { setSelectedDeduplicationRule(rule); setIsSidebarOpen(true); + router.push(`/deduplication?id=${rule.id}`); }; const onCloseDeduplication = () => { setIsSidebarOpen(false); setSelectedDeduplicationRule(null); + router.push('/deduplication'); }; useEffect(() => { - if (selectedRule) { - onDeduplicationClick(selectedRule); + if (selectedId && !isSidebarOpen) { + const rule = deduplicationRules.find((r) => r.id === selectedId); + if (rule) { + setSelectedDeduplicationRule(rule); + setIsSidebarOpen(true); + } + } + }, [selectedId, deduplicationRules]); + + useEffect(() => { + if (!isSidebarOpen && selectedId) { + router.push('/deduplication'); } - }, [selectedRule]); + }, [isSidebarOpen, selectedId, router]); const DEDUPLICATION_TABLE_COLS = useMemo( () => [ @@ -238,7 +238,7 @@ export const DeduplicationTable: React.FC = ({ deduplic
    diff --git a/keep-ui/app/deduplication/models.tsx b/keep-ui/app/deduplication/models.tsx index fd14a26d6..8e5e0d3ec 100644 --- a/keep-ui/app/deduplication/models.tsx +++ b/keep-ui/app/deduplication/models.tsx @@ -5,6 +5,7 @@ export interface DeduplicationRule { default: boolean; distribution: { hour: number; number: number }[]; provider_type: string; + provider_id: string; last_updated: string; last_updated_by: string; created_at: string; diff --git a/keep-ui/components/ui/MultiSelect.tsx b/keep-ui/components/ui/MultiSelect.tsx new file mode 100644 index 000000000..bc45cfc00 --- /dev/null +++ b/keep-ui/components/ui/MultiSelect.tsx @@ -0,0 +1,93 @@ +import React from "react"; +import Select from "react-select"; +import { components, Props as SelectProps, GroupBase, StylesConfig } from "react-select"; +import { Badge } from "@tremor/react"; + +type OptionType = { value: string; label: string }; + +const customStyles: StylesConfig = { + control: (provided, state) => ({ + ...provided, + borderColor: state.isFocused ? 'orange' : '#ccc', + '&:hover': { + borderColor: 'orange', + }, + boxShadow: state.isFocused ? '0 0 0 1px orange' : null, + backgroundColor: 'transparent', + }), + option: (provided, state) => ({ + ...provided, + backgroundColor: state.isSelected ? 'orange' : state.isFocused ? 'rgba(255, 165, 0, 0.1)' : 'transparent', + color: state.isSelected ? 'white' : 'black', + '&:hover': { + backgroundColor: 'rgba(255, 165, 0, 0.3)', + }, + }), + multiValue: (provided) => ({ + ...provided, + backgroundColor: 'default', + }), + multiValueLabel: (provided) => ({ + ...provided, + color: 'black', + }), + multiValueRemove: (provided) => ({ + ...provided, + color: 'orange', + '&:hover': { + backgroundColor: 'orange', + color: 'white', + }, + }), + menuPortal: (base) => ({ + ...base, + zIndex: 9999, + }), + menu: (provided) => ({ + ...provided, + zIndex: 9999, + }), +}; + +type CustomSelectProps = SelectProps> & { + components?: { + Option?: typeof components.Option; + MultiValue?: typeof components.MultiValue; + }; +}; + +const customComponents: CustomSelectProps['components'] = { + Option: ({ children, ...props }) => ( + + + {children} + + + ), + MultiValue: ({ children, ...props }) => ( + + + {children} + + + ), +}; + +type MultiSelectProps = SelectProps>; + +const MultiSelect: React.FC = ({ value, onChange, options, placeholder, ...rest }) => ( + > value={value} onChange={onChange} options={options} placeholder={placeholder} styles={customStyles} components={customComponents} - menuPortalTarget={document.body} // Render the menu in a portal + menuPortalTarget={document.body} menuPosition="fixed" - getOptionLabel={getOptionLabel} // Support custom getOptionLabel - getOptionValue={getOptionValue} // Support custom getOptionValue + getOptionLabel={getOptionLabel} + getOptionValue={getOptionValue} /> ); diff --git a/keep-ui/utils/hooks/useDeduplicationRules.ts b/keep-ui/utils/hooks/useDeduplicationRules.ts index 2ebb73c2e..1d6d43c54 100644 --- a/keep-ui/utils/hooks/useDeduplicationRules.ts +++ b/keep-ui/utils/hooks/useDeduplicationRules.ts @@ -20,7 +20,7 @@ export const useDeduplicationFields = (options: SWRConfiguration = {}) => { const apiUrl = getApiURL(); const { data: session } = useSession(); - return useSWRImmutable( + return useSWRImmutable>( () => (session ? `${apiUrl}/deduplications/fields` : null), (url) => fetcher(url, session?.accessToken), options diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index e87e1ad49..8d6cc2465 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -274,7 +274,9 @@ def get_deduplication_fields(self) -> list[str]: fields_per_provider = {} for field in fields: - key = f"{field.provider_type}_{field.provider_id}" + provider_type = field.provider_type if field.provider_type else "null" + provider_id = field.provider_id if field.provider_id else "null" + key = f"{provider_type}_{provider_id}" if key not in fields_per_provider: fields_per_provider[key] = [] fields_per_provider[key].append(field.field_name) From cbd12e5a65c27299ef10dcccad27c75406b2b880 Mon Sep 17 00:00:00 2001 From: shahargl Date: Sun, 15 Sep 2024 14:11:42 +0300 Subject: [PATCH 12/36] feat: wip --- .../alert_deduplicator/alert_deduplicator.py | 4 +- .../versions/2024-09-11-15-17_74eff4617402.py | 110 ------------------ 2 files changed, 3 insertions(+), 111 deletions(-) delete mode 100644 keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 8d6cc2465..947b4485c 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -272,13 +272,15 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: def get_deduplication_fields(self) -> list[str]: fields = get_alerts_fields(self.tenant_id) + default_fields = ["source", "service", "description"] + fields_per_provider = {} for field in fields: provider_type = field.provider_type if field.provider_type else "null" provider_id = field.provider_id if field.provider_id else "null" key = f"{provider_type}_{provider_id}" if key not in fields_per_provider: - fields_per_provider[key] = [] + fields_per_provider[key] = copy.copy(default_fields) fields_per_provider[key].append(field.field_name) return fields_per_provider diff --git a/keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py b/keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py deleted file mode 100644 index f0d8df7ff..000000000 --- a/keep/api/models/db/migrations/versions/2024-09-11-15-17_74eff4617402.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Dedup - -Revision ID: 74eff4617402 -Revises: 710b4ff1d19e -Create Date: 2024-09-11 15:17:38.762175 - -""" - -import sqlalchemy as sa -import sqlmodel -from alembic import op -from sqlalchemy.dialects import sqlite - -# revision identifiers, used by Alembic. -revision = "74eff4617402" -down_revision = "710b4ff1d19e" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "alertfield", - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), - sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column("field_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), - sa.Column("provider_type", sqlmodel.sql.sqltypes.AutoString(), nullable=True), - sa.ForeignKeyConstraint( - ["tenant_id"], - ["tenant.id"], - ), - sa.PrimaryKeyConstraint("id"), - sa.UniqueConstraint("tenant_id", "field_name", name="uq_tenant_field"), - ) - op.create_index( - "ix_alert_field_provider_id_provider_type", - "alertfield", - ["provider_id", "provider_type"], - unique=False, - ) - op.create_index( - "ix_alert_field_tenant_id", "alertfield", ["tenant_id"], unique=False - ) - op.create_index( - "ix_alert_field_tenant_id_field_name", - "alertfield", - ["tenant_id", "field_name"], - unique=False, - ) - op.create_index( - op.f("ix_alertfield_field_name"), "alertfield", ["field_name"], unique=False - ) - op.create_index( - op.f("ix_alertfield_provider_id"), "alertfield", ["provider_id"], unique=False - ) - op.create_index( - op.f("ix_alertfield_provider_type"), - "alertfield", - ["provider_type"], - unique=False, - ) - op.create_index( - op.f("ix_alertfield_tenant_id"), "alertfield", ["tenant_id"], unique=False - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("workflow", "is_disabled") - op.drop_constraint(None, "preset", type_="foreignkey") - op.drop_constraint(None, "preset", type_="unique") - op.drop_constraint(None, "preset", type_="unique") - op.drop_index(op.f("ix_preset_tenant_id"), table_name="preset") - op.drop_index(op.f("ix_preset_created_by"), table_name="preset") - op.alter_column("preset", "id", existing_type=sa.CHAR(length=32), nullable=True) - op.alter_column("preset", "options", existing_type=sqlite.JSON(), nullable=False) - op.add_column( - "alertdeduplicationrule", sa.Column("priority", sa.INTEGER(), nullable=False) - ) - op.drop_column("alertdeduplicationevent", "priority") - op.alter_column( - "alertaudit", "description", existing_type=sa.TEXT(), nullable=False - ) - op.create_table( - "_alembic_tmp_alertaudit", - sa.Column("id", sa.CHAR(length=32), nullable=False), - sa.Column("fingerprint", sa.VARCHAR(), nullable=False), - sa.Column("tenant_id", sa.VARCHAR(), nullable=False), - sa.Column("timestamp", sa.DATETIME(), nullable=False), - sa.Column("user_id", sa.VARCHAR(), nullable=False), - sa.Column("action", sa.VARCHAR(), nullable=False), - sa.Column("description", sa.TEXT(), nullable=True), - sa.ForeignKeyConstraint( - ["tenant_id"], - ["tenant.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - op.drop_index(op.f("ix_alertfield_tenant_id"), table_name="alertfield") - op.drop_index(op.f("ix_alertfield_provider_type"), table_name="alertfield") - op.drop_index(op.f("ix_alertfield_provider_id"), table_name="alertfield") - op.drop_index(op.f("ix_alertfield_field_name"), table_name="alertfield") - op.drop_index("ix_alert_field_tenant_id_field_name", table_name="alertfield") - op.drop_index("ix_alert_field_tenant_id", table_name="alertfield") - op.drop_index("ix_alert_field_provider_id_provider_type", table_name="alertfield") - op.drop_table("alertfield") - # ### end Alembic commands ### From 728a72a9661560b8a54543eb5d40ba68ad4af4d4 Mon Sep 17 00:00:00 2001 From: shahargl Date: Sun, 15 Sep 2024 17:41:32 +0300 Subject: [PATCH 13/36] feat: wip --- .../deduplication/DeduplicationSidebar.tsx | 104 ++++++++++++++---- .../app/deduplication/DeduplicationTable.tsx | 5 +- keep-ui/components/ui/Select.tsx | 7 +- .../alert_deduplicator/alert_deduplicator.py | 60 +++++++++- keep/api/core/db.py | 90 +++++++++++++++ keep/api/models/alert.py | 12 +- keep/api/routes/deduplications.py | 81 +++++++++++++- 7 files changed, 327 insertions(+), 32 deletions(-) diff --git a/keep-ui/app/deduplication/DeduplicationSidebar.tsx b/keep-ui/app/deduplication/DeduplicationSidebar.tsx index dff0ab963..d9b448cb3 100644 --- a/keep-ui/app/deduplication/DeduplicationSidebar.tsx +++ b/keep-ui/app/deduplication/DeduplicationSidebar.tsx @@ -9,7 +9,9 @@ import { useDeduplicationFields } from "utils/hooks/useDeduplicationRules"; import { GroupBase } from "react-select"; import Select from "@/components/ui/Select"; import MultiSelect from "@/components/ui/MultiSelect"; - +import { ExclamationTriangleIcon } from "@heroicons/react/24/outline"; +import { getApiURL } from "utils/apiUrl"; +import { useSession } from "next-auth/react"; interface ProviderOption { value: string; @@ -22,6 +24,7 @@ interface DeduplicationSidebarProps { toggle: VoidFunction; selectedDeduplicationRule: DeduplicationRule | null; onSubmit: (data: Partial) => Promise; + mutateDeduplicationRules: () => Promise; } const DeduplicationSidebar: React.FC = ({ @@ -29,6 +32,7 @@ const DeduplicationSidebar: React.FC = ({ toggle, selectedDeduplicationRule, onSubmit, + mutateDeduplicationRules, }) => { const { control, handleSubmit, setValue, reset, setError, watch, formState: { errors }, clearErrors } = useForm>({ defaultValues: selectedDeduplicationRule || { @@ -45,6 +49,7 @@ const DeduplicationSidebar: React.FC = ({ const [isSubmitting, setIsSubmitting] = useState(false); const { data: providers = { installed_providers: [], linked_providers: [] } } = useProviders(); const { data: deduplicationFields = {} } = useDeduplicationFields(); + const { data: session } = useSession(); const alertProviders = useMemo(() => [ { id: null, "type": "keep", "details": { name: "Keep" }, tags: ["alert"] }, @@ -54,14 +59,20 @@ const DeduplicationSidebar: React.FC = ({ const fullDeduplication = watch("full_deduplication"); const selectedProviderType = watch("provider_type"); const selectedProviderId = watch("provider_id"); + const fingerprintFields = watch("fingerprint_fields"); + const ignoreFields = watch("ignore_fields"); + const availableFields = useMemo(() => { - if (selectedProviderType && selectedProviderId) { - const key = `${selectedProviderType}_${selectedProviderId}`; - return deduplicationFields[key] || []; + // todo: add default fields for each provider from the backend + const defaultFields = ["source", "service", "description", "fingerprint", "name", "lastReceived"]; + if (selectedProviderType) { + const key = `${selectedProviderType}_${selectedProviderId || 'null'}`; + const providerFields = deduplicationFields[key] || []; + return [...new Set([...defaultFields, ...providerFields, ...(fingerprintFields ?? []), ...(ignoreFields ?? [])])]; } - return []; - }, [selectedProviderType, selectedProviderId, deduplicationFields]); + return [...new Set([...defaultFields, ...(fingerprintFields ?? [])])]; + }, [selectedProviderType, selectedProviderId, deduplicationFields, fingerprintFields, ignoreFields]); useEffect(() => { if (isOpen && selectedDeduplicationRule) { @@ -90,10 +101,37 @@ const DeduplicationSidebar: React.FC = ({ setIsSubmitting(true); clearErrors(); try { - await onSubmit(data); - handleToggle(); + const apiUrl = getApiURL(); + let url = `${apiUrl}/deduplication`; + + if (selectedDeduplicationRule && selectedDeduplicationRule.id) { + url += `/${selectedDeduplicationRule.id}`; + } + + // Use POST if there's no selectedDeduplicationRule.id (it's a default rule or new rule) + // This ensures we always create a new rule for default rules + const method = (!selectedDeduplicationRule || !selectedDeduplicationRule.id) ? "POST" : "PUT"; + + const response = await fetch(url, { + method: method, + headers: { + Authorization: `Bearer ${session?.accessToken}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(data), + }); + + if (response.ok) { + console.log("Deduplication rule saved:", data); + reset(); + handleToggle(); + await mutateDeduplicationRules(); + } else { + const errorData = await response.json(); + setError("root.serverError", { type: "manual", message: errorData.message || "Failed to save deduplication rule" }); + } } catch (error) { - setError("root.serverError", { type: "manual", message: "Failed to save deduplication rule" }); + setError("root.serverError", { type: "manual", message: "An unexpected error occurred" }); } finally { setIsSubmitting(false); } @@ -127,17 +165,36 @@ const DeduplicationSidebar: React.FC = ({ {selectedDeduplicationRule ? "Edit Deduplication Rule" : "Add Deduplication Rule"} Beta + {selectedDeduplicationRule?.default && Default Rule}
    + + {selectedDeduplicationRule?.default && ( +
    + + + Editing a default deduplication rule requires advanced knowledge. Default rules are carefully designed to provide optimal deduplication for specific alert types. Modifying these rules may impact the efficiency of your alert processing. If you're unsure about making changes, we recommend creating a new custom rule instead of modifying the default one. + +

    + Learn more about deduplication rules +
    +
    + )} +
    - + = ({ />
    - + = ({ />
    - + = ({ render={({ field }) => ( > {...field} + isDisabled={!!selectedDeduplicationRule?.default} options={alertProviders.map((provider) => ({ value: `${provider.type}_${provider.id}`, label: provider.details?.name || provider.id || "main", @@ -198,9 +256,9 @@ const DeduplicationSidebar: React.FC = ({ value: `${selectedProviderType}_${selectedProviderId}`, label: alertProviders.find( (provider) => `${provider.type}_${provider.id}` === `${selectedProviderType}_${selectedProviderId}` - )?.details?.name || selectedProviderId || "main", + )?.details?.name || (selectedProviderId !== "null" && selectedProviderId !== null ? selectedProviderId : "main"), logoUrl: `/icons/${selectedProviderType}-icon.png` - } : null} + } as ProviderOption : null} /> )} /> @@ -209,9 +267,9 @@ const DeduplicationSidebar: React.FC = ({ )}
    - + = ({ )}
    - +
    {fullDeduplication && (
    - + = ({ deduplic Set up rules to deduplicate similar alerts
    -
    diff --git a/keep-ui/components/ui/Select.tsx b/keep-ui/components/ui/Select.tsx index 0fecae97c..208c3654d 100644 --- a/keep-ui/components/ui/Select.tsx +++ b/keep-ui/components/ui/Select.tsx @@ -13,7 +13,7 @@ const customStyles: StylesConfig = { borderColor: 'orange', }, boxShadow: state.isFocused ? '0 0 0 1px orange' : 'none', - backgroundColor: 'transparent', + backgroundColor: state.isDisabled ? 'rgba(255, 165, 0, 0.1)' : 'transparent', }), option: (provided, state) => ({ ...provided, @@ -23,9 +23,10 @@ const customStyles: StylesConfig = { backgroundColor: 'rgba(255, 165, 0, 0.3)', }, }), - singleValue: (provided) => ({ + singleValue: (provided, state) => ({ ...provided, color: 'black', + backgroundColor: state.isDisabled ? 'rgba(255, 165, 0, 0.1)' : 'transparent', }), menuPortal: (base) => ({ ...base, @@ -105,6 +106,7 @@ const StyledSelect: React.FC = ({ placeholder, getOptionLabel, getOptionValue, + ...rest }) => ( > value={value} @@ -117,6 +119,7 @@ const StyledSelect: React.FC = ({ menuPosition="fixed" getOptionLabel={getOptionLabel} getOptionValue={getOptionValue} + {...rest} /> ); diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 947b4485c..5b0b324b6 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -6,14 +6,21 @@ from keep.api.core.config import config from keep.api.core.db import ( create_deduplication_event, + create_deduplication_rule, + delete_deduplication_rule, get_alerts_fields, get_all_dedup_ratio, get_all_deduplication_rules, get_custom_full_deduplication_rules, get_last_alert_hash_by_fingerprint, get_provider_distribution, + update_deduplication_rule, +) +from keep.api.models.alert import ( + AlertDto, + DeduplicationRuleDto, + DeduplicationRuleRequestDto, ) -from keep.api.models.alert import AlertDto, DeduplicationRuleDto from keep.providers.providers_factory import ProvidersFactory from keep.searchengine.searchengine import SearchEngine @@ -272,15 +279,60 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: def get_deduplication_fields(self) -> list[str]: fields = get_alerts_fields(self.tenant_id) - default_fields = ["source", "service", "description"] - fields_per_provider = {} for field in fields: provider_type = field.provider_type if field.provider_type else "null" provider_id = field.provider_id if field.provider_id else "null" key = f"{provider_type}_{provider_id}" if key not in fields_per_provider: - fields_per_provider[key] = copy.copy(default_fields) + fields_per_provider[key] = [] fields_per_provider[key].append(field.field_name) return fields_per_provider + + def create_deduplication_rule( + self, rule: DeduplicationRuleRequestDto, created_by: str + ) -> DeduplicationRuleDto: + # Use the db function to create a new deduplication rule + new_rule = create_deduplication_rule( + tenant_id=self.tenant_id, + name=rule.name, + description=rule.description, + provider_id=rule.provider_id, + provider_type=rule.provider_type, + created_by=created_by, + enabled=True, + fingerprint_fields=rule.fingerprint_fields, + full_deduplication=rule.full_deduplication, + ignore_fields=rule.ignore_fields or [], + priority=0, + ) + + return new_rule + + def update_deduplication_rule( + self, rule_id: str, rule: DeduplicationRuleRequestDto, updated_by: str + ) -> DeduplicationRuleDto: + # Use the db function to update an existing deduplication rule + updated_rule = update_deduplication_rule( + rule_id=rule_id, + tenant_id=self.tenant_id, + name=rule.name, + description=rule.description, + provider_id=rule.provider_id, + provider_type=rule.provider_type, + last_updated_by=updated_by, + enabled=True, + fingerprint_fields=rule.fingerprint_fields, + full_deduplication=rule.full_deduplication, + ignore_fields=rule.ignore_fields or [], + priority=0, + ) + + return updated_rule + + def delete_deduplication_rule(self, rule_id: str) -> bool: + # Use the db function to delete a deduplication rule + success = delete_deduplication_rule(rule_id=rule_id, tenant_id=self.tenant_id) + + return success diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 48c14c09a..96f3176f8 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -1671,6 +1671,96 @@ def get_custom_deduplication_rule(tenant_id, provider_id, provider_type): return rule +def create_deduplication_rule( + tenant_id: str, + name: str, + description: str, + provider_id: str | None, + provider_type: str, + created_by: str, + last_updated_by: str | None = None, + enabled: bool = True, + fingerprint_fields: list[str] = [], + full_deduplication: bool = False, + ignore_fields: list[str] = [], + priority: int = 0, +): + with Session(engine) as session: + new_rule = AlertDeduplicationRule( + tenant_id=tenant_id, + name=name, + description=description, + provider_id=provider_id, + provider_type=provider_type, + last_updated_by=last_updated_by, + created_by=created_by, + enabled=enabled, + fingerprint_fields=fingerprint_fields, + full_deduplication=full_deduplication, + ignore_fields=ignore_fields, + priority=priority, + ) + session.add(new_rule) + session.commit() + session.refresh(new_rule) + return new_rule + + +def update_deduplication_rule( + rule_id: str, + tenant_id: str, + name: str, + description: str, + provider_id: str | None, + provider_type: str, + last_updated_by: str, + enabled: bool = True, + fingerprint_fields: list[str] = [], + full_deduplication: bool = False, + ignore_fields: list[str] = [], + priority: int = 0, +): + with Session(engine) as session: + rule = session.exec( + select(AlertDeduplicationRule) + .where(AlertDeduplicationRule.id == rule_id) + .where(AlertDeduplicationRule.tenant_id == tenant_id) + ).first() + if not rule: + raise ValueError(f"No deduplication rule found with id {rule_id}") + + rule.name = name + rule.description = description + rule.provider_id = provider_id + rule.provider_type = provider_type + rule.last_updated_by = last_updated_by + rule.enabled = enabled + rule.fingerprint_fields = fingerprint_fields + rule.full_deduplication = full_deduplication + rule.ignore_fields = ignore_fields + rule.priority = priority + + session.add(rule) + session.commit() + session.refresh(rule) + return rule + + +def delete_deduplication_rule(rule_id: str, tenant_id: str) -> bool: + with Session(engine) as session: + rule = session.exec( + select(AlertDeduplicationRule) + .where(AlertDeduplicationRule.id == rule_id) + .where(AlertDeduplicationRule.tenant_id == tenant_id) + ).first() + if not rule: + return False + + session.delete(rule) + session.commit() + return True + + def get_custom_full_deduplication_rules(tenant_id, provider_id, provider_type): with Session(engine) as session: rules = session.exec( diff --git a/keep/api/models/alert.py b/keep/api/models/alert.py index cc01b846a..ab1a6363d 100644 --- a/keep/api/models/alert.py +++ b/keep/api/models/alert.py @@ -4,7 +4,7 @@ import logging import uuid from enum import Enum -from typing import Any, Dict +from typing import Any, Dict, Optional from uuid import UUID import pytz @@ -436,3 +436,13 @@ class DeduplicationRuleDto(BaseModel): fingerprint_fields: list[str] full_deduplication: bool ignore_fields: list[str] + + +class DeduplicationRuleRequestDto(BaseModel): + name: str + description: Optional[str] = None + provider_type: str + provider_id: Optional[str] = None + fingerprint_fields: list[str] + full_deduplication: bool = False + ignore_fields: Optional[list[str]] = None diff --git a/keep/api/routes/deduplications.py b/keep/api/routes/deduplications.py index 1aecb7f66..64b19ece5 100644 --- a/keep/api/routes/deduplications.py +++ b/keep/api/routes/deduplications.py @@ -1,8 +1,9 @@ import logging -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, HTTPException from keep.api.alert_deduplicator.alert_deduplicator import AlertDeduplicator +from keep.api.models.alert import DeduplicationRuleRequestDto as DeduplicationRule from keep.identitymanager.authenticatedentity import AuthenticatedEntity from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory @@ -47,3 +48,81 @@ def get_deduplication_fields( logger.info("Got deduplication fields") return fields + + +@router.post( + "", + description="Create Deduplication Rule", +) +def create_deduplication_rule( + rule: DeduplicationRule, + authenticated_entity: AuthenticatedEntity = Depends( + IdentityManagerFactory.get_auth_verifier(["write:deduplications"]) + ), +): + tenant_id = authenticated_entity.tenant_id + logger.info( + "Creating deduplication rule", + extra={"tenant_id": tenant_id, "rule": rule.dict()}, + ) + alert_deduplicator = AlertDeduplicator(tenant_id) + try: + # This is a custom rule + created_rule = alert_deduplicator.create_deduplication_rule( + rule=rule, created_by=authenticated_entity.email + ) + logger.info("Created deduplication rule") + return created_rule + except Exception as e: + logger.exception("Error creating deduplication rule") + raise HTTPException(status_code=400, detail=str(e)) + + +@router.put( + "/{rule_id}", + description="Update Deduplication Rule", +) +def update_deduplication_rule( + rule_id: str, + rule: DeduplicationRule, + authenticated_entity: AuthenticatedEntity = Depends( + IdentityManagerFactory.get_auth_verifier(["write:deduplications"]) + ), +): + tenant_id = authenticated_entity.tenant_id + logger.info("Updating deduplication rule", extra={"rule_id": rule_id}) + alert_deduplicator = AlertDeduplicator(tenant_id) + try: + updated_rule = alert_deduplicator.update_deduplication_rule( + rule_id, rule, authenticated_entity.email + ) + logger.info("Updated deduplication rule") + return updated_rule + except Exception as e: + logger.exception("Error updating deduplication rule") + raise HTTPException(status_code=400, detail=str(e)) + + +@router.delete( + "/{rule_id}", + description="Delete Deduplication Rule", +) +def delete_deduplication_rule( + rule_id: str, + authenticated_entity: AuthenticatedEntity = Depends( + IdentityManagerFactory.get_auth_verifier(["write:deduplications"]) + ), +): + tenant_id = authenticated_entity.tenant_id + logger.info("Deleting deduplication rule", extra={"rule_id": rule_id}) + alert_deduplicator = AlertDeduplicator(tenant_id) + try: + success = alert_deduplicator.delete_deduplication_rule(rule_id) + if success: + logger.info("Deleted deduplication rule") + return {"message": "Deduplication rule deleted successfully"} + else: + raise HTTPException(status_code=404, detail="Deduplication rule not found") + except Exception as e: + logger.exception("Error deleting deduplication rule") + raise HTTPException(status_code=400, detail=str(e)) From 45dfbd51425c1835fce52ebb49e3beffdd984ae6 Mon Sep 17 00:00:00 2001 From: shahargl Date: Mon, 16 Sep 2024 10:30:07 +0300 Subject: [PATCH 14/36] feat: wip --- .../deduplication/DeduplicationSidebar.tsx | 2 +- .../alert_deduplicator/alert_deduplicator.py | 56 +++++++++++--- keep/api/core/db.py | 76 +++++++++++++------ 3 files changed, 98 insertions(+), 36 deletions(-) diff --git a/keep-ui/app/deduplication/DeduplicationSidebar.tsx b/keep-ui/app/deduplication/DeduplicationSidebar.tsx index d9b448cb3..7dab1afa4 100644 --- a/keep-ui/app/deduplication/DeduplicationSidebar.tsx +++ b/keep-ui/app/deduplication/DeduplicationSidebar.tsx @@ -102,7 +102,7 @@ const DeduplicationSidebar: React.FC = ({ clearErrors(); try { const apiUrl = getApiURL(); - let url = `${apiUrl}/deduplication`; + let url = `${apiUrl}/deduplications`; if (selectedDeduplicationRule && selectedDeduplicationRule.id) { url += `/${selectedDeduplicationRule.id}`; diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 5b0b324b6..1daaa207b 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -9,11 +9,9 @@ create_deduplication_rule, delete_deduplication_rule, get_alerts_fields, - get_all_dedup_ratio, get_all_deduplication_rules, get_custom_full_deduplication_rules, get_last_alert_hash_by_fingerprint, - get_provider_distribution, update_deduplication_rule, ) from keep.api.models.alert import ( @@ -214,9 +212,38 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: } # get custom deduplication rules custom_deduplications = get_all_deduplication_rules(self.tenant_id) - custom_deduplications_dict = { - rule.provider_id: rule for rule in custom_deduplications - } + # cast to dto + custom_deduplications_dto = [ + DeduplicationRuleDto( + id=str(rule.id), + name=rule.name, + description=rule.description, + default=False, + distribution=[], + fingerprint_fields=rule.fingerprint_fields, + provider_type=rule.provider_type, + provider_id=rule.provider_id, + full_deduplication=rule.full_deduplication, + ignore_fields=rule.ignore_fields, + priority=rule.priority, + last_updated=str(rule.last_updated), + last_updated_by=rule.last_updated_by, + created_at=str(rule.created_at), + created_by=rule.created_by, + ingested=0, + dedup_ratio=0.0, + enabled=rule.enabled, + ) + for rule in custom_deduplications + ] + + custom_deduplications_dict = {} + for rule in custom_deduplications_dto: + key = f"{rule.provider_type}_{rule.provider_id}" + if key not in custom_deduplications_dict: + custom_deduplications_dict[key] = [] + custom_deduplications_dict[key].append(rule) + # get the "catch all" full deduplication rule catch_all_full_deduplication = self._get_default_full_deduplication_rule() @@ -226,7 +253,8 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: final_deduplications = [catch_all_full_deduplication] for provider in providers: # if the provider doesn't have a deduplication rule, use the default one - if provider.id not in custom_deduplications_dict: + key = f"{provider.type}_{provider.id}" + if key not in custom_deduplications_dict: # no default deduplication rule found [if provider doesn't have FINGERPRINT_FIELDS] if provider.type not in default_deduplications_dict: self.logger.warning( @@ -248,16 +276,16 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: final_deduplications.append(default_deduplication) # else, just use the custom deduplication rule else: - final_deduplications.append(custom_deduplications_dict[provider.id]) + final_deduplications += custom_deduplications_dict[key] # now calculate some statistics - dedup_ratio = get_all_dedup_ratio(self.tenant_id) - + # alerts_by_provider_stats = get_all_alerts_by_providers(self.tenant_id) + # deduplication_stats = get_all_deduplication_stats(self.tenant_id) + """ result = [] for dedup in final_deduplications: - dedup.ingested = dedup_ratio.get( - (dedup.provider_id, dedup.provider_type), {} - ).get("num_alerts", 0.0) + key = f"{dedup.provider_type}_{dedup.provider_id}" + dedup.ingested = alerts_by_provider_stats[key] dedup.dedup_ratio = dedup_ratio.get( (dedup.provider_id, dedup.provider_type), {} ).get("ratio", 0.0) @@ -273,6 +301,10 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: ) dedup.distribution = distribution break + """ + # sort providers to have enabled first + result = [] + result = sorted(result, key=lambda x: x.default, reverse=True) return result diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 96f3176f8..be9dcd787 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -1678,7 +1678,6 @@ def create_deduplication_rule( provider_id: str | None, provider_type: str, created_by: str, - last_updated_by: str | None = None, enabled: bool = True, fingerprint_fields: list[str] = [], full_deduplication: bool = False, @@ -1692,7 +1691,7 @@ def create_deduplication_rule( description=description, provider_id=provider_id, provider_type=provider_type, - last_updated_by=last_updated_by, + last_updated_by=created_by, # on creation, last_updated_by is the same as created_by created_by=created_by, enabled=enabled, fingerprint_fields=fingerprint_fields, @@ -1786,15 +1785,14 @@ def create_deduplication_event(tenant_id, deduplication_rule_id, deduplication_t session.commit() -def get_all_dedup_ratio(tenant_id): +def get_all_alerts_by_providers(tenant_id): with Session(engine) as session: - # Query to get the count of alerts and unique fingerprints per provider_id and provider_type + # Query to get the count of alerts per provider_id and provider_type query = ( select( Alert.provider_id, Alert.provider_type, func.count(Alert.id).label("num_alerts"), - func.count(func.distinct(Alert.fingerprint)).label("num_fingerprints"), ) .where(Alert.tenant_id == tenant_id) .group_by(Alert.provider_id, Alert.provider_type) @@ -1802,37 +1800,68 @@ def get_all_dedup_ratio(tenant_id): results = session.exec(query).all() - # Calculate the ratio for each provider + # Create a dictionary with the number of alerts for each provider stats = {} - total_alerts = 0 for result in results: provider_id = result.provider_id provider_type = result.provider_type num_alerts = result.num_alerts - num_fingerprints = result.num_fingerprints - ratio = ( - (1 - (num_fingerprints / num_alerts)) * 100 - if num_fingerprints > 0 - else 0 - ) - key = (provider_id, provider_type) + key = f"{provider_type}_{provider_id}" stats[key] = { "num_alerts": num_alerts, - "num_fingerprints": num_fingerprints, - "ratio": ratio, } - total_alerts += num_alerts - # Add total number of alerts to the stats - stats["total_alerts"] = total_alerts + return stats + + +def get_all_deduplication_stats(tenant_id): + with Session(engine) as session: + # Query to get deduplication stats + query = ( + select( + AlertDeduplicationEvent.provider_id, + AlertDeduplicationEvent.provider_type, + AlertDeduplicationEvent.deduplication_type, + func.count(AlertDeduplicationEvent.id).label("dedup_count"), + ) + .where(AlertDeduplicationEvent.tenant_id == tenant_id) + .group_by( + AlertDeduplicationEvent.provider_id, + AlertDeduplicationEvent.provider_type, + AlertDeduplicationEvent.deduplication_type, + ) + ) + + results = session.exec(query).all() + + # Create a dictionary with deduplication stats for each provider + stats = {} + for result in results: + provider_id = result.provider_id + provider_type = result.provider_type + dedup_type = result.deduplication_type + dedup_count = result.dedup_count + + key = (provider_id, provider_type) + if key not in stats: + stats[key] = {"full": 0, "partial": 0} + + stats[key][dedup_type] = dedup_count + + # Calculate deduplication ratio + for key, counts in stats.items(): + total_dedups = counts["full"] + counts["partial"] + if total_dedups > 0: + ratio = counts["full"] / total_dedups + else: + ratio = 0 + stats[key]["ratio"] = ratio return stats def get_last_alert_hash_by_fingerprint(tenant_id, fingerprint): - from sqlalchemy.dialects import mssql - # get the last alert for a given fingerprint # to check deduplication with Session(engine) as session: @@ -1843,13 +1872,14 @@ def get_last_alert_hash_by_fingerprint(tenant_id, fingerprint): .order_by(Alert.timestamp.desc()) .limit(1) # Add LIMIT 1 for MSSQL ) - + """ + from sqlalchemy.dialects import mssql # Compile the query and log it compiled_query = query.compile( dialect=mssql.dialect(), compile_kwargs={"literal_binds": True} ) logger.info(f"Compiled query: {compiled_query}") - + """ alert_hash = session.exec(query).first() return alert_hash From 15cfa6ecc82261dc06697e696fc113ddfc6ef0ae Mon Sep 17 00:00:00 2001 From: shahargl Date: Mon, 16 Sep 2024 19:25:46 +0300 Subject: [PATCH 15/36] feat: wip --- .../app/deduplication/DeduplicationTable.tsx | 34 ++-- .../alert_deduplicator/alert_deduplicator.py | 168 ++++++++++++------ keep/api/core/db.py | 84 ++++++--- scripts/simulate_alerts.py | 45 +++-- 4 files changed, 219 insertions(+), 112 deletions(-) diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx index 9b2de1a46..dfc2b45fc 100644 --- a/keep-ui/app/deduplication/DeduplicationTable.tsx +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -91,8 +91,10 @@ export const DeduplicationTable: React.FC = ({ deduplic cell: (info) => (
    {info.getValue()} - {info.row.original.default && ( + {info.row.original.default ? ( Default + ) : ( + Custom )} {info.row.original.full_deduplication && ( Full Deduplication @@ -140,16 +142,26 @@ export const DeduplicationTable: React.FC = ({ deduplic }), columnHelper.accessor("fingerprint_fields", { header: "Fields", - cell: (info) => ( -
    - {info.getValue().map((field: string, index: number) => ( - - {index > 0 && } - {field} - - ))} -
    - ), + cell: (info) => { + const fields = info.getValue(); + if (!fields || fields.length === 0) { + return ( +
    + N/A +
    + ); + } + return ( +
    + {fields.map((field: string, index: number) => ( + + {index > 0 && } + {field} + + ))} +
    + ); + }, }), columnHelper.display({ id: "actions", diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 1daaa207b..38ea00e78 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -2,6 +2,7 @@ import hashlib import json import logging +import uuid from keep.api.core.config import config from keep.api.core.db import ( @@ -9,8 +10,10 @@ create_deduplication_rule, delete_deduplication_rule, get_alerts_fields, + get_all_alerts_by_providers, get_all_deduplication_rules, - get_custom_full_deduplication_rules, + get_all_deduplication_stats, + get_custom_deduplication_rules, get_last_alert_hash_by_fingerprint, update_deduplication_rule, ) @@ -96,33 +99,39 @@ def apply_deduplication(self, alert: AlertDto) -> bool: # you can also safe to assume that alert.fingerprint is set by the provider itself # get only relevant rules - rule = self.get_full_deduplication_rule( + rules = self.get_deduplication_rules( self.tenant_id, alert.providerId, alert.providerType ) - self.logger.debug( - "Applying deduplication rule to alert", - extra={ - "rule_id": rule.id, - "alert_id": alert.id, - }, - ) - alert = self._apply_deduplication_rule(alert, rule) - self.logger.debug( - "Alert after deduplication rule applied", - extra={ - "rule_id": rule.id, - "alert_id": alert.id, - "is_full_duplicate": alert.isFullDuplicate, - "is_partial_duplicate": alert.isPartialDuplicate, - }, - ) - if alert.isFullDuplicate or alert.isPartialDuplicate: - # create deduplication event - create_deduplication_event( - tenant_id=self.tenant_id, - deduplication_rule_id=rule.id, - deduplication_type="full" if alert.isFullDuplicate else "partial", + + for rule in rules: + self.logger.debug( + "Applying deduplication rule to alert", + extra={ + "rule_id": rule.id, + "alert_id": alert.id, + }, + ) + alert = self._apply_deduplication_rule(alert, rule) + self.logger.debug( + "Alert after deduplication rule applied", + extra={ + "rule_id": rule.id, + "alert_id": alert.id, + "is_full_duplicate": alert.isFullDuplicate, + "is_partial_duplicate": alert.isPartialDuplicate, + }, ) + if alert.isFullDuplicate or alert.isPartialDuplicate: + # create deduplication event + create_deduplication_event( + tenant_id=self.tenant_id, + deduplication_rule_id=rule.id, + deduplication_type="full" if alert.isFullDuplicate else "partial", + provider_id=alert.providerId, + provider_type=alert.providerType, + ) + # we don't need to check the other rules + break return alert def _remove_field(self, field, alert: AlertDto) -> AlertDto: @@ -142,47 +151,77 @@ def _remove_field(self, field, alert: AlertDto) -> AlertDto: setattr(alert, field_parts[0], d) return alert - def get_full_deduplication_rule( + def get_deduplication_rules( self, tenant_id, provider_id, provider_type ) -> DeduplicationRuleDto: # try to get the rule from the database - rule = get_custom_full_deduplication_rules( - tenant_id, provider_id, provider_type - ) - if rule: + rules = get_custom_deduplication_rules(tenant_id, provider_id, provider_type) + + if not rules: self.logger.debug( - "Using custom deduplication rule", + "No custom deduplication rules found, using deafult full deduplication rule", extra={ "provider_id": provider_id, "provider_type": provider_type, "tenant_id": tenant_id, }, ) - return rule + rule = self._get_default_full_deduplication_rule(provider_id, provider_type) + return [rule] - # no custom rule found, let's try to use the default one + # else, return the custom rules self.logger.debug( - "Using default full deduplication rule", + "Using custom deduplication rules", extra={ "provider_id": provider_id, "provider_type": provider_type, "tenant_id": tenant_id, }, ) - rule = self._get_default_full_deduplication_rule() - return rule + # + # check that at least one of them is full deduplication rule + full_deduplication_rules = [rule for rule in rules if rule.full_deduplication] + # if full deduplication rule found, return the rules + if full_deduplication_rules: + return rules + + # if not, assign them the default full deduplication rule ignore fields + self.logger.info( + "No full deduplication rule found, assigning default full deduplication rule ignore fields" + ) + default_full_dedup_rule = self._get_default_full_deduplication_rule( + provider_id=provider_id, provider_type=provider_type + ) + for rule in rules: + if not rule.full_deduplication: + self.logger.debug( + "Assigning default full deduplication rule ignore fields", + ) + rule.ignore_fields = default_full_dedup_rule.ignore_fields + return rules + + def _get_default_full_deduplication_rule( + self, provider_id, provider_type + ) -> DeduplicationRuleDto: + # this is a way to generate a unique uuid for the default deduplication rule per (provider_id, provider_type) + namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, "keephq.dev") + generated_uuid = str( + uuid.uuid5(namespace_uuid, f"{provider_id}_{provider_type}") + ) - def _get_default_full_deduplication_rule(self) -> DeduplicationRuleDto: # just return a default deduplication rule with lastReceived field + if not provider_type: + provider_type = "keep" + return DeduplicationRuleDto( - id=DEFAULT_RULE_UUID, - name="Keep Full Deduplication Rule", - description="Keep Full Deduplication Rule", + id=generated_uuid, + name=f"{provider_type} default deduplication rule", + description=f"{provider_type} default deduplication rule", default=True, distribution=[], - fingerprint_fields=[], - provider_type="keep", - provider_id=None, + fingerprint_fields=[], # ["fingerprint"], # this is fallback + provider_type=provider_type or "keep", + provider_id=provider_id, full_deduplication=True, ignore_fields=["lastReceived"], priority=0, @@ -219,7 +258,7 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: name=rule.name, description=rule.description, default=False, - distribution=[], + distribution=[{"hour": i, "number": 0} for i in range(24)], fingerprint_fields=rule.fingerprint_fields, provider_type=rule.provider_type, provider_id=rule.provider_id, @@ -245,7 +284,9 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: custom_deduplications_dict[key].append(rule) # get the "catch all" full deduplication rule - catch_all_full_deduplication = self._get_default_full_deduplication_rule() + catch_all_full_deduplication = self._get_default_full_deduplication_rule( + provider_id=None, provider_type=None + ) # calculate the deduplciations # if a provider has custom deduplication rule, use it @@ -279,31 +320,40 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: final_deduplications += custom_deduplications_dict[key] # now calculate some statistics - # alerts_by_provider_stats = get_all_alerts_by_providers(self.tenant_id) - # deduplication_stats = get_all_deduplication_stats(self.tenant_id) - """ + alerts_by_provider_stats = get_all_alerts_by_providers(self.tenant_id) + deduplication_stats = get_all_deduplication_stats(self.tenant_id) + result = [] for dedup in final_deduplications: key = f"{dedup.provider_type}_{dedup.provider_id}" - dedup.ingested = alerts_by_provider_stats[key] - dedup.dedup_ratio = dedup_ratio.get( - (dedup.provider_id, dedup.provider_type), {} - ).get("ratio", 0.0) + dedup.ingested = alerts_by_provider_stats[key].get("num_alerts", 0) + if dedup.ingested == 0: + dedup.dedup_ratio = 0.0 + # this shouldn't happen, only in backward compatibility or some bug that dedup events are not created + elif key not in deduplication_stats: + self.logger.warning(f"Provider {key} does not have deduplication stats") + dedup.dedup_ratio = 0.0 + elif deduplication_stats[key].get("dedup_count", 0) == 0: + dedup.dedup_ratio = 0.0 + else: + dedup.dedup_ratio = ( + deduplication_stats[key].get("dedup_count") + / (deduplication_stats[key].get("dedup_count") + dedup.ingested) + ) * 100 + dedup.distribution = deduplication_stats[key].get( + "alerts_last_24_hours" + ) result.append(dedup) if self.provider_distribution_enabled: - providers_distribution = get_provider_distribution(self.tenant_id) for dedup in result: - for pd in providers_distribution: + for pd, stats in deduplication_stats.items(): if pd == f"{dedup.provider_id}_{dedup.provider_type}": - distribution = providers_distribution[pd].get( - "alert_last_24_hours" - ) + distribution = stats.get("alert_last_24_hours") dedup.distribution = distribution break - """ + # sort providers to have enabled first - result = [] result = sorted(result, key=lambda x: x.default, reverse=True) return result diff --git a/keep/api/core/db.py b/keep/api/core/db.py index be9dcd787..a69fbaa92 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -930,7 +930,6 @@ def count_alerts( def get_enrichment(tenant_id, fingerprint, refresh=False): with Session(engine) as session: return get_enrichment_with_session(session, tenant_id, fingerprint, refresh) - return alert_enrichment def get_enrichments( @@ -958,7 +957,7 @@ def get_enrichment_with_session(session, tenant_id, fingerprint, refresh=False): .where(AlertEnrichment.tenant_id == tenant_id) .where(AlertEnrichment.alert_fingerprint == fingerprint) ).first() - if refresh: + if refresh and alert_enrichment: try: session.refresh(alert_enrichment) except Exception: @@ -1760,24 +1759,27 @@ def delete_deduplication_rule(rule_id: str, tenant_id: str) -> bool: return True -def get_custom_full_deduplication_rules(tenant_id, provider_id, provider_type): +def get_custom_deduplication_rules(tenant_id, provider_id, provider_type): with Session(engine) as session: rules = session.exec( select(AlertDeduplicationRule) .where(AlertDeduplicationRule.tenant_id == tenant_id) .where(AlertDeduplicationRule.provider_id == provider_id) .where(AlertDeduplicationRule.provider_type == provider_type) - .where(AlertDeduplicationRule.full_deduplication == True) ).all() return rules -def create_deduplication_event(tenant_id, deduplication_rule_id, deduplication_type): +def create_deduplication_event( + tenant_id, deduplication_rule_id, deduplication_type, provider_id, provider_type +): with Session(engine) as session: deduplication_event = AlertDeduplicationEvent( tenant_id=tenant_id, deduplication_rule_id=deduplication_rule_id, deduplication_type=deduplication_type, + provider_id=provider_id, + provider_type=provider_type, timestamp=datetime.utcnow(), date_hour=datetime.utcnow().replace(minute=0, second=0, microsecond=0), ) @@ -1817,46 +1819,82 @@ def get_all_alerts_by_providers(tenant_id): def get_all_deduplication_stats(tenant_id): with Session(engine) as session: - # Query to get deduplication stats - query = ( + # Query to get all-time deduplication stats + all_time_query = ( select( AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, - AlertDeduplicationEvent.deduplication_type, func.count(AlertDeduplicationEvent.id).label("dedup_count"), ) .where(AlertDeduplicationEvent.tenant_id == tenant_id) .group_by( AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, - AlertDeduplicationEvent.deduplication_type, ) ) - results = session.exec(query).all() + all_time_results = session.exec(all_time_query).all() + + # Query to get alerts distribution in the last 24 hours + twenty_four_hours_ago = datetime.utcnow() - timedelta(hours=24) + alerts_last_24_hours_query = ( + select( + AlertDeduplicationEvent.provider_id, + AlertDeduplicationEvent.provider_type, + AlertDeduplicationEvent.date_hour, + func.count(AlertDeduplicationEvent.id).label("hourly_count"), + ) + .where(AlertDeduplicationEvent.tenant_id == tenant_id) + .where(AlertDeduplicationEvent.date_hour >= twenty_four_hours_ago) + .group_by( + AlertDeduplicationEvent.provider_id, + AlertDeduplicationEvent.provider_type, + AlertDeduplicationEvent.date_hour, + ) + ) + + alerts_last_24_hours_results = session.exec(alerts_last_24_hours_query).all() # Create a dictionary with deduplication stats for each provider stats = {} - for result in results: + current_hour = datetime.utcnow().replace(minute=0, second=0, microsecond=0) + for result in all_time_results: provider_id = result.provider_id provider_type = result.provider_type - dedup_type = result.deduplication_type dedup_count = result.dedup_count - key = (provider_id, provider_type) + # alerts without provider_id and provider_type are considered as "keep" + if not provider_type: + provider_type = "keep" + + key = f"{provider_type}_{provider_id}" if key not in stats: - stats[key] = {"full": 0, "partial": 0} + stats[key] = { + "dedup_count": 0, + "alerts_last_24_hours": [ + {"hour": (current_hour - timedelta(hours=i)).hour, "number": 0} + for i in range(0, 24) + ], + } - stats[key][dedup_type] = dedup_count + stats[key]["dedup_count"] = dedup_count - # Calculate deduplication ratio - for key, counts in stats.items(): - total_dedups = counts["full"] + counts["partial"] - if total_dedups > 0: - ratio = counts["full"] / total_dedups - else: - ratio = 0 - stats[key]["ratio"] = ratio + # Add alerts distribution from the last 24 hours + for result in alerts_last_24_hours_results: + provider_id = result.provider_id + provider_type = result.provider_type + date_hour = result.date_hour + hourly_count = result.hourly_count + + if not provider_type: + provider_type = "keep" + key = f"{provider_type}_{provider_id}" + if key in stats: + hours_ago = int((current_hour - date_hour).total_seconds() / 3600) + if 0 <= hours_ago < 24: + stats[key]["alerts_last_24_hours"][23 - hours_ago][ + "number" + ] = hourly_count return stats diff --git a/scripts/simulate_alerts.py b/scripts/simulate_alerts.py index 9380f3153..62669643a 100644 --- a/scripts/simulate_alerts.py +++ b/scripts/simulate_alerts.py @@ -17,6 +17,7 @@ def main(): + GENERATE_DEDUPLICATIONS = True keep_api_key = ( "f228aabc-17d4-4e12-a918-48bd90742afc" # os.environ.get("KEEP_API_KEY") ) @@ -36,25 +37,31 @@ def main(): provider = provider_classes[provider_type] alert = provider.simulate_alert() - logger.info("Sending alert: {}".format(alert)) - try: - env = random.choice(["production", "staging", "development"]) - response = requests.post( - send_alert_url + f"?provider_id={provider_type}-{env}", - headers={"x-api-key": keep_api_key}, - json=alert, - ) - except Exception as e: - logger.error("Failed to send alert: {}".format(e)) - time.sleep(0.2) - continue - - if response.status_code != 202: - logger.error("Failed to send alert: {}".format(response.text)) - else: - logger.info("Alert sent successfully") - - time.sleep(0.2) # Wait for 10 seconds before sending the next alert + # Determine number of times to send the same alert + num_iterations = 1 + if GENERATE_DEDUPLICATIONS: + num_iterations = random.randint(1, 3) + + for _ in range(num_iterations): + logger.info("Sending alert: {}".format(alert)) + try: + env = random.choice(["production", "staging", "development"]) + response = requests.post( + send_alert_url + f"?provider_id={provider_type}-{env}", + headers={"x-api-key": keep_api_key}, + json=alert, + ) + except Exception as e: + logger.error("Failed to send alert: {}".format(e)) + time.sleep(0.2) + continue + + if response.status_code != 202: + logger.error("Failed to send alert: {}".format(response.text)) + else: + logger.info("Alert sent successfully") + + time.sleep(0.2) # Wait for 0.2 seconds before sending the next alert if __name__ == "__main__": From 7276c9ebec8009cc64d53190f003a5dd107212c6 Mon Sep 17 00:00:00 2001 From: shahargl Date: Tue, 17 Sep 2024 09:36:09 +0300 Subject: [PATCH 16/36] feat: wip --- .../app/deduplication/DeduplicationTable.tsx | 53 +++++++++++++++---- keep-ui/app/deduplication/client.tsx | 7 ++- keep-ui/tailwind.config.js | 2 +- keep/providers/providers_factory.py | 2 +- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx index dfc2b45fc..b7a494d52 100644 --- a/keep-ui/app/deduplication/DeduplicationTable.tsx +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -24,19 +24,22 @@ import { DeduplicationRule } from "app/deduplication/models"; import DeduplicationSidebar from "app/deduplication/DeduplicationSidebar"; import { TrashIcon, PauseIcon, PlusIcon } from "@heroicons/react/24/outline"; import Image from "next/image"; +import { getApiURL } from "utils/apiUrl"; +import { useSession } from "next-auth/react"; const columnHelper = createColumnHelper(); type DeduplicationTableProps = { deduplicationRules: DeduplicationRule[]; + mutateDeduplicationRules: () => Promise; }; -export const DeduplicationTable: React.FC = ({ deduplicationRules }) => { +export const DeduplicationTable: React.FC = ({ deduplicationRules, mutateDeduplicationRules }) => { const router = useRouter(); + const { data: session } = useSession(); const searchParams = useSearchParams(); let selectedId = searchParams ? searchParams.get("id") : null; - const selectedRule = deduplicationRules.find((rule) => rule.id === selectedId); const [isSidebarOpen, setIsSidebarOpen] = useState(false); const [selectedDeduplicationRule, setSelectedDeduplicationRule] = useState(null); @@ -52,6 +55,31 @@ export const DeduplicationTable: React.FC = ({ deduplic router.push('/deduplication'); }; + const handleDeleteRule = async (rule: DeduplicationRule, event: React.MouseEvent) => { + event.stopPropagation(); + if (rule.default) return; // Don't delete default rules + + if (window.confirm("Are you sure you want to delete this deduplication rule?")) { + try { + const url = `${getApiURL()}/deduplications/${rule.id}`; + const response = await fetch(url, { + method: 'DELETE', + headers: { + Authorization: `Bearer ${session?.accessToken}`, + }, + }); + + if (response.ok) { + await mutateDeduplicationRules(); + } else { + console.error("Failed to delete deduplication rule"); + } + } catch (error) { + console.error("Error deleting deduplication rule:", error); + } + } + }; + useEffect(() => { if (selectedId && !isSidebarOpen) { const rule = deduplicationRules.find((r) => r.id === selectedId); @@ -144,16 +172,20 @@ export const DeduplicationTable: React.FC = ({ deduplic header: "Fields", cell: (info) => { const fields = info.getValue(); - if (!fields || fields.length === 0) { + const ignoreFields = info.row.original.ignore_fields; + const displayFields = fields && fields.length > 0 ? fields : ignoreFields; + + if (!displayFields || displayFields.length === 0) { return (
    - N/A + N/A
    ); } + return (
    - {fields.map((field: string, index: number) => ( + {displayFields.map((field: string, index: number) => ( {index > 0 && } {field} @@ -167,23 +199,25 @@ export const DeduplicationTable: React.FC = ({ deduplic id: "actions", cell: (info) => (
    -
    ), }), ], - [] + [handleDeleteRule] ); const table = useReactTable({ @@ -251,6 +285,7 @@ export const DeduplicationTable: React.FC = ({ deduplic { - - const { data: deduplicationRules = [], isLoading } = useDeduplicationRules(); + const { data: deduplicationRules = [], isLoading, mutate: mutateDeduplicationRules } = useDeduplicationRules(); if (isLoading) { return ; @@ -15,7 +14,7 @@ export const Client = () => { if (deduplicationRules.length === 0) { return ; - } + } - return ; + return ; }; diff --git a/keep-ui/tailwind.config.js b/keep-ui/tailwind.config.js index aae009bf3..858e263c4 100644 --- a/keep-ui/tailwind.config.js +++ b/keep-ui/tailwind.config.js @@ -19,7 +19,7 @@ module.exports = { muted: "rgb(255 237 213)", // orange-200 subtle: "rgb(251 146 60)", // orange-400 DEFAULT: "rgb(249 115 22)", // orange-500 - emphasis: "#1d4ed8", // blue-700 + emphasis: "#374151", // gray-700 inverted: "#ffffff", // white }, background: { diff --git a/keep/providers/providers_factory.py b/keep/providers/providers_factory.py index a449b85b5..c0143eae5 100644 --- a/keep/providers/providers_factory.py +++ b/keep/providers/providers_factory.py @@ -537,7 +537,7 @@ def get_default_deduplication_rules() -> list[DeduplicationRuleDto]: if provider.default_fingerprint_fields: deduplication_dto = DeduplicationRuleDto( name=f"{provider.type}_default", - description=f"Default deduplication for {provider.display_name}", + description=f"{provider.display_name} default deduplication rule", default=True, distribution=[{"hour": i, "number": 0} for i in range(24)], provider_type=provider.type, From fbeb910d99318dd4c16725813f2dcf4c14405a45 Mon Sep 17 00:00:00 2001 From: shahargl Date: Tue, 17 Sep 2024 09:41:41 +0300 Subject: [PATCH 17/36] feat: add migration --- .../versions/2024-09-17-09-39_05292e5e1455.py | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py diff --git a/keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py b/keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py new file mode 100644 index 000000000..a8b3ed936 --- /dev/null +++ b/keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py @@ -0,0 +1,198 @@ +"""deduplications v2 + +Revision ID: 05292e5e1455 +Revises: 938b1aa62d5c +Create Date: 2024-09-17 09:39:51.160143 + +""" + +import sqlalchemy as sa +import sqlmodel +from alembic import op +from sqlalchemy.dialects import sqlite + +# revision identifiers, used by Alembic. +revision = "05292e5e1455" +down_revision = "938b1aa62d5c" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "alertdeduplicationevent", + sa.Column("timestamp", sa.DateTime(), nullable=False), + sa.Column("date_hour", sa.DateTime(), nullable=True), + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column( + "deduplication_rule_id", sqlmodel.sql.sqltypes.GUID(), nullable=False + ), + sa.Column( + "deduplication_type", sqlmodel.sql.sqltypes.AutoString(), nullable=False + ), + sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("provider_type", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "ix_alert_deduplication_event_provider_id", + "alertdeduplicationevent", + ["provider_id"], + unique=False, + ) + op.create_index( + "ix_alert_deduplication_event_provider_id_date_hour", + "alertdeduplicationevent", + ["provider_id", "date_hour"], + unique=False, + ) + op.create_index( + "ix_alert_deduplication_event_provider_type", + "alertdeduplicationevent", + ["provider_type"], + unique=False, + ) + op.create_index( + "ix_alert_deduplication_event_provider_type_date_hour", + "alertdeduplicationevent", + ["provider_type", "date_hour"], + unique=False, + ) + op.create_index( + op.f("ix_alertdeduplicationevent_tenant_id"), + "alertdeduplicationevent", + ["tenant_id"], + unique=False, + ) + op.create_table( + "alertdeduplicationrule", + sa.Column("fingerprint_fields", sa.JSON(), nullable=True), + sa.Column("ignore_fields", sa.JSON(), nullable=True), + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("provider_type", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("last_updated", sa.DateTime(), nullable=False), + sa.Column( + "last_updated_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False + ), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column("created_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("enabled", sa.Boolean(), nullable=False), + sa.Column("full_deduplication", sa.Boolean(), nullable=False), + sa.Column("priority", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_alertdeduplicationrule_name"), + "alertdeduplicationrule", + ["name"], + unique=False, + ) + op.create_table( + "alertfield", + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("field_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.Column("provider_type", sqlmodel.sql.sqltypes.AutoString(), nullable=True), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("tenant_id", "field_name", name="uq_tenant_field"), + ) + op.create_index( + "ix_alert_field_provider_id_provider_type", + "alertfield", + ["provider_id", "provider_type"], + unique=False, + ) + op.create_index( + "ix_alert_field_tenant_id", "alertfield", ["tenant_id"], unique=False + ) + op.create_index( + "ix_alert_field_tenant_id_field_name", + "alertfield", + ["tenant_id", "field_name"], + unique=False, + ) + op.create_index( + op.f("ix_alertfield_field_name"), "alertfield", ["field_name"], unique=False + ) + op.create_index( + op.f("ix_alertfield_provider_id"), "alertfield", ["provider_id"], unique=False + ) + op.create_index( + op.f("ix_alertfield_provider_type"), + "alertfield", + ["provider_type"], + unique=False, + ) + op.create_index( + op.f("ix_alertfield_tenant_id"), "alertfield", ["tenant_id"], unique=False + ) + op.drop_table("alertdeduplicationfilter") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "alertdeduplicationfilter", + sa.Column("fields", sqlite.JSON(), nullable=True), + sa.Column("id", sa.CHAR(length=32), nullable=False), + sa.Column("tenant_id", sa.VARCHAR(), nullable=False), + sa.Column("matcher_cel", sa.VARCHAR(), nullable=False), + sa.ForeignKeyConstraint( + ["tenant_id"], + ["tenant.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.drop_index(op.f("ix_alertfield_tenant_id"), table_name="alertfield") + op.drop_index(op.f("ix_alertfield_provider_type"), table_name="alertfield") + op.drop_index(op.f("ix_alertfield_provider_id"), table_name="alertfield") + op.drop_index(op.f("ix_alertfield_field_name"), table_name="alertfield") + op.drop_index("ix_alert_field_tenant_id_field_name", table_name="alertfield") + op.drop_index("ix_alert_field_tenant_id", table_name="alertfield") + op.drop_index("ix_alert_field_provider_id_provider_type", table_name="alertfield") + op.drop_table("alertfield") + op.drop_index( + op.f("ix_alertdeduplicationrule_name"), table_name="alertdeduplicationrule" + ) + op.drop_table("alertdeduplicationrule") + op.drop_index( + op.f("ix_alertdeduplicationevent_tenant_id"), + table_name="alertdeduplicationevent", + ) + op.drop_index( + "ix_alert_deduplication_event_provider_type_date_hour", + table_name="alertdeduplicationevent", + ) + op.drop_index( + "ix_alert_deduplication_event_provider_type", + table_name="alertdeduplicationevent", + ) + op.drop_index( + "ix_alert_deduplication_event_provider_id_date_hour", + table_name="alertdeduplicationevent", + ) + op.drop_index( + "ix_alert_deduplication_event_provider_id", table_name="alertdeduplicationevent" + ) + op.drop_table("alertdeduplicationevent") + # ### end Alembic commands ### From 14b8f066ecc837cdeb4cd18454f72be48a2aa0e6 Mon Sep 17 00:00:00 2001 From: shahargl Date: Tue, 17 Sep 2024 19:46:02 +0300 Subject: [PATCH 18/36] feat: wip --- .../alert_deduplicator/alert_deduplicator.py | 23 +- keep/api/core/db.py | 11 +- tests/fixtures/client.py | 5 +- tests/test_deduplications.py | 538 ++++++++++++++++++ tests/test_parser.py | 28 +- 5 files changed, 579 insertions(+), 26 deletions(-) create mode 100644 tests/test_deduplications.py diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 38ea00e78..a52940bda 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -218,7 +218,7 @@ def _get_default_full_deduplication_rule( name=f"{provider_type} default deduplication rule", description=f"{provider_type} default deduplication rule", default=True, - distribution=[], + distribution=[{"hour": i, "number": 0} for i in range(24)], fingerprint_fields=[], # ["fingerprint"], # this is fallback provider_type=provider_type or "keep", provider_id=provider_id, @@ -326,20 +326,29 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: result = [] for dedup in final_deduplications: key = f"{dedup.provider_type}_{dedup.provider_id}" - dedup.ingested = alerts_by_provider_stats[key].get("num_alerts", 0) + dedup.ingested = alerts_by_provider_stats.get(key, {"num_alerts": 0}).get( + "num_alerts", 0 + ) + # full deduplication is also counted as ingested + dedup.ingested += deduplication_stats.get(key, {"full_dedup_count": 0}).get( + "full_dedup_count", 0 + ) + # total dedup count is the sum of full and partial dedup count + dedup_count = deduplication_stats.get(key, {"full_dedup_count": 0}).get( + "full_dedup_count", 0 + ) + deduplication_stats.get(key, {"partial_dedup_count": 0}).get( + "partial_dedup_count", 0 + ) if dedup.ingested == 0: dedup.dedup_ratio = 0.0 # this shouldn't happen, only in backward compatibility or some bug that dedup events are not created elif key not in deduplication_stats: self.logger.warning(f"Provider {key} does not have deduplication stats") dedup.dedup_ratio = 0.0 - elif deduplication_stats[key].get("dedup_count", 0) == 0: + elif dedup_count == 0: dedup.dedup_ratio = 0.0 else: - dedup.dedup_ratio = ( - deduplication_stats[key].get("dedup_count") - / (deduplication_stats[key].get("dedup_count") + dedup.ingested) - ) * 100 + dedup.dedup_ratio = (dedup_count / dedup.ingested) * 100 dedup.distribution = deduplication_stats[key].get( "alerts_last_24_hours" ) diff --git a/keep/api/core/db.py b/keep/api/core/db.py index a69fbaa92..29a87939c 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -1824,12 +1824,14 @@ def get_all_deduplication_stats(tenant_id): select( AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, + AlertDeduplicationEvent.deduplication_type, func.count(AlertDeduplicationEvent.id).label("dedup_count"), ) .where(AlertDeduplicationEvent.tenant_id == tenant_id) .group_by( AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, + AlertDeduplicationEvent.deduplication_type, ) ) @@ -1862,6 +1864,7 @@ def get_all_deduplication_stats(tenant_id): provider_id = result.provider_id provider_type = result.provider_type dedup_count = result.dedup_count + dedup_type = result.deduplication_type # alerts without provider_id and provider_type are considered as "keep" if not provider_type: @@ -1870,14 +1873,18 @@ def get_all_deduplication_stats(tenant_id): key = f"{provider_type}_{provider_id}" if key not in stats: stats[key] = { - "dedup_count": 0, + "full_dedup_count": 0, + "partial_dedup_count": 0, "alerts_last_24_hours": [ {"hour": (current_hour - timedelta(hours=i)).hour, "number": 0} for i in range(0, 24) ], } - stats[key]["dedup_count"] = dedup_count + if dedup_type == "full": + stats[key]["full_dedup_count"] += dedup_count + elif dedup_type == "partial": + stats[key]["partial_dedup_count"] += dedup_count # Add alerts distribution from the last 24 hours for result in alerts_last_24_hours_results: diff --git a/tests/fixtures/client.py b/tests/fixtures/client.py index c49b6ece5..e71a3a2c1 100644 --- a/tests/fixtures/client.py +++ b/tests/fixtures/client.py @@ -47,7 +47,10 @@ def test_app(monkeypatch, request): for event_handler in app.router.on_startup: asyncio.run(event_handler()) - return app + yield app + + for event_handler in app.router.on_shutdown: + asyncio.run(event_handler()) # Fixture for TestClient using the test_app fixture diff --git a/tests/test_deduplications.py b/tests/test_deduplications.py new file mode 100644 index 000000000..e9a628983 --- /dev/null +++ b/tests/test_deduplications.py @@ -0,0 +1,538 @@ +import pytest + +from keep.providers.providers_factory import ProvidersFactory +from tests.fixtures.client import client, setup_api_key, test_app # noqa + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_default_deduplication_rule(db_session, client, test_app): + # insert an alert with some provider_id and make sure that the default deduplication rule is working + provider_classes = { + provider: ProvidersFactory.get_provider_class(provider) + for provider in ["datadog", "prometheus"] + } + for provider_type, provider in provider_classes.items(): + alert = provider.simulate_alert() + client.post( + f"/alerts/event/{provider_type}?", + json=alert, + headers={"x-api-key": "some-api-key"}, + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + assert len(deduplication_rules) == 3 # default + datadog + prometheus + + for dedup_rule in deduplication_rules: + # check that the default deduplication rule is working + if dedup_rule.get("provider_type") == "keep": + assert dedup_rule.get("ingested") == 0 + assert dedup_rule.get("default") + # check how many times the alert was deduplicated in the last 24 hours + assert dedup_rule.get("distribution") == [ + {"hour": i, "number": 0} for i in range(24) + ] + # check that the datadog/prometheus deduplication rule is working + else: + assert dedup_rule.get("ingested") == 1 + # the deduplication ratio is zero since the alert was not deduplicated + assert dedup_rule.get("dedup_ratio") == 0 + assert dedup_rule.get("default") + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_deduplication_sanity(db_session, client, test_app): + # insert the same alert twice and make sure that the default deduplication rule is working + # insert an alert with some provider_id and make sure that the default deduplication rule is working + provider = ProvidersFactory.get_provider_class("datadog") + alert = provider.simulate_alert() + for i in range(2): + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + assert len(deduplication_rules) == 2 # default + datadog + + for dedup_rule in deduplication_rules: + # check that the default deduplication rule is working + if dedup_rule.get("provider_type") == "keep": + assert dedup_rule.get("ingested") == 0 + assert dedup_rule.get("default") + # check how many times the alert was deduplicated in the last 24 hours + assert dedup_rule.get("distribution") == [ + {"hour": i, "number": 0} for i in range(24) + ] + # check that the datadog/prometheus deduplication rule is working + else: + assert dedup_rule.get("ingested") == 2 + # the deduplication ratio is zero since the alert was not deduplicated + assert dedup_rule.get("dedup_ratio") == 50.0 + assert dedup_rule.get("default") + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_deduplication_sanity_2(db_session, client, test_app): + # insert two different alerts, twice each, and make sure that the default deduplication rule is working + provider = ProvidersFactory.get_provider_class("datadog") + alert1 = provider.simulate_alert() + alert2 = provider.simulate_alert() + + for alert in [alert1, alert2]: + for _ in range(2): + client.post( + "/alerts/event/datadog", + json=alert, + headers={"x-api-key": "some-api-key"}, + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + assert len(deduplication_rules) == 2 # default + datadog + + for dedup_rule in deduplication_rules: + if dedup_rule.get("provider_type") == "datadog": + assert dedup_rule.get("ingested") == 4 + assert dedup_rule.get("dedup_ratio") == 50.0 + assert dedup_rule.get("default") + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_deduplication_sanity_3(db_session, client, test_app): + # insert many alerts and make sure that the default deduplication rule is working + provider = ProvidersFactory.get_provider_class("datadog") + alerts = [provider.simulate_alert() for _ in range(10)] + + for alert in alerts: + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + assert len(deduplication_rules) == 2 # default + datadog + + for dedup_rule in deduplication_rules: + if dedup_rule.get("provider_type") == "datadog": + assert dedup_rule.get("ingested") == 10 + assert dedup_rule.get("dedup_ratio") == 0 + assert dedup_rule.get("default") + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_custom_deduplication_rule(db_session, client, test_app): + # create a custom deduplication rule and insert alerts that should be deduplicated by this + custom_rule = { + "description": "Custom Rule", + "provider_type": "datadog", + "deduplication_fields": ["title", "message"], + "default": False, + } + + client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + + provider = ProvidersFactory.get_provider_class("datadog") + alert = provider.simulate_alert() + + for _ in range(2): + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + custom_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("description") == "Custom Rule": + custom_rule_found = True + assert dedup_rule.get("ingested") == 2 + assert dedup_rule.get("dedup_ratio") == 50.0 + assert not dedup_rule.get("default") + + assert custom_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_custom_deduplication_rule_2(db_session, client, test_app): + # create a custom deduplication rule and insert alerts that should not be deduplicated by this + custom_rule = { + "description": "Custom Rule", + "provider_type": "datadog", + "deduplication_fields": ["title", "message"], + "default": False, + } + + client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + + provider = ProvidersFactory.get_provider_class("datadog") + alert1 = provider.simulate_alert() + alert2 = provider.simulate_alert() + + client.post( + "/alerts/event/datadog", json=alert1, headers={"x-api-key": "some-api-key"} + ) + client.post( + "/alerts/event/datadog", json=alert2, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + custom_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("description") == "Custom Rule": + custom_rule_found = True + assert dedup_rule.get("ingested") == 2 + assert dedup_rule.get("dedup_ratio") == 0 + assert not dedup_rule.get("default") + + assert custom_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_update_deduplication_rule(db_session, client, test_app): + # create a custom deduplication rule and update it + custom_rule = { + "description": "Custom Rule", + "provider_type": "datadog", + "deduplication_fields": ["title", "message"], + "default": False, + } + + response = client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + rule_id = response.json().get("id") + + updated_rule = { + "description": "Updated Custom Rule", + "provider_type": "datadog", + "deduplication_fields": ["title"], + "default": False, + } + + client.put( + f"/deduplications/{rule_id}", + json=updated_rule, + headers={"x-api-key": "some-api-key"}, + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + updated_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("id") == rule_id: + updated_rule_found = True + assert dedup_rule.get("description") == "Updated Custom Rule" + assert dedup_rule.get("deduplication_fields") == ["title"] + + assert updated_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_delete_deduplication_rule_sanity(db_session, client, test_app): + # create a custom deduplication rule and delete it + custom_rule = { + "description": "Custom Rule", + "provider_type": "datadog", + "deduplication_fields": ["title", "message"], + "default": False, + } + + response = client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + rule_id = response.json().get("id") + + client.delete(f"/deduplications/{rule_id}", headers={"x-api-key": "some-api-key"}) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + assert all(rule.get("id") != rule_id for rule in deduplication_rules) + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_delete_deduplication_rule_invalid(db_session, client, test_app): + # try to delete a deduplication rule that does not exist + response = client.delete( + "/deduplications/non-existent-id", headers={"x-api-key": "some-api-key"} + ) + + assert response.status_code == 404 + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_delete_deduplication_rule_default(db_session, client, test_app): + # try to delete a default deduplication rule + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + default_rule_id = next( + rule["id"] for rule in deduplication_rules if rule["default"] + ) + + response = client.delete( + f"/deduplications/{default_rule_id}", headers={"x-api-key": "some-api-key"} + ) + + assert response.status_code == 400 + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_full_deduplication(db_session, client, test_app): + # create a custom deduplication rule with full deduplication and insert alerts that should be deduplicated by this + custom_rule = { + "description": "Full Deduplication Rule", + "provider_type": "datadog", + "deduplication_fields": ["title", "message", "source"], + "default": False, + } + + client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + + provider = ProvidersFactory.get_provider_class("datadog") + alert = provider.simulate_alert() + + for _ in range(3): + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + full_dedup_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("description") == "Full Deduplication Rule": + full_dedup_rule_found = True + assert dedup_rule.get("ingested") == 3 + assert dedup_rule.get("dedup_ratio") == 66.67 + + assert full_dedup_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_partial_deduplication(db_session, client, test_app): + # insert a datadog alert with the same incident_id, group and title and make sure that the datadog default deduplication rule is working + provider = ProvidersFactory.get_provider_class("datadog") + base_alert = provider.simulate_alert() + + alerts = [ + base_alert, + {**base_alert, "message": "Different message"}, + {**base_alert, "source": "Different source"}, + ] + + for alert in alerts: + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + datadog_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("provider_type") == "datadog" and dedup_rule.get("default"): + datadog_rule_found = True + assert dedup_rule.get("ingested") == 3 + assert ( + dedup_rule.get("dedup_ratio") > 0 + and dedup_rule.get("dedup_ratio") < 100 + ) + + assert datadog_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_ingesting_alert_without_fingerprint_fields(db_session, client, test_app): + # insert a datadog alert without the required fingerprint fields and make sure that it is not deduplicated + provider = ProvidersFactory.get_provider_class("datadog") + alert = provider.simulate_alert() + alert.pop("incident_id") + alert.pop("group") + alert.pop("title") + + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + datadog_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("provider_type") == "datadog" and dedup_rule.get("default"): + datadog_rule_found = True + assert dedup_rule.get("ingested") == 1 + assert dedup_rule.get("dedup_ratio") == 0 + + assert datadog_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_deduplication_fields(db_session, client, test_app): + # insert a datadog alert with the same incident_id and make sure that the datadog default deduplication rule is working + provider = ProvidersFactory.get_provider_class("datadog") + base_alert = provider.simulate_alert() + + alerts = [ + base_alert, + {**base_alert, "group": "Different group"}, + {**base_alert, "title": "Different title"}, + ] + + for alert in alerts: + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + datadog_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("provider_type") == "datadog" and dedup_rule.get("default"): + datadog_rule_found = True + assert dedup_rule.get("ingested") == 3 + assert dedup_rule.get("dedup_ratio") == 66.67 + + assert datadog_rule_found diff --git a/tests/test_parser.py b/tests/test_parser.py index 54c0a4a78..6e607fe37 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,8 +1,8 @@ # here we are going to create all needed tests for the parser.py parse function -import uuid import builtins import json import time +import uuid from pathlib import Path import pytest @@ -11,13 +11,13 @@ from fastapi import HTTPException from keep.api.core.dependencies import SINGLE_TENANT_UUID +from keep.api.models.db.action import Action from keep.contextmanager.contextmanager import ContextManager from keep.parser.parser import Parser, ParserUtils from keep.providers.mock_provider.mock_provider import MockProvider from keep.providers.models.provider_config import ProviderConfig from keep.step.step import Step from keep.workflowmanager.workflowstore import WorkflowStore -from keep.api.models.db.action import Action def test_parse_with_nonexistent_file(db_session): @@ -86,14 +86,6 @@ def test_parse_all_alerts(db_session): # You can add more specific assertions based on the content of mock_files and how they are parsed into alerts. -# This test depends on the previous one because of global providers configuration -@pytest.mark.xfail -def test_parse_with_alert_source_with_no_providers_file(): - parser = Parser() - with pytest.raises(TypeError): - parser.parse(str(workflow_path)) - - def parse_env_setup(context_manager): parser = Parser() parser._parse_providers_from_env(context_manager=context_manager) @@ -301,7 +293,9 @@ def test_parse_alert_steps(self): ## Test Case for reusable actions path_to_test_reusable_resources = Path(__file__).parent / "workflows" reusable_workflow_path = str(path_to_test_resources / "reusable_alert_for_testing.yml") -reusable_workflow_with_action_path = str(path_to_test_resources / "reusable_alert_with_actions_for_testing.yml") +reusable_workflow_with_action_path = str( + path_to_test_resources / "reusable_alert_with_actions_for_testing.yml" +) reusable_providers_path = str(path_to_test_resources / "providers_for_testing.yaml") reusable_actions_path = str(path_to_test_resources / "reusable_actions_for_testing.yml") @@ -397,7 +391,7 @@ def test_load_actions_config(self, db_session): class TestParserUtils: - + def test_deep_merge_dict(self): """Dictionary: if the merge combines recursively and prioritize values of source""" source = {"1": {"s11": "s11", "s12": "s12"}, "2": {"s21": "s21"}} @@ -405,16 +399,18 @@ def test_deep_merge_dict(self): expected_results = { "1": {"s11": "s11", "s12": "s12", "d11": "d11", "d12": "d12"}, "2": {"s21": "s21"}, - "3": {"d31": "d31"} + "3": {"d31": "d31"}, } results = ParserUtils.deep_merge(source, dest) assert expected_results == results def test_deep_merge_list(self): """List: if the merge combines recursively and prioritize values of source""" - source = {"data": [{"s1": "s1"}, {"s2": "s2"}]} - dest = {"data": [{"d1": "d1"}, {"d2": "d2"}, {"d3": "d3"}]} - expected_results = {"data": [{"s1": "s1", "d1": "d1"}, {"s2": "s2", "d2": "d2"}, {"d3": "d3"}]} + source = {"data": [{"s1": "s1"}, {"s2": "s2"}]} + dest = {"data": [{"d1": "d1"}, {"d2": "d2"}, {"d3": "d3"}]} + expected_results = { + "data": [{"s1": "s1", "d1": "d1"}, {"s2": "s2", "d2": "d2"}, {"d3": "d3"}] + } results = ParserUtils.deep_merge(source, dest) assert expected_results == results From 1eb7e1ec305047b38e9293ba957d4e85a1b17a45 Mon Sep 17 00:00:00 2001 From: shahargl Date: Thu, 19 Sep 2024 14:59:36 +0100 Subject: [PATCH 19/36] feat: tests --- .../alert_deduplicator/alert_deduplicator.py | 71 +++-- keep/api/api.py | 13 +- keep/api/core/db.py | 32 ++- keep/api/routes/deduplications.py | 14 + keep/providers/base/base_provider.py | 2 +- tests/test_deduplications.py | 270 +++++++++++++++--- 6 files changed, 340 insertions(+), 62 deletions(-) diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index a52940bda..9dd72fc7e 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -4,13 +4,14 @@ import logging import uuid +from fastapi import HTTPException + from keep.api.core.config import config from keep.api.core.db import ( create_deduplication_event, create_deduplication_rule, delete_deduplication_rule, get_alerts_fields, - get_all_alerts_by_providers, get_all_deduplication_rules, get_all_deduplication_stats, get_custom_deduplication_rules, @@ -132,6 +133,16 @@ def apply_deduplication(self, alert: AlertDto) -> bool: ) # we don't need to check the other rules break + else: + # create none deduplication event, for statistics + create_deduplication_event( + tenant_id=self.tenant_id, + deduplication_rule_id=rule.id, + deduplication_type="none", + provider_id=alert.providerId, + provider_type=alert.providerType, + ) + return alert def _remove_field(self, field, alert: AlertDto) -> AlertDto: @@ -200,14 +211,19 @@ def get_deduplication_rules( rule.ignore_fields = default_full_dedup_rule.ignore_fields return rules - def _get_default_full_deduplication_rule( - self, provider_id, provider_type - ) -> DeduplicationRuleDto: + def _generate_uuid(self, provider_id, provider_type): # this is a way to generate a unique uuid for the default deduplication rule per (provider_id, provider_type) namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, "keephq.dev") generated_uuid = str( uuid.uuid5(namespace_uuid, f"{provider_id}_{provider_type}") ) + return generated_uuid + + def _get_default_full_deduplication_rule( + self, provider_id, provider_type + ) -> DeduplicationRuleDto: + # this is a way to generate a unique uuid for the default deduplication rule per (provider_id, provider_type) + generated_uuid = self._generate_uuid(provider_id, provider_type) # just return a default deduplication rule with lastReceived field if not provider_type: @@ -249,6 +265,9 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: default_deduplications_dict = { dd.provider_type: dd for dd in default_deduplications } + for dd in default_deduplications: + provider_id, provider_type = dd.provider_id, dd.provider_type + dd.id = self._generate_uuid(provider_id, provider_type) # get custom deduplication rules custom_deduplications = get_all_deduplication_rules(self.tenant_id) # cast to dto @@ -320,25 +339,26 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: final_deduplications += custom_deduplications_dict[key] # now calculate some statistics - alerts_by_provider_stats = get_all_alerts_by_providers(self.tenant_id) + # alerts_by_provider_stats = get_all_alerts_by_providers(self.tenant_id) deduplication_stats = get_all_deduplication_stats(self.tenant_id) result = [] for dedup in final_deduplications: - key = f"{dedup.provider_type}_{dedup.provider_id}" - dedup.ingested = alerts_by_provider_stats.get(key, {"num_alerts": 0}).get( - "num_alerts", 0 - ) - # full deduplication is also counted as ingested - dedup.ingested += deduplication_stats.get(key, {"full_dedup_count": 0}).get( + key = dedup.id + full_dedup = deduplication_stats.get(key, {"full_dedup_count": 0}).get( "full_dedup_count", 0 ) - # total dedup count is the sum of full and partial dedup count - dedup_count = deduplication_stats.get(key, {"full_dedup_count": 0}).get( - "full_dedup_count", 0 - ) + deduplication_stats.get(key, {"partial_dedup_count": 0}).get( - "partial_dedup_count", 0 + partial_dedup = deduplication_stats.get( + key, {"partial_dedup_count": 0} + ).get("partial_dedup_count", 0) + none_dedup = deduplication_stats.get(key, {"none_dedup_count": 0}).get( + "none_dedup_count", 0 ) + + dedup.ingested = full_dedup + partial_dedup + none_dedup + # total dedup count is the sum of full and partial dedup count + dedup_count = full_dedup + partial_dedup + if dedup.ingested == 0: dedup.dedup_ratio = 0.0 # this shouldn't happen, only in backward compatibility or some bug that dedup events are not created @@ -384,6 +404,25 @@ def get_deduplication_fields(self) -> list[str]: def create_deduplication_rule( self, rule: DeduplicationRuleRequestDto, created_by: str ) -> DeduplicationRuleDto: + # check that provider installed (cannot create deduplication rule for uninstalled provider) + provider = None + installed_providers = ProvidersFactory.get_installed_providers(self.tenant_id) + linked_providers = ProvidersFactory.get_linked_providers(self.tenant_id) + provider_key = f"{rule.provider_type}_{rule.provider_id}" + for p in installed_providers + linked_providers: + if provider_key == f"{p.type}_{p.id}": + provider = p + break + + if not provider: + message = f"Provider {rule.provider_type} not found" + if rule.provider_id: + message += f" with id {rule.provider_id}" + raise HTTPException( + status_code=404, + detail=message, + ) + # Use the db function to create a new deduplication rule new_rule = create_deduplication_rule( tenant_id=self.tenant_id, diff --git a/keep/api/api.py b/keep/api/api.py index 6634c0308..5bcc0ddcc 100644 --- a/keep/api/api.py +++ b/keep/api/api.py @@ -301,12 +301,21 @@ async def on_shutdown(): if SCHEDULER: logger.info("Stopping the scheduler") wf_manager = WorkflowManager.get_instance() - await wf_manager.stop() + # stop the scheduler + try: + await wf_manager.stop() + # in pytest, there could be race condition + except TypeError: + pass logger.info("Scheduler stopped successfully") if CONSUMER: logger.info("Stopping the consumer") event_subscriber = EventSubscriber.get_instance() - await event_subscriber.stop() + try: + await event_subscriber.stop() + # in pytest, there could be race condition + except TypeError: + pass logger.info("Consumer stopped successfully") # ARQ workers stops themselves? see "shutdown on SIGTERM" in logs logger.info("Keep shutdown complete") diff --git a/keep/api/core/db.py b/keep/api/core/db.py index 673c9c558..19c661bcd 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -1689,8 +1689,6 @@ def get_all_deduplication_rules(tenant_id): AlertDeduplicationRule.tenant_id == tenant_id ) ).all() - # cast to dto - return rules @@ -1857,6 +1855,7 @@ def get_all_deduplication_stats(tenant_id): # Query to get all-time deduplication stats all_time_query = ( select( + AlertDeduplicationEvent.deduplication_rule_id, AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, AlertDeduplicationEvent.deduplication_type, @@ -1864,6 +1863,7 @@ def get_all_deduplication_stats(tenant_id): ) .where(AlertDeduplicationEvent.tenant_id == tenant_id) .group_by( + AlertDeduplicationEvent.deduplication_rule_id, AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, AlertDeduplicationEvent.deduplication_type, @@ -1876,6 +1876,7 @@ def get_all_deduplication_stats(tenant_id): twenty_four_hours_ago = datetime.utcnow() - timedelta(hours=24) alerts_last_24_hours_query = ( select( + AlertDeduplicationEvent.deduplication_rule_id, AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, AlertDeduplicationEvent.date_hour, @@ -1884,6 +1885,7 @@ def get_all_deduplication_stats(tenant_id): .where(AlertDeduplicationEvent.tenant_id == tenant_id) .where(AlertDeduplicationEvent.date_hour >= twenty_four_hours_ago) .group_by( + AlertDeduplicationEvent.deduplication_rule_id, AlertDeduplicationEvent.provider_id, AlertDeduplicationEvent.provider_type, AlertDeduplicationEvent.date_hour, @@ -1892,7 +1894,7 @@ def get_all_deduplication_stats(tenant_id): alerts_last_24_hours_results = session.exec(alerts_last_24_hours_query).all() - # Create a dictionary with deduplication stats for each provider + # Create a dictionary with deduplication stats for each rule stats = {} current_hour = datetime.utcnow().replace(minute=0, second=0, microsecond=0) for result in all_time_results: @@ -1905,21 +1907,27 @@ def get_all_deduplication_stats(tenant_id): if not provider_type: provider_type = "keep" - key = f"{provider_type}_{provider_id}" + key = str(result.deduplication_rule_id) if key not in stats: + # initialize the stats for the deduplication rule stats[key] = { "full_dedup_count": 0, "partial_dedup_count": 0, + "none_dedup_count": 0, "alerts_last_24_hours": [ {"hour": (current_hour - timedelta(hours=i)).hour, "number": 0} for i in range(0, 24) ], + "provider_id": provider_id, + "provider_type": provider_type, } if dedup_type == "full": stats[key]["full_dedup_count"] += dedup_count elif dedup_type == "partial": stats[key]["partial_dedup_count"] += dedup_count + elif dedup_type == "none": + stats[key]["none_dedup_count"] += dedup_count # Add alerts distribution from the last 24 hours for result in alerts_last_24_hours_results: @@ -1927,10 +1935,11 @@ def get_all_deduplication_stats(tenant_id): provider_type = result.provider_type date_hour = result.date_hour hourly_count = result.hourly_count + key = str(result.deduplication_rule_id) if not provider_type: provider_type = "keep" - key = f"{provider_type}_{provider_id}" + if key in stats: hours_ago = int((current_hour - date_hour).total_seconds() / 3600) if 0 <= hours_ago < 24: @@ -3138,6 +3147,19 @@ def get_provider_by_name(tenant_id: str, provider_name: str) -> Provider: return provider +def get_provider_by_type_and_id( + tenant_id: str, provider_type: str, provider_id: Optional[str] +) -> Provider: + with Session(engine) as session: + query = select(Provider).where( + Provider.tenant_id == tenant_id, + Provider.type == provider_type, + Provider.id == provider_id, + ) + provider = session.exec(query).first() + return provider + + def bulk_upsert_alert_fields( tenant_id: str, fields: List[str], provider_id: str, provider_type: str ): diff --git a/keep/api/routes/deduplications.py b/keep/api/routes/deduplications.py index 64b19ece5..0e8b45c54 100644 --- a/keep/api/routes/deduplications.py +++ b/keep/api/routes/deduplications.py @@ -1,4 +1,5 @@ import logging +import uuid from fastapi import APIRouter, Depends, HTTPException @@ -73,6 +74,8 @@ def create_deduplication_rule( ) logger.info("Created deduplication rule") return created_rule + except HTTPException as e: + raise e except Exception as e: logger.exception("Error creating deduplication rule") raise HTTPException(status_code=400, detail=str(e)) @@ -116,6 +119,13 @@ def delete_deduplication_rule( tenant_id = authenticated_entity.tenant_id logger.info("Deleting deduplication rule", extra={"rule_id": rule_id}) alert_deduplicator = AlertDeduplicator(tenant_id) + + # verify rule id is uuid + try: + uuid.UUID(rule_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid rule id") + try: success = alert_deduplicator.delete_deduplication_rule(rule_id) if success: @@ -123,6 +133,10 @@ def delete_deduplication_rule( return {"message": "Deduplication rule deleted successfully"} else: raise HTTPException(status_code=404, detail="Deduplication rule not found") + except HTTPException as e: + logger.exception("Error deleting deduplication rule") + # keep the same status code + raise e except Exception as e: logger.exception("Error deleting deduplication rule") raise HTTPException(status_code=400, detail=str(e)) diff --git a/keep/providers/base/base_provider.py b/keep/providers/base/base_provider.py index 5c4994c54..ba968837b 100644 --- a/keep/providers/base/base_provider.py +++ b/keep/providers/base/base_provider.py @@ -347,7 +347,7 @@ def format_alert( }, ) alert.fingerprint = cls.get_alert_fingerprint( - alert, custom_deduplication_rule.deduplication_fields + alert, custom_deduplication_rule.fingerprint_fields ) return formatted_alert diff --git a/tests/test_deduplications.py b/tests/test_deduplications.py index e9a628983..2db53ea04 100644 --- a/tests/test_deduplications.py +++ b/tests/test_deduplications.py @@ -1,3 +1,6 @@ +import random +import uuid + import pytest from keep.providers.providers_factory import ProvidersFactory @@ -104,7 +107,10 @@ def test_deduplication_sanity_2(db_session, client, test_app): # insert two different alerts, twice each, and make sure that the default deduplication rule is working provider = ProvidersFactory.get_provider_class("datadog") alert1 = provider.simulate_alert() - alert2 = provider.simulate_alert() + alert2 = alert1 + # datadog deduplicated by monitor_id + while alert2.get("monitor_id") == alert1.get("monitor_id"): + alert2 = provider.simulate_alert() for alert in [alert1, alert2]: for _ in range(2): @@ -141,7 +147,12 @@ def test_deduplication_sanity_3(db_session, client, test_app): provider = ProvidersFactory.get_provider_class("datadog") alerts = [provider.simulate_alert() for _ in range(10)] + monitor_ids = set() for alert in alerts: + # lets make it not deduplicated by randomizing the monitor_id + while alert["monitor_id"] in monitor_ids: + alert["monitor_id"] = random.randint(0, 10**10) + monitor_ids.add(alert["monitor_id"]) client.post( "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} ) @@ -169,22 +180,87 @@ def test_deduplication_sanity_3(db_session, client, test_app): indirect=True, ) def test_custom_deduplication_rule(db_session, client, test_app): + provider = ProvidersFactory.get_provider_class("datadog") + alert1 = provider.simulate_alert() + client.post( + "/alerts/event/datadog", json=alert1, headers={"x-api-key": "some-api-key"} + ) + # create a custom deduplication rule and insert alerts that should be deduplicated by this custom_rule = { - "description": "Custom Rule", + "name": "Custom Rule", + "description": "Custom Rule Description", "provider_type": "datadog", - "deduplication_fields": ["title", "message"], - "default": False, + "fingerprint_fields": ["title", "message"], + "full_deduplication": False, + "ignore_fields": None, } + resp = client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + assert resp.status_code == 200 + + provider = ProvidersFactory.get_provider_class("datadog") + alert = provider.simulate_alert() + + for _ in range(2): + client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) + + deduplication_rules = client.get( + "/deduplications", headers={"x-api-key": "some-api-key"} + ).json() + + custom_rule_found = False + for dedup_rule in deduplication_rules: + if dedup_rule.get("name") == "Custom Rule": + custom_rule_found = True + assert dedup_rule.get("ingested") == 2 + assert dedup_rule.get("dedup_ratio") == 50.0 + assert not dedup_rule.get("default") + + assert custom_rule_found + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_custom_deduplication_rule_behaviour(db_session, client, test_app): + # create a custom deduplication rule and insert alerts that should be deduplicated by this + provider = ProvidersFactory.get_provider_class("datadog") + alert1 = provider.simulate_alert() client.post( + "/alerts/event/datadog", json=alert1, headers={"x-api-key": "some-api-key"} + ) + custom_rule = { + "name": "Custom Rule", + "description": "Custom Rule Description", + "provider_type": "datadog", + "fingerprint_fields": ["title", "message"], + "full_deduplication": False, + "ignore_fields": None, + } + + resp = client.post( "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} ) + assert resp.status_code == 200 provider = ProvidersFactory.get_provider_class("datadog") alert = provider.simulate_alert() for _ in range(2): + # the default rule should deduplicate the alert by monitor_id so let's randomize it - + # if the custom rule is working, the alert should be deduplicated by title and message + alert["monitor_id"] = random.randint(0, 10**10) client.post( "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} ) @@ -195,7 +271,7 @@ def test_custom_deduplication_rule(db_session, client, test_app): custom_rule_found = False for dedup_rule in deduplication_rules: - if dedup_rule.get("description") == "Custom Rule": + if dedup_rule.get("name") == "Custom Rule": custom_rule_found = True assert dedup_rule.get("ingested") == 2 assert dedup_rule.get("dedup_ratio") == 50.0 @@ -209,32 +285,51 @@ def test_custom_deduplication_rule(db_session, client, test_app): [ { "AUTH_TYPE": "NOAUTH", + "KEEP_PROVIDERS": '{"keepDatadog":{"type":"datadog","authentication":{"api_key":"1234","app_key": "1234"}}}', }, ], indirect=True, ) def test_custom_deduplication_rule_2(db_session, client, test_app): - # create a custom deduplication rule and insert alerts that should not be deduplicated by this + # create a custom full deduplication rule and insert alerts that should not be deduplicated by this + providers = client.get("/providers", headers={"x-api-key": "some-api-key"}).json() + datadog_provider_id = next( + provider["id"] + for provider in providers.get("installed_providers") + if provider["type"] == "datadog" + ) + custom_rule = { - "description": "Custom Rule", + "name": "Custom Rule", + "description": "Custom Rule Description", "provider_type": "datadog", - "deduplication_fields": ["title", "message"], - "default": False, + "provider_id": datadog_provider_id, + "fingerprint_fields": [ + "name", + "message", + ], # title in datadog mapped to name in keep + "full_deduplication": False, + "ignore_fields": ["field_that_never_exists"], } - client.post( + response = client.post( "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} ) + assert response.status_code == 200 provider = ProvidersFactory.get_provider_class("datadog") alert1 = provider.simulate_alert() - alert2 = provider.simulate_alert() client.post( - "/alerts/event/datadog", json=alert1, headers={"x-api-key": "some-api-key"} + f"/alerts/event/datadog?provider_id={datadog_provider_id}", + json=alert1, + headers={"x-api-key": "some-api-key"}, ) + alert1["title"] = "Different title" client.post( - "/alerts/event/datadog", json=alert2, headers={"x-api-key": "some-api-key"} + f"/alerts/event/datadog?provider_id={datadog_provider_id}", + json=alert1, + headers={"x-api-key": "some-api-key"}, ) deduplication_rules = client.get( @@ -243,7 +338,7 @@ def test_custom_deduplication_rule_2(db_session, client, test_app): custom_rule_found = False for dedup_rule in deduplication_rules: - if dedup_rule.get("description") == "Custom Rule": + if dedup_rule.get("name") == "Custom Rule": custom_rule_found = True assert dedup_rule.get("ingested") == 2 assert dedup_rule.get("dedup_ratio") == 0 @@ -257,36 +352,53 @@ def test_custom_deduplication_rule_2(db_session, client, test_app): [ { "AUTH_TYPE": "NOAUTH", + "KEEP_PROVIDERS": '{"keepDatadog":{"type":"datadog","authentication":{"api_key":"1234","app_key": "1234"}}}', }, ], indirect=True, ) def test_update_deduplication_rule(db_session, client, test_app): # create a custom deduplication rule and update it + response = client.get("/providers", headers={"x-api-key": "some-api-key"}) + assert response.status_code == 200 + datadog_provider_id = next( + provider["id"] + for provider in response.json().get("installed_providers") + if provider["type"] == "datadog" + ) + custom_rule = { - "description": "Custom Rule", + "name": "Custom Rule", + "description": "Custom Rule Description", "provider_type": "datadog", - "deduplication_fields": ["title", "message"], - "default": False, + "provider_id": datadog_provider_id, + "fingerprint_fields": ["title", "message"], + "full_deduplication": False, + "ignore_fields": None, } response = client.post( "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} ) - rule_id = response.json().get("id") + assert response.status_code == 200 + rule_id = response.json().get("id") updated_rule = { + "name": "Updated Custom Rule", "description": "Updated Custom Rule", "provider_type": "datadog", - "deduplication_fields": ["title"], - "default": False, + "provider_id": datadog_provider_id, + "fingerprint_fields": ["title"], + "full_deduplication": False, + "ignore_fields": None, } - client.put( + response = client.put( f"/deduplications/{rule_id}", json=updated_rule, headers={"x-api-key": "some-api-key"}, ) + assert response.status_code == 200 deduplication_rules = client.get( "/deduplications", headers={"x-api-key": "some-api-key"} @@ -297,7 +409,7 @@ def test_update_deduplication_rule(db_session, client, test_app): if dedup_rule.get("id") == rule_id: updated_rule_found = True assert dedup_rule.get("description") == "Updated Custom Rule" - assert dedup_rule.get("deduplication_fields") == ["title"] + assert dedup_rule.get("fingerprint_fields") == ["title"] assert updated_rule_found @@ -311,20 +423,88 @@ def test_update_deduplication_rule(db_session, client, test_app): ], indirect=True, ) +def test_update_deduplication_rule_non_exist_provider(db_session, client, test_app): + # create a custom deduplication rule and update it + custom_rule = { + "name": "Custom Rule", + "description": "Custom Rule Description", + "provider_type": "datadog", + "fingerprint_fields": ["title", "message"], + "full_deduplication": False, + "ignore_fields": None, + } + response = client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + assert response.status_code == 404 + assert response.json() == {"detail": "Provider datadog not found"} + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + }, + ], + indirect=True, +) +def test_update_deduplication_rule_linked_provider(db_session, client, test_app): + provider = ProvidersFactory.get_provider_class("datadog") + alert1 = provider.simulate_alert() + response = client.post( + "/alerts/event/datadog", json=alert1, headers={"x-api-key": "some-api-key"} + ) + custom_rule = { + "name": "Custom Rule", + "description": "Custom Rule Description", + "provider_type": "datadog", + "fingerprint_fields": ["title", "message"], + "full_deduplication": False, + "ignore_fields": None, + } + response = client.post( + "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} + ) + # once a linked provider is created, a customization should be allowed + assert response.status_code == 200 + + +@pytest.mark.parametrize( + "test_app", + [ + { + "AUTH_TYPE": "NOAUTH", + "KEEP_PROVIDERS": '{"keepDatadog":{"type":"datadog","authentication":{"api_key":"1234","app_key": "1234"}}}', + }, + ], + indirect=True, +) def test_delete_deduplication_rule_sanity(db_session, client, test_app): + response = client.get("/providers", headers={"x-api-key": "some-api-key"}) + assert response.status_code == 200 + datadog_provider_id = next( + provider["id"] + for provider in response.json().get("installed_providers") + if provider["type"] == "datadog" + ) # create a custom deduplication rule and delete it custom_rule = { - "description": "Custom Rule", + "name": "Custom Rule", + "description": "Custom Rule Description", "provider_type": "datadog", - "deduplication_fields": ["title", "message"], - "default": False, + "provider_id": datadog_provider_id, + "fingerprint_fields": ["title", "message"], + "full_deduplication": False, + "ignore_fields": None, } response = client.post( "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} ) - rule_id = response.json().get("id") + assert response.status_code == 200 + rule_id = response.json().get("id") client.delete(f"/deduplications/{rule_id}", headers={"x-api-key": "some-api-key"}) deduplication_rules = client.get( @@ -349,6 +529,14 @@ def test_delete_deduplication_rule_invalid(db_session, client, test_app): "/deduplications/non-existent-id", headers={"x-api-key": "some-api-key"} ) + assert response.status_code == 400 + assert response.json() == {"detail": "Invalid rule id"} + + # now use UUID + some_uuid = str(uuid.uuid4()) + response = client.delete( + f"/deduplications/{some_uuid}", headers={"x-api-key": "some-api-key"} + ) assert response.status_code == 404 @@ -375,7 +563,7 @@ def test_delete_deduplication_rule_default(db_session, client, test_app): f"/deduplications/{default_rule_id}", headers={"x-api-key": "some-api-key"} ) - assert response.status_code == 400 + assert response.status_code == 404 @pytest.mark.parametrize( @@ -389,19 +577,25 @@ def test_delete_deduplication_rule_default(db_session, client, test_app): ) def test_full_deduplication(db_session, client, test_app): # create a custom deduplication rule with full deduplication and insert alerts that should be deduplicated by this + provider = ProvidersFactory.get_provider_class("datadog") + alert = provider.simulate_alert() + # send the alert so a linked provider is created + response = client.post( + "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} + ) custom_rule = { + "name": "Full Deduplication Rule", "description": "Full Deduplication Rule", "provider_type": "datadog", - "deduplication_fields": ["title", "message", "source"], - "default": False, + "fingerprint_fields": ["title", "message", "source"], + "full_deduplication": True, + "ignore_fields": list(alert.keys()), # ignore all fields } - client.post( + response = client.post( "/deduplications", json=custom_rule, headers={"x-api-key": "some-api-key"} ) - - provider = ProvidersFactory.get_provider_class("datadog") - alert = provider.simulate_alert() + assert response.status_code == 200 for _ in range(3): client.post( @@ -417,7 +611,7 @@ def test_full_deduplication(db_session, client, test_app): if dedup_rule.get("description") == "Full Deduplication Rule": full_dedup_rule_found = True assert dedup_rule.get("ingested") == 3 - assert dedup_rule.get("dedup_ratio") == 66.67 + assert 66.667 - dedup_rule.get("dedup_ratio") < 0.1 # 0.66666666....7 assert full_dedup_rule_found @@ -477,9 +671,9 @@ def test_ingesting_alert_without_fingerprint_fields(db_session, client, test_app # insert a datadog alert without the required fingerprint fields and make sure that it is not deduplicated provider = ProvidersFactory.get_provider_class("datadog") alert = provider.simulate_alert() - alert.pop("incident_id") - alert.pop("group") - alert.pop("title") + alert.pop("incident_id", None) + alert.pop("group", None) + alert["title"] = str(random.randint(0, 10**10)) client.post( "/alerts/event/datadog", json=alert, headers={"x-api-key": "some-api-key"} @@ -533,6 +727,6 @@ def test_deduplication_fields(db_session, client, test_app): if dedup_rule.get("provider_type") == "datadog" and dedup_rule.get("default"): datadog_rule_found = True assert dedup_rule.get("ingested") == 3 - assert dedup_rule.get("dedup_ratio") == 66.67 + assert 66.667 - dedup_rule.get("dedup_ratio") < 0.1 # 0.66666666....7 assert datadog_rule_found From 3cfe0bad424622933bf366b0ed92a9eb41c53390 Mon Sep 17 00:00:00 2001 From: shahargl Date: Thu, 19 Sep 2024 16:13:36 +0100 Subject: [PATCH 20/36] feat: done --- .../DeduplicationPlaceholder.tsx | 11 ++++------- .../alert_deduplicator/alert_deduplicator.py | 19 ++++++++++++++++++- ...55.py => 2024-09-19-15-26_493f217af6b6.py} | 12 ++++++------ scripts/simulate_alerts.py | 2 +- 4 files changed, 29 insertions(+), 15 deletions(-) rename keep/api/models/db/migrations/versions/{2024-09-17-09-39_05292e5e1455.py => 2024-09-19-15-26_493f217af6b6.py} (97%) diff --git a/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx b/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx index afb8830f2..8c5d49ecf 100644 --- a/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx +++ b/keep-ui/app/deduplication/DeduplicationPlaceholder.tsx @@ -18,14 +18,11 @@ export const DeduplicationPlaceholder = () => { Reduce noise by creatiing deduplications. + + Start sending alerts or connect providers to create deduplication + rules. +
    - {/* list[DeduplicationRuleDto]: default_deduplication = copy.deepcopy( default_deduplications_dict[provider.type] ) + default_deduplication.id = self._generate_uuid( + provider.id, provider.type + ) # copy the provider id to the description if provider.id: default_deduplication.description = ( @@ -385,6 +388,11 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: # sort providers to have enabled first result = sorted(result, key=lambda x: x.default, reverse=True) + # if the default is empty, remove it + if len(result) == 1 and result[0].ingested == 0: + # empty states, no alerts + return [] + return result def get_deduplication_fields(self) -> list[str]: @@ -443,7 +451,16 @@ def create_deduplication_rule( def update_deduplication_rule( self, rule_id: str, rule: DeduplicationRuleRequestDto, updated_by: str ) -> DeduplicationRuleDto: - # Use the db function to update an existing deduplication rule + # check if this is a default rule + default_rule_id = self._generate_uuid(rule.provider_id, rule.provider_type) + # if its a default, we need to override and create a new rule + if rule_id == default_rule_id: + self.logger.info("Default rule update, creating a new rule") + rule_dto = self.create_deduplication_rule(rule, updated_by) + self.logger.info("Default rule updated") + return rule_dto + + # else, use the db function to update an existing deduplication rule updated_rule = update_deduplication_rule( rule_id=rule_id, tenant_id=self.tenant_id, diff --git a/keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py b/keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py similarity index 97% rename from keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py rename to keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py index a8b3ed936..39bc26942 100644 --- a/keep/api/models/db/migrations/versions/2024-09-17-09-39_05292e5e1455.py +++ b/keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py @@ -1,8 +1,8 @@ -"""deduplications v2 +"""Dedup -Revision ID: 05292e5e1455 -Revises: 938b1aa62d5c -Create Date: 2024-09-17 09:39:51.160143 +Revision ID: 493f217af6b6 +Revises: 5d7ae55efc6a +Create Date: 2024-09-19 15:26:21.564118 """ @@ -12,8 +12,8 @@ from sqlalchemy.dialects import sqlite # revision identifiers, used by Alembic. -revision = "05292e5e1455" -down_revision = "938b1aa62d5c" +revision = "493f217af6b6" +down_revision = "5d7ae55efc6a" branch_labels = None depends_on = None diff --git a/scripts/simulate_alerts.py b/scripts/simulate_alerts.py index 62669643a..7cbda6ef7 100644 --- a/scripts/simulate_alerts.py +++ b/scripts/simulate_alerts.py @@ -19,7 +19,7 @@ def main(): GENERATE_DEDUPLICATIONS = True keep_api_key = ( - "f228aabc-17d4-4e12-a918-48bd90742afc" # os.environ.get("KEEP_API_KEY") + "ba8fa324-9047-480d-b611-6e446ec75215" # os.environ.get("KEEP_API_KEY") ) keep_api_url = "http://localhost:8080" # os.environ.get("KEEP_API_URL") if keep_api_key is None or keep_api_url is None: From 849343614596ae563e1eedc23035e98d6c8cd189 Mon Sep 17 00:00:00 2001 From: shahargl Date: Mon, 23 Sep 2024 14:36:48 +0100 Subject: [PATCH 21/36] feat: merge from main --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 45b06baf1..57a86b596 100644 --- a/.gitignore +++ b/.gitignore @@ -208,3 +208,4 @@ ee/experimental/ai_temp/* ,e!ee/experimental/ai_temp/.gitkeep oauth2.cfg +scripts/keep_slack_bot.py From d5da7f54c13c35633421f3f47f8f68322dc8e2a7 Mon Sep 17 00:00:00 2001 From: shahargl Date: Mon, 23 Sep 2024 14:47:27 +0100 Subject: [PATCH 22/36] feat: make ui build pass --- .../deduplication/DeduplicationSidebar.tsx | 245 +++++++++++++----- .../app/deduplication/DeduplicationTable.tsx | 102 +++++--- keep-ui/components/ui/MultiSelect.tsx | 59 +++-- keep-ui/next-env.d.ts | 2 +- 4 files changed, 287 insertions(+), 121 deletions(-) diff --git a/keep-ui/app/deduplication/DeduplicationSidebar.tsx b/keep-ui/app/deduplication/DeduplicationSidebar.tsx index 7dab1afa4..fed236c89 100644 --- a/keep-ui/app/deduplication/DeduplicationSidebar.tsx +++ b/keep-ui/app/deduplication/DeduplicationSidebar.tsx @@ -12,6 +12,7 @@ import MultiSelect from "@/components/ui/MultiSelect"; import { ExclamationTriangleIcon } from "@heroicons/react/24/outline"; import { getApiURL } from "utils/apiUrl"; import { useSession } from "next-auth/react"; +import { KeyedMutator } from "swr"; interface ProviderOption { value: string; @@ -24,7 +25,7 @@ interface DeduplicationSidebarProps { toggle: VoidFunction; selectedDeduplicationRule: DeduplicationRule | null; onSubmit: (data: Partial) => Promise; - mutateDeduplicationRules: () => Promise; + mutateDeduplicationRules: KeyedMutator; } const DeduplicationSidebar: React.FC = ({ @@ -34,7 +35,16 @@ const DeduplicationSidebar: React.FC = ({ onSubmit, mutateDeduplicationRules, }) => { - const { control, handleSubmit, setValue, reset, setError, watch, formState: { errors }, clearErrors } = useForm>({ + const { + control, + handleSubmit, + setValue, + reset, + setError, + watch, + formState: { errors }, + clearErrors, + } = useForm>({ defaultValues: selectedDeduplicationRule || { name: "", description: "", @@ -47,32 +57,57 @@ const DeduplicationSidebar: React.FC = ({ }); const [isSubmitting, setIsSubmitting] = useState(false); - const { data: providers = { installed_providers: [], linked_providers: [] } } = useProviders(); + const { + data: providers = { installed_providers: [], linked_providers: [] }, + } = useProviders(); const { data: deduplicationFields = {} } = useDeduplicationFields(); const { data: session } = useSession(); - const alertProviders = useMemo(() => [ - { id: null, "type": "keep", "details": { name: "Keep" }, tags: ["alert"] }, - ...providers.installed_providers, - ...providers.linked_providers - ].filter(provider => provider.tags?.includes("alert")), [providers]); + const alertProviders = useMemo( + () => + [ + { id: null, type: "keep", details: { name: "Keep" }, tags: ["alert"] }, + ...providers.installed_providers, + ...providers.linked_providers, + ].filter((provider) => provider.tags?.includes("alert")), + [providers] + ); const fullDeduplication = watch("full_deduplication"); const selectedProviderType = watch("provider_type"); const selectedProviderId = watch("provider_id"); const fingerprintFields = watch("fingerprint_fields"); const ignoreFields = watch("ignore_fields"); - const availableFields = useMemo(() => { // todo: add default fields for each provider from the backend - const defaultFields = ["source", "service", "description", "fingerprint", "name", "lastReceived"]; + const defaultFields = [ + "source", + "service", + "description", + "fingerprint", + "name", + "lastReceived", + ]; if (selectedProviderType) { - const key = `${selectedProviderType}_${selectedProviderId || 'null'}`; + const key = `${selectedProviderType}_${selectedProviderId || "null"}`; const providerFields = deduplicationFields[key] || []; - return [...new Set([...defaultFields, ...providerFields, ...(fingerprintFields ?? []), ...(ignoreFields ?? [])])]; + return [ + ...new Set([ + ...defaultFields, + ...providerFields, + ...(fingerprintFields ?? []), + ...(ignoreFields ?? []), + ]), + ]; } return [...new Set([...defaultFields, ...(fingerprintFields ?? [])])]; - }, [selectedProviderType, selectedProviderId, deduplicationFields, fingerprintFields, ignoreFields]); + }, [ + selectedProviderType, + selectedProviderId, + deduplicationFields, + fingerprintFields, + ignoreFields, + ]); useEffect(() => { if (isOpen && selectedDeduplicationRule) { @@ -97,7 +132,9 @@ const DeduplicationSidebar: React.FC = ({ toggle(); }; - const onFormSubmit: SubmitHandler> = async (data) => { + const onFormSubmit: SubmitHandler> = async ( + data + ) => { setIsSubmitting(true); clearErrors(); try { @@ -110,7 +147,10 @@ const DeduplicationSidebar: React.FC = ({ // Use POST if there's no selectedDeduplicationRule.id (it's a default rule or new rule) // This ensures we always create a new rule for default rules - const method = (!selectedDeduplicationRule || !selectedDeduplicationRule.id) ? "POST" : "PUT"; + const method = + !selectedDeduplicationRule || !selectedDeduplicationRule.id + ? "POST" + : "PUT"; const response = await fetch(url, { method: method, @@ -128,10 +168,16 @@ const DeduplicationSidebar: React.FC = ({ await mutateDeduplicationRules(); } else { const errorData = await response.json(); - setError("root.serverError", { type: "manual", message: errorData.message || "Failed to save deduplication rule" }); + setError("root.serverError", { + type: "manual", + message: errorData.message || "Failed to save deduplication rule", + }); } } catch (error) { - setError("root.serverError", { type: "manual", message: "An unexpected error occurred" }); + setError("root.serverError", { + type: "manual", + message: "An unexpected error occurred", + }); } finally { setIsSubmitting(false); } @@ -163,9 +209,17 @@ const DeduplicationSidebar: React.FC = ({
    - {selectedDeduplicationRule ? "Edit Deduplication Rule" : "Add Deduplication Rule"} - Beta - {selectedDeduplicationRule?.default && Default Rule} + {selectedDeduplicationRule + ? "Edit Deduplication Rule" + : "Add Deduplication Rule"} + + Beta + + {selectedDeduplicationRule?.default && ( + + Default Rule + + )}
    )} - +
    @@ -234,36 +302,53 @@ const DeduplicationSidebar: React.FC = ({ control={control} rules={{ required: "Provider is required" }} render={({ field }) => ( - > - {...field} - isDisabled={!!selectedDeduplicationRule?.default} - options={alertProviders.map((provider) => ({ - value: `${provider.type}_${provider.id}`, - label: provider.details?.name || provider.id || "main", - logoUrl: `/icons/${provider.type}-icon.png` - }))} - placeholder="Select provider" - onChange={(selectedOption) => { - if (selectedOption) { - const [providerType, providerId] = selectedOption.value.split('_'); - setValue("provider_type", providerType); - setValue("provider_id", providerId as any); - } - }} - value={alertProviders.find( - (provider) => `${provider.type}_${provider.id}` === `${selectedProviderType}_${selectedProviderId}` - ) ? { - value: `${selectedProviderType}_${selectedProviderId}`, - label: alertProviders.find( - (provider) => `${provider.type}_${provider.id}` === `${selectedProviderType}_${selectedProviderId}` - )?.details?.name || (selectedProviderId !== "null" && selectedProviderId !== null ? selectedProviderId : "main"), - logoUrl: `/icons/${selectedProviderType}-icon.png` - } as ProviderOption : null} - /> + > + {...field} + isDisabled={!!selectedDeduplicationRule?.default} + options={alertProviders.map((provider) => ({ + value: `${provider.type}_${provider.id}`, + label: + provider.details?.name || provider.id || "main", + logoUrl: `/icons/${provider.type}-icon.png`, + }))} + placeholder="Select provider" + onChange={(selectedOption) => { + if (selectedOption) { + const [providerType, providerId] = + selectedOption.value.split("_"); + setValue("provider_type", providerType); + setValue("provider_id", providerId as any); + } + }} + value={ + alertProviders.find( + (provider) => + `${provider.type}_${provider.id}` === + `${selectedProviderType}_${selectedProviderId}` + ) + ? ({ + value: `${selectedProviderType}_${selectedProviderId}`, + label: + alertProviders.find( + (provider) => + `${provider.type}_${provider.id}` === + `${selectedProviderType}_${selectedProviderId}` + )?.details?.name || + (selectedProviderId !== "null" && + selectedProviderId !== null + ? selectedProviderId + : "main"), + logoUrl: `/icons/${selectedProviderType}-icon.png`, + } as ProviderOption) + : null + } + /> )} /> {errors.provider_type && ( -

    {errors.provider_type.message}

    +

    + {errors.provider_type.message} +

    )}
    @@ -273,28 +358,40 @@ const DeduplicationSidebar: React.FC = ({ ( - ({ value: fieldName, - label: fieldName + label: fieldName, }))} placeholder="Select fingerprint fields" value={field.value?.map((value: string) => ({ value, - label: value + label: value, }))} onChange={(selectedOptions) => { - field.onChange(selectedOptions.map((option: { value: string }) => option.value)); + field.onChange( + selectedOptions.map( + (option: { value: string }) => option.value + ) + ); }} - noOptionsMessage={() => selectedProviderType ? "No options" : "Please choose provider to see available fields"} - /> + noOptionsMessage={() => + selectedProviderType + ? "No options" + : "Please choose provider to see available fields" + } + /> )} - /> + /> {errors.fingerprint_fields && ( -

    {errors.fingerprint_fields.message}

    +

    + {errors.fingerprint_fields.message} +

    )}
    @@ -309,7 +406,9 @@ const DeduplicationSidebar: React.FC = ({ /> )} /> - Full Deduplication + + Full Deduplication +
    {fullDeduplication && ( @@ -325,26 +424,36 @@ const DeduplicationSidebar: React.FC = ({ {...field} options={availableFields.map((fieldName) => ({ value: fieldName, - label: fieldName + label: fieldName, }))} placeholder="Select ignore fields" value={field.value?.map((value: string) => ({ value, - label: value + label: value, }))} onChange={(selectedOptions) => { - field.onChange(selectedOptions.map((option: { value: string }) => option.value)); + field.onChange( + selectedOptions.map( + (option: { value: string }) => option.value + ) + ); }} /> )} /> {errors.ignore_fields && ( -

    {errors.ignore_fields.message}

    +

    + {errors.ignore_fields.message} +

    )}
    )} {errors.root?.serverError && ( - + {errors.root.serverError.message} )} @@ -358,11 +467,7 @@ const DeduplicationSidebar: React.FC = ({ > Cancel -
    diff --git a/keep-ui/app/deduplication/DeduplicationTable.tsx b/keep-ui/app/deduplication/DeduplicationTable.tsx index b7a494d52..42532e3a6 100644 --- a/keep-ui/app/deduplication/DeduplicationTable.tsx +++ b/keep-ui/app/deduplication/DeduplicationTable.tsx @@ -1,4 +1,4 @@ -import React, { useEffect, useMemo, useState } from 'react'; +import React, { useEffect, useMemo, useState } from "react"; import { Button, Card, @@ -29,19 +29,25 @@ import { useSession } from "next-auth/react"; const columnHelper = createColumnHelper(); +import { KeyedMutator } from "swr"; + type DeduplicationTableProps = { deduplicationRules: DeduplicationRule[]; - mutateDeduplicationRules: () => Promise; + mutateDeduplicationRules: KeyedMutator; }; -export const DeduplicationTable: React.FC = ({ deduplicationRules, mutateDeduplicationRules }) => { +export const DeduplicationTable: React.FC = ({ + deduplicationRules, + mutateDeduplicationRules, +}) => { const router = useRouter(); const { data: session } = useSession(); const searchParams = useSearchParams(); let selectedId = searchParams ? searchParams.get("id") : null; const [isSidebarOpen, setIsSidebarOpen] = useState(false); - const [selectedDeduplicationRule, setSelectedDeduplicationRule] = useState(null); + const [selectedDeduplicationRule, setSelectedDeduplicationRule] = + useState(null); const onDeduplicationClick = (rule: DeduplicationRule) => { setSelectedDeduplicationRule(rule); @@ -52,18 +58,23 @@ export const DeduplicationTable: React.FC = ({ deduplic const onCloseDeduplication = () => { setIsSidebarOpen(false); setSelectedDeduplicationRule(null); - router.push('/deduplication'); + router.push("/deduplication"); }; - const handleDeleteRule = async (rule: DeduplicationRule, event: React.MouseEvent) => { + const handleDeleteRule = async ( + rule: DeduplicationRule, + event: React.MouseEvent + ) => { event.stopPropagation(); if (rule.default) return; // Don't delete default rules - if (window.confirm("Are you sure you want to delete this deduplication rule?")) { + if ( + window.confirm("Are you sure you want to delete this deduplication rule?") + ) { try { const url = `${getApiURL()}/deduplications/${rule.id}`; const response = await fetch(url, { - method: 'DELETE', + method: "DELETE", headers: { Authorization: `Bearer ${session?.accessToken}`, }, @@ -92,7 +103,7 @@ export const DeduplicationTable: React.FC = ({ deduplic useEffect(() => { if (!isSidebarOpen && selectedId) { - router.push('/deduplication'); + router.push("/deduplication"); } }, [isSidebarOpen, selectedId, router]); @@ -118,39 +129,55 @@ export const DeduplicationTable: React.FC = ({ deduplic header: "Name", cell: (info) => (
    - {info.getValue()} + + {info.getValue()} + {info.row.original.default ? ( - Default + + Default + ) : ( - Custom + + Custom + )} {info.row.original.full_deduplication && ( - Full Deduplication + + Full Deduplication + )}
    ), }), columnHelper.accessor("ingested", { header: "Ingested", - cell: (info) => {info.getValue() || 0}, + cell: (info) => ( + + {info.getValue() || 0} + + ), }), columnHelper.accessor("dedup_ratio", { header: "Dedup Ratio", cell: (info) => { const value = info.getValue() || 0; const formattedValue = Number(value).toFixed(1); - return {formattedValue}%; + return ( + + {formattedValue}% + + ); }, }), columnHelper.accessor("distribution", { header: "Distribution", cell: (info) => { const rawData = info.getValue(); - const maxNumber = Math.max(...rawData.map(item => item.number)); - const allZero = rawData.every(item => item.number === 0); - const data = rawData.map(item => ({ + const maxNumber = Math.max(...rawData.map((item) => item.number)); + const allZero = rawData.every((item) => item.number === 0); + const data = rawData.map((item) => ({ ...item, - number: maxNumber > 0 ? (item.number / maxNumber) + 1 : 0.5 + number: maxNumber > 0 ? item.number / maxNumber + 1 : 0.5, })); const colors = ["orange"]; const showGradient = true; @@ -173,12 +200,15 @@ export const DeduplicationTable: React.FC = ({ deduplic cell: (info) => { const fields = info.getValue(); const ignoreFields = info.row.original.ignore_fields; - const displayFields = fields && fields.length > 0 ? fields : ignoreFields; + const displayFields = + fields && fields.length > 0 ? fields : ignoreFields; if (!displayFields || displayFields.length === 0) { return (
    - N/A + + N/A +
    ); } @@ -188,7 +218,9 @@ export const DeduplicationTable: React.FC = ({ deduplic {displayFields.map((field: string, index: number) => ( {index > 0 && } - {field} + + {field} + ))}
    @@ -209,7 +241,11 @@ export const DeduplicationTable: React.FC = ({ deduplic size="xs" variant="secondary" icon={TrashIcon} - tooltip={info.row.original.default ? "Cannot delete default rule" : "Delete Rule"} + tooltip={ + info.row.original.default + ? "Cannot delete default rule" + : "Delete Rule" + } disabled={info.row.original.default} onClick={(e) => handleDeleteRule(info.row.original, e)} /> @@ -224,9 +260,11 @@ export const DeduplicationTable: React.FC = ({ deduplic data: deduplicationRules, columns: DEDUPLICATION_TABLE_COLS, getCoreRowModel: getCoreRowModel(), - }) + }); - const handleSubmitDeduplicationRule = async (data: Partial) => { + const handleSubmitDeduplicationRule = async ( + data: Partial + ) => { // Implement the logic to submit the deduplication rule // This is a placeholder function, replace with actual implementation console.log("Submitting deduplication rule:", data); @@ -238,16 +276,20 @@ export const DeduplicationTable: React.FC = ({ deduplic
    - Deduplication Rules <span className="text-gray-400">({deduplicationRules.length})</span> + Deduplication Rules{" "} + <span className="text-gray-400">({deduplicationRules.length})</span> Set up rules to deduplicate similar alerts
    -
    diff --git a/keep-ui/components/ui/MultiSelect.tsx b/keep-ui/components/ui/MultiSelect.tsx index bc45cfc00..eb22a710d 100644 --- a/keep-ui/components/ui/MultiSelect.tsx +++ b/keep-ui/components/ui/MultiSelect.tsx @@ -1,42 +1,51 @@ import React from "react"; import Select from "react-select"; -import { components, Props as SelectProps, GroupBase, StylesConfig } from "react-select"; +import { + components, + Props as SelectProps, + GroupBase, + StylesConfig, +} from "react-select"; import { Badge } from "@tremor/react"; type OptionType = { value: string; label: string }; const customStyles: StylesConfig = { - control: (provided, state) => ({ + control: (provided: any, state: any) => ({ ...provided, - borderColor: state.isFocused ? 'orange' : '#ccc', - '&:hover': { - borderColor: 'orange', + borderColor: state.isFocused ? "orange" : "#ccc", + "&:hover": { + borderColor: "orange", }, - boxShadow: state.isFocused ? '0 0 0 1px orange' : null, - backgroundColor: 'transparent', + boxShadow: state.isFocused ? "0 0 0 1px orange" : null, + backgroundColor: "transparent", }), option: (provided, state) => ({ ...provided, - backgroundColor: state.isSelected ? 'orange' : state.isFocused ? 'rgba(255, 165, 0, 0.1)' : 'transparent', - color: state.isSelected ? 'white' : 'black', - '&:hover': { - backgroundColor: 'rgba(255, 165, 0, 0.3)', + backgroundColor: state.isSelected + ? "orange" + : state.isFocused + ? "rgba(255, 165, 0, 0.1)" + : "transparent", + color: state.isSelected ? "white" : "black", + "&:hover": { + backgroundColor: "rgba(255, 165, 0, 0.3)", }, }), multiValue: (provided) => ({ ...provided, - backgroundColor: 'default', + backgroundColor: "default", }), multiValueLabel: (provided) => ({ ...provided, - color: 'black', + color: "black", }), multiValueRemove: (provided) => ({ ...provided, - color: 'orange', - '&:hover': { - backgroundColor: 'orange', - color: 'white', + color: "orange", + "&:hover": { + backgroundColor: "orange", + color: "white", }, }), menuPortal: (base) => ({ @@ -49,14 +58,18 @@ const customStyles: StylesConfig = { }), }; -type CustomSelectProps = SelectProps> & { +type CustomSelectProps = SelectProps< + OptionType, + true, + GroupBase +> & { components?: { Option?: typeof components.Option; MultiValue?: typeof components.MultiValue; }; }; -const customComponents: CustomSelectProps['components'] = { +const customComponents: CustomSelectProps["components"] = { Option: ({ children, ...props }) => ( @@ -75,7 +88,13 @@ const customComponents: CustomSelectProps['components'] = { type MultiSelectProps = SelectProps>; -const MultiSelect: React.FC = ({ value, onChange, options, placeholder, ...rest }) => ( +const MultiSelect: React.FC = ({ + value, + onChange, + options, + placeholder, + ...rest +}) => (