diff --git a/.github/workflows/test-pr-e2e.yml b/.github/workflows/test-pr-e2e.yml index ba7cb43e5..95fff2ebc 100644 --- a/.github/workflows/test-pr-e2e.yml +++ b/.github/workflows/test-pr-e2e.yml @@ -4,9 +4,9 @@ on: workflow_dispatch: pull_request: paths: - - 'keep/**' - - 'keep-ui/**' - - 'tests/**' + - "keep/**" + - "keep-ui/**" + - "tests/**" concurrency: group: ${{ github.workflow }}-${{ github.head_ref }} @@ -123,7 +123,7 @@ jobs: # create the state directory # mkdir -p ./state && chown -R root:root ./state && chmod -R 777 ./state - + - name: Run e2e tests and report coverage run: | poetry run coverage run --branch -m pytest -s tests/e2e_tests/ @@ -147,9 +147,9 @@ jobs: - name: Upload test artifacts on failure if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4.4.3 with: - name: test-artifacts + name: test-artifacts-my-artifacts-${{ matrix.db_type }} path: | playwright_dump_*.html playwright_dump_*.png diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 6709f7c21..e07f3ebc0 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -91,9 +91,11 @@ jobs: run: poetry install --no-interaction --no-root --with dev - name: Run unit tests and report coverage + env: + LOG_LEVEL: DEBUG + SQLALCHEMY_WARN_20: 1 run: | - poetry run coverage run --branch -m pytest -n auto --non-integration --ignore=tests/e2e_tests/ - + poetry run coverage run --branch -m pytest --timeout 20 -n auto --non-integration --ignore=tests/e2e_tests/ - name: Run integration tests and report coverage run: | diff --git a/keep-ui/app/(keep)/alerts/alert-name.tsx b/keep-ui/app/(keep)/alerts/alert-name.tsx index d65780d22..2d4fcd088 100644 --- a/keep-ui/app/(keep)/alerts/alert-name.tsx +++ b/keep-ui/app/(keep)/alerts/alert-name.tsx @@ -4,11 +4,11 @@ import { TicketIcon, TrashIcon, PencilSquareIcon, - Cog8ToothIcon, + // Cog8ToothIcon, } from "@heroicons/react/24/outline"; import { Icon } from "@tremor/react"; import { AlertDto, AlertToWorkflowExecution } from "./models"; -import { useWorkflowExecutions } from "utils/hooks/useWorkflowExecutions"; +// import { useWorkflowExecutions } from "utils/hooks/useWorkflowExecutions"; import { useRouter } from "next/navigation"; interface Props { @@ -22,7 +22,8 @@ export default function AlertName({ setTicketModalAlert, }: Props) { const router = useRouter(); - const { data: executions } = useWorkflowExecutions(); + // TODO: fix this so we can show which alert had workflow execution + // const { data: executions } = useWorkflowExecutions(); const handleNoteClick = () => { if (setNoteModalAlert) { @@ -38,9 +39,9 @@ export default function AlertName({ } }; - const relevantWorkflowExecution = - executions?.find((wf) => wf.alert_fingerprint === alert.fingerprint) ?? - null; + const relevantWorkflowExecution: AlertToWorkflowExecution | null = null; + // executions?.find((wf) => wf.alert_fingerprint === alert.fingerprint) ?? + // null; const { name, @@ -131,7 +132,7 @@ export default function AlertName({ variant="solid" /> )} - {relevantWorkflowExecution && ( + {/* {relevantWorkflowExecution && ( handleWorkflowClick(relevantWorkflowExecution)} className="ml-1 cursor-pointer" variant="solid" /> - )} + )} */} ); diff --git a/keep/api/alert_deduplicator/alert_deduplicator.py b/keep/api/alert_deduplicator/alert_deduplicator.py index 08fb5c243..0ed44c4af 100644 --- a/keep/api/alert_deduplicator/alert_deduplicator.py +++ b/keep/api/alert_deduplicator/alert_deduplicator.py @@ -14,7 +14,7 @@ get_alerts_fields, get_all_deduplication_rules, get_all_deduplication_stats, - get_custom_deduplication_rules, + get_custom_deduplication_rule, get_last_alert_hash_by_fingerprint, update_deduplication_rule, ) @@ -31,12 +31,16 @@ class AlertDeduplicator: + DEDUPLICATION_DISTRIBUTION_ENABLED = config( + "KEEP_DEDUPLICATION_DISTRIBUTION_ENABLED", cast=bool, default=True + ) + CUSTOM_DEDUPLICATION_DISTRIBUTION_ENABLED = config( + "KEEP_CUSTOM_DEDUPLICATION_ENABLED", cast=bool, default=True + ) + def __init__(self, tenant_id): self.logger = logging.getLogger(__name__) self.tenant_id = tenant_id - self.provider_distribution_enabled = config( - "PROVIDER_DISTRIBUTION_ENABLED", cast=bool, default=True - ) self.search_engine = SearchEngine(self.tenant_id) def _apply_deduplication_rule( @@ -91,13 +95,23 @@ def _apply_deduplication_rule( }, ) alert.isPartialDuplicate = True + else: + self.logger.info( + "Alert is not deduplicated", + extra={ + "alert_id": alert.id, + "fingerprint": alert.fingerprint, + "tenant_id": self.tenant_id, + "last_alert_hash_by_fingerprint": last_alert_hash_by_fingerprint, + }, + ) return alert def apply_deduplication(self, alert: AlertDto) -> bool: # IMPOTRANT NOTE TO SOMEONE WORKING ON THIS CODE: # apply_deduplication runs AFTER _format_alert, so you can assume that alert fields are in the expected format. - # you can also safe to assume that alert.fingerprint is set by the provider itself + # you are also safe to assume that alert.fingerprint is set by the provider itself # get only relevant rules rules = self.get_deduplication_rules( @@ -122,26 +136,30 @@ def apply_deduplication(self, alert: AlertDto) -> bool: "is_partial_duplicate": alert.isPartialDuplicate, }, ) - if alert.isFullDuplicate or alert.isPartialDuplicate: - # create deduplication event - create_deduplication_event( - tenant_id=self.tenant_id, - deduplication_rule_id=rule.id, - deduplication_type="full" if alert.isFullDuplicate else "partial", - provider_id=alert.providerId, - provider_type=alert.providerType, - ) - # we don't need to check the other rules - break - else: - # create none deduplication event, for statistics - create_deduplication_event( - tenant_id=self.tenant_id, - deduplication_rule_id=rule.id, - deduplication_type="none", - provider_id=alert.providerId, - provider_type=alert.providerType, - ) + + if AlertDeduplicator.DEDUPLICATION_DISTRIBUTION_ENABLED: + if alert.isFullDuplicate or alert.isPartialDuplicate: + # create deduplication event + create_deduplication_event( + tenant_id=self.tenant_id, + deduplication_rule_id=rule.id, + deduplication_type=( + "full" if alert.isFullDuplicate else "partial" + ), + provider_id=alert.providerId, + provider_type=alert.providerType, + ) + # we don't need to check the other rules + break + else: + # create none deduplication event, for statistics + create_deduplication_event( + tenant_id=self.tenant_id, + deduplication_rule_id=rule.id, + deduplication_type="none", + provider_id=alert.providerId, + provider_type=alert.providerType, + ) return alert @@ -166,11 +184,15 @@ def get_deduplication_rules( self, tenant_id, provider_id, provider_type ) -> DeduplicationRuleDto: # try to get the rule from the database - rules = get_custom_deduplication_rules(tenant_id, provider_id, provider_type) + rule = ( + get_custom_deduplication_rule(tenant_id, provider_id, provider_type) + if AlertDeduplicator.CUSTOM_DEDUPLICATION_DISTRIBUTION_ENABLED + else None + ) - if not rules: + if not rule: self.logger.debug( - "No custom deduplication rules found, using deafult full deduplication rule", + "No custom deduplication rule found, using deafult full deduplication rule", extra={ "provider_id": provider_id, "provider_type": provider_type, @@ -189,12 +211,10 @@ def get_deduplication_rules( "tenant_id": tenant_id, }, ) - # - # check that at least one of them is full deduplication rule - full_deduplication_rules = [rule for rule in rules if rule.full_deduplication] + # if full deduplication rule found, return the rules - if full_deduplication_rules: - return rules + if rule.full_deduplication: + return [rule] # if not, assign them the default full deduplication rule ignore fields self.logger.info( @@ -203,13 +223,8 @@ def get_deduplication_rules( default_full_dedup_rule = self._get_default_full_deduplication_rule( provider_id=provider_id, provider_type=provider_type ) - for rule in rules: - if not rule.full_deduplication: - self.logger.debug( - "Assigning default full deduplication rule ignore fields", - ) - rule.ignore_fields = default_full_dedup_rule.ignore_fields - return rules + rule.ignore_fields = default_full_dedup_rule.ignore_fields + return [rule] def _generate_uuid(self, provider_id, provider_type): # this is a way to generate a unique uuid for the default deduplication rule per (provider_id, provider_type) @@ -269,7 +284,11 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: provider_id, provider_type = dd.provider_id, dd.provider_type dd.id = self._generate_uuid(provider_id, provider_type) # get custom deduplication rules - custom_deduplications = get_all_deduplication_rules(self.tenant_id) + custom_deduplications = ( + get_all_deduplication_rules(self.tenant_id) + if AlertDeduplicator.CUSTOM_DEDUPLICATION_DISTRIBUTION_ENABLED + else [] + ) # cast to dto custom_deduplications_dto = [ DeduplicationRuleDto( @@ -347,6 +366,14 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: result = [] for dedup in final_deduplications: + self.logger.debug( + "Calculating deduplication stats", + extra={ + "deduplication_rule_id": dedup.id, + "tenant_id": self.tenant_id, + "deduplication_stats": deduplication_stats, + }, + ) key = dedup.id full_dedup = deduplication_stats.get(key, {"full_dedup_count": 0}).get( "full_dedup_count", 0 @@ -377,7 +404,7 @@ def get_deduplications(self) -> list[DeduplicationRuleDto]: ) result.append(dedup) - if self.provider_distribution_enabled: + if AlertDeduplicator.DEDUPLICATION_DISTRIBUTION_ENABLED: for dedup in result: for pd, stats in deduplication_stats.items(): if pd == f"{dedup.provider_id}_{dedup.provider_type}": diff --git a/keep/api/api.py b/keep/api/api.py index 69a5141a6..fb2b3845b 100644 --- a/keep/api/api.py +++ b/keep/api/api.py @@ -1,6 +1,7 @@ import asyncio import logging import os +from contextlib import asynccontextmanager from importlib import metadata import requests @@ -24,6 +25,8 @@ KEEP_ARQ_TASK_POOL_BASIC_PROCESSING, KEEP_ARQ_TASK_POOL_NONE, ) +from keep.api.core.config import config +from keep.api.core.db import dispose_session from keep.api.core.dependencies import SINGLE_TENANT_UUID from keep.api.logging import CONFIG as logging_config from keep.api.middlewares import LoggingMiddleware @@ -59,23 +62,23 @@ ) # load all providers into cache -from keep.providers.providers_factory import ProvidersFactory from keep.workflowmanager.workflowmanager import WorkflowManager load_dotenv(find_dotenv()) keep.api.logging.setup_logging() logger = logging.getLogger(__name__) -HOST = os.environ.get("KEEP_HOST", "0.0.0.0") -PORT = int(os.environ.get("PORT", 8080)) -SCHEDULER = os.environ.get("SCHEDULER", "true") == "true" -CONSUMER = os.environ.get("CONSUMER", "true") == "true" +HOST = config("KEEP_HOST", default="0.0.0.0") +PORT = config("PORT", default=8080, cast=int) +SCHEDULER = config("SCHEDULER", default="true", cast=bool) +CONSUMER = config("CONSUMER", default="true", cast=bool) +KEEP_DEBUG_TASKS = config("KEEP_DEBUG_TASKS", default="false", cast=bool) -AUTH_TYPE = os.environ.get("AUTH_TYPE", IdentityManagerTypes.NOAUTH.value).lower() +AUTH_TYPE = config("AUTH_TYPE", default=IdentityManagerTypes.NOAUTH.value).lower() try: KEEP_VERSION = metadata.version("keep") except Exception: - KEEP_VERSION = os.environ.get("KEEP_VERSION", "unknown") + KEEP_VERSION = config("KEEP_VERSION", default="unknown") # Monkey patch requests to disable redirects original_request = requests.Session.request @@ -89,10 +92,126 @@ def no_redirect_request(self, method, url, **kwargs): requests.Session.request = no_redirect_request +async def check_pending_tasks(background_tasks: set): + while True: + events_in_queue = len(background_tasks) + if events_in_queue > 0: + logger.info( + f"{events_in_queue} background tasks pending", + extra={ + "pending_tasks": events_in_queue, + }, + ) + await asyncio.sleep(1) + + +async def startup(): + """ + This runs for every worker on startup. + Read more about lifespan here: https://fastapi.tiangolo.com/advanced/events/#lifespan + """ + logger.info("Disope existing DB connections") + # psycopg2.DatabaseError: error with status PGRES_TUPLES_OK and no message from the libpq + # https://stackoverflow.com/questions/43944787/sqlalchemy-celery-with-scoped-session-error/54751019#54751019 + dispose_session() + + logger.info("Starting the services") + + # Start the scheduler + if SCHEDULER: + try: + logger.info("Starting the scheduler") + wf_manager = WorkflowManager.get_instance() + await wf_manager.start() + logger.info("Scheduler started successfully") + except Exception: + logger.exception("Failed to start the scheduler") + + # Start the consumer + if CONSUMER: + try: + logger.info("Starting the consumer") + event_subscriber = EventSubscriber.get_instance() + # TODO: there is some "race condition" since if the consumer starts before the server, + # and start getting events, it will fail since the server is not ready yet + # we should add a "wait" here to make sure the server is ready + await event_subscriber.start() + logger.info("Consumer started successfully") + except Exception: + logger.exception("Failed to start the consumer") + + if KEEP_ARQ_TASK_POOL != KEEP_ARQ_TASK_POOL_NONE: + event_loop = asyncio.get_event_loop() + if KEEP_ARQ_TASK_POOL == KEEP_ARQ_TASK_POOL_ALL: + logger.info("Starting all task pools") + basic_worker = get_arq_worker(KEEP_ARQ_QUEUE_BASIC) + event_loop.create_task(basic_worker.async_run()) + elif KEEP_ARQ_TASK_POOL == KEEP_ARQ_TASK_POOL_BASIC_PROCESSING: + logger.info("Starting Basic Processing task pool") + arq_worker = get_arq_worker(KEEP_ARQ_QUEUE_BASIC) + event_loop.create_task(arq_worker.async_run()) + else: + raise ValueError(f"Invalid task pool: {KEEP_ARQ_TASK_POOL}") + + logger.info("Services started successfully") + + +async def shutdown(): + """ + This runs for every worker on shutdown. + Read more about lifespan here: https://fastapi.tiangolo.com/advanced/events/#lifespan + """ + logger.info("Shutting down Keep") + if SCHEDULER: + logger.info("Stopping the scheduler") + wf_manager = WorkflowManager.get_instance() + # stop the scheduler + try: + await wf_manager.stop() + # in pytest, there could be race condition + except TypeError: + pass + logger.info("Scheduler stopped successfully") + if CONSUMER: + logger.info("Stopping the consumer") + event_subscriber = EventSubscriber.get_instance() + try: + await event_subscriber.stop() + # in pytest, there could be race condition + except TypeError: + pass + logger.info("Consumer stopped successfully") + # ARQ workers stops themselves? see "shutdown on SIGTERM" in logs + logger.info("Keep shutdown complete") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + This runs for every worker on startup and shutdown. + Read more about lifespan here: https://fastapi.tiangolo.com/advanced/events/#lifespan + """ + # create a set of background tasks + background_tasks = set() + # if debug tasks are enabled, create a task to check for pending tasks + if KEEP_DEBUG_TASKS: + asyncio.create_task(check_pending_tasks(background_tasks)) + + # Startup + await startup() + + # yield the background tasks, this is available for the app to use in request context + yield {"background_tasks": background_tasks} + + # Shutdown + await shutdown() + + def get_app( auth_type: IdentityManagerTypes = IdentityManagerTypes.NOAUTH.value, ) -> FastAPI: - if not os.environ.get("KEEP_API_URL", None): + keep_api_url = config("KEEP_API_URL", default=None) + if not keep_api_url: logger.info( "KEEP_API_URL is not set, setting it to default", extra={"keep_api_url": f"http://{HOST}:{PORT}"}, @@ -103,7 +222,7 @@ def get_app( f"Starting Keep with {os.environ['KEEP_API_URL']} as URL and version {KEEP_VERSION}", extra={ "keep_version": KEEP_VERSION, - "keep_api_url": os.environ.get("KEEP_API_URL"), + "keep_api_url": keep_api_url, }, ) @@ -111,6 +230,7 @@ def get_app( title="Keep API", description="Rest API powering https://platform.keephq.dev and friends 🏄‍♀️", version=KEEP_VERSION, + lifespan=lifespan, ) @app.get("/") @@ -177,66 +297,6 @@ async def root(): # if any endpoints needed, add them on_start identity_manager.on_start(app) - @app.on_event("startup") - async def on_startup(): - logger.info("Loading providers into cache") - ProvidersFactory.get_all_providers() - # Start the services - logger.info("Starting the services") - # Start the scheduler - if SCHEDULER: - logger.info("Starting the scheduler") - wf_manager = WorkflowManager.get_instance() - await wf_manager.start() - logger.info("Scheduler started successfully") - # Start the consumer - if CONSUMER: - logger.info("Starting the consumer") - event_subscriber = EventSubscriber.get_instance() - # TODO: there is some "race condition" since if the consumer starts before the server, - # and start getting events, it will fail since the server is not ready yet - # we should add a "wait" here to make sure the server is ready - await event_subscriber.start() - logger.info("Consumer started successfully") - if KEEP_ARQ_TASK_POOL != KEEP_ARQ_TASK_POOL_NONE: - event_loop = asyncio.get_event_loop() - if KEEP_ARQ_TASK_POOL == KEEP_ARQ_TASK_POOL_ALL: - logger.info("Starting all task pools") - basic_worker = get_arq_worker(KEEP_ARQ_QUEUE_BASIC) - event_loop.create_task(basic_worker.async_run()) - elif KEEP_ARQ_TASK_POOL == KEEP_ARQ_TASK_POOL_BASIC_PROCESSING: - logger.info("Starting Basic Processing task pool") - arq_worker = get_arq_worker(KEEP_ARQ_QUEUE_BASIC) - event_loop.create_task(arq_worker.async_run()) - else: - raise ValueError(f"Invalid task pool: {KEEP_ARQ_TASK_POOL}") - logger.info("Services started successfully") - - @app.on_event("shutdown") - async def on_shutdown(): - logger.info("Shutting down Keep") - if SCHEDULER: - logger.info("Stopping the scheduler") - wf_manager = WorkflowManager.get_instance() - # stop the scheduler - try: - await wf_manager.stop() - # in pytest, there could be race condition - except TypeError: - pass - logger.info("Scheduler stopped successfully") - if CONSUMER: - logger.info("Stopping the consumer") - event_subscriber = EventSubscriber.get_instance() - try: - await event_subscriber.stop() - # in pytest, there could be race condition - except TypeError: - pass - logger.info("Consumer stopped successfully") - # ARQ workers stops themselves? see "shutdown on SIGTERM" in logs - logger.info("Keep shutdown complete") - @app.exception_handler(Exception) async def catch_exception(request: Request, exc: Exception): logging.error( @@ -266,9 +326,15 @@ def run(app: FastAPI): keep.api.config.on_starting() # run the server - uvicorn.run( - app, - host=HOST, - port=PORT, - log_config=logging_config, - ) + workers = config("KEEP_WORKERS", default=None, cast=int) + if workers: + uvicorn.run( + "keep.api.api:get_app", + host=HOST, + port=PORT, + log_config=logging_config, + lifespan="on", + workers=workers, + ) + else: + uvicorn.run(app, host=HOST, port=PORT, log_config=logging_config, lifespan="on") diff --git a/keep/api/bl/enrichments_bl.py b/keep/api/bl/enrichments_bl.py index 858579f33..08ab95855 100644 --- a/keep/api/bl/enrichments_bl.py +++ b/keep/api/bl/enrichments_bl.py @@ -7,10 +7,12 @@ import chevron from sqlmodel import Session +from keep.api.core.config import config from keep.api.core.db import enrich_alert as enrich_alert_db from keep.api.core.db import ( get_enrichment_with_session, get_mapping_rule_by_id, + get_session_sync, get_topology_data_by_dynamic_matcher, ) from keep.api.core.elastic import ElasticClient @@ -49,11 +51,15 @@ def get_nested_attribute(obj: AlertDto, attr_path: str): class EnrichmentsBl: + + ENRICHMENT_DISABLED = config("KEEP_ENRICHMENT_DISABLED", default="false", cast=bool) + def __init__(self, tenant_id: str, db: Session | None = None): self.logger = logging.getLogger(__name__) self.tenant_id = tenant_id - self.db_session = db - self.elastic_client = ElasticClient(tenant_id=tenant_id) + if not EnrichmentsBl.ENRICHMENT_DISABLED: + self.db_session = db or get_session_sync() + self.elastic_client = ElasticClient(tenant_id=tenant_id) def run_extraction_rules( self, event: AlertDto | dict, pre=False @@ -61,6 +67,10 @@ def run_extraction_rules( """ Run the extraction rules for the event """ + if EnrichmentsBl.ENRICHMENT_DISABLED: + self.logger.debug("Enrichment is disabled, skipping extraction rules") + return event + fingerprint = ( event.get("fingerprint") if isinstance(event, dict) @@ -189,7 +199,7 @@ def run_mapping_rule_by_id( ) return result - def run_mapping_rules(self, alert: AlertDto): + def run_mapping_rules(self, alert: AlertDto) -> AlertDto: """ Run the mapping rules for the alert. @@ -199,6 +209,10 @@ def run_mapping_rules(self, alert: AlertDto): Returns: - AlertDto: The enriched alert after applying mapping rules. """ + if EnrichmentsBl.ENRICHMENT_DISABLED: + self.logger.debug("Enrichment is disabled, skipping mapping rules") + return alert + self.logger.info( "Running mapping rules for incoming alert", extra={"fingerprint": alert.fingerprint, "tenant_id": self.tenant_id}, @@ -452,6 +466,10 @@ def dispose_enrichments(self, fingerprint: str): """ Dispose of enrichments from the alert """ + if EnrichmentsBl.ENRICHMENT_DISABLED: + self.logger.debug("Enrichment is disabled, skipping dispose enrichments") + return + self.logger.debug("disposing enrichments", extra={"fingerprint": fingerprint}) enrichments = get_enrichment_with_session( self.db_session, self.tenant_id, fingerprint diff --git a/keep/api/config.py b/keep/api/config.py index 38f19b696..e567bb930 100644 --- a/keep/api/config.py +++ b/keep/api/config.py @@ -5,6 +5,7 @@ from keep.api.api import AUTH_TYPE from keep.api.core.db_on_start import migrate_db, try_create_single_tenant from keep.api.core.dependencies import SINGLE_TENANT_UUID +from keep.api.core.tenant_configuration import TenantConfiguration from keep.api.routes.dashboard import provision_dashboards from keep.identitymanager.identitymanagerfactory import IdentityManagerTypes from keep.providers.providers_factory import ProvidersFactory @@ -20,6 +21,7 @@ def provision_resources(): if PROVISION_RESOURCES: + logger.info("Loading providers into cache") # provision providers from env. relevant only on single tenant. logger.info("Provisioning providers and workflows") ProvidersService.provision_providers_from_env(SINGLE_TENANT_UUID) @@ -43,6 +45,8 @@ def on_starting(server=None): # https://www.joelsleppy.com/blog/gunicorn-application-preloading/ # @tb: 👏 @Matvey-Kuk ProvidersFactory.get_all_providers() + # Load tenant configuration early + TenantConfiguration() # Create single tenant if it doesn't exist if AUTH_TYPE in [ diff --git a/keep/api/core/db.py b/keep/api/core/db.py index bf544cb17..6ca42f25f 100644 --- a/keep/api/core/db.py +++ b/keep/api/core/db.py @@ -13,12 +13,13 @@ from contextlib import contextmanager from datetime import datetime, timedelta, timezone from typing import Any, Callable, Dict, List, Tuple, Type, Union -from uuid import uuid4 +from uuid import UUID, uuid4 import validators from dateutil.tz import tz from dotenv import find_dotenv, load_dotenv from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor +from psycopg2.errors import NoActiveSqlTransaction from sqlalchemy import ( String, and_, @@ -42,6 +43,7 @@ from keep.api.consts import STATIC_PRESETS from keep.api.core.db_utils import create_db_engine, get_json_extract_field +from keep.api.core.dependencies import SINGLE_TENANT_UUID # This import is required to create the tables from keep.api.models.ai_external import ( @@ -54,8 +56,8 @@ IncidentDtoIn, IncidentSorting, ) -from keep.api.models.time_stamp import TimeStampFilter from keep.api.models.db.action import Action +from keep.api.models.db.ai_external import * # pylint: disable=unused-wildcard-import from keep.api.models.db.alert import * # pylint: disable=unused-wildcard-import from keep.api.models.db.dashboard import * # pylint: disable=unused-wildcard-import from keep.api.models.db.extraction import * # pylint: disable=unused-wildcard-import @@ -68,7 +70,7 @@ from keep.api.models.db.tenant import * # pylint: disable=unused-wildcard-import from keep.api.models.db.topology import * # pylint: disable=unused-wildcard-import from keep.api.models.db.workflow import * # pylint: disable=unused-wildcard-import -from keep.api.models.db.ai_external import * # pylint: disable=unused-wildcard-import +from keep.api.models.time_stamp import TimeStampFilter logger = logging.getLogger(__name__) @@ -91,6 +93,15 @@ ] +def dispose_session(): + logger.info("Disposing engine pool") + if engine.dialect.name != "sqlite": + engine.dispose(close=False) + logger.info("Engine pool disposed") + else: + logger.info("Engine pool is sqlite, not disposing") + + @contextmanager def existed_or_new_session(session: Optional[Session] = None) -> Session: if session: @@ -125,6 +136,13 @@ def get_session_sync() -> Session: return Session(engine) +def __convert_to_uuid(value: str) -> UUID: + try: + return UUID(value) + except ValueError: + return None + + def create_workflow_execution( workflow_id: str, tenant_id: str, @@ -208,14 +226,19 @@ def get_last_completed_execution( def get_workflows_that_should_run(): with Session(engine) as session: logger.debug("Checking for workflows that should run") - workflows_with_interval = ( - session.query(Workflow) - .filter(Workflow.is_deleted == False) - .filter(Workflow.is_disabled == False) - .filter(Workflow.interval != None) - .filter(Workflow.interval > 0) - .all() - ) + workflows_with_interval = [] + try: + result = session.exec( + select(Workflow) + .filter(Workflow.is_deleted == False) + .filter(Workflow.is_disabled == False) + .filter(Workflow.interval != None) + .filter(Workflow.interval > 0) + ) + workflows_with_interval = result.all() if result else [] + except Exception: + logger.exception("Failed to get workflows with interval") + logger.debug(f"Found {len(workflows_with_interval)} workflows with interval") workflows_to_run = [] # for each workflow: @@ -1062,8 +1085,7 @@ def get_alerts_with_filters( with Session(engine) as session: # Create the query query = ( - session - .query(Alert) + session.query(Alert) .select_from(LastAlert) .join(Alert, LastAlert.alert_id == Alert.id) ) @@ -1227,35 +1249,21 @@ def get_last_alerts( with_incidents=False, fingerprints=None, ) -> list[Alert]: - """ - Get the last alert for each fingerprint along with the first time the alert was triggered. - Supports MySQL, PostgreSQL, and SQLite databases. - - Args: - tenant_id (_type_): The tenant_id to filter the alerts by. - provider_id (_type_, optional): The provider id to filter by. Defaults to None. - limit (int, optional): The maximum number of alerts to return. Defaults to 1000. - timeframe (int, optional): The number of days to look back. Defaults to None. - upper_timestamp (datetime, optional): The upper bound for the timestamp filter. Defaults to None. - lower_timestamp (datetime, optional): The lower bound for the timestamp filter. Defaults to None. - fingerprints (List[str], optional): List of fingerprints to filter by. Defaults to None. - Returns: - List[Alert]: A list of Alert objects including the first time the alert was triggered. - """ with Session(engine) as session: dialect_name = session.bind.dialect.name - query = ( - session - .query(Alert, LastAlert.first_timestamp.label("startedAt")) + # Build the base query using select() + stmt = ( + select(Alert, LastAlert.first_timestamp.label("startedAt")) .select_from(LastAlert) .join(Alert, LastAlert.alert_id == Alert.id) ) if timeframe: - query = query.filter( - LastAlert.timestamp >= datetime.now(tz=timezone.utc) - timedelta(days=timeframe) + stmt = stmt.where( + LastAlert.timestamp + >= datetime.now(tz=timezone.utc) - timedelta(days=timeframe) ) # Apply additional filters @@ -1273,24 +1281,26 @@ def get_last_alerts( logger.info(f"filter_conditions: {filter_conditions}") if filter_conditions: - query = query.filter(*filter_conditions) + stmt = stmt.where(*filter_conditions) # Main query for alerts - query = query.filter(Alert.tenant_id == tenant_id).options(subqueryload(Alert.alert_enrichment)) + stmt = stmt.where(Alert.tenant_id == tenant_id).options( + subqueryload(Alert.alert_enrichment) + ) if with_incidents: if dialect_name == "sqlite": # SQLite version - using JSON incidents_subquery = ( - session.query( + select( LastAlertToIncident.fingerprint, func.json_group_array( cast(LastAlertToIncident.incident_id, String) ).label("incidents"), ) - .filter( + .where( LastAlertToIncident.tenant_id == tenant_id, - LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT + LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, ) .group_by(LastAlertToIncident.fingerprint) .subquery() @@ -1299,15 +1309,15 @@ def get_last_alerts( elif dialect_name == "mysql": # MySQL version - using GROUP_CONCAT incidents_subquery = ( - session.query( + select( LastAlertToIncident.fingerprint, func.group_concat( cast(LastAlertToIncident.incident_id, String) ).label("incidents"), ) - .filter( + .where( LastAlertToIncident.tenant_id == tenant_id, - LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT + LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, ) .group_by(LastAlertToIncident.fingerprint) .subquery() @@ -1316,16 +1326,16 @@ def get_last_alerts( elif dialect_name == "postgresql": # PostgreSQL version - using string_agg incidents_subquery = ( - session.query( + select( LastAlertToIncident.fingerprint, func.string_agg( cast(LastAlertToIncident.incident_id, String), ",", ).label("incidents"), ) - .filter( + .where( LastAlertToIncident.tenant_id == tenant_id, - LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT + LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, ) .group_by(LastAlertToIncident.fingerprint) .subquery() @@ -1333,19 +1343,20 @@ def get_last_alerts( else: raise ValueError(f"Unsupported dialect: {dialect_name}") - query = query.add_columns(incidents_subquery.c.incidents) - query = query.outerjoin( - incidents_subquery, Alert.fingerprint == incidents_subquery.c.fingerprint + stmt = stmt.add_columns(incidents_subquery.c.incidents) + stmt = stmt.outerjoin( + incidents_subquery, + Alert.fingerprint == incidents_subquery.c.fingerprint, ) if provider_id: - query = query.filter(Alert.provider_id == provider_id) + stmt = stmt.where(Alert.provider_id == provider_id) # Order by timestamp in descending order and limit the results - query = query.order_by(desc(Alert.timestamp)).limit(limit) + stmt = stmt.order_by(desc(Alert.timestamp)).limit(limit) # Execute the query - alerts_with_start = query.all() + alerts_with_start = session.execute(stmt).all() # Process results based on dialect alerts = [] @@ -1456,7 +1467,6 @@ def get_user_by_api_key(api_key: str): # this is only for single tenant def get_user(username, password, update_sign_in=True): - from keep.api.core.dependencies import SINGLE_TENANT_UUID from keep.api.models.db.user import User password_hash = hashlib.sha256(password.encode()).hexdigest() @@ -1475,7 +1485,6 @@ def get_user(username, password, update_sign_in=True): def get_users(tenant_id=None): - from keep.api.core.dependencies import SINGLE_TENANT_UUID from keep.api.models.db.user import User tenant_id = tenant_id or SINGLE_TENANT_UUID @@ -1486,7 +1495,6 @@ def get_users(tenant_id=None): def delete_user(username): - from keep.api.core.dependencies import SINGLE_TENANT_UUID from keep.api.models.db.user import User with Session(engine) as session: @@ -1649,9 +1657,13 @@ def update_rule( require_approve, resolve_on, ): + rule_uuid = __convert_to_uuid(rule_id) + if not rule_uuid: + return False + with Session(engine) as session: rule = session.exec( - select(Rule).where(Rule.tenant_id == tenant_id).where(Rule.id == rule_id) + select(Rule).where(Rule.tenant_id == tenant_id).where(Rule.id == rule_uuid) ).first() if rule: @@ -1703,8 +1715,12 @@ def create_alert(tenant_id, provider_type, provider_id, event, fingerprint): def delete_rule(tenant_id, rule_id): with Session(engine) as session: + rule_uuid = __convert_to_uuid(rule_id) + if not rule_uuid: + return False + rule = session.exec( - select(Rule).where(Rule.tenant_id == tenant_id).where(Rule.id == rule_id) + select(Rule).where(Rule.tenant_id == tenant_id).where(Rule.id == rule_uuid) ).first() if rule: @@ -1783,7 +1799,9 @@ def get_rule_distribution(tenant_id, minute=False): # Check the dialect if session.bind.dialect.name == "mysql": time_format = "%Y-%m-%d %H:%i" if minute else "%Y-%m-%d %H" - timestamp_format = func.date_format(LastAlertToIncident.timestamp, time_format) + timestamp_format = func.date_format( + LastAlertToIncident.timestamp, time_format + ) elif session.bind.dialect.name == "postgresql": time_format = "YYYY-MM-DD HH:MI" if minute else "YYYY-MM-DD HH" timestamp_format = func.to_char(LastAlertToIncident.timestamp, time_format) @@ -1905,10 +1923,14 @@ def update_deduplication_rule( ignore_fields: list[str] = [], priority: int = 0, ): + rule_uuid = __convert_to_uuid(rule_id) + if not rule_uuid: + return False + with Session(engine) as session: rule = session.exec( select(AlertDeduplicationRule) - .where(AlertDeduplicationRule.id == rule_id) + .where(AlertDeduplicationRule.id == rule_uuid) .where(AlertDeduplicationRule.tenant_id == tenant_id) ).first() if not rule: @@ -1932,10 +1954,14 @@ def update_deduplication_rule( def delete_deduplication_rule(rule_id: str, tenant_id: str) -> bool: + rule_uuid = __convert_to_uuid(rule_id) + if not rule_uuid: + return False + with Session(engine) as session: rule = session.exec( select(AlertDeduplicationRule) - .where(AlertDeduplicationRule.id == rule_id) + .where(AlertDeduplicationRule.id == rule_uuid) .where(AlertDeduplicationRule.tenant_id == tenant_id) ).first() if not rule: @@ -1946,20 +1972,30 @@ def delete_deduplication_rule(rule_id: str, tenant_id: str) -> bool: return True -def get_custom_deduplication_rules(tenant_id, provider_id, provider_type): - with Session(engine) as session: - rules = session.exec( - select(AlertDeduplicationRule) - .where(AlertDeduplicationRule.tenant_id == tenant_id) - .where(AlertDeduplicationRule.provider_id == provider_id) - .where(AlertDeduplicationRule.provider_type == provider_type) - ).all() - return rules - - def create_deduplication_event( tenant_id, deduplication_rule_id, deduplication_type, provider_id, provider_type ): + logger.debug( + "Adding deduplication event", + extra={ + "deduplication_rule_id": deduplication_rule_id, + "deduplication_type": deduplication_type, + "provider_id": provider_id, + "provider_type": provider_type, + "tenant_id": tenant_id, + }, + ) + if isinstance(deduplication_rule_id, str): + deduplication_rule_id = __convert_to_uuid(deduplication_rule_id) + if not deduplication_rule_id: + logger.debug( + "Deduplication rule id is not a valid uuid", + extra={ + "deduplication_rule_id": deduplication_rule_id, + "tenant_id": tenant_id, + }, + ) + return False with Session(engine) as session: deduplication_event = AlertDeduplicationEvent( tenant_id=tenant_id, @@ -1967,11 +2003,20 @@ def create_deduplication_event( deduplication_type=deduplication_type, provider_id=provider_id, provider_type=provider_type, - timestamp=datetime.utcnow(), - date_hour=datetime.utcnow().replace(minute=0, second=0, microsecond=0), + timestamp=datetime.now(tz=timezone.utc), + date_hour=datetime.now(tz=timezone.utc).replace( + minute=0, second=0, microsecond=0 + ), ) session.add(deduplication_event) session.commit() + logger.debug( + "Deduplication event added", + extra={ + "deduplication_event_id": deduplication_event.id, + "tenant_id": tenant_id, + }, + ) def get_all_deduplication_stats(tenant_id): @@ -2074,19 +2119,18 @@ def get_all_deduplication_stats(tenant_id): return stats -def get_last_alert_hash_by_fingerprint(tenant_id, fingerprint): +def get_last_alert_hash_by_fingerprint(tenant_id, fingerprint) -> str | None: # get the last alert for a given fingerprint # to check deduplication with Session(engine) as session: query = ( - select(Alert.alert_hash) - .where(Alert.tenant_id == tenant_id) - .where(Alert.fingerprint == fingerprint) - .order_by(Alert.timestamp.desc()) - .limit(1) # Add LIMIT 1 for MSSQL + select(LastAlert.alert_hash) + .where(LastAlert.tenant_id == tenant_id) + .where(LastAlert.fingerprint == fingerprint) + .limit(1) ) - alert_hash = session.exec(query).first() + alert_hash: str | None = session.scalars(query).first() return alert_hash @@ -2140,7 +2184,8 @@ def get_linked_providers(tenant_id: str) -> List[Tuple[str, str, datetime]]: .filter( Alert.tenant_id == tenant_id, Alert.provider_type != "group", - Provider.id == None, # Filters for alerts with a provider_id not in Provider table + Provider.id + == None, # Filters for alerts with a provider_id not in Provider table ) .group_by(Alert.provider_type, Alert.provider_id) .all() @@ -2284,6 +2329,7 @@ def get_provider_distribution( "last_alert_received": last_alert_timestamp, } else: + provider_distribution[provider_key]["last_alert_received"] = max( provider_distribution[provider_key]["last_alert_received"], last_alert_timestamp, @@ -2589,6 +2635,7 @@ def get_db_preset_by_name(tenant_id: str, preset_name: str) -> Preset | None: ).first() return preset + def get_db_presets(tenant_id: str) -> List[Preset]: with Session(engine) as session: presets = ( @@ -2598,11 +2645,13 @@ def get_db_presets(tenant_id: str) -> List[Preset]: ) return presets + def get_all_presets_dtos(tenant_id: str) -> List[PresetDto]: presets = get_db_presets(tenant_id) static_presets_dtos = list(STATIC_PRESETS.values()) return [PresetDto(**preset.to_dict()) for preset in presets] + static_presets_dtos + def get_dashboards(tenant_id: str, email=None) -> List[Dict[str, Any]]: with Session(engine) as session: statement = ( @@ -2933,8 +2982,10 @@ def get_incidents_meta_for_tenant(tenant_id: str) -> dict: ).label("affected_services"), ) .select_from(Incident) - .outerjoin(sources_join, True) - .outerjoin(affected_services_join, True) + .outerjoin(sources_join, sources_join.c.value.isnot(None)) + .outerjoin( + affected_services_join, affected_services_join.c.value.isnot(None) + ) .filter(Incident.tenant_id == tenant_id, Incident.is_confirmed == True) ) results = session.exec(query).one_or_none() @@ -2968,8 +3019,10 @@ def get_incidents_meta_for_tenant(tenant_id: str) -> dict: ), ) .select_from(Incident) - .outerjoin(sources_join, True) - .outerjoin(affected_services_join, True) + .outerjoin(sources_join, sources_join.c.value.isnot(None)) + .outerjoin( + affected_services_join, affected_services_join.c.value.isnot(None) + ) .filter(Incident.tenant_id == tenant_id, Incident.is_confirmed == True) ) @@ -3005,8 +3058,10 @@ def get_incidents_meta_for_tenant(tenant_id: str) -> dict: ), ) .select_from(Incident) - .outerjoin(sources_join, True) - .outerjoin(affected_services_join, True) + .outerjoin(sources_join, sources_join.c.value.isnot(None)) + .outerjoin( + affected_services_join, affected_services_join.c.value.isnot(None) + ) .filter(Incident.tenant_id == tenant_id, Incident.is_confirmed == True) ) @@ -3019,7 +3074,9 @@ def get_incidents_meta_for_tenant(tenant_id: str) -> dict: return { "assignees": list(filter(bool, assignees)) if assignees else [], "sources": list(filter(bool, sources)) if sources else [], - "services": list(filter(bool, affected_services)) if affected_services else [], + "services": ( + list(filter(bool, affected_services)) if affected_services else [] + ), } return {} @@ -3068,20 +3125,27 @@ def filter_query(session: Session, query, field, value): return query -def enrich_incidents_with_alerts(tenant_id: str, incidents: List[Incident], session: Optional[Session]=None): +def enrich_incidents_with_alerts( + tenant_id: str, incidents: List[Incident], session: Optional[Session] = None +): with existed_or_new_session(session) as session: incident_alerts = session.exec( select(LastAlertToIncident.incident_id, Alert) .select_from(LastAlert) - .join(LastAlertToIncident, and_( - LastAlertToIncident.tenant_id == LastAlert.tenant_id, - LastAlertToIncident.fingerprint == LastAlert.fingerprint, - LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, - )) + .join( + LastAlertToIncident, + and_( + LastAlertToIncident.tenant_id == LastAlert.tenant_id, + LastAlertToIncident.fingerprint == LastAlert.fingerprint, + LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, + ), + ) .join(Alert, LastAlert.alert_id == Alert.id) .where( LastAlert.tenant_id == tenant_id, - LastAlertToIncident.incident_id.in_([incident.id for incident in incidents]) + LastAlertToIncident.incident_id.in_( + [incident.id for incident in incidents] + ), ) ).all() @@ -3095,20 +3159,27 @@ def enrich_incidents_with_alerts(tenant_id: str, incidents: List[Incident], sess return incidents -def enrich_alerts_with_incidents(tenant_id: str, alerts: List[Alert], session: Optional[Session]=None): +def enrich_alerts_with_incidents( + tenant_id: str, alerts: List[Alert], session: Optional[Session] = None +): with existed_or_new_session(session) as session: alert_incidents = session.exec( select(LastAlertToIncident.fingerprint, Incident) .select_from(LastAlert) - .join(LastAlertToIncident, and_( - LastAlertToIncident.tenant_id == LastAlert.tenant_id, - LastAlertToIncident.fingerprint == LastAlert.fingerprint, - LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, - )) + .join( + LastAlertToIncident, + and_( + LastAlertToIncident.tenant_id == LastAlert.tenant_id, + LastAlertToIncident.fingerprint == LastAlert.fingerprint, + LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, + ), + ) .join(Incident, LastAlertToIncident.incident_id == Incident.id) .where( LastAlert.tenant_id == tenant_id, - LastAlertToIncident.fingerprint.in_([alert.fingerprint for alert in alerts]) + LastAlertToIncident.fingerprint.in_( + [alert.fingerprint for alert in alerts] + ), ) ).all() @@ -3201,7 +3272,9 @@ def get_last_incidents( def get_incident_by_id( - tenant_id: str, incident_id: str | UUID, with_alerts: bool = False, + tenant_id: str, + incident_id: str | UUID, + with_alerts: bool = False, session: Optional[Session] = None, ) -> Optional[Incident]: with existed_or_new_session(session) as session: @@ -3214,7 +3287,9 @@ def get_incident_by_id( incident = query.first() if with_alerts: enrich_incidents_with_alerts( - tenant_id, [incident], session, + tenant_id, + [incident], + session, ) return incident @@ -3285,8 +3360,7 @@ def update_incident_from_dto_by_id( ) -> Optional[Incident]: with Session(engine) as session: incident = session.exec( - select(Incident) - .where( + select(Incident).where( Incident.tenant_id == tenant_id, Incident.id == incident_id, ) @@ -3390,10 +3464,12 @@ def get_incident_alerts_and_links_by_incident_id( LastAlertToIncident, ) .select_from(LastAlertToIncident) - .join(LastAlert, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint - ) + .join( + LastAlert, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), ) .join(Alert, LastAlert.alert_id == Alert.id) .filter( @@ -3475,10 +3551,13 @@ def get_alerts_data_for_incident( alerts_data = session.exec( select(*fields) .select_from(LastAlert) - .join(Alert, and_( - LastAlert.tenant_id == Alert.tenant_id, - LastAlert.alert_id == Alert.id, - )) + .join( + Alert, + and_( + LastAlert.tenant_id == Alert.tenant_id, + LastAlert.alert_id == Alert.id, + ), + ) .where( LastAlert.tenant_id == tenant_id, col(LastAlert.fingerprint).in_(fingerprints), @@ -3551,10 +3630,13 @@ def add_alerts_to_incident( existing_fingerprints = set( session.exec( select(LastAlert.fingerprint) - .join(LastAlertToIncident, and_( - LastAlertToIncident.tenant_id == LastAlert.tenant_id, - LastAlertToIncident.fingerprint == LastAlert.fingerprint - )) + .join( + LastAlertToIncident, + and_( + LastAlertToIncident.tenant_id == LastAlert.tenant_id, + LastAlertToIncident.fingerprint == LastAlert.fingerprint, + ), + ) .where( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, LastAlertToIncident.tenant_id == tenant_id, @@ -3618,10 +3700,13 @@ def add_alerts_to_incident( started_at, last_seen_at = session.exec( select(func.min(Alert.timestamp), func.max(Alert.timestamp)) - .join(LastAlertToIncident, and_( - LastAlertToIncident.tenant_id == Alert.tenant_id, - LastAlertToIncident.fingerprint == Alert.fingerprint - )) + .join( + LastAlertToIncident, + and_( + LastAlertToIncident.tenant_id == Alert.tenant_id, + LastAlertToIncident.fingerprint == Alert.fingerprint, + ), + ) .where( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, LastAlertToIncident.tenant_id == tenant_id, @@ -3661,10 +3746,13 @@ def get_last_alerts_for_incidents( LastAlertToIncident.incident_id, ) .select_from(LastAlert) - .join(LastAlertToIncident, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint - )) + .join( + LastAlertToIncident, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), + ) .join(Alert, LastAlert.alert_id == Alert.id) .filter( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, @@ -3725,10 +3813,13 @@ def remove_alerts_to_incident_by_incident_id( existed_services_query = ( select(func.distinct(service_field)) .select_from(LastAlert) - .join(LastAlertToIncident, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint - )) + .join( + LastAlertToIncident, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), + ) .join(Alert, LastAlert.alert_id == Alert.id) .filter( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, @@ -3743,10 +3834,13 @@ def remove_alerts_to_incident_by_incident_id( existed_sources_query = ( select(col(Alert.provider_type).distinct()) .select_from(LastAlert) - .join(LastAlertToIncident, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint - )) + .join( + LastAlertToIncident, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), + ) .join(Alert, LastAlert.alert_id == Alert.id) .filter( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, @@ -3771,10 +3865,13 @@ def remove_alerts_to_incident_by_incident_id( started_at, last_seen_at = session.exec( select(func.min(Alert.timestamp), func.max(Alert.timestamp)) .select_from(LastAlert) - .join(LastAlertToIncident, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint, - )) + .join( + LastAlertToIncident, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), + ) .join(Alert, LastAlert.alert_id == Alert.id) .where( LastAlertToIncident.tenant_id == tenant_id, @@ -3816,8 +3913,7 @@ def merge_incidents_to_id( ) -> Tuple[List[UUID], List[UUID], List[UUID]]: with Session(engine) as session: destination_incident = session.exec( - select(Incident) - .where( + select(Incident).where( Incident.tenant_id == tenant_id, Incident.id == destination_incident_id ) ).first() @@ -3840,7 +3936,9 @@ def merge_incidents_to_id( skipped_incident_ids = [] failed_incident_ids = [] for source_incident in source_incidents: - source_incident_alerts_fingerprints = [alert.fingerprint for alert in source_incident._alerts] + source_incident_alerts_fingerprints = [ + alert.fingerprint for alert in source_incident._alerts + ] if not source_incident_alerts_fingerprints: logger.info(f"Source incident {source_incident.id} doesn't have alerts") skipped_incident_ids.append(source_incident.id) @@ -4221,13 +4319,17 @@ def get_workflow_executions_for_incident_or_alert( # Query for workflow executions associated with alerts tied to the incident alert_query = ( base_query.join( - LastAlert, WorkflowToAlertExecution.alert_fingerprint == LastAlert.fingerprint + LastAlert, + WorkflowToAlertExecution.alert_fingerprint == LastAlert.fingerprint, ) .join(Alert, LastAlert.alert_id == Alert.id) - .join(LastAlertToIncident, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint - )) + .join( + LastAlertToIncident, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), + ) .where( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, LastAlertToIncident.incident_id == incident_id, @@ -4277,15 +4379,19 @@ def is_all_incident_alerts_resolved( .select_from(LastAlert) .join(Alert, LastAlert.alert_id == Alert.id) .outerjoin( - AlertEnrichment, and_( + AlertEnrichment, + and_( Alert.tenant_id == AlertEnrichment.tenant_id, - Alert.fingerprint == AlertEnrichment.alert_fingerprint + Alert.fingerprint == AlertEnrichment.alert_fingerprint, + ), + ) + .join( + LastAlertToIncident, + and_( + LastAlertToIncident.tenant_id == LastAlert.tenant_id, + LastAlertToIncident.fingerprint == LastAlert.fingerprint, ), ) - .join(LastAlertToIncident, and_( - LastAlertToIncident.tenant_id == LastAlert.tenant_id, - LastAlertToIncident.fingerprint == LastAlert.fingerprint - )) .where( LastAlertToIncident.deleted_at == NULL_FOR_DELETED_AT, LastAlertToIncident.incident_id == incident.id, @@ -4344,15 +4450,19 @@ def is_edge_incident_alert_resolved( select(Alert.fingerprint, enriched_status_field, status_field) .select_from(Alert) .outerjoin( - AlertEnrichment, and_( + AlertEnrichment, + and_( Alert.tenant_id == AlertEnrichment.tenant_id, - Alert.fingerprint == AlertEnrichment.alert_fingerprint - ) + Alert.fingerprint == AlertEnrichment.alert_fingerprint, + ), + ) + .join( + LastAlertToIncident, + and_( + LastAlertToIncident.tenant_id == Alert.tenant_id, + LastAlertToIncident.fingerprint == Alert.fingerprint, + ), ) - .join(LastAlertToIncident, and_( - LastAlertToIncident.tenant_id == Alert.tenant_id, - LastAlertToIncident.fingerprint == Alert.fingerprint - )) .where(LastAlertToIncident.incident_id == incident.id) .group_by(Alert.fingerprint) .having(func.max(Alert.timestamp)) @@ -4374,13 +4484,11 @@ def get_alerts_metrics_by_provider( dynamic_field_sums = [ func.sum( case( - [ - ( - func.json_extract(Alert.event, f"$.{field}").isnot(None) - & (func.json_extract(Alert.event, f"$.{field}") != False), - 1, - ) - ], + ( + (func.json_extract(Alert.event, f"$.{field}").isnot(None)) + & (func.json_extract(Alert.event, f"$.{field}") != False), + 1, + ), else_=0, ) ).label(f"{field}_count") @@ -4394,15 +4502,18 @@ def get_alerts_metrics_by_provider( Alert.provider_id, func.count(Alert.id).label("total_alerts"), func.sum( - case([(LastAlertToIncident.fingerprint.isnot(None), 1)], else_=0) + case((LastAlertToIncident.fingerprint.isnot(None), 1), else_=0) ).label("correlated_alerts"), *dynamic_field_sums, ) .join(LastAlert, Alert.id == LastAlert.alert_id) - .outerjoin(LastAlertToIncident, and_( - LastAlert.tenant_id == LastAlertToIncident.tenant_id, - LastAlert.fingerprint == LastAlertToIncident.fingerprint - )) + .outerjoin( + LastAlertToIncident, + and_( + LastAlert.tenant_id == LastAlertToIncident.tenant_id, + LastAlert.fingerprint == LastAlertToIncident.fingerprint, + ), + ) .filter( Alert.tenant_id == tenant_id, ) @@ -4416,50 +4527,68 @@ def get_alerts_metrics_by_provider( results = query.group_by(Alert.provider_id, Alert.provider_type).all() - return { - f"{row.provider_id}_{row.provider_type}": { + metrics = {} + for row in results: + key = f"{row.provider_id}_{row.provider_type}" + metrics[key] = { "total_alerts": row.total_alerts, "correlated_alerts": row.correlated_alerts, "provider_type": row.provider_type, - **{ - f"{field}_count": getattr(row, f"{field}_count") for field in fields - }, # Add field-specific counts } - for row in results - } + for field in fields: + metrics[key][f"{field}_count"] = getattr(row, f"{field}_count", 0) + + return metrics -def get_or_create_external_ai_settings(tenant_id: str) -> List[ExternalAIConfigAndMetadataDto]: + +def get_or_create_external_ai_settings( + tenant_id: str, +) -> List[ExternalAIConfigAndMetadataDto]: with Session(engine) as session: algorithm_configs = session.exec( - select(ExternalAIConfigAndMetadata).where(ExternalAIConfigAndMetadata.tenant_id == tenant_id) + select(ExternalAIConfigAndMetadata).where( + ExternalAIConfigAndMetadata.tenant_id == tenant_id + ) ).all() if len(algorithm_configs) == 0: if os.environ.get("KEEP_EXTERNAL_AI_TRANSFORMERS_URL") is not None: algorithm_config = ExternalAIConfigAndMetadata.from_external_ai( - tenant_id=tenant_id, - algorithm=external_ai_transformers + tenant_id=tenant_id, algorithm=external_ai_transformers ) session.add(algorithm_config) session.commit() algorithm_configs = [algorithm_config] - return [ExternalAIConfigAndMetadataDto.from_orm(algorithm_config) for algorithm_config in algorithm_configs] + return [ + ExternalAIConfigAndMetadataDto.from_orm(algorithm_config) + for algorithm_config in algorithm_configs + ] + -def update_extrnal_ai_settings(tenant_id: str, ai_settings: ExternalAIConfigAndMetadata) -> ExternalAIConfigAndMetadataDto: +def update_extrnal_ai_settings( + tenant_id: str, ai_settings: ExternalAIConfigAndMetadata +) -> ExternalAIConfigAndMetadataDto: with Session(engine) as session: - setting = session.query(ExternalAIConfigAndMetadata).filter( - ExternalAIConfigAndMetadata.tenant_id == tenant_id, - ExternalAIConfigAndMetadata.id == ai_settings.id, - ).first() + setting = ( + session.query(ExternalAIConfigAndMetadata) + .filter( + ExternalAIConfigAndMetadata.tenant_id == tenant_id, + ExternalAIConfigAndMetadata.id == ai_settings.id, + ) + .first() + ) setting.settings = json.dumps(ai_settings.settings) setting.feedback_logs = ai_settings.feedback_logs if ai_settings.settings_proposed_by_algorithm is not None: - setting.settings_proposed_by_algorithm = json.dumps(ai_settings.settings_proposed_by_algorithm) + setting.settings_proposed_by_algorithm = json.dumps( + ai_settings.settings_proposed_by_algorithm + ) else: setting.settings_proposed_by_algorithm = None session.add(setting) session.commit() return setting + def get_table_class(table_name: str) -> Type[SQLModel]: """ Get the SQLModel table class dynamically based on table name. @@ -4523,28 +4652,28 @@ def get_resource_ids_by_resource_type( result = session.exec(query) return result.all() -def get_or_creat_posthog_instance_id( - session: Optional[Session] = None - ): - POSTHOG_INSTANCE_ID_KEY = "posthog_instance_id" - with Session(engine) as session: - system = session.exec(select(System).where(System.name == POSTHOG_INSTANCE_ID_KEY)).first() - if system: - return system.value - system = System( - id=str(uuid4()), - name=POSTHOG_INSTANCE_ID_KEY, - value=str(uuid4()), - ) - session.add(system) - session.commit() - session.refresh(system) +def get_or_creat_posthog_instance_id(session: Optional[Session] = None): + POSTHOG_INSTANCE_ID_KEY = "posthog_instance_id" + with Session(engine) as session: + system = session.exec( + select(System).where(System.name == POSTHOG_INSTANCE_ID_KEY) + ).first() + if system: return system.value -def get_activity_report( - session: Optional[Session] = None - ): + system = System( + id=str(uuid4()), + name=POSTHOG_INSTANCE_ID_KEY, + value=str(uuid4()), + ) + session.add(system) + session.commit() + session.refresh(system) + return system.value + + +def get_activity_report(session: Optional[Session] = None): from keep.api.models.db.user import User last_24_hours = datetime.utcnow() - timedelta(hours=24) @@ -4570,24 +4699,25 @@ def get_activity_report( .filter(Workflow.creation_time >= last_24_hours) .count() ) - activity_report["last_24_hours_workflows_executed"] = (session.query(WorkflowExecution).filter( - WorkflowExecution.started >= last_24_hours).count() -) + activity_report["last_24_hours_workflows_executed"] = ( + session.query(WorkflowExecution) + .filter(WorkflowExecution.started >= last_24_hours) + .count() + ) return activity_report def get_last_alert_by_fingerprint( - tenant_id: str, fingerprint: str, session: Optional[Session] = None, - for_update: bool = False + tenant_id: str, + fingerprint: str, + session: Optional[Session] = None, + for_update: bool = False, ) -> Optional[LastAlert]: with existed_or_new_session(session) as session: - query = ( - select(LastAlert) - .where( - and_( - LastAlert.tenant_id == tenant_id, - LastAlert.fingerprint == fingerprint, - ) + query = select(LastAlert).where( + and_( + LastAlert.tenant_id == tenant_id, + LastAlert.fingerprint == fingerprint, ) ) if for_update: @@ -4598,46 +4728,81 @@ def get_last_alert_by_fingerprint( def set_last_alert( tenant_id: str, alert: Alert, session: Optional[Session] = None, max_retries=3 ) -> None: - logger.info( - f"Set last alert for `{alert.fingerprint}`" - ) + logger.info(f"Seting last alert for `{alert.fingerprint}`") with existed_or_new_session(session) as session: for attempt in range(max_retries): - with session.begin_nested() as transaction: - try: - last_alert = get_last_alert_by_fingerprint(tenant_id, alert.fingerprint, session, for_update=True) + logger.debug( + f"Attempt {attempt} to set last alert for `{alert.fingerprint}`", + extra={ + "alert_id": alert.id, + "tenant_id": tenant_id, + "fingerprint": alert.fingerprint, + }, + ) + try: + last_alert = get_last_alert_by_fingerprint( + tenant_id, alert.fingerprint, session, for_update=True + ) - # To prevent rare, but possible race condition - # For example if older alert failed to process - # and retried after new one - if last_alert and last_alert.timestamp.replace(tzinfo=tz.UTC) < alert.timestamp.replace(tzinfo=tz.UTC): + # To prevent rare, but possible race condition + # For example if older alert failed to process + # and retried after new one + if last_alert and last_alert.timestamp.replace( + tzinfo=tz.UTC + ) < alert.timestamp.replace(tzinfo=tz.UTC): - logger.info( - f"Update last alert for `{alert.fingerprint}`: {last_alert.alert_id} -> {alert.id}" - ) - last_alert.timestamp = alert.timestamp - last_alert.alert_id = alert.id - session.add(last_alert) + logger.info( + f"Update last alert for `{alert.fingerprint}`: {last_alert.alert_id} -> {alert.id}" + ) + last_alert.timestamp = alert.timestamp + last_alert.alert_id = alert.id + session.add(last_alert) - elif not last_alert: - logger.info( - f"No last alert for `{alert.fingerprint}`, creating new" - ) - last_alert = LastAlert( - tenant_id=tenant_id, - fingerprint=alert.fingerprint, - timestamp=alert.timestamp, - first_timestamp=alert.timestamp,alert_id=alert.id, - ) + elif not last_alert: + logger.info( + f"No last alert for `{alert.fingerprint}`, creating new" + ) + last_alert = LastAlert( + tenant_id=tenant_id, + fingerprint=alert.fingerprint, + timestamp=alert.timestamp, + first_timestamp=alert.timestamp, + alert_id=alert.id, + alert_hash=alert.alert_hash, + ) - session.add(last_alert) - transaction.commit() - except OperationalError as ex: - if "Deadlock found" in ex.args[0]: + session.add(last_alert) + session.commit() + except OperationalError as ex: + if "no such savepoint" in ex.args[0]: + logger.info( + f"No such savepoint while updating lastalert for `{alert.fingerprint}`, retry #{attempt}" + ) + if attempt >= max_retries: + raise ex - logger.info( - f"Deadlock found while updating lastalert for `{alert.fingerprint}`, retry #{attempt}" - ) - transaction.rollback() - if attempt >= max_retries: - raise ex + if "Deadlock found" in ex.args[0]: + logger.info( + f"Deadlock found while updating lastalert for `{alert.fingerprint}`, retry #{attempt}" + ) + if attempt >= max_retries: + raise ex + except NoActiveSqlTransaction: + logger.exception( + f"No active sql transaction while updating lastalert for `{alert.fingerprint}`, retry #{attempt}", + extra={ + "alert_id": alert.id, + "tenant_id": tenant_id, + "fingerprint": alert.fingerprint, + }, + ) + logger.debug( + f"Successfully updated lastalert for `{alert.fingerprint}`", + extra={ + "alert_id": alert.id, + "tenant_id": tenant_id, + "fingerprint": alert.fingerprint, + }, + ) + # break the retry loop + break diff --git a/keep/api/core/db_on_start.py b/keep/api/core/db_on_start.py index 8081ce31b..defd4ecbd 100644 --- a/keep/api/core/db_on_start.py +++ b/keep/api/core/db_on_start.py @@ -46,15 +46,9 @@ def try_create_single_tenant(tenant_id: str, create_default_user=True) -> None: """ Creates the single tenant and the default user if they don't exist. """ - try: - # if Keep is not multitenant, let's import the User table too: - from keep.api.models.db.user import ( # pylint: disable=import-outside-toplevel - User, - ) - - migrate_db() - except Exception: - pass + # if Keep is not multitenant, let's import the User table too: + from keep.api.models.db.user import User # pylint: disable=import-outside-toplevel + with Session(engine) as session: try: # check if the tenant exist: diff --git a/keep/api/core/db_utils.py b/keep/api/core/db_utils.py index 1c4fd0e14..97ffa3100 100644 --- a/keep/api/core/db_utils.py +++ b/keep/api/core/db_utils.py @@ -101,6 +101,12 @@ def __get_conn_impersonate() -> pymysql.connections.Connection: DB_ECHO = config( "DATABASE_ECHO", default=False, cast=bool ) # pylint: disable=invalid-name +KEEP_FORCE_CONNECTION_STRING = config( + "KEEP_FORCE_CONNECTION_STRING", default=False, cast=bool +) # pylint: disable=invalid-name +KEEP_DB_PRE_PING_ENABLED = config( + "KEEP_DB_PRE_PING_ENABLED", default=False, cast=bool +) # pylint: disable=invalid-name def dumps(_json) -> str: @@ -122,7 +128,7 @@ def create_db_engine(): """ Creates a database engine based on the environment variables. """ - if RUNNING_IN_CLOUD_RUN: + if RUNNING_IN_CLOUD_RUN and not KEEP_FORCE_CONNECTION_STRING: engine = create_engine( "mysql+pymysql://", creator=__get_conn, @@ -147,6 +153,7 @@ def create_db_engine(): max_overflow=DB_MAX_OVERFLOW, json_serializer=dumps, echo=DB_ECHO, + pool_pre_ping=True if KEEP_DB_PRE_PING_ENABLED else False, ) # SQLite does not support pool_size except TypeError: diff --git a/keep/api/core/demo_mode.py b/keep/api/core/demo_mode.py index a94571e63..e438355b2 100644 --- a/keep/api/core/demo_mode.py +++ b/keep/api/core/demo_mode.py @@ -1,3 +1,4 @@ +import asyncio import datetime import logging import os @@ -11,7 +12,6 @@ import requests from dateutil import parser from requests.models import PreparedRequest -from sqlalchemy.util import asyncio from keep.api.core.db import get_session_sync from keep.api.core.dependencies import SINGLE_TENANT_UUID @@ -318,7 +318,7 @@ def perform_demo_ai(keep_api_key, keep_api_url): incidents_existing = requests.get( f"{keep_api_url}/incidents", headers={"x-api-key": keep_api_key}, - ) + ) incidents_existing.raise_for_status() incidents_existing = incidents_existing.json()["items"] @@ -326,7 +326,7 @@ def perform_demo_ai(keep_api_key, keep_api_url): incident_exists = None - # Create incident if it doesn't exist + # Create incident if it doesn't exist for incident in incidents_existing: if incident["user_generated_name"] == MANUAL_INCIDENT_NAME: @@ -393,8 +393,7 @@ def perform_demo_ai(keep_api_key, keep_api_url): alerts_in_incident.raise_for_status() alerts_in_incident = alerts_in_incident.json() - - if len(alerts_in_incident['items']) < 20: + if len(alerts_in_incident["items"]) < 20: alerts_existing = requests.get( f"{keep_api_url}/alerts", headers={"x-api-key": keep_api_key}, @@ -408,7 +407,7 @@ def perform_demo_ai(keep_api_key, keep_api_url): if len(fingerprints_to_add) > 0: fingerprints_to_add = fingerprints_to_add[:10] - + response = requests.post( f"{keep_api_url}/incidents/{incident_exists['id']}/alerts", headers={"x-api-key": keep_api_key}, @@ -430,7 +429,7 @@ async def simulate_alerts_async( demo_topology=False, clean_old_incidents=False, demo_ai=False, - target_rps=0 + target_rps=0, ): logger.info("Simulating alerts...") @@ -505,13 +504,17 @@ async def simulate_alerts_async( send_alert_url_params = {} # choose provider based on weights - provider_type = random.choices(providers, weights=normalized_weights, k=1)[0] - send_alert_url = "{}/alerts/event/{}".format(keep_api_url, provider_type) + provider_type = random.choices( + providers, weights=normalized_weights, k=1 + )[0] + send_alert_url = "{}/alerts/event/{}".format( + keep_api_url, provider_type + ) if provider_type in existing_providers_to_their_ids: - send_alert_url_params["provider_id"] = existing_providers_to_their_ids[ - provider_type - ] + send_alert_url_params["provider_id"] = ( + existing_providers_to_their_ids[provider_type] + ) logger.info( f"Provider type: {provider_type}, send_alert_url_params now are: {send_alert_url_params}" ) @@ -618,11 +621,15 @@ async def simulate_alerts_worker(worker_id, keep_api_key, rps=1): logger.info("Alert sent successfully") if rps: - delay = 1/rps - (time.time() - start) + delay = 1 / rps - (time.time() - start) if delay > 0: - logger.debug('worker %d sleeps, %f', worker_id, delay) + logger.debug("worker %d sleeps, %f", worker_id, delay) await asyncio.sleep(delay) - logger.info('Worker %d RPS: %.2f', worker_id, total_requests / (time.time() - total_start)) + logger.info( + "Worker %d RPS: %.2f", + worker_id, + total_requests / (time.time() - total_start), + ) if __name__ == "__main__": diff --git a/keep/api/core/metrics.py b/keep/api/core/metrics.py new file mode 100644 index 000000000..6147c5f88 --- /dev/null +++ b/keep/api/core/metrics.py @@ -0,0 +1,42 @@ +import os + +from prometheus_client import CollectorRegistry, Counter, Gauge, Summary, multiprocess + +PROMETHEUS_MULTIPROC_DIR = os.environ.get("PROMETHEUS_MULTIPROC_DIR", "/tmp/prometheus") +os.makedirs(PROMETHEUS_MULTIPROC_DIR, exist_ok=True) + + +# Create a single registry for all metrics +registry = CollectorRegistry() +multiprocess.MultiProcessCollector(registry, path=PROMETHEUS_MULTIPROC_DIR) + +# Process event metrics +events_in_counter = Counter( + "events_in_total", "Total number of events received", registry=registry +) +events_out_counter = Counter( + "events_out_total", "Total number of events processed", registry=registry +) +events_error_counter = Counter( + "events_error_total", "Total number of events with error", registry=registry +) +processing_time_summary = Summary( + "processing_time_seconds", "Average time spent processing events", registry=registry +) + +# Running tasks metrics +running_tasks_gauge = Gauge( + "running_tasks_current", + "Current number of running tasks", + registry=registry, + multiprocess_mode="livesum", +) + +# Per-process running tasks metrics +running_tasks_by_process_gauge = Gauge( + "running_tasks_by_process", + "Current number of running tasks per process", + labelnames=["pid"], + registry=registry, + multiprocess_mode="livesum", +) diff --git a/keep/api/custom_worker.py b/keep/api/custom_worker.py new file mode 100644 index 000000000..79a59fc47 --- /dev/null +++ b/keep/api/custom_worker.py @@ -0,0 +1,5 @@ +from uvicorn.workers import UvicornWorker + + +class CustomUvicornWorker(UvicornWorker): + CONFIG_KWARGS = {"lifespan": "on"} diff --git a/keep/api/models/db/ai_external.py b/keep/api/models/db/ai_external.py index 50c1398e1..074853f37 100644 --- a/keep/api/models/db/ai_external.py +++ b/keep/api/models/db/ai_external.py @@ -1,17 +1,17 @@ -import os import json - +import os from uuid import uuid4 -from sqlalchemy import ForeignKey, Column, JSON, Text -from sqlmodel import Field, SQLModel from pydantic import BaseModel, Json +from sqlalchemy import JSON, Column, ForeignKey, Text +from sqlmodel import Field, SQLModel class ExternalAI(BaseModel): """ Base model for external algorithms. """ + name: str = None description: str = None version: int = None @@ -22,57 +22,88 @@ class ExternalAI(BaseModel): @property def unique_id(self): return self.name + "_" + str(self.version) - -# Not sure if we'll need to move algorithm objects to the DB, + +# Not sure if we'll need to move algorithm objects to the DB, # for now, it's ok to keep them as code. external_ai_transformers = ExternalAI( name="Transformers Correlation", - description="""A transformer-based alert-to-incident correlation algorithm, -tailored for each tenant by training on their specific alert and incident data. -The system will automatically associate new alerts with existing incidents if they are -sufficiently similar; otherwise, it will create new incidents. In essence, it behaves like a human, + description="""A transformer-based alert-to-incident correlation algorithm, +tailored for each tenant by training on their specific alert and incident data. +The system will automatically associate new alerts with existing incidents if they are +sufficiently similar; otherwise, it will create new incidents. In essence, it behaves like a human, analyzing the alert feed and making decisions for each incoming alert.""", version=1, api_url=os.environ.get("KEEP_EXTERNAL_AI_TRANSFORMERS_URL", None), api_key=os.environ.get("KEEP_EXTERNAL_AI_TRANSFORMERS_API_KEY", None), config_default=json.dumps( [ - {"min": 0.3, "max": 0.99, "value": 0.9, "type": "float", "name": "Model Accuracy Threshold", "description": "The trained model accuracy will be evaluated using 30 percent of alerts-to-incident correlations as a validation dataset. If the accuracy is below this threshold, the correlation won't be launched."}, - {"min": 0.3, "max": 0.99, "value": 0.9, "type": "float", "name": "Correlation Threshold", "description": "The minimum correlation value to consider two alerts belonging to an incident."}, - {"min": 1, "max": 20, "value": 1, "type": "int", "name": "Train Epochs", "description": "The amount of epochs to train the model for. The less the better to avoid over-fitting."}, - {"value": True, "type": "bool", "name": "Create New Incidents", "description": "Do you want AI to issue new incident if correlation is detected and the incnident alerts are related to is resolved?"}, - {"value": True, "type": "bool", "name": "Enabled", "description": "Enable or disable the algorithm."}, + { + "min": 0.3, + "max": 0.99, + "value": 0.9, + "type": "float", + "name": "Model Accuracy Threshold", + "description": "The trained model accuracy will be evaluated using 30 percent of alerts-to-incident correlations as a validation dataset. If the accuracy is below this threshold, the correlation won't be launched.", + }, + { + "min": 0.3, + "max": 0.99, + "value": 0.9, + "type": "float", + "name": "Correlation Threshold", + "description": "The minimum correlation value to consider two alerts belonging to an incident.", + }, + { + "min": 1, + "max": 20, + "value": 1, + "type": "int", + "name": "Train Epochs", + "description": "The amount of epochs to train the model for. The less the better to avoid over-fitting.", + }, + { + "value": True, + "type": "bool", + "name": "Create New Incidents", + "description": "Do you want AI to issue new incident if correlation is detected and the incnident alerts are related to is resolved?", + }, + { + "value": True, + "type": "bool", + "name": "Enabled", + "description": "Enable or disable the algorithm.", + }, ] - ) + ), ) -EXTERNAL_AIS = [ - external_ai_transformers -] +EXTERNAL_AIS = [external_ai_transformers] + class ExternalAIConfigAndMetadata(SQLModel, table=True): """ Dynamic per-tenant algo settings and metadata """ + id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True) algorithm_id: str = Field(nullable=False) tenant_id: str = Field(ForeignKey("tenant.id"), nullable=False) settings: str = Field( - nullable=False, sa_column=Column(JSON), ) settings_proposed_by_algorithm: str = Field( - nullable=True, sa_column=Column(JSON), ) - feedback_logs: str = Field(nullable=True, sa_column=Column(Text)) + feedback_logs: str = Field(sa_column=Column(Text)) @property def algorithm(self) -> ExternalAI: - matching_algos = [algo for algo in EXTERNAL_AIS if algo.unique_id == self.algorithm_id] + matching_algos = [ + algo for algo in EXTERNAL_AIS if algo.unique_id == self.algorithm_id + ] return matching_algos[0] if len(matching_algos) > 0 else None - + def from_external_ai(tenant_id: str, algorithm: ExternalAI): external_ai = ExternalAIConfigAndMetadata( algorithm_id=algorithm.unique_id, diff --git a/keep/api/models/db/alert.py b/keep/api/models/db/alert.py index 10d1dab71..1e2cb8d5c 100644 --- a/keep/api/models/db/alert.py +++ b/keep/api/models/db/alert.py @@ -5,7 +5,7 @@ from uuid import UUID, uuid4 from pydantic import PrivateAttr -from sqlalchemy import ForeignKey, UniqueConstraint, ForeignKeyConstraint +from sqlalchemy import ForeignKey, ForeignKeyConstraint, UniqueConstraint from sqlalchemy.dialects.mssql import DATETIME2 as MSSQL_DATETIME2 from sqlalchemy.dialects.mysql import DATETIME as MySQL_DATETIME from sqlalchemy.engine.url import make_url @@ -71,6 +71,7 @@ class AlertToIncident(SQLModel, table=True): default=NULL_FOR_DELETED_AT, ) + class LastAlert(SQLModel, table=True): tenant_id: str = Field(foreign_key="tenant.id", nullable=False, primary_key=True) @@ -78,6 +79,7 @@ class LastAlert(SQLModel, table=True): alert_id: UUID = Field(foreign_key="alert.id") timestamp: datetime = Field(nullable=False, index=True) first_timestamp: datetime = Field(nullable=False, index=True) + alert_hash: str | None = Field(nullable=True, index=True) class LastAlertToIncident(SQLModel, table=True): @@ -105,8 +107,9 @@ class LastAlertToIncident(SQLModel, table=True): __table_args__ = ( ForeignKeyConstraint( ["tenant_id", "fingerprint"], - ["lastalert.tenant_id", "lastalert.fingerprint"]), - {} + ["lastalert.tenant_id", "lastalert.fingerprint"], + ), + {}, ) @@ -118,8 +121,8 @@ class Incident(SQLModel, table=True): user_generated_name: str | None ai_generated_name: str | None - user_summary: str = Field(sa_column=Column(TEXT), nullable=True) - generated_summary: str = Field(sa_column=Column(TEXT), nullable=True) + user_summary: str = Field(sa_column=Column(TEXT)) + generated_summary: str = Field(sa_column=Column(TEXT)) assignee: str | None severity: int = Field(default=IncidentSeverity.CRITICAL.order) @@ -152,7 +155,7 @@ class Incident(SQLModel, table=True): rule_fingerprint: str = Field(default="", sa_column=Column(TEXT)) # This is the fingerprint of the incident generated by the underlying tool # It's not a unique identifier in the DB (constraint), but when we have the same incident from some tools, we can use it to detect duplicates - fingerprint: str | None = Field(default=None, sa_column=Column(TEXT), nullable=True) + fingerprint: str | None = Field(default=None, sa_column=Column(TEXT)) same_incident_in_the_past_id: UUID | None = Field( sa_column=Column( diff --git a/keep/api/models/db/mapping.py b/keep/api/models/db/mapping.py index 64970c75a..74618cb9a 100644 --- a/keep/api/models/db/mapping.py +++ b/keep/api/models/db/mapping.py @@ -29,11 +29,10 @@ class MappingRule(SQLModel, table=True): max_length=255, ) # The attributes to match against (e.g. ["service","region"]) - matchers: list[str] = Field(sa_column=Column(JSON), nullable=False) + matchers: list[str] = Field(sa_column=Column(JSON)) # The rows of the CSV file [{service: "service1", region: "region1", ...}, ...] rows: Optional[list[dict]] = Field( sa_column=Column(JSON), - nullable=True, ) # max_length=204800) updated_by: Optional[str] = Field(max_length=255, default=None) last_updated_at: datetime = Field(default_factory=datetime.utcnow) diff --git a/keep/api/models/db/migrations/versions/2024-07-11-17-10_54c1252b2c8a.py b/keep/api/models/db/migrations/versions/2024-07-11-17-10_54c1252b2c8a.py index 75de2e9c3..95fd2a70e 100644 --- a/keep/api/models/db/migrations/versions/2024-07-11-17-10_54c1252b2c8a.py +++ b/keep/api/models/db/migrations/versions/2024-07-11-17-10_54c1252b2c8a.py @@ -1,16 +1,17 @@ """First migration Revision ID: 54c1252b2c8a -Revises: +Revises: Create Date: 2024-07-11 17:10:10.815182 """ +import logging + import sqlalchemy as sa +import sqlalchemy_utils import sqlmodel from alembic import op -import sqlalchemy_utils -import logging # revision identifiers, used by Alembic. revision = "54c1252b2c8a" @@ -64,7 +65,7 @@ def _upgrade() -> None: "alert", sa.Column("timestamp", sa.DateTime(), nullable=False), sa.Column("event", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("provider_type", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), @@ -83,7 +84,7 @@ def _upgrade() -> None: op.create_table( "alertdeduplicationfilter", sa.Column("fields", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("matcher_cel", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.ForeignKeyConstraint( @@ -95,7 +96,7 @@ def _upgrade() -> None: op.create_table( "alertenrichment", sa.Column("enrichments", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("timestamp", sa.DateTime(), nullable=False), sa.Column( @@ -111,7 +112,7 @@ def _upgrade() -> None: op.create_table( "alertraw", sa.Column("raw_alert", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.ForeignKeyConstraint( ["tenant_id"], @@ -223,7 +224,7 @@ def _upgrade() -> None: op.create_table( "preset", sa.Column("options", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("created_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("is_private", sa.Boolean(), nullable=True), @@ -266,7 +267,7 @@ def _upgrade() -> None: "rule", sa.Column("definition", sa.JSON(), nullable=True), sa.Column("grouping_criteria", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("definition_cel", sqlmodel.sql.sqltypes.AutoString(), nullable=False), @@ -309,7 +310,7 @@ def _upgrade() -> None: ) op.create_table( "tenantinstallation", - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("bot_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("installed", sa.Boolean(), nullable=False), @@ -344,7 +345,7 @@ def _upgrade() -> None: sa.Column( "rule_id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=True ), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("creation_time", sa.DateTime(), nullable=False), sa.Column( @@ -395,7 +396,7 @@ def _upgrade() -> None: ), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("timestamp", sa.DateTime(), nullable=False), - sa.Column("alert_id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("alert_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.ForeignKeyConstraint( ["alert_id"], ["alert.id"], @@ -441,6 +442,7 @@ def _upgrade() -> None: ) # ### end Alembic commands ### + def upgrade() -> None: """ This migration is special because it creates the tables from scratch, @@ -451,7 +453,9 @@ def upgrade() -> None: except Exception as e: if "already exists" in str(e): logging.warning(str(e)) - logging.warning("Table already exists, which most likely means that tables has already been created before the migration mechanism was introduced. It's ok!") + logging.warning( + "Table already exists, which most likely means that tables has already been created before the migration mechanism was introduced. It's ok!" + ) else: raise e diff --git a/keep/api/models/db/migrations/versions/2024-07-15-15-10_c37ec8f6db3e.py b/keep/api/models/db/migrations/versions/2024-07-15-15-10_c37ec8f6db3e.py index 375936b21..95104b4f4 100644 --- a/keep/api/models/db/migrations/versions/2024-07-15-15-10_c37ec8f6db3e.py +++ b/keep/api/models/db/migrations/versions/2024-07-15-15-10_c37ec8f6db3e.py @@ -21,7 +21,7 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.create_table( "alertaudit", - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("fingerprint", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("timestamp", sa.DateTime(), nullable=False), diff --git a/keep/api/models/db/migrations/versions/2024-07-16-12-16_37019ca3eb2e.py b/keep/api/models/db/migrations/versions/2024-07-16-12-16_37019ca3eb2e.py index 43b57dd8f..09b4c3c0c 100644 --- a/keep/api/models/db/migrations/versions/2024-07-16-12-16_37019ca3eb2e.py +++ b/keep/api/models/db/migrations/versions/2024-07-16-12-16_37019ca3eb2e.py @@ -22,7 +22,7 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.create_table( "incident", - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False), @@ -44,7 +44,7 @@ def upgrade() -> None: nullable=False, ), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column("alert_id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("alert_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.ForeignKeyConstraint( ["alert_id"], ["alert.id"], diff --git a/keep/api/models/db/migrations/versions/2024-08-11-17-38_9453855f3ba0.py b/keep/api/models/db/migrations/versions/2024-08-11-17-38_9453855f3ba0.py index 929635927..09d9f0c1d 100644 --- a/keep/api/models/db/migrations/versions/2024-08-11-17-38_9453855f3ba0.py +++ b/keep/api/models/db/migrations/versions/2024-08-11-17-38_9453855f3ba0.py @@ -34,7 +34,7 @@ def upgrade() -> None: op.create_table( "presettaglink", sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column("preset_id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("preset_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tag_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.ForeignKeyConstraint( ["preset_id"], diff --git a/keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py b/keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py index 39bc26942..fd24b6bcd 100644 --- a/keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py +++ b/keep/api/models/db/migrations/versions/2024-09-19-15-26_493f217af6b6.py @@ -24,10 +24,10 @@ def upgrade() -> None: "alertdeduplicationevent", sa.Column("timestamp", sa.DateTime(), nullable=False), sa.Column("date_hour", sa.DateTime(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column( - "deduplication_rule_id", sqlmodel.sql.sqltypes.GUID(), nullable=False + "deduplication_rule_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False ), sa.Column( "deduplication_type", sqlmodel.sql.sqltypes.AutoString(), nullable=False @@ -74,7 +74,7 @@ def upgrade() -> None: "alertdeduplicationrule", sa.Column("fingerprint_fields", sa.JSON(), nullable=True), sa.Column("ignore_fields", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False), @@ -103,7 +103,7 @@ def upgrade() -> None: ) op.create_table( "alertfield", - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("field_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("provider_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), diff --git a/keep/api/models/db/migrations/versions/2024-09-22-14-16_01ebe17218c0.py b/keep/api/models/db/migrations/versions/2024-09-22-14-16_01ebe17218c0.py index 0f0875651..6236751c7 100644 --- a/keep/api/models/db/migrations/versions/2024-09-22-14-16_01ebe17218c0.py +++ b/keep/api/models/db/migrations/versions/2024-09-22-14-16_01ebe17218c0.py @@ -22,7 +22,7 @@ def upgrade() -> None: op.create_table( "topologyapplication", sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=True), sa.ForeignKeyConstraint( @@ -34,7 +34,7 @@ def upgrade() -> None: op.create_table( "topologyserviceapplication", sa.Column("service_id", sa.Integer(), nullable=False), - sa.Column("application_id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("application_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.ForeignKeyConstraint( ["application_id"], ["topologyapplication.id"], diff --git a/keep/api/models/db/migrations/versions/2024-10-31-18-01_273b29f368b7.py b/keep/api/models/db/migrations/versions/2024-10-31-18-01_273b29f368b7.py index fd5e5166a..ef12441a8 100644 --- a/keep/api/models/db/migrations/versions/2024-10-31-18-01_273b29f368b7.py +++ b/keep/api/models/db/migrations/versions/2024-10-31-18-01_273b29f368b7.py @@ -23,7 +23,7 @@ def upgrade() -> None: "aisuggestion", sa.Column("suggestion_input", sa.JSON(), nullable=True), sa.Column("suggestion_content", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("user_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column( @@ -68,8 +68,8 @@ def upgrade() -> None: op.create_table( "aifeedback", sa.Column("feedback_content", sa.JSON(), nullable=True), - sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), - sa.Column("suggestion_id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), + sa.Column("suggestion_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("user_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("rating", sa.Integer(), nullable=True), sa.Column("comment", sqlmodel.sql.sqltypes.AutoString(), nullable=True), diff --git a/keep/api/models/db/migrations/versions/2024-12-02-13-36_bdae8684d0b4.py b/keep/api/models/db/migrations/versions/2024-12-02-13-36_bdae8684d0b4.py index 905748deb..2d755ce7d 100644 --- a/keep/api/models/db/migrations/versions/2024-12-02-13-36_bdae8684d0b4.py +++ b/keep/api/models/db/migrations/versions/2024-12-02-13-36_bdae8684d0b4.py @@ -10,6 +10,7 @@ import sqlalchemy_utils import sqlmodel from alembic import op +from sqlalchemy import text from sqlalchemy.orm import Session # revision identifiers, used by Alembic. @@ -25,7 +26,8 @@ def populate_db(): session = Session(op.get_bind()) if session.bind.dialect.name == "postgresql": - migrate_lastalert_query = """ + migrate_lastalert_query = text( + """ insert into lastalert (tenant_id, fingerprint, alert_id, timestamp) select alert.tenant_id, alert.fingerprint, alert.id as alert_id, alert.timestamp from alert @@ -38,8 +40,10 @@ def populate_db(): on conflict do nothing """ + ) - migrate_lastalerttoincident_query = """ + migrate_lastalerttoincident_query = text( + """ insert into lastalerttoincident (incident_id, tenant_id, timestamp, fingerprint, is_created_by_ai, deleted_at) select ati.incident_id, ati.tenant_id, ati.timestamp, lf.fingerprint, ati.is_created_by_ai, ati.deleted_at from alerttoincident as ati @@ -57,9 +61,11 @@ def populate_db(): on conflict do nothing """ + ) else: - migrate_lastalert_query = """ + migrate_lastalert_query = text( + """ INSERT INTO lastalert (tenant_id, fingerprint, alert_id, timestamp) SELECT grouped_alerts.tenant_id, @@ -80,8 +86,10 @@ def populate_db(): ) as grouped_alerts GROUP BY grouped_alerts.tenant_id, grouped_alerts.fingerprint, grouped_alerts.timestamp; """ + ) - migrate_lastalerttoincident_query = """ + migrate_lastalerttoincident_query = text( + """ REPLACE INTO lastalerttoincident (incident_id, tenant_id, timestamp, fingerprint, is_created_by_ai, deleted_at) select ati.incident_id, ati.tenant_id, ati.timestamp, lf.fingerprint, ati.is_created_by_ai, ati.deleted_at from alerttoincident as ati @@ -96,7 +104,8 @@ def populate_db(): group by fingerprint, tenant_id ) as a on alert.fingerprint = a.fingerprint and alert.timestamp = a.last_received and alert.tenant_id = a.tenant_id ) as lf on ati.alert_id = lf.id; - """ + """ + ) session.execute(migrate_lastalert_query) session.execute(migrate_lastalerttoincident_query) @@ -107,7 +116,7 @@ def upgrade() -> None: "lastalert", sa.Column("tenant_id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), sa.Column("fingerprint", sqlmodel.sql.sqltypes.AutoString(), nullable=False), - sa.Column("alert_id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("alert_id", sqlmodel.sql.sqltypes.types.Uuid(), nullable=False), sa.Column("timestamp", sa.DateTime(), nullable=False), sa.ForeignKeyConstraint( ["alert_id"], @@ -147,7 +156,9 @@ def upgrade() -> None: ["tenant_id"], ["tenant.id"], ), - sa.PrimaryKeyConstraint("tenant_id", "incident_id", "fingerprint", "deleted_at"), + sa.PrimaryKeyConstraint( + "tenant_id", "incident_id", "fingerprint", "deleted_at" + ), ) populate_db() @@ -158,4 +169,4 @@ def downgrade() -> None: with op.batch_alter_table("lastalert", schema=None) as batch_op: batch_op.drop_index(batch_op.f("ix_lastalert_timestamp")) - op.drop_table("lastalert") \ No newline at end of file + op.drop_table("lastalert") diff --git a/keep/api/models/db/migrations/versions/2024-12-02-20-42_c6e5594c99f8.py b/keep/api/models/db/migrations/versions/2024-12-02-20-42_c6e5594c99f8.py index 1ea7ab342..70fdd9def 100644 --- a/keep/api/models/db/migrations/versions/2024-12-02-20-42_c6e5594c99f8.py +++ b/keep/api/models/db/migrations/versions/2024-12-02-20-42_c6e5594c99f8.py @@ -8,6 +8,7 @@ import sqlalchemy as sa from alembic import op +from sqlalchemy import text from sqlalchemy.dialects import mysql from sqlalchemy.orm import Session @@ -21,15 +22,19 @@ def populate_db(): session = Session(op.get_bind()) - session.execute(""" + session.execute( + text( + """ UPDATE lastalert SET first_timestamp = ( SELECT MIN(alert.timestamp) FROM alert - WHERE alert.fingerprint = lastalert.fingerprint + WHERE alert.fingerprint = lastalert.fingerprint AND alert.tenant_id = lastalert.tenant_id ) - """) + """ + ) + ) def upgrade() -> None: diff --git a/keep/api/models/db/migrations/versions/2024-12-08-16-24_55cc64020f6d.py b/keep/api/models/db/migrations/versions/2024-12-08-16-24_55cc64020f6d.py new file mode 100644 index 000000000..24a2c7c03 --- /dev/null +++ b/keep/api/models/db/migrations/versions/2024-12-08-16-24_55cc64020f6d.py @@ -0,0 +1,37 @@ +"""Add Alert Hash to LastAlert + +Revision ID: 55cc64020f6d +Revises: c6e5594c99f8 +Create Date: 2024-12-08 16:24:01.808208 + +""" + +import sqlalchemy as sa +import sqlmodel +from alembic import op + +# revision identifiers, used by Alembic. +revision = "55cc64020f6d" +down_revision = "c6e5594c99f8" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("lastalert", schema=None) as batch_op: + batch_op.add_column( + sa.Column("alert_hash", sqlmodel.sql.sqltypes.AutoString(), nullable=True) + ) + batch_op.create_index( + batch_op.f("ix_lastalert_alert_hash"), ["alert_hash"], unique=False + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("lastalert", schema=None) as batch_op: + batch_op.drop_index(batch_op.f("ix_lastalert_alert_hash")) + batch_op.drop_column("alert_hash") + # ### end Alembic commands ### diff --git a/keep/api/routes/alerts.py b/keep/api/routes/alerts.py index 6d656c02c..8f3870328 100644 --- a/keep/api/routes/alerts.py +++ b/keep/api/routes/alerts.py @@ -1,3 +1,4 @@ +import asyncio import base64 import hashlib import hmac @@ -9,15 +10,8 @@ import celpy from arq import ArqRedis -from fastapi import ( - APIRouter, - BackgroundTasks, - Depends, - HTTPException, - Query, - Request, - Response, -) +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request +from fastapi.concurrency import run_in_threadpool from fastapi.responses import JSONResponse from pusher import Pusher @@ -34,6 +28,7 @@ ) from keep.api.core.dependencies import extract_generic_body, get_pusher_client from keep.api.core.elastic import ElasticClient +from keep.api.core.metrics import running_tasks_by_process_gauge, running_tasks_gauge from keep.api.models.alert import ( AlertDto, DeleteRequestBody, @@ -261,6 +256,88 @@ def assign_alert( return {"status": "ok"} +def discard_task( + trace_id: str, + task: asyncio.Task, + running_tasks: set, + started_time: float, +): + try: + running_tasks.discard(task) + running_tasks_gauge.dec() # Decrease total counter + running_tasks_by_process_gauge.labels( + pid=os.getpid() + ).dec() # Decrease process counter + + # Log any exception that occurred in the task + if task.exception(): + logger.error( + "Task failed with exception", + extra={ + "trace_id": trace_id, + "error": str(task.exception()), + "processing_time": time.time() - started_time, + }, + ) + else: + logger.info( + "Task completed", + extra={ + "processing_time": time.time() - started_time, + "trace_id": trace_id, + }, + ) + except Exception: + # Make sure we always decrement both counters even if something goes wrong + running_tasks_gauge.dec() + running_tasks_by_process_gauge.labels(pid=os.getpid()).dec() + logger.exception( + "Error in discard_task callback", + extra={ + "trace_id": trace_id, + }, + ) + + +def create_process_event_task( + bg_tasks: BackgroundTasks, + tenant_id: str, + provider_type: str | None, + provider_id: str | None, + fingerprint: str, + api_key_name: str | None, + trace_id: str, + event: AlertDto | list[AlertDto] | dict, + running_tasks: set, +) -> str: + logger.info("Adding task", extra={"trace_id": trace_id}) + started_time = time.time() + running_tasks_gauge.inc() # Increase total counter + running_tasks_by_process_gauge.labels( + pid=os.getpid() + ).inc() # Increase process counter + task = asyncio.create_task( + run_in_threadpool( + process_event, + {}, + tenant_id, + provider_type, + provider_id, + fingerprint, + api_key_name, + trace_id, + event, + ) + ) + task.add_done_callback( + lambda task: discard_task(trace_id, task, running_tasks, started_time) + ) + bg_tasks.add_task(task) + running_tasks.add(task) + logger.info("Task added", extra={"trace_id": trace_id}) + return task.get_name() + + @router.post( "/event", description="Receive a generic alert event", @@ -285,6 +362,7 @@ async def receive_generic_event( bg_tasks (BackgroundTasks): Background tasks handler. tenant_id (str, optional): Defaults to Depends(verify_api_key). """ + running_tasks: set = request.state.background_tasks if REDIS: redis: ArqRedis = await get_pool() job = await redis.enqueue_job( @@ -306,10 +384,10 @@ async def receive_generic_event( "queue": KEEP_ARQ_QUEUE_BASIC, }, ) + task_name = job.job_id else: - bg_tasks.add_task( - process_event, - {}, + task_name = create_process_event_task( + bg_tasks, authenticated_entity.tenant_id, None, None, @@ -317,8 +395,9 @@ async def receive_generic_event( authenticated_entity.api_key_name, request.state.trace_id, event, + running_tasks, ) - return Response(status_code=202) + return JSONResponse(content={"task_name": task_name}, status_code=202) # https://learn.netdata.cloud/docs/alerts-&-notifications/notifications/centralized-cloud-notifications/webhook#challenge-secret @@ -366,7 +445,7 @@ async def receive_event( pusher_client: Pusher = Depends(get_pusher_client), ) -> dict[str, str]: trace_id = request.state.trace_id - + running_tasks: set = request.state.background_tasks provider_class = None try: t = time.time() @@ -414,12 +493,10 @@ async def receive_event( "queue": KEEP_ARQ_QUEUE_BASIC, }, ) + task_name = job.job_id else: - t = time.time() - logger.debug("Adding task to process event") - bg_tasks.add_task( - process_event, - {}, + task_name = create_process_event_task( + bg_tasks, authenticated_entity.tenant_id, provider_type, provider_id, @@ -427,9 +504,9 @@ async def receive_event( authenticated_entity.api_key_name, trace_id, event, + running_tasks, ) - logger.debug("Added task to process event", extra={"time": time.time() - t}) - return Response(status_code=202) + return JSONResponse(content={"task_name": task_name}, status_code=202) @router.get( diff --git a/keep/api/routes/metrics.py b/keep/api/routes/metrics.py index 750774ff9..4f7f121c3 100644 --- a/keep/api/routes/metrics.py +++ b/keep/api/routes/metrics.py @@ -1,11 +1,16 @@ -import chevron - -from fastapi import Query from typing import List -from fastapi import APIRouter, Depends, Response +import chevron +from fastapi import APIRouter, Depends, Query, Request, Response +from prometheus_client import CONTENT_TYPE_LATEST, generate_latest + +from keep.api.core.db import ( + get_last_alerts_for_incidents, + get_last_incidents, + get_workflow_executions_count, +) +from keep.api.core.metrics import registry from keep.api.models.alert import AlertDto -from keep.api.core.db import get_last_incidents, get_last_alerts_for_incidents, get_workflow_executions_count from keep.identitymanager.authenticatedentity import AuthenticatedEntity from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory @@ -14,6 +19,13 @@ CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" +@router.get("/processing", include_in_schema=False) +async def get_processing_metrics(request: Request): + # Generate all metrics from the single registry + metrics = generate_latest(registry) + return Response(content=metrics, media_type=CONTENT_TYPE_LATEST) + + @router.get("") def get_metrics( labels: List[str] = Query(None), @@ -40,7 +52,7 @@ def get_metrics( type: Bearer credentials: "{Your API Key}" - # Optional, you can add labels to exported incidents. + # Optional, you can add labels to exported incidents. # Label values will be equal to the last incident's alert payload value matching the label. # Attention! Don't add "flaky" labels which could change from alert to alert within the same incident. # Good labels: ['labels.department', 'labels.team'], bad labels: ['labels.severity', 'labels.pod_id'] @@ -52,7 +64,7 @@ def get_metrics( ``` """ # We don't use im-memory metrics countrs here which is typical for prometheus exporters, - # they would make us expose our app's pod id's. This is a customer-facing endpoing + # they would make us expose our app's pod id's. This is a customer-facing endpoint # we're deploying to SaaS, and we want to hide our internal infra. tenant_id = authenticated_entity.tenant_id @@ -68,17 +80,23 @@ def get_metrics( is_confirmed=True, ) - last_alerts_for_incidents = get_last_alerts_for_incidents([incident.id for incident in incidents]) - + last_alerts_for_incidents = get_last_alerts_for_incidents( + [incident.id for incident in incidents] + ) + for incident in incidents: - incident_name = incident.user_generated_name if incident.user_generated_name else incident.ai_generated_name + incident_name = ( + incident.user_generated_name + if incident.user_generated_name + else incident.ai_generated_name + ) extra_labels = "" try: last_alert = last_alerts_for_incidents[str(incident.id)][0] last_alert_dto = AlertDto(**last_alert.event) except IndexError: last_alert_dto = None - + if labels is not None: for label in labels: label_value = chevron.render("{{ " + label + " }}", last_alert_dto) @@ -86,7 +104,7 @@ def get_metrics( extra_labels += f' {label}="{label_value}"' export += f'alerts_total{{incident_name="{incident_name}" incident_id="{incident.id}"{extra_labels}}} {incident.alerts_count}\n' - + # Exporting stats about open incidents export += "\n\n" export += "# HELP open_incidents_total The total number of open incidents.\r\n" diff --git a/keep/api/routes/providers.py b/keep/api/routes/providers.py index 41b2bb480..7547e67c0 100644 --- a/keep/api/routes/providers.py +++ b/keep/api/routes/providers.py @@ -35,7 +35,7 @@ READ_ONLY = config("KEEP_READ_ONLY", default="false") == "true" PROVIDER_DISTRIBUTION_ENABLED = config( - "PROVIDER_DISTRIBUTION_ENABLED", cast=bool, default=True + "KEEP_PROVIDER_DISTRIBUTION_ENABLED", cast=bool, default=True ) diff --git a/keep/api/tasks/process_event_task.py b/keep/api/tasks/process_event_task.py index ec2cf276a..66c8fa035 100644 --- a/keep/api/tasks/process_event_task.py +++ b/keep/api/tasks/process_event_task.py @@ -4,13 +4,14 @@ import json import logging import os +import time from typing import List -import dateutil - # third-parties +import dateutil from arq import Retry from fastapi.datastructures import FormData +from opentelemetry import trace from sqlmodel import Session # internals @@ -27,6 +28,12 @@ ) from keep.api.core.dependencies import get_pusher_client from keep.api.core.elastic import ElasticClient +from keep.api.core.metrics import ( + events_error_counter, + events_in_counter, + events_out_counter, + processing_time_summary, +) from keep.api.models.alert import AlertDto, AlertStatus, IncidentDto from keep.api.models.db.alert import Alert, AlertActionType, AlertAudit, AlertRaw from keep.api.tasks.notification_cache import get_notification_cache @@ -39,10 +46,20 @@ from keep.workflowmanager.workflowmanager import WorkflowManager TIMES_TO_RETRY_JOB = 5 # the number of times to retry the job in case of failure +# Opt-outs/ins KEEP_STORE_RAW_ALERTS = os.environ.get("KEEP_STORE_RAW_ALERTS", "false") == "true" KEEP_CORRELATION_ENABLED = os.environ.get("KEEP_CORRELATION_ENABLED", "true") == "true" KEEP_ALERT_FIELDS_ENABLED = ( - os.environ.get("KEEP_ALERT_FIELDS_ENABLED", "false") == "true" + os.environ.get("KEEP_ALERT_FIELDS_ENABLED", "true") == "true" +) +KEEP_MAINTENANCE_WINDOWS_ENABLED = ( + os.environ.get("KEEP_MAINTENANCE_WINDOWS_ENABLED", "true") == "true" +) +KEEP_AUDIT_EVENTS_ENABLED = ( + os.environ.get("KEEP_AUDIT_EVENTS_ENABLED", "true") == "true" +) +KEEP_CALCULATE_START_FIRING_TIME_ENABLED = ( + os.environ.get("KEEP_CALCULATE_START_FIRING_TIME_ENABLED", "true") == "true" ) logger = logging.getLogger(__name__) @@ -94,28 +111,37 @@ def __save_to_db( provider_type=provider_type, ) session.add(alert) + # add audit to the deduplicated events - for event in deduplicated_events: - audit = AlertAudit( - tenant_id=tenant_id, - fingerprint=event.fingerprint, - status=event.status, - action=AlertActionType.DEDUPLICATED.value, - user_id="system", - description="Alert was deduplicated", - ) - session.add(audit) + # TODO: move this to the alert deduplicator + if KEEP_AUDIT_EVENTS_ENABLED: + for event in deduplicated_events: + audit = AlertAudit( + tenant_id=tenant_id, + fingerprint=event.fingerprint, + status=event.status, + action=AlertActionType.DEDUPLICATED.value, + user_id="system", + description="Alert was deduplicated", + ) + session.add(audit) + enriched_formatted_events = [] + for formatted_event in formatted_events: formatted_event.pushed = True - # calculate startFiring time - previous_alert = get_alerts_by_fingerprint( - tenant_id=tenant_id, fingerprint=formatted_event.fingerprint, limit=1 - ) - previous_alert = convert_db_alerts_to_dto_alerts(previous_alert) - formatted_event.firingStartTime = calculated_start_firing_time( - formatted_event, previous_alert - ) + + if KEEP_CALCULATE_START_FIRING_TIME_ENABLED: + # calculate startFiring time + previous_alert = get_alerts_by_fingerprint( + tenant_id=tenant_id, + fingerprint=formatted_event.fingerprint, + limit=1, + ) + previous_alert = convert_db_alerts_to_dto_alerts(previous_alert) + formatted_event.firingStartTime = calculated_start_firing_time( + formatted_event, previous_alert + ) enrichments_bl = EnrichmentsBl(tenant_id, session) # Dispose enrichments that needs to be disposed @@ -176,20 +202,22 @@ def __save_to_db( session.flush() alert_id = alert.id formatted_event.event_id = str(alert_id) - audit = AlertAudit( - tenant_id=tenant_id, - fingerprint=formatted_event.fingerprint, - action=( - AlertActionType.AUTOMATIC_RESOLVE.value - if formatted_event.status == AlertStatus.RESOLVED.value - else AlertActionType.TIGGERED.value - ), - user_id="system", - description=f"Alert recieved from provider with status {formatted_event.status}", - ) - session.add(audit) - alert_dto = AlertDto(**formatted_event.dict()) + if KEEP_AUDIT_EVENTS_ENABLED: + audit = AlertAudit( + tenant_id=tenant_id, + fingerprint=formatted_event.fingerprint, + action=( + AlertActionType.AUTOMATIC_RESOLVE.value + if formatted_event.status == AlertStatus.RESOLVED.value + else AlertActionType.TIGGERED.value + ), + user_id="system", + description=f"Alert recieved from provider with status {formatted_event.status}", + ) + session.add(audit) + + alert_dto = AlertDto(**formatted_event.dict()) set_last_alert(tenant_id, alert, session=session) # Mapping @@ -210,8 +238,9 @@ def __save_to_db( setattr(alert_dto, enrichment, value) enriched_formatted_events.append(alert_dto) session.commit() + logger.info( - "Asyncronusly added new alerts to the DB", + "Added new alerts to the DB", extra={ "provider_type": provider_type, "num_of_alerts": len(formatted_events), @@ -222,7 +251,7 @@ def __save_to_db( return enriched_formatted_events except Exception: logger.exception( - "Failed to push alerts to the DB", + "Failed to add new alerts to the DB", extra={ "provider_type": provider_type, "num_of_alerts": len(formatted_events), @@ -239,9 +268,11 @@ def __handle_formatted_events( session: Session, raw_events: list[dict], formatted_events: list[AlertDto], + tracer: trace.Tracer, provider_id: str | None = None, notify_client: bool = True, timestamp_forced: datetime.datetime | None = None, + job_id: str | None = None, ): """ this is super important function and does five things: @@ -256,117 +287,149 @@ def __handle_formatted_events( """ logger.info( - "Asyncronusly adding new alerts to the DB", + "Adding new alerts to the DB", extra={ "provider_type": provider_type, "num_of_alerts": len(formatted_events), "provider_id": provider_id, "tenant_id": tenant_id, + "job_id": job_id, }, ) # first, check for maintenance windows - maintenance_windows_bl = MaintenanceWindowsBl(tenant_id=tenant_id, session=session) - if maintenance_windows_bl.maintenance_rules: - formatted_events = [ - event - for event in formatted_events - if maintenance_windows_bl.check_if_alert_in_maintenance_windows(event) - is False - ] - else: - logger.debug( - "No maintenance windows configured for this tenant", - extra={"tenant_id": tenant_id}, - ) + if KEEP_MAINTENANCE_WINDOWS_ENABLED: + with tracer.start_as_current_span("process_event_maintenance_windows_check"): + maintenance_windows_bl = MaintenanceWindowsBl( + tenant_id=tenant_id, session=session + ) + if maintenance_windows_bl.maintenance_rules: + formatted_events = [ + event + for event in formatted_events + if maintenance_windows_bl.check_if_alert_in_maintenance_windows( + event + ) + is False + ] + else: + logger.debug( + "No maintenance windows configured for this tenant", + extra={"tenant_id": tenant_id}, + ) - if not formatted_events: - logger.info( - "No alerts to process after running maintenance windows check", - extra={"tenant_id": tenant_id}, - ) - return + if not formatted_events: + logger.info( + "No alerts to process after running maintenance windows check", + extra={"tenant_id": tenant_id}, + ) + return - # second, filter out any deduplicated events - alert_deduplicator = AlertDeduplicator(tenant_id) + with tracer.start_as_current_span("process_event_deduplication"): + # second, filter out any deduplicated events + alert_deduplicator = AlertDeduplicator(tenant_id) - for event in formatted_events: - # apply deduplication - # apply_deduplication set alert_hash and isDuplicate on event - event = alert_deduplicator.apply_deduplication(event) + for event in formatted_events: + # apply_deduplication set alert_hash and isDuplicate on event + event = alert_deduplicator.apply_deduplication(event) - # filter out the deduplicated events - deduplicated_events = list( - filter(lambda event: event.isFullDuplicate, formatted_events) - ) - formatted_events = list( - filter(lambda event: not event.isFullDuplicate, formatted_events) - ) + # filter out the deduplicated events + deduplicated_events = list( + filter(lambda event: event.isFullDuplicate, formatted_events) + ) + formatted_events = list( + filter(lambda event: not event.isFullDuplicate, formatted_events) + ) - # save to db - enriched_formatted_events = __save_to_db( - tenant_id, - provider_type, - session, - raw_events, - formatted_events, - deduplicated_events, - provider_id, - timestamp_forced, - ) + with tracer.start_as_current_span("process_event_save_to_db"): + # save to db + enriched_formatted_events = __save_to_db( + tenant_id, + provider_type, + session, + raw_events, + formatted_events, + deduplicated_events, + provider_id, + timestamp_forced, + ) # let's save all fields to the DB so that we can use them in the future such in deduplication fields suggestions # todo: also use it on correlation rules suggestions if KEEP_ALERT_FIELDS_ENABLED: - for enriched_formatted_event in enriched_formatted_events: - logger.debug( - "Bulk upserting alert fields", - extra={ - "alert_event_id": enriched_formatted_event.event_id, - "alert_fingerprint": enriched_formatted_event.fingerprint, - }, - ) - fields = [] - for key, value in enriched_formatted_event.dict().items(): - if isinstance(value, dict): - for nested_key in value.keys(): - fields.append(f"{key}.{nested_key}") - else: - fields.append(key) - - bulk_upsert_alert_fields( - tenant_id=tenant_id, - fields=fields, - provider_id=enriched_formatted_event.providerId, - provider_type=enriched_formatted_event.providerType, - session=session, - ) + with tracer.start_as_current_span("process_event_bulk_upsert_alert_fields"): + for enriched_formatted_event in enriched_formatted_events: + logger.debug( + "Bulk upserting alert fields", + extra={ + "alert_event_id": enriched_formatted_event.event_id, + "alert_fingerprint": enriched_formatted_event.fingerprint, + }, + ) + fields = [] + for key, value in enriched_formatted_event.dict().items(): + if isinstance(value, dict): + for nested_key in value.keys(): + fields.append(f"{key}.{nested_key}") + else: + fields.append(key) + + bulk_upsert_alert_fields( + tenant_id=tenant_id, + fields=fields, + provider_id=enriched_formatted_event.providerId, + provider_type=enriched_formatted_event.providerType, + session=session, + ) - logger.debug( - "Bulk upserted alert fields", - extra={ - "alert_event_id": enriched_formatted_event.event_id, - "alert_fingerprint": enriched_formatted_event.fingerprint, - }, - ) + logger.debug( + "Bulk upserted alert fields", + extra={ + "alert_event_id": enriched_formatted_event.event_id, + "alert_fingerprint": enriched_formatted_event.fingerprint, + }, + ) # after the alert enriched and mapped, lets send it to the elasticsearch - elastic_client = ElasticClient(tenant_id=tenant_id) - for alert in enriched_formatted_events: + with tracer.start_as_current_span("process_event_push_to_elasticsearch"): + elastic_client = ElasticClient(tenant_id=tenant_id) + if elastic_client.enabled: + for alert in enriched_formatted_events: + try: + logger.debug( + "Pushing alert to elasticsearch", + extra={ + "alert_event_id": alert.event_id, + "alert_fingerprint": alert.fingerprint, + }, + ) + elastic_client.index_alert( + alert=alert, + ) + except Exception: + logger.exception( + "Failed to push alerts to elasticsearch", + extra={ + "provider_type": provider_type, + "num_of_alerts": len(formatted_events), + "provider_id": provider_id, + "tenant_id": tenant_id, + }, + ) + continue + + with tracer.start_as_current_span("process_event_push_to_workflows"): try: - logger.debug( - "Pushing alert to elasticsearch", - extra={ - "alert_event_id": alert.event_id, - "alert_fingerprint": alert.fingerprint, - }, - ) - elastic_client.index_alert( - alert=alert, - ) + # Now run any workflow that should run based on this alert + # TODO: this should publish event + workflow_manager = WorkflowManager.get_instance() + # insert the events to the workflow manager process queue + logger.info("Adding events to the workflow manager queue") + workflow_manager.insert_events(tenant_id, enriched_formatted_events) + logger.info("Added events to the workflow manager queue") except Exception: logger.exception( - "Failed to push alerts to elasticsearch", + "Failed to run workflows based on alerts", extra={ "provider_type": provider_type, "num_of_alerts": len(formatted_events), @@ -374,45 +437,116 @@ def __handle_formatted_events( "tenant_id": tenant_id, }, ) - continue - - try: - # Now run any workflow that should run based on this alert - # TODO: this should publish event - workflow_manager = WorkflowManager.get_instance() - # insert the events to the workflow manager process queue - logger.info("Adding events to the workflow manager queue") - workflow_manager.insert_events(tenant_id, enriched_formatted_events) - logger.info("Added events to the workflow manager queue") - except Exception: - logger.exception( - "Failed to run workflows based on alerts", - extra={ - "provider_type": provider_type, - "num_of_alerts": len(formatted_events), - "provider_id": provider_id, - "tenant_id": tenant_id, - }, - ) incidents = [] - # Now we need to run the rules engine - if KEEP_CORRELATION_ENABLED: + with tracer.start_as_current_span("process_event_run_rules_engine"): + # Now we need to run the rules engine + if KEEP_CORRELATION_ENABLED: + try: + rules_engine = RulesEngine(tenant_id=tenant_id) + incidents: List[IncidentDto] = rules_engine.run_rules( + enriched_formatted_events, session=session + ) + + # TODO: Replace with incidents workflow triggers. Ticket: https://github.com/keephq/keep/issues/1527 + # if new grouped incidents were created, we need to push them to the client + # if incidents: + # logger.info("Adding group alerts to the workflow manager queue") + # workflow_manager.insert_events(tenant_id, grouped_alerts) + # logger.info("Added group alerts to the workflow manager queue") + except Exception: + logger.exception( + "Failed to run rules engine", + extra={ + "provider_type": provider_type, + "num_of_alerts": len(formatted_events), + "provider_id": provider_id, + "tenant_id": tenant_id, + }, + ) + + with tracer.start_as_current_span("process_event_notify_client"): + pusher_client = get_pusher_client() if notify_client else None + # Get the notification cache + pusher_cache = get_notification_cache() + + # Tell the client to poll alerts + if pusher_client and pusher_cache.should_notify(tenant_id, "poll-alerts"): + try: + pusher_client.trigger( + f"private-{tenant_id}", + "poll-alerts", + "{}", + ) + logger.info("Told client to poll alerts") + except Exception: + logger.exception("Failed to tell client to poll alerts") + pass + + if ( + incidents + and pusher_client + and pusher_cache.should_notify(tenant_id, "incident-change") + ): + try: + pusher_client.trigger( + f"private-{tenant_id}", + "incident-change", + {}, + ) + except Exception: + logger.exception("Failed to tell the client to pull incidents") + + # Now we need to update the presets + # send with pusher + if not pusher_client: + return + try: + presets = get_all_presets_dtos(tenant_id) rules_engine = RulesEngine(tenant_id=tenant_id) - incidents: List[IncidentDto] = rules_engine.run_rules( - enriched_formatted_events, session=session - ) - - # TODO: Replace with incidents workflow triggers. Ticket: https://github.com/keephq/keep/issues/1527 - # if new grouped incidents were created, we need to push them to the client - # if incidents: - # logger.info("Adding group alerts to the workflow manager queue") - # workflow_manager.insert_events(tenant_id, grouped_alerts) - # logger.info("Added group alerts to the workflow manager queue") + presets_do_update = [] + for preset_dto in presets: + # filter the alerts based on the search query + filtered_alerts = rules_engine.filter_alerts( + enriched_formatted_events, preset_dto.cel_query + ) + # if not related alerts, no need to update + if not filtered_alerts: + continue + presets_do_update.append(preset_dto) + preset_dto.alerts_count = len(filtered_alerts) + # update noisy + if preset_dto.is_noisy: + firing_filtered_alerts = list( + filter( + lambda alert: alert.status == AlertStatus.FIRING.value, + filtered_alerts, + ) + ) + # if there are firing alerts, then do noise + if firing_filtered_alerts: + logger.info("Noisy preset is noisy") + preset_dto.should_do_noise_now = True + # else if at least one of the alerts has isNoisy and should fire: + elif any( + alert.isNoisy and alert.status == AlertStatus.FIRING.value + for alert in filtered_alerts + if hasattr(alert, "isNoisy") + ): + logger.info("Noisy preset is noisy") + preset_dto.should_do_noise_now = True + try: + pusher_client.trigger( + f"private-{tenant_id}", + "async-presets", + json.dumps([p.dict() for p in presets_do_update], default=str), + ) + except Exception: + logger.exception("Failed to send presets via pusher") except Exception: logger.exception( - "Failed to run rules engine", + "Failed to send presets via pusher", extra={ "provider_type": provider_type, "num_of_alerts": len(formatted_events), @@ -420,98 +554,10 @@ def __handle_formatted_events( "tenant_id": tenant_id, }, ) - - pusher_client = get_pusher_client() if notify_client else None - # Get the notification cache - pusher_cache = get_notification_cache() - - # Tell the client to poll alerts - if pusher_client and pusher_cache.should_notify(tenant_id, "poll-alerts"): - try: - pusher_client.trigger( - f"private-{tenant_id}", - "poll-alerts", - "{}", - ) - logger.info("Told client to poll alerts") - except Exception: - logger.exception("Failed to tell client to poll alerts") - pass - - if ( - incidents - and pusher_client - and pusher_cache.should_notify(tenant_id, "incident-change") - ): - try: - pusher_client.trigger( - f"private-{tenant_id}", - "incident-change", - {}, - ) - except Exception: - logger.exception("Failed to tell the client to pull incidents") - - # Now we need to update the presets - # send with pusher - if not pusher_client: - return - - try: - presets = get_all_presets_dtos(tenant_id) - rules_engine = RulesEngine(tenant_id=tenant_id) - presets_do_update = [] - for preset_dto in presets: - # filter the alerts based on the search query - filtered_alerts = rules_engine.filter_alerts( - enriched_formatted_events, preset_dto.cel_query - ) - # if not related alerts, no need to update - if not filtered_alerts: - continue - presets_do_update.append(preset_dto) - preset_dto.alerts_count = len(filtered_alerts) - # update noisy - if preset_dto.is_noisy: - firing_filtered_alerts = list( - filter( - lambda alert: alert.status == AlertStatus.FIRING.value, - filtered_alerts, - ) - ) - # if there are firing alerts, then do noise - if firing_filtered_alerts: - logger.info("Noisy preset is noisy") - preset_dto.should_do_noise_now = True - # else if at least one of the alerts has isNoisy and should fire: - elif any( - alert.isNoisy and alert.status == AlertStatus.FIRING.value - for alert in filtered_alerts - if hasattr(alert, "isNoisy") - ): - logger.info("Noisy preset is noisy") - preset_dto.should_do_noise_now = True - try: - pusher_client.trigger( - f"private-{tenant_id}", - "async-presets", - json.dumps([p.dict() for p in presets_do_update], default=str), - ) - except Exception: - logger.exception("Failed to send presets via pusher") - except Exception: - logger.exception( - "Failed to send presets via pusher", - extra={ - "provider_type": provider_type, - "num_of_alerts": len(formatted_events), - "provider_id": provider_id, - "tenant_id": tenant_id, - }, - ) return enriched_formatted_events +@processing_time_summary.time() def process_event( ctx: dict, # arq context tenant_id: str, @@ -526,6 +572,9 @@ def process_event( notify_client: bool = True, timestamp_forced: datetime.datetime | None = None, ) -> list[Alert]: + start_time = time.time() + job_id = ctx.get("job_id") + extra_dict = { "tenant_id": tenant_id, "provider_type": provider_type, @@ -533,95 +582,114 @@ def process_event( "fingerprint": fingerprint, "event_type": str(type(event)), "trace_id": trace_id, - "job_id": ctx.get("job_id"), + "job_id": job_id, "raw_event": ( event if KEEP_STORE_RAW_ALERTS else None ), # Let's log the events if we store it for debugging } logger.info("Processing event", extra=extra_dict) + tracer = trace.get_tracer(__name__) + raw_event = copy.deepcopy(event) + events_in_counter.inc() try: - session = get_session_sync() - # Pre alert formatting extraction rules - enrichments_bl = EnrichmentsBl(tenant_id, session) - try: - event = enrichments_bl.run_extraction_rules(event, pre=True) - except Exception: - logger.exception("Failed to run pre-formatting extraction rules") + with tracer.start_as_current_span("process_event_get_db_session"): + # Create a session to be used across the processing task + session = get_session_sync() - if ( - provider_type is not None - and isinstance(event, dict) - or isinstance(event, FormData) - or isinstance(event, list) - ): + # Pre alert formatting extraction rules + with tracer.start_as_current_span("process_event_pre_alert_formatting"): + enrichments_bl = EnrichmentsBl(tenant_id, session) try: - provider_class = ProvidersFactory.get_provider_class(provider_type) + event = enrichments_bl.run_extraction_rules(event, pre=True) except Exception: - provider_class = ProvidersFactory.get_provider_class("keep") + logger.exception("Failed to run pre-formatting extraction rules") + + with tracer.start_as_current_span("process_event_provider_formatting"): + if ( + provider_type is not None + and isinstance(event, dict) + or isinstance(event, FormData) + or isinstance(event, list) + ): + try: + provider_class = ProvidersFactory.get_provider_class(provider_type) + except Exception: + provider_class = ProvidersFactory.get_provider_class("keep") - event = provider_class.format_alert( - tenant_id=tenant_id, - event=event, - provider_id=provider_id, - provider_type=provider_type, - ) - # SHAHAR: for aws cloudwatch, we get a subscription notification message that we should skip - # todo: move it to be generic - if event is None and provider_type == "cloudwatch": - logger.info( - "This is a subscription notification message from AWS - skipping processing" - ) - return - elif event is None: - logger.info( - "Provider returned None (failed silently), skipping processing" + event = provider_class.format_alert( + tenant_id=tenant_id, + event=event, + provider_id=provider_id, + provider_type=provider_type, ) - return + # SHAHAR: for aws cloudwatch, we get a subscription notification message that we should skip + # todo: move it to be generic + if event is None and provider_type == "cloudwatch": + logger.info( + "This is a subscription notification message from AWS - skipping processing" + ) + return + elif event is None: + logger.info( + "Provider returned None (failed silently), skipping processing" + ) - if isinstance(event, str): - extra_dict["raw_event"] = event - logger.error( - "Event is a string (malformed json?), skipping processing", - extra=extra_dict, + if event: + if isinstance(event, str): + extra_dict["raw_event"] = event + logger.error( + "Event is a string (malformed json?), skipping processing", + extra=extra_dict, + ) + return None + + # In case when provider_type is not set + if isinstance(event, dict): + event = [AlertDto(**event)] + raw_event = [raw_event] + + # Prepare the event for the digest + if isinstance(event, AlertDto): + event = [event] + raw_event = [raw_event] + + with tracer.start_as_current_span("process_event_internal_preparation"): + __internal_prepartion(event, fingerprint, api_key_name) + + formatted_events = __handle_formatted_events( + tenant_id, + provider_type, + session, + raw_event, + event, + tracer, + provider_id, + notify_client, + timestamp_forced, + job_id, ) - return None - - # In case when provider_type is not set - if isinstance(event, dict): - event = [AlertDto(**event)] - raw_event = [raw_event] - # Prepare the event for the digest - if isinstance(event, AlertDto): - event = [event] - raw_event = [raw_event] - - __internal_prepartion(event, fingerprint, api_key_name) - return __handle_formatted_events( - tenant_id, - provider_type, - session, - raw_event, - event, - provider_id, - notify_client, - timestamp_forced, - ) + logger.info( + "Event processed", + extra={**extra_dict, "processing_time": time.time() - start_time}, + ) + events_out_counter.inc() + return formatted_events except Exception: logger.exception( "Error processing event", - extra=extra_dict, + extra={**extra_dict, "processing_time": time.time() - start_time}, ) # In case of exception, add the alerts to the defect table __save_error_alerts(tenant_id, provider_type, raw_event) + events_error_counter.inc() # Retrying only if context is present (running the job in arq worker) if bool(ctx): raise Retry(defer=ctx["job_try"] * TIMES_TO_RETRY_JOB) finally: session.close() - logger.info("Event processed", extra=extra_dict) def __save_error_alerts( diff --git a/keep/identitymanager/authverifierbase.py b/keep/identitymanager/authverifierbase.py index da96bdc14..3978a1b33 100644 --- a/keep/identitymanager/authverifierbase.py +++ b/keep/identitymanager/authverifierbase.py @@ -198,7 +198,6 @@ def authorize(self, authenticated_entity: AuthenticatedEntity) -> None: """ self.logger.debug(f"Authorizing entity: {authenticated_entity}") self._authorize(authenticated_entity) - self.logger.debug("Authorization successful") def _authorize(self, authenticated_entity: AuthenticatedEntity) -> None: """ @@ -218,7 +217,6 @@ def _authorize(self, authenticated_entity: AuthenticatedEntity) -> None: status_code=403, detail=f"You don't have the required scopes to access this resource [required scopes: {self.scopes}]", ) - self.logger.debug("Authorization successful") def _extract_api_key( self, diff --git a/keep/providers/opsgenie_provider/opsgenie_provider.py b/keep/providers/opsgenie_provider/opsgenie_provider.py index 4403dc119..3beb8d72e 100644 --- a/keep/providers/opsgenie_provider/opsgenie_provider.py +++ b/keep/providers/opsgenie_provider/opsgenie_provider.py @@ -72,11 +72,12 @@ def validate_scopes(self): scopes = {} self.logger.info("Validating scopes") try: - self._create_alert( + alert = self._create_alert( user="John Doe", note="Simple alert", message="Simple alert showing context with name: John Doe", ) + self._delete_alert(alert["id"]) scopes["opsgenie:create"] = True except ApiException as e: self.logger.exception("Failed to create OpsGenie alert") @@ -91,6 +92,10 @@ def validate_config(self): **self.config.authentication ) + def _delete_alert(self, alert_id: str): + api_instance = opsgenie_sdk.AlertApi(opsgenie_sdk.ApiClient(self.configuration)) + return api_instance.delete_alert(alert_id) + # https://github.com/opsgenie/opsgenie-python-sdk/blob/master/docs/CreateAlertPayload.md def _create_alert( self, @@ -129,7 +134,10 @@ def _create_alert( priority=priority, ) try: - api_instance.create_alert(create_alert_payload) + alert = api_instance.create_alert(create_alert_payload) + alert_dict = alert.to_dict() + alert_dict["id"] = alert.id + return alert_dict except ApiException: self.logger.exception("Failed to create OpsGenie alert") raise @@ -206,7 +214,7 @@ def _notify( Args: kwargs (dict): The providers with context """ - self._create_alert( + return self._create_alert( user, note, source, diff --git a/keep/providers/providers_factory.py b/keep/providers/providers_factory.py index 1f610b8bc..9e80b4df3 100644 --- a/keep/providers/providers_factory.py +++ b/keep/providers/providers_factory.py @@ -44,6 +44,7 @@ class ProviderConfigurationException(Exception): class ProvidersFactory: _loaded_providers_cache = None + _loaded_deduplication_rules_cache = None @staticmethod def get_provider_class( @@ -598,6 +599,9 @@ def get_default_deduplication_rules() -> list[DeduplicationRuleDto]: Returns: list: The default deduplications for each provider. """ + if ProvidersFactory._loaded_deduplication_rules_cache: + return ProvidersFactory._loaded_deduplication_rules_cache + default_deduplications = [] all_providers = ProvidersFactory.get_all_providers() @@ -624,4 +628,5 @@ def get_default_deduplication_rules() -> list[DeduplicationRuleDto]: ) default_deduplications.append(deduplication_dto) + ProvidersFactory._loaded_deduplication_rules_cache = default_deduplications return default_deduplications diff --git a/keep/providers/slack_provider/slack_provider.py b/keep/providers/slack_provider/slack_provider.py index 003c34b76..4e0040061 100644 --- a/keep/providers/slack_provider/slack_provider.py +++ b/keep/providers/slack_provider/slack_provider.py @@ -162,7 +162,7 @@ def _notify( # Also, do not encode the payload as json, but as x-www-form-urlencoded # Only reference I found for it is: https://getkeep.slack.com/services/B082F60L9GX?added=1 and # https://stackoverflow.com/questions/42993602/slack-chat-postmessage-attachment-gives-no-text - if payload["attachments"]: + if payload.get("attachments", None): payload["attachments"] = attachments response = requests.post( self.authentication_config.webhook_url, diff --git a/poetry.lock b/poetry.lock index 3bcbc2d9c..9d66c1c4e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3245,6 +3245,20 @@ wcwidth = "*" [package.extras] tests = ["pytest", "pytest-cov", "pytest-lazy-fixtures"] +[[package]] +name = "prometheus-client" +version = "0.21.1" +description = "Python client for the Prometheus monitoring system." +optional = false +python-versions = ">=3.8" +files = [ + {file = "prometheus_client-0.21.1-py3-none-any.whl", hash = "sha256:594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301"}, + {file = "prometheus_client-0.21.1.tar.gz", hash = "sha256:252505a722ac04b0456be05c05f75f45d760c2911ffc45f2a06bcaed9f3ae3fb"}, +] + +[package.extras] +twisted = ["twisted"] + [[package]] name = "propcache" version = "0.2.0" @@ -3389,6 +3403,102 @@ files = [ {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, ] +[[package]] +name = "psycopg" +version = "3.2.3" +description = "PostgreSQL database adapter for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "psycopg-3.2.3-py3-none-any.whl", hash = "sha256:644d3973fe26908c73d4be746074f6e5224b03c1101d302d9a53bf565ad64907"}, + {file = "psycopg-3.2.3.tar.gz", hash = "sha256:a5764f67c27bec8bfac85764d23c534af2c27b893550377e37ce59c12aac47a2"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} +tzdata = {version = "*", markers = "sys_platform == \"win32\""} + +[package.extras] +binary = ["psycopg-binary (==3.2.3)"] +c = ["psycopg-c (==3.2.3)"] +dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "mypy (>=1.11)", "types-setuptools (>=57.4)", "wheel (>=0.37)"] +docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] +pool = ["psycopg-pool"] +test = ["anyio (>=4.0)", "mypy (>=1.11)", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] + +[[package]] +name = "psycopg-binary" +version = "3.2.3" +description = "PostgreSQL database adapter for Python -- C optimisation distribution" +optional = false +python-versions = ">=3.8" +files = [ + {file = "psycopg_binary-3.2.3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:965455eac8547f32b3181d5ec9ad8b9be500c10fe06193543efaaebe3e4ce70c"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:71adcc8bc80a65b776510bc39992edf942ace35b153ed7a9c6c573a6849ce308"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f73adc05452fb85e7a12ed3f69c81540a8875960739082e6ea5e28c373a30774"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8630943143c6d6ca9aefc88bbe5e76c90553f4e1a3b2dc339e67dc34aa86f7e"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bffb61e198a91f712cc3d7f2d176a697cb05b284b2ad150fb8edb308eba9002"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4fa2240c9fceddaa815a58f29212826fafe43ce80ff666d38c4a03fb036955"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:192a5f8496e6e1243fdd9ac20e117e667c0712f148c5f9343483b84435854c78"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64dc6e9ec64f592f19dc01a784e87267a64a743d34f68488924251253da3c818"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:79498df398970abcee3d326edd1d4655de7d77aa9aecd578154f8af35ce7bbd2"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:949551752930d5e478817e0b49956350d866b26578ced0042a61967e3fcccdea"}, + {file = "psycopg_binary-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:80a2337e2dfb26950894c8301358961430a0304f7bfe729d34cc036474e9c9b1"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6d8f2144e0d5808c2e2aed40fbebe13869cd00c2ae745aca4b3b16a435edb056"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:94253be2b57ef2fea7ffe08996067aabf56a1eb9648342c9e3bad9e10c46e045"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fda0162b0dbfa5eaed6cdc708179fa27e148cb8490c7d62e5cf30713909658ea"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c0419cdad8c70eaeb3116bb28e7b42d546f91baf5179d7556f230d40942dc78"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74fbf5dd3ef09beafd3557631e282f00f8af4e7a78fbfce8ab06d9cd5a789aae"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d784f614e4d53050cbe8abf2ae9d1aaacf8ed31ce57b42ce3bf2a48a66c3a5c"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4e76ce2475ed4885fe13b8254058be710ec0de74ebd8ef8224cf44a9a3358e5f"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5938b257b04c851c2d1e6cb2f8c18318f06017f35be9a5fe761ee1e2e344dfb7"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:257c4aea6f70a9aef39b2a77d0658a41bf05c243e2bf41895eb02220ac6306f3"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:06b5cc915e57621eebf2393f4173793ed7e3387295f07fed93ed3fb6a6ccf585"}, + {file = "psycopg_binary-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:09baa041856b35598d335b1a74e19a49da8500acedf78164600694c0ba8ce21b"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:48f8ca6ee8939bab760225b2ab82934d54330eec10afe4394a92d3f2a0c37dd6"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5361ea13c241d4f0ec3f95e0bf976c15e2e451e9cc7ef2e5ccfc9d170b197a40"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb987f14af7da7c24f803111dbc7392f5070fd350146af3345103f76ea82e339"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0463a11b1cace5a6aeffaf167920707b912b8986a9c7920341c75e3686277920"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b7be9a6c06518967b641fb15032b1ed682fd3b0443f64078899c61034a0bca6"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64a607e630d9f4b2797f641884e52b9f8e239d35943f51bef817a384ec1678fe"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fa33ead69ed133210d96af0c63448b1385df48b9c0247eda735c5896b9e6dbbf"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1f8b0d0e99d8e19923e6e07379fa00570be5182c201a8c0b5aaa9a4d4a4ea20b"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:709447bd7203b0b2debab1acec23123eb80b386f6c29e7604a5d4326a11e5bd6"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5e37d5027e297a627da3551a1e962316d0f88ee4ada74c768f6c9234e26346d9"}, + {file = "psycopg_binary-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:261f0031ee6074765096a19b27ed0f75498a8338c3dcd7f4f0d831e38adf12d1"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:41fdec0182efac66b27478ac15ef54c9ebcecf0e26ed467eb7d6f262a913318b"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:07d019a786eb020c0f984691aa1b994cb79430061065a694cf6f94056c603d26"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c57615791a337378fe5381143259a6c432cdcbb1d3e6428bfb7ce59fff3fb5c"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8eb9a4e394926b93ad919cad1b0a918e9b4c846609e8c1cfb6b743683f64da0"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5905729668ef1418bd36fbe876322dcb0f90b46811bba96d505af89e6fbdce2f"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd65774ed7d65101b314808b6893e1a75b7664f680c3ef18d2e5c84d570fa393"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:700679c02f9348a0d0a2adcd33a0275717cd0d0aee9d4482b47d935023629505"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:96334bb64d054e36fed346c50c4190bad9d7c586376204f50bede21a913bf942"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9099e443d4cc24ac6872e6a05f93205ba1a231b1a8917317b07c9ef2b955f1f4"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1985ab05e9abebfbdf3163a16ebb37fbc5d49aff2bf5b3d7375ff0920bbb54cd"}, + {file = "psycopg_binary-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:e90352d7b610b4693fad0feea48549d4315d10f1eba5605421c92bb834e90170"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:69320f05de8cdf4077ecd7fefdec223890eea232af0d58f2530cbda2871244a0"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4926ea5c46da30bec4a85907aa3f7e4ea6313145b2aa9469fdb861798daf1502"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c64c4cd0d50d5b2288ab1bcb26c7126c772bbdebdfadcd77225a77df01c4a57e"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05a1bdce30356e70a05428928717765f4a9229999421013f41338d9680d03a63"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ad357e426b0ea5c3043b8ec905546fa44b734bf11d33b3da3959f6e4447d350"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:967b47a0fd237aa17c2748fdb7425015c394a6fb57cdad1562e46a6eb070f96d"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:71db8896b942770ed7ab4efa59b22eee5203be2dfdee3c5258d60e57605d688c"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2773f850a778575dd7158a6dd072f7925b67f3ba305e2003538e8831fec77a1d"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aeddf7b3b3f6e24ccf7d0edfe2d94094ea76b40e831c16eff5230e040ce3b76b"}, + {file = "psycopg_binary-3.2.3-cp38-cp38-win_amd64.whl", hash = "sha256:824c867a38521d61d62b60aca7db7ca013a2b479e428a0db47d25d8ca5067410"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:9994f7db390c17fc2bd4c09dca722fd792ff8a49bb3bdace0c50a83f22f1767d"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1303bf8347d6be7ad26d1362af2c38b3a90b8293e8d56244296488ee8591058e"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:842da42a63ecb32612bb7f5b9e9f8617eab9bc23bd58679a441f4150fcc51c96"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2bb342a01c76f38a12432848e6013c57eb630103e7556cf79b705b53814c3949"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd40af959173ea0d087b6b232b855cfeaa6738f47cb2a0fd10a7f4fa8b74293f"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9b60b465773a52c7d4705b0a751f7f1cdccf81dd12aee3b921b31a6e76b07b0e"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fc6d87a1c44df8d493ef44988a3ded751e284e02cdf785f746c2d357e99782a6"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:f0b018e37608c3bfc6039a1dc4eb461e89334465a19916be0153c757a78ea426"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2a29f5294b0b6360bfda69653697eff70aaf2908f58d1073b0acd6f6ab5b5a4f"}, + {file = "psycopg_binary-3.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:e56b1fd529e5dde2d1452a7d72907b37ed1b4f07fdced5d8fb1e963acfff6749"}, +] + [[package]] name = "psycopg2-binary" version = "2.9.10" @@ -3878,6 +3988,20 @@ pytest = ">=6.2.5" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "pytest-timeout" +version = "2.3.1" +description = "pytest plugin to abort hanging tests" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-timeout-2.3.1.tar.gz", hash = "sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9"}, + {file = "pytest_timeout-2.3.1-py3-none-any.whl", hash = "sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + [[package]] name = "pytest-xdist" version = "3.6.1" @@ -4575,77 +4699,98 @@ files = [ [[package]] name = "sqlalchemy" -version = "1.4.41" +version = "2.0.36" description = "Database Abstraction Library" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "SQLAlchemy-1.4.41-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:13e397a9371ecd25573a7b90bd037db604331cf403f5318038c46ee44908c44d"}, - {file = "SQLAlchemy-1.4.41-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2d6495f84c4fd11584f34e62f9feec81bf373787b3942270487074e35cbe5330"}, - {file = "SQLAlchemy-1.4.41-cp27-cp27m-win32.whl", hash = "sha256:e570cfc40a29d6ad46c9aeaddbdcee687880940a3a327f2c668dd0e4ef0a441d"}, - {file = "SQLAlchemy-1.4.41-cp27-cp27m-win_amd64.whl", hash = "sha256:5facb7fd6fa8a7353bbe88b95695e555338fb038ad19ceb29c82d94f62775a05"}, - {file = "SQLAlchemy-1.4.41-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f37fa70d95658763254941ddd30ecb23fc4ec0c5a788a7c21034fc2305dab7cc"}, - {file = "SQLAlchemy-1.4.41-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:361f6b5e3f659e3c56ea3518cf85fbdae1b9e788ade0219a67eeaaea8a4e4d2a"}, - {file = "SQLAlchemy-1.4.41-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0990932f7cca97fece8017414f57fdd80db506a045869d7ddf2dda1d7cf69ecc"}, - {file = "SQLAlchemy-1.4.41-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cd767cf5d7252b1c88fcfb58426a32d7bd14a7e4942497e15b68ff5d822b41ad"}, - {file = "SQLAlchemy-1.4.41-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5102fb9ee2c258a2218281adcb3e1918b793c51d6c2b4666ce38c35101bb940e"}, - {file = "SQLAlchemy-1.4.41-cp310-cp310-win32.whl", hash = "sha256:2082a2d2fca363a3ce21cfa3d068c5a1ce4bf720cf6497fb3a9fc643a8ee4ddd"}, - {file = "SQLAlchemy-1.4.41-cp310-cp310-win_amd64.whl", hash = "sha256:e4b12e3d88a8fffd0b4ca559f6d4957ed91bd4c0613a4e13846ab8729dc5c251"}, - {file = "SQLAlchemy-1.4.41-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:90484a2b00baedad361402c257895b13faa3f01780f18f4a104a2f5c413e4536"}, - {file = "SQLAlchemy-1.4.41-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b67fc780cfe2b306180e56daaa411dd3186bf979d50a6a7c2a5b5036575cbdbb"}, - {file = "SQLAlchemy-1.4.41-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ad2b727fc41c7f8757098903f85fafb4bf587ca6605f82d9bf5604bd9c7cded"}, - {file = "SQLAlchemy-1.4.41-cp311-cp311-win32.whl", hash = "sha256:59bdc291165b6119fc6cdbc287c36f7f2859e6051dd923bdf47b4c55fd2f8bd0"}, - {file = "SQLAlchemy-1.4.41-cp311-cp311-win_amd64.whl", hash = "sha256:d2e054aed4645f9b755db85bc69fc4ed2c9020c19c8027976f66576b906a74f1"}, - {file = "SQLAlchemy-1.4.41-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:4ba7e122510bbc07258dc42be6ed45997efdf38129bde3e3f12649be70683546"}, - {file = "SQLAlchemy-1.4.41-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0dcf127bb99458a9d211e6e1f0f3edb96c874dd12f2503d4d8e4f1fd103790b"}, - {file = "SQLAlchemy-1.4.41-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e16c2be5cb19e2c08da7bd3a87fed2a0d4e90065ee553a940c4fc1a0fb1ab72b"}, - {file = "SQLAlchemy-1.4.41-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebeeec5c14533221eb30bad716bc1fd32f509196318fb9caa7002c4a364e4c"}, - {file = "SQLAlchemy-1.4.41-cp36-cp36m-win32.whl", hash = "sha256:3e2ef592ac3693c65210f8b53d0edcf9f4405925adcfc031ff495e8d18169682"}, - {file = "SQLAlchemy-1.4.41-cp36-cp36m-win_amd64.whl", hash = "sha256:eb30cf008850c0a26b72bd1b9be6730830165ce049d239cfdccd906f2685f892"}, - {file = "SQLAlchemy-1.4.41-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:c23d64a0b28fc78c96289ffbd0d9d1abd48d267269b27f2d34e430ea73ce4b26"}, - {file = "SQLAlchemy-1.4.41-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8eb8897367a21b578b26f5713833836f886817ee2ffba1177d446fa3f77e67c8"}, - {file = "SQLAlchemy-1.4.41-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:14576238a5f89bcf504c5f0a388d0ca78df61fb42cb2af0efe239dc965d4f5c9"}, - {file = "SQLAlchemy-1.4.41-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:639e1ae8d48b3c86ffe59c0daa9a02e2bfe17ca3d2b41611b30a0073937d4497"}, - {file = "SQLAlchemy-1.4.41-cp37-cp37m-win32.whl", hash = "sha256:0005bd73026cd239fc1e8ccdf54db58b6193be9a02b3f0c5983808f84862c767"}, - {file = "SQLAlchemy-1.4.41-cp37-cp37m-win_amd64.whl", hash = "sha256:5323252be2bd261e0aa3f33cb3a64c45d76829989fa3ce90652838397d84197d"}, - {file = "SQLAlchemy-1.4.41-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:05f0de3a1dc3810a776275763764bb0015a02ae0f698a794646ebc5fb06fad33"}, - {file = "SQLAlchemy-1.4.41-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0002e829142b2af00b4eaa26c51728f3ea68235f232a2e72a9508a3116bd6ed0"}, - {file = "SQLAlchemy-1.4.41-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:22ff16cedab5b16a0db79f1bc99e46a6ddececb60c396562e50aab58ddb2871c"}, - {file = "SQLAlchemy-1.4.41-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccfd238f766a5bb5ee5545a62dd03f316ac67966a6a658efb63eeff8158a4bbf"}, - {file = "SQLAlchemy-1.4.41-cp38-cp38-win32.whl", hash = "sha256:58bb65b3274b0c8a02cea9f91d6f44d0da79abc993b33bdedbfec98c8440175a"}, - {file = "SQLAlchemy-1.4.41-cp38-cp38-win_amd64.whl", hash = "sha256:ce8feaa52c1640de9541eeaaa8b5fb632d9d66249c947bb0d89dd01f87c7c288"}, - {file = "SQLAlchemy-1.4.41-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:199a73c31ac8ea59937cc0bf3dfc04392e81afe2ec8a74f26f489d268867846c"}, - {file = "SQLAlchemy-1.4.41-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676d51c9f6f6226ae8f26dc83ec291c088fe7633269757d333978df78d931ab"}, - {file = "SQLAlchemy-1.4.41-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:036d8472356e1d5f096c5e0e1a7e0f9182140ada3602f8fff6b7329e9e7cfbcd"}, - {file = "SQLAlchemy-1.4.41-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2307495d9e0ea00d0c726be97a5b96615035854972cc538f6e7eaed23a35886c"}, - {file = "SQLAlchemy-1.4.41-cp39-cp39-win32.whl", hash = "sha256:9c56e19780cd1344fcd362fd6265a15f48aa8d365996a37fab1495cae8fcd97d"}, - {file = "SQLAlchemy-1.4.41-cp39-cp39-win_amd64.whl", hash = "sha256:f5fa526d027d804b1f85cdda1eb091f70bde6fb7d87892f6dd5a48925bc88898"}, - {file = "SQLAlchemy-1.4.41.tar.gz", hash = "sha256:0292f70d1797e3c54e862e6f30ae474014648bc9c723e14a2fda730adb0a9791"}, -] - -[package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8318f4776c85abc3f40ab185e388bee7a6ea99e7fa3a30686580b209eaa35c08"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c245b1fbade9c35e5bd3b64270ab49ce990369018289ecfde3f9c318411aaa07"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69f93723edbca7342624d09f6704e7126b152eaed3cdbb634cb657a54332a3c5"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f9511d8dd4a6e9271d07d150fb2f81874a3c8c95e11ff9af3a2dfc35fe42ee44"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-win32.whl", hash = "sha256:c3f3631693003d8e585d4200730616b78fafd5a01ef8b698f6967da5c605b3fa"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-win_amd64.whl", hash = "sha256:a86bfab2ef46d63300c0f06936bd6e6c0105faa11d509083ba8f2f9d237fb5b5"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd3a55deef00f689ce931d4d1b23fa9f04c880a48ee97af488fd215cf24e2a6c"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f5e9cd989b45b73bd359f693b935364f7e1f79486e29015813c338450aa5a71"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ddd9db6e59c44875211bc4c7953a9f6638b937b0a88ae6d09eb46cced54eff"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2519f3a5d0517fc159afab1015e54bb81b4406c278749779be57a569d8d1bb0d"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59b1ee96617135f6e1d6f275bbe988f419c5178016f3d41d3c0abb0c819f75bb"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:39769a115f730d683b0eb7b694db9789267bcd027326cccc3125e862eb03bfd8"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-win32.whl", hash = "sha256:66bffbad8d6271bb1cc2f9a4ea4f86f80fe5e2e3e501a5ae2a3dc6a76e604e6f"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-win_amd64.whl", hash = "sha256:23623166bfefe1487d81b698c423f8678e80df8b54614c2bf4b4cfcd7c711959"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7b64e6ec3f02c35647be6b4851008b26cff592a95ecb13b6788a54ef80bbdd4"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46331b00096a6db1fdc052d55b101dbbfc99155a548e20a0e4a8e5e4d1362855"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdf3386a801ea5aba17c6410dd1dc8d39cf454ca2565541b5ac42a84e1e28f53"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9dfa18ff2a67b09b372d5db8743c27966abf0e5344c555d86cc7199f7ad83a"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:90812a8933df713fdf748b355527e3af257a11e415b613dd794512461eb8a686"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1bc330d9d29c7f06f003ab10e1eaced295e87940405afe1b110f2eb93a233588"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-win32.whl", hash = "sha256:79d2e78abc26d871875b419e1fd3c0bca31a1cb0043277d0d850014599626c2e"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-win_amd64.whl", hash = "sha256:b544ad1935a8541d177cb402948b94e871067656b3a0b9e91dbec136b06a2ff5"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-win32.whl", hash = "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-win_amd64.whl", hash = "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:be9812b766cad94a25bc63bec11f88c4ad3629a0cec1cd5d4ba48dc23860486b"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aae840ebbd6cdd41af1c14590e5741665e5272d2fee999306673a1bb1fdb4d"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4557e1f11c5f653ebfdd924f3f9d5ebfc718283b0b9beebaa5dd6b77ec290971"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07b441f7d03b9a66299ce7ccf3ef2900abc81c0db434f42a5694a37bd73870f2"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:28120ef39c92c2dd60f2721af9328479516844c6b550b077ca450c7d7dc68575"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-win32.whl", hash = "sha256:b81ee3d84803fd42d0b154cb6892ae57ea6b7c55d8359a02379965706c7efe6c"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-win_amd64.whl", hash = "sha256:f942a799516184c855e1a32fbc7b29d7e571b52612647866d4ec1c3242578fcb"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3d6718667da04294d7df1670d70eeddd414f313738d20a6f1d1f379e3139a545"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:72c28b84b174ce8af8504ca28ae9347d317f9dba3999e5981a3cd441f3712e24"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b11d0cfdd2b095e7b0686cf5fabeb9c67fae5b06d265d8180715b8cfa86522e3"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e32092c47011d113dc01ab3e1d3ce9f006a47223b18422c5c0d150af13a00687"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6a440293d802d3011028e14e4226da1434b373cbaf4a4bbb63f845761a708346"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c54a1e53a0c308a8e8a7dffb59097bff7facda27c70c286f005327f21b2bd6b1"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-win32.whl", hash = "sha256:1e0d612a17581b6616ff03c8e3d5eff7452f34655c901f75d62bd86449d9750e"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-win_amd64.whl", hash = "sha256:8958b10490125124463095bbdadda5aa22ec799f91958e410438ad6c97a7b793"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dc022184d3e5cacc9579e41805a681187650e170eb2fd70e28b86192a479dcaa"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b817d41d692bf286abc181f8af476c4fbef3fd05e798777492618378448ee689"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e46a888b54be23d03a89be510f24a7652fe6ff660787b96cd0e57a4ebcb46d"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ae3005ed83f5967f961fd091f2f8c5329161f69ce8480aa8168b2d7fe37f06"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03e08af7a5f9386a43919eda9de33ffda16b44eb11f3b313e6822243770e9763"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3dbb986bad3ed5ceaf090200eba750b5245150bd97d3e67343a3cfed06feecf7"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-win32.whl", hash = "sha256:9fe53b404f24789b5ea9003fc25b9a3988feddebd7e7b369c8fac27ad6f52f28"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-win_amd64.whl", hash = "sha256:af148a33ff0349f53512a049c6406923e4e02bf2f26c5fb285f143faf4f0e46a"}, + {file = "SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e"}, + {file = "sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +typing-extensions = ">=4.6.0" [package.extras] -aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] asyncio = ["greenlet (!=0.4.17)"] -asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] -mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"] mssql = ["pyodbc"] mssql-pymssql = ["pymssql"] mssql-pyodbc = ["pyodbc"] -mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] -mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] mysql-connector = ["mysql-connector-python"] -oracle = ["cx-oracle (>=7)", "cx-oracle (>=7,<8)"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] postgresql = ["psycopg2 (>=2.7)"] postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] -postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] postgresql-psycopg2binary = ["psycopg2-binary"] postgresql-psycopg2cffi = ["psycopg2cffi"] -pymysql = ["pymysql", "pymysql (<1)"] -sqlcipher = ["sqlcipher3-binary"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlalchemy-utils" @@ -4675,35 +4820,20 @@ test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3 timezone = ["python-dateutil"] url = ["furl (>=0.4.1)"] -[[package]] -name = "sqlalchemy2-stubs" -version = "0.0.2a38" -description = "Typing Stubs for SQLAlchemy 1.4" -optional = false -python-versions = ">=3.6" -files = [ - {file = "sqlalchemy2-stubs-0.0.2a38.tar.gz", hash = "sha256:861d722abeb12f13eacd775a9f09379b11a5a9076f469ccd4099961b95800f9e"}, - {file = "sqlalchemy2_stubs-0.0.2a38-py3-none-any.whl", hash = "sha256:b62aa46943807287550e2033dafe07564b33b6a815fbaa3c144e396f9cc53bcb"}, -] - -[package.dependencies] -typing-extensions = ">=3.7.4" - [[package]] name = "sqlmodel" -version = "0.0.8" +version = "0.0.22" description = "SQLModel, SQL databases in Python, designed for simplicity, compatibility, and robustness." optional = false -python-versions = ">=3.6.1,<4.0.0" +python-versions = ">=3.7" files = [ - {file = "sqlmodel-0.0.8-py3-none-any.whl", hash = "sha256:0fd805719e0c5d4f22be32eb3ffc856eca3f7f20e8c7aa3e117ad91684b518ee"}, - {file = "sqlmodel-0.0.8.tar.gz", hash = "sha256:3371b4d1ad59d2ffd0c530582c2140b6c06b090b32af9b9c6412986d7b117036"}, + {file = "sqlmodel-0.0.22-py3-none-any.whl", hash = "sha256:a1ed13e28a1f4057cbf4ff6cdb4fc09e85702621d3259ba17b3c230bfb2f941b"}, + {file = "sqlmodel-0.0.22.tar.gz", hash = "sha256:7d37c882a30c43464d143e35e9ecaf945d88035e20117bf5ec2834a23cbe505e"}, ] [package.dependencies] -pydantic = ">=1.8.2,<2.0.0" -SQLAlchemy = ">=1.4.17,<=1.4.41" -sqlalchemy2-stubs = "*" +pydantic = ">=1.10.13,<3.0.0" +SQLAlchemy = ">=2.0.14,<2.1.0" [[package]] name = "starkbank-ecdsa" @@ -4881,13 +5011,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.20.0" +version = "0.32.1" description = "The lightning-fast ASGI server." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "uvicorn-0.20.0-py3-none-any.whl", hash = "sha256:c3ed1598a5668208723f2bb49336f4509424ad198d6ab2615b7783db58d919fd"}, - {file = "uvicorn-0.20.0.tar.gz", hash = "sha256:a4e12017b940247f836bc90b72e725d7dfd0c8ed1c51eb365f5ba30d9f5127d8"}, + {file = "uvicorn-0.32.1-py3-none-any.whl", hash = "sha256:82ad92fd58da0d12af7482ecdb5f2470a04c9c9a53ced65b9bbb4a205377602e"}, + {file = "uvicorn-0.32.1.tar.gz", hash = "sha256:ee9519c246a72b1c084cea8d3b44ed6026e78a4a309cbedae9c37e4cb9fbb175"}, ] [package.dependencies] @@ -4895,7 +5025,7 @@ click = ">=7.0" h11 = ">=0.8" [package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "validators" @@ -5199,4 +5329,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "e866f8f4cf8210e17e03248ad91f473c777ecdd3405773f0d23a7c93210c9196" +content-hash = "a3319e110409281b9a3fa4c09300681fb82ef73bd05bbe1f93081a669b7635d7" diff --git a/pyproject.toml b/pyproject.toml index fc2b44046..97541c9ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "keep" -version = "0.31.8" +version = "0.32.0" description = "Alerting. for developers, by developers." authors = ["Keep Alerting LTD"] packages = [{include = "keep"}] @@ -25,16 +25,16 @@ boto3 = "^1.26.72" validators = "^0.20.0" python-telegram-bot = "^20.1" fastapi = "^0.109.1" -uvicorn = "^0.20.0" +uvicorn = "0.32.1" opsgenie-sdk = "^2.1.5" starlette-context = "^0.3.6" datadog-api-client = "^2.12.0" -sqlmodel = "^0.0.8" +sqlmodel = "^0.0.22" cloud-sql-python-connector = "1.12.0" pymysql = "^1.1.1" google-cloud-secret-manager = "^2.16.1" python-jose = "^3.3.0" -sqlalchemy = "1.4.41" +sqlalchemy = "^2.0.14" snowflake-connector-python = "3.12.3" openai = "1.37.1" opentelemetry-sdk = ">=1.20.0,<1.22" @@ -86,6 +86,9 @@ google-cloud-logging = "^3.11.3" json5 = "^0.9.28" pytest-xdist = "^3.6.1" +psycopg-binary = "^3.2.3" +psycopg = "^3.2.3" +prometheus-client = "^0.21.1" psycopg2-binary = "^2.9.10" [tool.poetry.group.dev.dependencies] pre-commit = "^3.0.4" @@ -103,6 +106,7 @@ pytest-docker = "^2.0.1" playwright = "^1.44.0" freezegun = "^1.5.1" +pytest-timeout = "^2.3.1" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/scripts/simulate_alerts.sh b/scripts/simulate_alerts.sh index 8361fa80a..7c4600b62 100755 --- a/scripts/simulate_alerts.sh +++ b/scripts/simulate_alerts.sh @@ -14,7 +14,7 @@ ROOT="$(dirname $0)/.." # Function to start the processes start_processes() { for ((i=0; i