Skip to content

Commit

Permalink
feat: better control over background workers (#1620)
Browse files Browse the repository at this point in the history
  • Loading branch information
Matvey-Kuk authored Aug 15, 2024
1 parent 2502b2d commit e7af854
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 70 deletions.
2 changes: 1 addition & 1 deletion ee/experimental/incident_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

logger = logging.getLogger(__name__)

ALGORITHM_VERBOSE_NAME = "Basic correlation algorithm v0.2"
ALGORITHM_VERBOSE_NAME = "Correlation algorithm v0.2"
USE_N_HISTORICAL_ALERTS = 10e10
USE_N_HISTORICAL_INCIDENTS = 10e10

Expand Down
104 changes: 59 additions & 45 deletions keep-ui/app/ai/ai.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -74,52 +74,55 @@ export default function Ai() {
</div>
<Card className="mt-10 p-4 md:p-10 mx-auto">
<div>
<div className="prose-2xl">👋 You are almost there!</div>
AI Correlation is coming soon. Make sure you have enough data
collected to prepare.
<div className="max-w-md mt-10 flex justify-items-start justify-start">
<List>
<ListItem>
<span>
Connect an incident source to dump incidents, or create 10
incidents manually
</span>
<span>
{aistats?.incidents_count &&
aistats?.incidents_count >= 10 ? (
<div></div>
) : (
<div></div>
)}
</span>
</ListItem>
<ListItem>
<span>Collect 100 alerts</span>
<span>
{aistats?.alerts_count && aistats?.alerts_count >= 100 ? (
<div></div>
) : (
<div></div>
)}
</span>
</ListItem>
<ListItem>
<span>Collect alerts for more than 3 days</span>
<span>
{aistats?.first_alert_datetime &&
new Date(aistats.first_alert_datetime) <
new Date(Date.now() - 3 * 24 * 60 * 60 * 1000) ? (
<div></div>
) : (
<div></div>
)}
</span>
</ListItem>
</List>
</div>
{aistats?.is_mining_enabled == false && (
<div>
<div className="prose-2xl">👋 You are almost there!</div>
AI Correlation is coming soon. Make sure you have enough data
collected to prepare.
<div className="max-w-md mt-10 flex justify-items-start justify-start">
<List>
<ListItem>
<span>
Connect an incident source to dump incidents, or create 10
incidents manually
</span>
<span>
{aistats?.incidents_count &&
aistats?.incidents_count >= 10 ? (
<div></div>
) : (
<div></div>
)}
</span>
</ListItem>
<ListItem>
<span>Collect 100 alerts</span>
<span>
{aistats?.alerts_count && aistats?.alerts_count >= 100 ? (
<div></div>
) : (
<div></div>
)}
</span>
</ListItem>
<ListItem>
<span>Collect alerts for more than 3 days</span>
<span>
{aistats?.first_alert_datetime &&
new Date(aistats.first_alert_datetime) <
new Date(Date.now() - 3 * 24 * 60 * 60 * 1000) ? (
<div></div>
) : (
<div></div>
)}
</span>
</ListItem>
</List>
</div>
</div>
)}
{aistats?.is_mining_enabled && (
<div>
<div className="prose-2xl mt-10">AI:</div>
<div className="grid grid-cols-2 gap-4 mt-6">
<Card
className={
Expand All @@ -137,7 +140,8 @@ export default function Ai() {

<div className="mt-4">
<Subtitle>Log:</Subtitle>
{!basicAlgorithmLog && (<p>No recent logs found.</p>)}{basicAlgorithmLog}
{!basicAlgorithmLog && <p>No recent logs found.</p>}
{basicAlgorithmLog}
</div>

<button
Expand Down Expand Up @@ -192,6 +196,16 @@ export default function Ai() {
</div>
</button>
</Card>
<Card
className={"p-4 flex flex-col w-full border-white border-2"}
>
<h3 className="text-lg sm:text-xl font-semibold line-clamp-2">
Summorization v0.1
</h3>
<p className="text-sm top-0">
Using LLMs to provide a human-readable incident summary.
</p>
</Card>
</div>
</div>
)}
Expand Down
5 changes: 2 additions & 3 deletions keep/api/routes/ai.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import logging

from fastapi import (
Expand All @@ -8,7 +7,7 @@

from keep.api.core.dependencies import AuthenticatedEntity, AuthVerifier
from keep.api.core.db import get_incidents_count, get_alerts_count, get_first_alert_datetime
from keep.api.utils.import_ee import ALGORITHM_VERBOSE_NAME
from keep.api.utils.import_ee import ALGORITHM_VERBOSE_NAME, is_ee_enabled_for_tenant


router = APIRouter()
Expand All @@ -27,6 +26,6 @@ def get_stats(
"alerts_count": get_alerts_count(tenant_id),
"first_alert_datetime": get_first_alert_datetime(tenant_id),
"incidents_count": get_incidents_count(tenant_id),
"is_mining_enabled": os.environ.get("EE_ENABLED", "false") == "true",
"is_mining_enabled": is_ee_enabled_for_tenant(tenant_id),
"algorithm_verbose_name": str(ALGORITHM_VERBOSE_NAME)
}
47 changes: 26 additions & 21 deletions keep/api/tasks/process_background_ai_task.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import time
import asyncio
import logging
import datetime

from keep.api.utils.import_ee import mine_incidents_and_create_objects, ALGORITHM_VERBOSE_NAME
from keep.api.core.tenant_configuration import TenantConfiguration
from keep.api.utils.import_ee import mine_incidents_and_create_objects, ALGORITHM_VERBOSE_NAME, is_ee_enabled_for_tenant
from keep.api.core.db import get_tenants_configurations

logger = logging.getLogger(__name__)


async def process_correlation(ctx, tenant_id:str):
await asyncio.sleep(180)
logger.info(
f"Background AI task started, {ALGORITHM_VERBOSE_NAME}",
extra={"algorithm": ALGORITHM_VERBOSE_NAME, "tenant_id": tenant_id},
Expand Down Expand Up @@ -45,27 +44,33 @@ async def process_background_ai_task(
logger.error(f"Error getting queued jobs, happens sometimes with unknown reason: {e}")
return None

tenant_configuration = TenantConfiguration()

if mine_incidents_and_create_objects is not NotImplemented:
for tenant in get_tenants_configurations():

# Because of https://github.com/python-arq/arq/issues/432 we need to check if the job is already running
# The other option would be to twick "keep_result" but it will make debugging harder
job_prefix = 'process_correlation_tenant_id_' + str(tenant)
jobs_with_same_prefix = [job for job in all_jobs if job.job_id.startswith(job_prefix)]
if len(jobs_with_same_prefix) > 0:
logger.info(
f"No {ALGORITHM_VERBOSE_NAME} for tenant {tenant} scheduled because there is already one running",
extra={"algorithm": ALGORITHM_VERBOSE_NAME, "tenant_id": tenant},
)
if is_ee_enabled_for_tenant(tenant, tenant_configuration=tenant_configuration):
# Because of https://github.com/python-arq/arq/issues/432 we need to check if the job is already running
# The other option would be to twick "keep_result" but it will make debugging harder
job_prefix = 'process_correlation_tenant_id_' + str(tenant)
jobs_with_same_prefix = [job for job in all_jobs if job.job_id.startswith(job_prefix)]
if len(jobs_with_same_prefix) > 0:
logger.info(
f"No {ALGORITHM_VERBOSE_NAME} for tenant {tenant} scheduled because there is already one running",
extra={"algorithm": ALGORITHM_VERBOSE_NAME, "tenant_id": tenant},
)
else:
job = await pool.enqueue_job(
"process_correlation",
tenant_id=tenant,
_job_id=job_prefix + ":" + str(time.time()), # Strict ID ensures uniqueness
_job_try=1
)
logger.info(
f"{ALGORITHM_VERBOSE_NAME} for tenant {tenant} scheduled, job: {job}",
extra={"algorithm": ALGORITHM_VERBOSE_NAME, "tenant_id": tenant},
)
else:
job = await pool.enqueue_job(
"process_correlation",
tenant_id=tenant,
_job_id=job_prefix + ":" + str(time.time()), # Strict ID ensures uniqueness
_job_try=1
)
logger.info(
f"{ALGORITHM_VERBOSE_NAME} for tenant {tenant} scheduled, job: {job}",
f"No {ALGORITHM_VERBOSE_NAME} for tenant {tenant} scheduled because EE is disabled for this tenant",
extra={"algorithm": ALGORITHM_VERBOSE_NAME, "tenant_id": tenant},
)

17 changes: 17 additions & 0 deletions keep/api/utils/import_ee.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import sys
import pathlib

from keep.api.core.tenant_configuration import TenantConfiguration

EE_ENABLED = os.environ.get("EE_ENABLED", "false") == "true"
EE_PATH = os.environ.get("EE_PATH", "../ee") # Path related to the fastapi root directory

Expand All @@ -19,3 +21,18 @@
else:
mine_incidents_and_create_objects = NotImplemented
ALGORITHM_VERBOSE_NAME = NotImplemented

def is_ee_enabled_for_tenant(tenant_id: str, tenant_configuration=None) -> bool:
if not EE_ENABLED:
return False

if tenant_configuration is None:
tenant_configuration = TenantConfiguration()

config = tenant_configuration.get_configuration(
tenant_id, "ee_enabled"
)
if config is None:
return False

return bool(config)

0 comments on commit e7af854

Please sign in to comment.