From 2a453f09073cd844c748312fb34ca06bc53084e1 Mon Sep 17 00:00:00 2001 From: Vladimir Filonov Date: Sun, 19 Jan 2025 12:04:50 +0400 Subject: [PATCH] fix: yaml.safe_load is slow as hell (#3056) Co-authored-by: Tal --- keep/actions/actions_factory.py | 10 ++++----- keep/api/models/workflow.py | 11 +++++----- keep/api/routes/actions.py | 6 ++--- keep/api/routes/workflows.py | 18 +++++++-------- keep/cli/cli.py | 6 ++--- keep/functions/cyaml.py | 14 ++++++++++++ keep/parser/parser.py | 15 ++++++------- keep/providers/aks_provider/aks_provider.py | 4 ++-- .../sendgrid_provider/sendgrid_provider.py | 4 ++-- keep/workflowmanager/workflowstore.py | 22 +++++++++---------- tests/test_parser.py | 5 +++-- 11 files changed, 65 insertions(+), 50 deletions(-) create mode 100644 keep/functions/cyaml.py diff --git a/keep/actions/actions_factory.py b/keep/actions/actions_factory.py index 59dde1a67..5177acee4 100644 --- a/keep/actions/actions_factory.py +++ b/keep/actions/actions_factory.py @@ -1,6 +1,5 @@ import time import logging -import yaml from io import StringIO from uuid import uuid4 from typing import List, Union @@ -9,6 +8,7 @@ from keep.api.models.db.action import Action from keep.api.core.db import get_all_actions, create_actions, delete_action, get_action, update_action from keep.actions.actions_exception import ActionsCRUDException +from keep.functions import cyaml logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ def _convert_models_to_dtos(models: List[Action]) -> List[ActionDTO]: results: List[ActionDTO] = [] for model in models: try: - dto = ActionDTO(id=model.id, use=model.use, name=model.name, details=yaml.safe_load(StringIO(model.action_raw))) + dto = ActionDTO(id=model.id, use=model.use, name=model.name, details=cyaml.safe_load(StringIO(model.action_raw))) results.append(dto) except ValidationError: logger.warning("Unmatched Action model and the coresponding DTO", exc_info=True, extra={ @@ -45,7 +45,7 @@ def add_actions(tenant_id: str, installed_by: str, action_dtos: List[dict]): installation_time=time.time(), name=action_dto.get("name"), use=action_dto.get("use") or action_dto.get("name"), # if there is no `use` tag, use `name` instead - action_raw=yaml.dump(action_dto) + action_raw=cyaml.dump(action_dto) ) actions.append(action) create_actions(actions) @@ -76,7 +76,7 @@ def update_action(tenant_id: str, action_id: str, payload: dict) -> Union[Action action_payload = Action( name=payload.get("name"), use=payload.get("use") or payload.get("name"), - action_raw=yaml.dump(payload) + action_raw=cyaml.dump(payload) ) updated_action = update_action(tenant_id, action_id, action_payload) if updated_action: @@ -84,4 +84,4 @@ def update_action(tenant_id: str, action_id: str, payload: dict) -> Union[Action raise ActionsCRUDException(status_code=422, detail="No action matched to be updated") except Exception: logger.exception("Uknown exception when update an action on database") - raise ActionsCRUDException(status_code=400, detail="Unable to update an action") \ No newline at end of file + raise ActionsCRUDException(status_code=400, detail="Unable to update an action") diff --git a/keep/api/models/workflow.py b/keep/api/models/workflow.py index b4d5c3285..bd9eb1f35 100644 --- a/keep/api/models/workflow.py +++ b/keep/api/models/workflow.py @@ -2,16 +2,17 @@ from datetime import datetime from typing import List, Literal, Optional -import yaml from pydantic import BaseModel, validator +from keep.functions import cyaml + def represent_ordered_dict(dumper, data): filtered_data = {k: v for k, v in data.items() if v is not None} return dumper.represent_mapping("tag:yaml.org,2002:map", filtered_data.items()) -yaml.add_representer(OrderedDict, represent_ordered_dict) +cyaml.add_representer(OrderedDict, represent_ordered_dict) class ProviderDTO(BaseModel): @@ -44,7 +45,7 @@ class WorkflowDTO(BaseModel): @property def workflow_raw_id(self): - id = yaml.safe_load(self.workflow_raw).get("id") + id = cyaml.safe_load(self.workflow_raw).get("id") return id @validator("workflow_raw", pre=False, always=True) @@ -64,7 +65,7 @@ def manipulate_raw(cls, raw, values): _type_: _description_ """ ordered_raw = OrderedDict() - d = yaml.safe_load(raw) + d = cyaml.safe_load(raw) # id desc and triggers ordered_raw["id"] = d.get("id") values["workflow_raw_id"] = d.get("id") @@ -85,7 +86,7 @@ def manipulate_raw(cls, raw, values): ordered_raw["steps"] = d.get("steps") # last, actions ordered_raw["actions"] = d.get("actions") - return yaml.dump(ordered_raw, width=99999) + return cyaml.dump(ordered_raw, width=99999) class WorkflowExecutionLogsDTO(BaseModel): diff --git a/keep/api/routes/actions.py b/keep/api/routes/actions.py index fc1fbc5ac..3e0e85b60 100644 --- a/keep/api/routes/actions.py +++ b/keep/api/routes/actions.py @@ -1,10 +1,10 @@ import logging -import yaml from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, status from fastapi.responses import JSONResponse from keep.actions.actions_factory import ActionsCRUD +from keep.functions import cyaml from keep.identitymanager.authenticatedentity import AuthenticatedEntity from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory @@ -39,8 +39,8 @@ async def _get_action_info(request: Request, file: UploadFile) -> dict: action_inforaw = await file.read() else: action_inforaw = await request.body() - action_info = yaml.safe_load(action_inforaw) - except yaml.YAMLError: + action_info = cyaml.safe_load(action_inforaw) + except cyaml.YAMLError: logger.exception("Invalid YAML format when parsing actions file") raise HTTPException(status_code=400, detail="Invalid yaml format") return action_info diff --git a/keep/api/routes/workflows.py b/keep/api/routes/workflows.py index 363d45b16..3a2a098d1 100644 --- a/keep/api/routes/workflows.py +++ b/keep/api/routes/workflows.py @@ -4,7 +4,6 @@ from typing import Any, Dict, List, Optional import validators -import yaml from fastapi import ( APIRouter, Body, @@ -38,6 +37,7 @@ ) from keep.api.utils.enrichment_helpers import convert_db_alerts_to_dto_alerts from keep.api.utils.pagination import WorkflowExecutionsPaginatedResultsDto +from keep.functions import cyaml from keep.identitymanager.authenticatedentity import AuthenticatedEntity from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory from keep.parser.parser import Parser @@ -118,9 +118,9 @@ def get_workflows( # create the workflow DTO try: - workflow_raw = yaml.safe_load(workflow.workflow_raw) + workflow_raw = cyaml.safe_load(workflow.workflow_raw) # very big width to avoid line breaks - workflow_raw = yaml.dump(workflow_raw, width=99999) + workflow_raw = cyaml.dump(workflow_raw, width=99999) workflow_dto = WorkflowDTO( id=workflow.id, name=workflow.name, @@ -313,7 +313,7 @@ async def __get_workflow_raw_data(request: Request, file: UploadFile | None) -> workflow_raw_data = await file.read() else: workflow_raw_data = await request.body() - workflow_data = yaml.safe_load(workflow_raw_data) + workflow_data = cyaml.safe_load(workflow_raw_data) # backward comptability if "alert" in workflow_data: workflow_data = workflow_data.pop("alert") @@ -321,7 +321,7 @@ async def __get_workflow_raw_data(request: Request, file: UploadFile | None) -> elif "workflow" in workflow_data: workflow_data = workflow_data.pop("workflow") - except yaml.YAMLError: + except cyaml.YAMLError: logger.exception("Invalid YAML format") raise HTTPException(status_code=400, detail="Invalid YAML format") return workflow_data @@ -518,7 +518,7 @@ async def update_workflow_by_id( workflow["name"] = workflow_from_db.name workflow_from_db.description = workflow.get("description") workflow_from_db.interval = workflow_interval - workflow_from_db.workflow_raw = yaml.dump(workflow, width=99999) + workflow_from_db.workflow_raw = cyaml.dump(workflow, width=99999) workflow_from_db.last_updated = datetime.datetime.now() session.add(workflow_from_db) session.commit() @@ -587,9 +587,9 @@ def get_workflow_by_id( providers_dto, triggers = [], [] # Default in case of failure try: - workflow_yaml = yaml.safe_load(workflow.workflow_raw) + workflow_yaml = cyaml.safe_load(workflow.workflow_raw) valid_workflow_yaml = {"workflow": workflow_yaml} - final_workflow_raw = yaml.dump(valid_workflow_yaml, width=99999) + final_workflow_raw = cyaml.dump(valid_workflow_yaml, width=99999) workflow_dto = WorkflowDTO( id=workflow.id, name=workflow.name, @@ -604,7 +604,7 @@ def get_workflow_by_id( disabled=workflow.is_disabled, ) return workflow_dto - except yaml.YAMLError: + except cyaml.YAMLError: logger.exception("Invalid YAML format") raise HTTPException(status_code=500, detail="Error fetching workflow meta data") diff --git a/keep/cli/cli.py b/keep/cli/cli.py index 81a29c777..ed19b1826 100644 --- a/keep/cli/cli.py +++ b/keep/cli/cli.py @@ -11,11 +11,11 @@ import click import requests -import yaml from dotenv import find_dotenv, load_dotenv from prettytable import PrettyTable from keep.api.core.posthog import posthog_client +from keep.functions import cyaml from keep.providers.models.provider_config import ProviderScope from keep.providers.providers_factory import ProvidersFactory @@ -103,7 +103,7 @@ def set_config(self, keep_config: str): try: with open(file=keep_config, mode="r") as f: self.logger.debug("Loading configuration file.") - self.config = yaml.safe_load(f) or {} + self.config = cyaml.safe_load(f) or {} self.logger.debug("Configuration file loaded.") except FileNotFoundError: @@ -123,7 +123,7 @@ def set_config(self, keep_config: str): self.random_user_id = str(uuid.uuid4()) self.config["random_user_id"] = self.random_user_id with open(file=keep_config, mode="w") as f: - yaml.dump(self.config, f) + cyaml.dump(self.config, f) arguments = sys.argv diff --git a/keep/functions/cyaml.py b/keep/functions/cyaml.py new file mode 100644 index 000000000..4daa0d075 --- /dev/null +++ b/keep/functions/cyaml.py @@ -0,0 +1,14 @@ +import yaml +from yaml import * + + +def safe_load(stream): + return yaml.load(stream, Loader=yaml.CSafeLoader) + +def dump(data, stream=None, Dumper=None, **kwds): + Dumper = Dumper or yaml.CDumper + return yaml.dump(data, stream, Dumper=Dumper, **kwds) + +def add_representer(data_type, representer, Dumper=None): + Dumper = Dumper or yaml.CDumper + Dumper.add_representer(data_type, representer) diff --git a/keep/parser/parser.py b/keep/parser/parser.py index 72c30c7ad..f9c931dc6 100644 --- a/keep/parser/parser.py +++ b/keep/parser/parser.py @@ -5,11 +5,10 @@ import re import typing -import yaml - from keep.actions.actions_factory import ActionsCRUD from keep.api.core.db import get_workflow_id from keep.contextmanager.contextmanager import ContextManager +from keep.functions import cyaml from keep.providers.providers_factory import ProvidersFactory from keep.step.step import Step, StepType from keep.step.step_provider_parameter import StepProviderParameter @@ -332,8 +331,8 @@ def _parse_providers_from_file( ): with open(providers_file, "r") as file: try: - providers = yaml.safe_load(file) - except yaml.YAMLError: + providers = cyaml.safe_load(file) + except cyaml.YAMLError: self.logger.exception(f"Error parsing providers file {providers_file}") raise context_manager.providers_context.update(providers) @@ -465,8 +464,8 @@ def _parse_actions_from_file( if actions_file and os.path.isfile(actions_file): with open(actions_file, "r") as file: try: - actions_content = yaml.safe_load(file) - except yaml.YAMLError: + actions_content = cyaml.safe_load(file) + except cyaml.YAMLError: self.logger.exception(f"Error parsing actions file {actions_file}") raise # create a hashmap -> action @@ -553,8 +552,8 @@ def _load_actions_from_file( actions = [] with open(actions_file, "r") as file: try: - actions = yaml.safe_load(file) - except yaml.YAMLError: + actions = cyaml.safe_load(file) + except cyaml.YAMLError: self.logger.exception(f"Error parsing actions file {actions_file}") raise # convert actions into dictionary of unique object by id diff --git a/keep/providers/aks_provider/aks_provider.py b/keep/providers/aks_provider/aks_provider.py index c0199977b..e9f539972 100644 --- a/keep/providers/aks_provider/aks_provider.py +++ b/keep/providers/aks_provider/aks_provider.py @@ -2,13 +2,13 @@ import logging import pydantic -import yaml from azure.identity import ClientSecretCredential from azure.mgmt.containerservice import ContainerServiceClient from kubernetes import client, config from keep.contextmanager.contextmanager import ContextManager from keep.exceptions.provider_exception import ProviderException +from keep.functions import cyaml from keep.providers.base.base_provider import BaseProvider from keep.providers.models.provider_config import ProviderConfig from keep.providers.providers_factory import ProvidersFactory @@ -114,7 +114,7 @@ def __generate_client(self): ) # parse the kubeconfig (parsed as yml string) - kubeconfig = yaml.safe_load( + kubeconfig = cyaml.safe_load( cluster_creds.kubeconfigs[0].value.decode("utf-8") ) diff --git a/keep/providers/sendgrid_provider/sendgrid_provider.py b/keep/providers/sendgrid_provider/sendgrid_provider.py index 3ed4e90b6..36abe963f 100644 --- a/keep/providers/sendgrid_provider/sendgrid_provider.py +++ b/keep/providers/sendgrid_provider/sendgrid_provider.py @@ -11,6 +11,7 @@ from sendgrid.helpers.mail import Mail from keep.contextmanager.contextmanager import ContextManager +from keep.functions import cyaml from keep.providers.base.base_provider import BaseProvider from keep.providers.models.provider_config import ProviderConfig, ProviderScope from keep.providers.providers_factory import ProvidersFactory @@ -209,9 +210,8 @@ def dispose(self): ) scopes = provider.validate_scopes() print(scopes) - import yaml - mail = yaml.safe_load( + mail = cyaml.safe_load( """to: - "youremail@gmail.com" - "youranotheremail@gmail.com" diff --git a/keep/workflowmanager/workflowstore.py b/keep/workflowmanager/workflowstore.py index 109a4d2b2..fd7926f3a 100644 --- a/keep/workflowmanager/workflowstore.py +++ b/keep/workflowmanager/workflowstore.py @@ -6,7 +6,6 @@ import requests import validators -import yaml from fastapi import HTTPException from keep.api.core.db import ( @@ -24,6 +23,7 @@ ) from keep.api.models.db.workflow import Workflow as WorkflowModel from keep.api.models.workflow import ProviderDTO +from keep.functions import cyaml from keep.parser.parser import Parser from keep.providers.providers_factory import ProvidersFactory from keep.workflowmanager.workflow import Workflow @@ -56,7 +56,7 @@ def create_workflow(self, tenant_id: str, created_by, workflow: dict): created_by=created_by, interval=interval, is_disabled=Parser.parse_disabled(workflow), - workflow_raw=yaml.dump(workflow, width=99999), + workflow_raw=cyaml.dump(workflow, width=99999), ) self.logger.info(f"Workflow {workflow_id} created successfully") return workflow @@ -100,9 +100,9 @@ def _parse_workflow_to_dict(self, workflow_path: str) -> dict: def get_raw_workflow(self, tenant_id: str, workflow_id: str) -> str: raw_workflow = get_raw_workflow(tenant_id, workflow_id) - workflow_yaml = yaml.safe_load(raw_workflow) + workflow_yaml = cyaml.safe_load(raw_workflow) valid_workflow_yaml = {"workflow": workflow_yaml} - return yaml.dump(valid_workflow_yaml, width=99999) + return cyaml.dump(valid_workflow_yaml, width=99999) def get_workflow(self, tenant_id: str, workflow_id: str) -> Workflow: workflow = get_raw_workflow(tenant_id, workflow_id) @@ -111,7 +111,7 @@ def get_workflow(self, tenant_id: str, workflow_id: str) -> Workflow: status_code=404, detail=f"Workflow {workflow_id} not found", ) - workflow_yaml = yaml.safe_load(workflow) + workflow_yaml = cyaml.safe_load(workflow) workflow = self.parser.parse(tenant_id, workflow_yaml) if len(workflow) > 1: raise HTTPException( @@ -293,7 +293,7 @@ def provision_workflows_from_directory( try: with open(workflow_path, "r") as yaml_file: - workflow_yaml = yaml.safe_load(yaml_file) + workflow_yaml = cyaml.safe_load(yaml_file) if "workflow" in workflow_yaml: workflow_yaml = workflow_yaml["workflow"] # backward compatibility @@ -317,7 +317,7 @@ def provision_workflows_from_directory( created_by="system", interval=workflow_interval, is_disabled=workflow_disabled, - workflow_raw=yaml.dump(workflow_yaml, width=99999), + workflow_raw=cyaml.dump(workflow_yaml, width=99999), provisioned=True, provisioned_file=workflow_path, ) @@ -347,8 +347,8 @@ def _read_workflow_from_stream(self, stream) -> dict: """ self.logger.debug("Parsing workflow") try: - workflow = yaml.safe_load(stream) - except yaml.YAMLError as e: + workflow = cyaml.safe_load(stream) + except cyaml.YAMLError as e: self.logger.error(f"Error parsing workflow: {e}") raise e return workflow @@ -386,7 +386,7 @@ def get_random_workflow_templates( workflow_yaml = self._parse_workflow_to_dict(file_path) if "workflow" in workflow_yaml: workflow_yaml["name"] = workflow_yaml["workflow"]["id"] - workflow_yaml["workflow_raw"] = yaml.dump(workflow_yaml) + workflow_yaml["workflow_raw"] = cyaml.dump(workflow_yaml) workflow_yaml["workflow_raw_id"] = workflow_yaml["workflow"]["id"] workflows.append(workflow_yaml) count += 1 @@ -466,7 +466,7 @@ def get_workflow_meta_data( return providers_dto, triggers # Parse the workflow YAML safely - workflow_yaml = yaml.safe_load(workflow_raw_data) + workflow_yaml = cyaml.safe_load(workflow_raw_data) if not workflow_yaml: self.logger.error( f"Parsed workflow_yaml is empty or invalid: {workflow_raw_data}" diff --git a/tests/test_parser.py b/tests/test_parser.py index 6e607fe37..fcf08f19f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -13,6 +13,7 @@ from keep.api.core.dependencies import SINGLE_TENANT_UUID from keep.api.models.db.action import Action from keep.contextmanager.contextmanager import ContextManager +from keep.functions import cyaml from keep.parser.parser import Parser, ParserUtils from keep.providers.mock_provider.mock_provider import MockProvider from keep.providers.models.provider_config import ProviderConfig @@ -203,7 +204,7 @@ def mock_safeload(*args, **kwargs): monkeypatch.setattr( builtins, "open", mocker.mock_open(read_data="does not matter") ) - monkeypatch.setattr(yaml, "safe_load", mock_safeload) + monkeypatch.setattr(cyaml, "safe_load", mock_safeload) # ACT parse_file_setup(context_manager) @@ -223,7 +224,7 @@ def mock_safeload(*args, **kwargs): monkeypatch.setattr( builtins, "open", mocker.mock_open(read_data="does not matter") ) - monkeypatch.setattr(yaml, "safe_load", mock_safeload) + monkeypatch.setattr(cyaml, "safe_load", mock_safeload) # ACT/ASSERT with pytest.raises(yaml.YAMLError):