diff --git a/kairon/actions/definitions/vector_action.py b/kairon/actions/definitions/vector_action.py index c44dc3459..860dcfef4 100644 --- a/kairon/actions/definitions/vector_action.py +++ b/kairon/actions/definitions/vector_action.py @@ -76,7 +76,7 @@ async def execute(self, dispatcher: CollectingDispatcher, tracker: Tracker, doma else payload_type.get('value') msg_logger.append(request_body) tracker_data = ActionUtility.build_context(tracker, True) - response = vector_db.perform_operation(operation_type.get('value'), request_body) + response = vector_db.perform_operation(operation_type, request_body) logger.info("response: " + str(response)) response_context = self.__add_user_context_to_http_response(response, tracker_data) bot_response, bot_resp_log = ActionUtility.compose_response(vector_action_config['response'], response_context) diff --git a/kairon/api/app/routers/bot/data.py b/kairon/api/app/routers/bot/data.py index af3ce336a..403b2dbad 100644 --- a/kairon/api/app/routers/bot/data.py +++ b/kairon/api/app/routers/bot/data.py @@ -4,9 +4,10 @@ from starlette.requests import Request from starlette.responses import FileResponse -from kairon.api.models import Response, TextData, CognitiveDataRequest +from kairon.api.models import Response, CognitiveDataRequest, CognitionSchemaRequest from kairon.events.definitions.faq_importer import FaqDataImporterEvent from kairon.shared.auth import Authentication +from kairon.shared.cognition.processor import CognitionDataProcessor from kairon.shared.constants import DESIGNER_ACCESS from kairon.shared.data.processor import MongoProcessor from kairon.shared.models import User @@ -14,6 +15,7 @@ router = APIRouter() processor = MongoProcessor() +cognition_processor = CognitionDataProcessor() @router.post("/faq/upload", response_model=Response) @@ -51,86 +53,92 @@ async def download_faq_files( return response -@router.post("/text/faq", response_model=Response) -async def save_bot_text( - text: TextData, +@router.get("/text/faq", response_model=Response) +async def get_text( + request: Request, + current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), +): + """ + Fetches text content of the bot + """ + kwargs = request.query_params._dict.copy() + return {"data": list(cognition_processor.get_content(current_user.get_bot(), **kwargs))} + + +@router.get("/text/faq/collection", response_model=Response) +async def list_collection( + current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), +): + """ + Fetches text content of the bot + """ + return {"data": cognition_processor.list_cognition_collections(current_user.get_bot())} + + +@router.post("/cognition/schema", response_model=Response) +async def save_cognition_schema( + metadata: CognitionSchemaRequest, current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), - collection: str = None ): """ - Saves text content into the bot + Saves and updates cognition metadata into the bot """ return { - "message": "Text saved!", + "message": "Schema saved!", "data": { - "_id": processor.save_content( - text.data, + "_id": cognition_processor.save_cognition_schema( + metadata.dict(), current_user.get_user(), current_user.get_bot(), - collection ) } } -@router.put("/text/faq/{text_id}", response_model=Response) -async def update_bot_text( - text_id: str, - text: TextData, +@router.put("/cognition/schema/{metadata_id}", response_model=Response) +async def update_cognition_schema( + metadata_id: str, + metadata: CognitionSchemaRequest, current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), - collection: str = None, ): """ - Updates text content into the bot + Saves and updates cognition metadata into the bot """ return { - "message": "Text updated!", + "message": "Schema updated!", "data": { - "_id": processor.update_content( - text_id, - text.data, - current_user.get_user(), - current_user.get_bot(), - collection + "_id": cognition_processor.update_cognition_schema( + metadata_id, + metadata.dict(), + current_user.get_user(), + current_user.get_bot(), ) } } -@router.delete("/text/faq/{text_id}", response_model=Response) -async def delete_bot_text( - text_id: str, +@router.delete("/cognition/schema/{metadata_id}", response_model=Response) +async def delete_cognition_schema( + metadata_id: str, current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), ): """ - Deletes text content of the bot + Deletes cognition content of the bot """ - processor.delete_content(text_id, current_user.get_user(), current_user.get_bot()) + cognition_processor.delete_cognition_schema(metadata_id, current_user.get_bot()) return { - "message": "Text deleted!" + "message": "Schema deleted!" } -@router.get("/text/faq", response_model=Response) -async def get_text( - request: Request, - current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), -): - """ - Fetches text content of the bot - """ - kwargs = request.query_params._dict.copy() - return {"data": list(processor.get_content(current_user.get_bot(), **kwargs))} - - -@router.get("/text/faq/collection", response_model=Response) -async def list_collection( +@router.get("/cognition/schema", response_model=Response) +async def list_cognition_schema( current_user: User = Security(Authentication.get_current_user_and_bot, scopes=DESIGNER_ACCESS), ): """ - Fetches text content of the bot + Fetches cognition content of the bot """ - return {"data": processor.list_collection(current_user.get_bot())} + return {"data": list(cognition_processor.list_cognition_schema(current_user.get_bot()))} @router.post("/cognition", response_model=Response) @@ -144,7 +152,7 @@ async def save_cognition_data( return { "message": "Record saved!", "data": { - "_id": processor.save_cognition_data( + "_id": cognition_processor.save_cognition_data( cognition.dict(), current_user.get_user(), current_user.get_bot(), @@ -165,7 +173,7 @@ async def update_cognition_data( return { "message": "Record updated!", "data": { - "_id": processor.update_cognition_data( + "_id": cognition_processor.update_cognition_data( cognition_id, cognition.dict(), current_user.get_user(), @@ -183,7 +191,7 @@ async def delete_cognition_data( """ Deletes cognition content of the bot """ - processor.delete_cognition_data(cognition_id, current_user.get_bot()) + cognition_processor.delete_cognition_data(cognition_id, current_user.get_bot()) return { "message": "Record deleted!" } @@ -196,4 +204,4 @@ async def list_cognition_data( """ Fetches cognition content of the bot """ - return {"data": list(processor.list_cognition_data(current_user.get_bot()))} + return {"data": list(cognition_processor.list_cognition_data(current_user.get_bot()))} diff --git a/kairon/api/models.py b/kairon/api/models.py index c11d8d967..921a7a547 100644 --- a/kairon/api/models.py +++ b/kairon/api/models.py @@ -3,9 +3,9 @@ import validators from fastapi.param_functions import Form from fastapi.security import OAuth2PasswordRequestForm +from rasa.shared.constants import DEFAULT_NLU_FALLBACK_INTENT_NAME from kairon.exceptions import AppException -from rasa.shared.constants import DEFAULT_NLU_FALLBACK_INTENT_NAME from kairon.shared.data.constant import EVENT_STATUS, SLOT_MAPPING_TYPE, SLOT_TYPE, ACCESS_ROLES, ACTIVITY_STATUS, \ INTEGRATION_STATUS, FALLBACK_MESSAGE, DEFAULT_NLU_FALLBACK_RESPONSE from ..shared.actions.models import ActionParameterType, EvaluationType, DispatchType, DbQueryValueType, \ @@ -364,23 +364,6 @@ def validate_request_method(cls, v, values, **kwargs): return v.upper() -class QueryConfig(BaseModel): - type: DbQueryValueType - value: DbActionOperationType - - @root_validator - def check(cls, values): - from kairon.shared.utils import Utility - - if Utility.check_empty_string(values.get('type')): - raise ValueError("type cannot be empty") - - if Utility.check_empty_string(values.get('value')): - raise ValueError("value cannot be empty") - - return values - - class PayloadConfig(BaseModel): type: DbQueryValueType value: Any @@ -422,7 +405,7 @@ def validate_source_code(cls, v, values, **kwargs): class DatabaseActionRequest(BaseModel): name: constr(to_lower=True, strip_whitespace=True) - query: QueryConfig + query: DbActionOperationType payload: PayloadConfig response: ActionResponseEvaluation = None set_slots: List[SetSlotsUsingActionResponse] = [] @@ -947,17 +930,31 @@ def check(cls, values): return values -class Metadata(BaseModel): +class ColumnMetadata(BaseModel): column_name: str data_type: CognitionMetadataType enable_search: bool = True create_embeddings: bool = True + @root_validator + def check(cls, values): + from kairon.shared.utils import Utility + + if values.get('data_type') not in [CognitionMetadataType.str.value, CognitionMetadataType.int.value]: + raise ValueError("Only str and int data types are supported") + if Utility.check_empty_string(values.get('column_name')): + raise ValueError("Column name cannot be empty") + return values + + +class CognitionSchemaRequest(BaseModel): + metadata: List[ColumnMetadata] = None + collection_name: str + class CognitiveDataRequest(BaseModel): data: Any - content_type: CognitionDataType - metadata: List[Metadata] = None + content_type: CognitionDataType = CognitionDataType.text.value collection: str = None @root_validator @@ -965,10 +962,8 @@ def check(cls, values): from kairon.shared.utils import Utility data = values.get("data") - metadata = values.get("metadata", []) - if metadata: - for metadata_item in metadata: - Utility.retrieve_data(data, metadata_item.dict()) + if not data or (isinstance(data, str) and Utility.check_empty_string(data)): + raise ValueError("data cannot be empty") return values diff --git a/kairon/shared/actions/data_objects.py b/kairon/shared/actions/data_objects.py index d3c0b0266..d4675d199 100644 --- a/kairon/shared/actions/data_objects.py +++ b/kairon/shared/actions/data_objects.py @@ -162,15 +162,15 @@ def pre_save_post_validation(cls, sender, document, **kwargs): param.value = Utility.encrypt_message(param.value) -class DbOperation(EmbeddedDocument): - type = StringField(required=True, choices=[op_type.value for op_type in DbQueryValueType]) - value = StringField(required=True, choices=[payload.value for payload in DbActionOperationType]) - - def validate(self, clean=True): - if Utility.check_empty_string(self.type): - raise ValidationError("query type is required") - if not self.value or self.value is None: - raise ValidationError("query value is required") +# class DbOperation(EmbeddedDocument): +# type = StringField(required=True, choices=[op_type.value for op_type in DbQueryValueType]) +# value = StringField(required=True, choices=[payload.value for payload in DbActionOperationType]) +# +# def validate(self, clean=True): +# if Utility.check_empty_string(self.type): +# raise ValidationError("query type is required") +# if not self.value or self.value is None: +# raise ValidationError("query value is required") class DbQuery(EmbeddedDocument): @@ -189,7 +189,7 @@ def validate(self, clean=True): class DatabaseAction(Auditlog): name = StringField(required=True) collection = StringField(required=True) - query = EmbeddedDocumentField(DbOperation, required=True) + query = StringField(required=True, choices=[payload.value for payload in DbActionOperationType]) payload = EmbeddedDocumentField(DbQuery, default=DbQuery()) response = EmbeddedDocumentField(HttpActionResponse, default=HttpActionResponse()) set_slots = ListField(EmbeddedDocumentField(SetSlotsFromResponse)) @@ -206,9 +206,10 @@ def validate(self, clean=True): if self.name is None or not self.name.strip(): raise ValidationError("Action name cannot be empty") + if not self.query or self.query is None: + raise ValidationError("query value is required") self.response.validate() self.payload.validate() - self.query.validate() def clean(self): self.name = self.name.strip().lower() diff --git a/kairon/shared/cognition/__init__.py b/kairon/shared/cognition/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kairon/shared/cognition/data_objects.py b/kairon/shared/cognition/data_objects.py new file mode 100644 index 000000000..2d587f4ac --- /dev/null +++ b/kairon/shared/cognition/data_objects.py @@ -0,0 +1,71 @@ +from datetime import datetime + +from mongoengine import EmbeddedDocument, StringField, BooleanField, ValidationError, ListField, EmbeddedDocumentField, \ + DateTimeField, SequenceField, DynamicField + +from kairon import Utility +from kairon.shared.data.audit.data_objects import Auditlog +from kairon.shared.data.signals import push_notification, auditlogger +from kairon.shared.models import CognitionMetadataType, CognitionDataType + + +class ColumnMetadata(EmbeddedDocument): + column_name = StringField(required=True) + data_type = StringField(required=True, default=CognitionMetadataType.str.value, + choices=[CognitionMetadataType.str.value, CognitionMetadataType.int.value]) + enable_search = BooleanField(default=True) + create_embeddings = BooleanField(default=True) + + def validate(self, clean=True): + if clean: + self.clean() + if self.data_type not in [CognitionMetadataType.str.value, CognitionMetadataType.int.value]: + raise ValidationError("Only str and int data types are supported") + if Utility.check_empty_string(self.column_name): + raise ValidationError("Column name cannot be empty") + + def clean(self): + if not Utility.check_empty_string(self.column_name): + self.column_name = self.column_name.strip().lower() + + +@auditlogger.log +@push_notification.apply +class CognitionSchema(Auditlog): + metadata = ListField(EmbeddedDocumentField(ColumnMetadata, default=None)) + collection_name = StringField(required=True) + user = StringField(required=True) + bot = StringField(required=True) + timestamp = DateTimeField(default=datetime.utcnow) + + meta = {"indexes": [{"fields": ["bot"]}]} + + def validate(self, clean=True): + if clean: + self.clean() + + if self.metadata: + for metadata_dict in self.metadata: + metadata_dict.validate() + + +@auditlogger.log +@push_notification.apply +class CognitionData(Auditlog): + vector_id = SequenceField(required=True) + data = DynamicField(required=True) + content_type = StringField(default=CognitionDataType.text.value, choices=[CognitionDataType.text.value, + CognitionDataType.json.value]) + collection = StringField(default=None) + user = StringField(required=True) + bot = StringField(required=True) + timestamp = DateTimeField(default=datetime.utcnow) + + meta = {"indexes": [{"fields": ["$data", "bot"]}]} + + def validate(self, clean=True): + if clean: + self.clean() + + if not self.data or (isinstance(self.data, str) and Utility.check_empty_string(self.data)): + raise ValidationError("data cannot be empty") diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py new file mode 100644 index 000000000..e20b073ac --- /dev/null +++ b/kairon/shared/cognition/processor.py @@ -0,0 +1,258 @@ +import json +from datetime import datetime +from typing import Text, Dict, Any + +from mongoengine import DoesNotExist, Q + +from kairon import Utility +from kairon.exceptions import AppException +from kairon.shared.cognition.data_objects import CognitionData, CognitionSchema, ColumnMetadata +from kairon.shared.data.data_objects import BotSettings +from kairon.shared.data.processor import MongoProcessor +from kairon.shared.models import CognitionDataType, CognitionMetadataType + + +class CognitionDataProcessor: + """ + Class contains logic for saves, updates and deletes bot content and cognition content + """ + + def is_collection_limit_exceeded(self, bot, collection): + """ + checks if collection limit is exhausted + + :param bot: bot id + :param collection: Name of collection + :return: boolean + :raises: AppException + """ + + collections = list(CognitionSchema.objects(bot=bot).distinct(field='collection_name')) + if collection not in collections and len(collections) >= BotSettings.objects( + bot=bot).get().cognition_collections_limit: + return True + else: + return False + + def is_column_collection_limit_exceeded(self, bot, metadata): + """ + checks if columns in collection limit is exhausted + + :param bot: bot id + :param metadata: schema + :return: boolean + """ + return len(metadata) >= BotSettings.objects(bot=bot).get().cognition_columns_per_collection_limit + + def is_same_column_in_metadata(self, metadata): + """ + checks if there are same columns in metadata + + :param bot: bot id + :param metadata: schema + :return: boolean + """ + if len(metadata) < 2: + return False + reference_value = metadata[0].get('column_name') + return all(d.get('column_name') == reference_value for d in metadata) + + def get_content(self, bot: Text, **kwargs): + """ + fetches content + + :param bot: bot id + :return: yield dict + """ + kwargs["bot"] = bot + search = kwargs.pop('data', None) + start_idx = kwargs.pop('start_idx', 0) + page_size = kwargs.pop('page_size', 10) + cognition_data = CognitionData.objects(**kwargs) + if search: + cognition_data = cognition_data.search_text(search) + for value in cognition_data.skip(start_idx).limit(page_size): + item = value.to_mongo().to_dict() + item.pop('timestamp') + item["_id"] = item["_id"].__str__() + yield item + + def list_cognition_collections(self, bot: Text): + """ + Retrieve cognition data. + :param bot: bot id + """ + collections = list(CognitionData.objects(bot=bot).distinct(field='collection')) + return collections + + def save_cognition_schema(self, metadata: Dict, user: Text, bot: Text): + if self.is_collection_limit_exceeded(bot, metadata.get('collection_name')): + raise AppException('Collection limit exceeded!') + if metadata.get('metadata') and self.is_column_collection_limit_exceeded(bot, metadata.get('metadata')): + raise AppException('Column limit exceeded for collection!') + if metadata.get('metadata') and self.is_same_column_in_metadata(metadata.get('metadata')): + raise AppException('Columns cannot be same in the schema!') + metadata_obj = CognitionSchema(bot=bot, user=user) + if metadata.get('metadata'): + metadata_obj.metadata = [ColumnMetadata(**meta) for meta in metadata.get('metadata')] + metadata_obj.collection_name = metadata.get('collection_name') + metadata_id = metadata_obj.save().to_mongo().to_dict()["_id"].__str__() + return metadata_id + + def update_cognition_schema(self, metadata_id: str, metadata: Dict, user: Text, bot: Text): + try: + metadata_obj = CognitionSchema.objects(bot=bot, id=metadata_id).get() + if metadata_obj.collection_name != metadata.get('collection_name'): + raise AppException('Collection name cannot be updated!') + metadata_obj.metadata = [ColumnMetadata(**meta) for meta in metadata.get('metadata')] + metadata_obj.bot = bot + metadata_obj.user = user + metadata_obj.timestamp = datetime.utcnow() + metadata_obj.save() + except DoesNotExist: + raise AppException("Schema with given id not found!") + + def delete_cognition_schema(self, metadata_id: str, bot: Text): + try: + metadata = CognitionSchema.objects(bot=bot, id=metadata_id).get() + metadata.delete() + except DoesNotExist: + raise AppException("Schema does not exists!") + + def list_cognition_schema(self, bot: Text): + """ + fetches metadata + + :param bot: bot id + :return: yield dict + """ + for value in CognitionSchema.objects(bot=bot): + final_data = {} + item = value.to_mongo().to_dict() + metadata = item.pop("metadata") + collection = item.pop('collection_name', None) + final_data["_id"] = item["_id"].__str__() + final_data['metadata'] = metadata + final_data['collection_name'] = collection + yield final_data + + def __validate_metadata_and_payload(self, bot, payload): + data = payload.get('data') + collection = payload.get('collection', None) + matched_metadata = self.find_matching_metadata(bot, data, collection) + for metadata_dict in matched_metadata['metadata']: + self.retrieve_data(data, metadata_dict) + + def save_cognition_data(self, payload: Dict, user: Text, bot: Text): + bot_settings = MongoProcessor.get_bot_settings(bot=bot, user=user) + if not bot_settings["llm_settings"]['enable_faq']: + raise AppException('Faq feature is disabled for the bot! Please contact support.') + + if payload.get('content_type') == CognitionDataType.text.value and len(payload.get('data').split()) < 10: + raise AppException("Content should contain atleast 10 words.") + + if payload.get('collection'): + if not Utility.is_exist(CognitionSchema, bot=bot, collection_name=payload.get('collection'), raise_error=False): + raise AppException('Collection does not exist!') + if payload.get('content_type') == CognitionDataType.text.value and \ + not Utility.is_exist(CognitionSchema, bot=bot, metadata=[], + collection_name=payload.get('collection'), raise_error=False): + raise AppException('Content type text cannot have metadata!') + if payload.get('content_type') == CognitionDataType.json.value: + self.__validate_metadata_and_payload(bot, payload) + + payload_obj = CognitionData() + payload_obj.data = payload.get('data') + payload_obj.content_type = payload.get('content_type') + payload_obj.collection = payload.get('collection', None) + payload_obj.user = user + payload_obj.bot = bot + payload_id = payload_obj.save().to_mongo().to_dict()["_id"].__str__() + return payload_id + + def update_cognition_data(self, payload_id: str, payload: Dict, user: Text, bot: Text): + data = payload['data'] + content_type = payload['content_type'] + if payload.get('content_type') == CognitionDataType.text.value and len(payload.get('data').split()) < 10: + raise AppException("Content should contain atleast 10 words.") + Utility.is_exist(CognitionData, bot=bot, id__ne=payload_id, data=data, + exp_message="Payload data already exists!") + if payload.get('collection') and not Utility.is_exist(CognitionSchema, bot=bot, collection_name=payload.get('collection'), raise_error=False): + raise AppException('Collection does not exist!') + try: + payload_obj = CognitionData.objects(bot=bot, id=payload_id).get() + if content_type == CognitionDataType.json.value: + self.__validate_metadata_and_payload(bot, payload) + payload_obj.data = data + payload_obj.content_type = content_type + payload_obj.collection = payload.get('collection', None) + payload_obj.user = user + payload_obj.timestamp = datetime.utcnow() + payload_obj.save() + except DoesNotExist: + raise AppException("Payload with given id not found!") + + def delete_cognition_data(self, payload_id: str, bot: Text): + try: + payload = CognitionData.objects(bot=bot, id=payload_id).get() + payload.delete() + except DoesNotExist: + raise AppException("Payload does not exists!") + + def list_cognition_data(self, bot: Text): + """ + fetches content + + :param bot: bot id + :return: yield dict + """ + for value in CognitionData.objects(bot=bot): + final_data = {} + item = value.to_mongo().to_dict() + data = item.pop("data") + data_type = item.pop("content_type") + final_data["_id"] = item["_id"].__str__() + final_data['content'] = data + final_data['content_type'] = data_type + final_data['collection'] = item.get('collection', None) + yield final_data + + @staticmethod + def retrieve_data(data: Any, schema: Dict): + if schema and isinstance(data, dict): + data_type = schema['data_type'] + column_name = schema['column_name'] + if column_name in data and data[column_name] and data_type == CognitionMetadataType.int.value: + try: + return int(data[column_name]) + except ValueError: + raise AppException("Invalid data type!") + else: + return data[column_name] + + @staticmethod + def find_matching_metadata(bot: Text, data: Any, collection: Text = None): + data_keys = list(data.keys()) + try: + matching_metadata = CognitionSchema.objects(Q(metadata__column_name__in=data_keys) & + Q(collection_name=collection) & + Q(bot=bot)).get() + return matching_metadata + except DoesNotExist as e: + raise AppException("Metadata related to payload not found!") + + + @staticmethod + def get_embeddings_and_payload_data(data: Any, metadata: CognitionSchema): + search_payload = {} + create_embedding_data = {} + for metadata_item in metadata['metadata']: + column_name = metadata_item["column_name"] + if column_name in data.keys(): + converted_value = CognitionDataProcessor.retrieve_data(data, metadata_item) + if converted_value and metadata_item["enable_search"]: + search_payload[column_name] = converted_value + if converted_value and metadata_item["create_embeddings"]: + create_embedding_data[column_name] = converted_value + create_embedding_data = json.dumps(create_embedding_data) + return search_payload, create_embedding_data diff --git a/kairon/shared/data/data_objects.py b/kairon/shared/data/data_objects.py index 12ba9b971..344946f65 100644 --- a/kairon/shared/data/data_objects.py +++ b/kairon/shared/data/data_objects.py @@ -15,8 +15,7 @@ DictField, DynamicField, IntField, - FloatField, - SequenceField + FloatField ) from rasa.shared.constants import DEFAULT_NLU_FALLBACK_INTENT_NAME from rasa.shared.core.slots import ( @@ -33,7 +32,7 @@ from kairon.exceptions import AppException from kairon.shared.data.audit.data_objects import Auditlog from kairon.shared.data.signals import push_notification, auditlogger -from kairon.shared.models import TemplateType, StoryStepType, StoryType, CognitionDataType, CognitionMetadataType +from kairon.shared.models import TemplateType, StoryStepType, StoryType from kairon.shared.utils import Utility from .constant import EVENT_STATUS, SLOT_MAPPING_TYPE, TrainingDataSourceType from ..constants import WhatsappBSPTypes, LLMResourceProvider @@ -696,51 +695,6 @@ def validate(self, clean=True): DataUtility.validate_flow_events(self.events, "RULE", self.block_name) -class CognitionMetadata(EmbeddedDocument): - column_name = StringField(required=True) - data_type = StringField(required=True, default=CognitionMetadataType.str.value, - choices=[CognitionMetadataType.str.value, CognitionMetadataType.int.value]) - enable_search = BooleanField(default=True) - create_embeddings = BooleanField(default=True) - - def validate(self, clean=True): - if clean: - self.clean() - if self.data_type not in [CognitionMetadataType.str.value, CognitionMetadataType.int.value]: - raise ValidationError("Only str and int data types are supported") - if Utility.check_empty_string(self.column_name): - raise ValidationError("Column name cannot be empty") - - def clean(self): - if not Utility.check_empty_string(self.column_name): - self.column_name = self.column_name.strip().lower() - - -@auditlogger.log -@push_notification.apply -class CognitionData(Auditlog): - vector_id = SequenceField(required=True) - data = DynamicField(required=True) - content_type = StringField(default=CognitionDataType.text.value, choices=[CognitionDataType.text.value, - CognitionDataType.json.value]) - metadata = ListField(EmbeddedDocumentField(CognitionMetadata), default=None) - collection = StringField(default=None) - user = StringField(required=True) - bot = StringField(required=True) - timestamp = DateTimeField(default=datetime.utcnow) - - meta = {"indexes": [{"fields": ["$data", "bot"]}]} - - def validate(self, clean=True): - if clean: - self.clean() - - if self.metadata: - for metadata_item in self.metadata or []: - metadata_item.validate() - Utility.retrieve_data(self.data, metadata_item.to_mongo().to_dict()) - - @auditlogger.log @push_notification.apply class Configs(Auditlog): @@ -895,6 +849,8 @@ class BotSettings(Auditlog): data_importer_limit_per_day = IntField(default=5) multilingual_limit_per_day = IntField(default=2) data_generation_limit_per_day = IntField(default=3) + cognition_collections_limit = IntField(default=3) + cognition_columns_per_collection_limit = IntField(default=5) meta = {"indexes": [{"fields": ["bot", ("bot", "status")]}]} diff --git a/kairon/shared/data/processor.py b/kairon/shared/data/processor.py index 661b5cdbc..45606d82c 100644 --- a/kairon/shared/data/processor.py +++ b/kairon/shared/data/processor.py @@ -43,14 +43,14 @@ SlotSetAction, FormValidationAction, EmailActionConfig, GoogleSearchAction, JiraAction, ZendeskAction, \ PipedriveLeadsAction, SetSlots, HubspotFormsAction, HttpActionResponse, SetSlotsFromResponse, \ CustomActionRequestParameters, KaironTwoStageFallbackAction, QuickReplies, RazorpayAction, PromptAction, \ - LlmPrompt, FormSlotSet, DatabaseAction, DbOperation, DbQuery, PyscriptActionConfig, WebSearchAction, UserQuestion + LlmPrompt, FormSlotSet, DatabaseAction, DbQuery, PyscriptActionConfig, WebSearchAction, UserQuestion from kairon.shared.actions.models import ActionType, HttpRequestContentType, ActionParameterType, DbQueryValueType from kairon.shared.data.audit.data_objects import AuditLogData from kairon.shared.importer.processor import DataImporterLogProcessor from kairon.shared.metering.constants import MetricType from kairon.shared.metering.metering_processor import MeteringProcessor from kairon.shared.models import StoryEventType, TemplateType, StoryStepType, HttpContentType, StoryType, \ - LlmPromptSource, CognitionDataType + LlmPromptSource from kairon.shared.plugins.factory import PluginFactory from kairon.shared.utils import Utility, StoryValidator from .constant import ( @@ -89,8 +89,8 @@ ModelDeployment, Rules, Utterances, BotSettings, ChatClientConfig, SlotMapping, KeyVault, EventConfig, TrainingDataGenerator, - MultiflowStories, MultiflowStoryEvents, CognitionData, MultiFlowStoryMetadata, - Synonyms, Lookup, CognitionMetadata, LLMSettings, Analytics + MultiflowStories, MultiflowStoryEvents, MultiFlowStoryMetadata, + Synonyms, Lookup, Analytics ) from .utils import DataUtility from ..constants import KaironSystemSlots, PluginTypes @@ -3207,7 +3207,7 @@ def update_db_action(self, request_data: Dict, user: str, bot: str): raise AppException(f'Action with name "{request_data.get("name")}" not found') self.__validate_payload(request_data.get('payload'), bot) action = DatabaseAction.objects(name=request_data.get('name'), bot=bot, status=True).get() - action.query = DbOperation(**request_data['query']) + action.query = request_data['query'] action.payload = DbQuery(**request_data['payload']) action.response = HttpActionResponse(**request_data.get('response', {})) action.set_slots = [SetSlotsFromResponse(**slot).to_mongo().to_dict() for slot in @@ -3230,7 +3230,7 @@ def add_db_action(self, vector_db_action_config: Dict, user: str, bot: str): set_slots = [SetSlotsFromResponse(**slot) for slot in vector_db_action_config.get('set_slots')] action_id = DatabaseAction( name=vector_db_action_config['name'], - query=DbOperation(**vector_db_action_config.get('query')), + query=vector_db_action_config.get('query'), payload=DbQuery(**vector_db_action_config.get('payload')), response=HttpActionResponse(**vector_db_action_config.get('response', {})), set_slots=set_slots, @@ -5757,130 +5757,3 @@ def get_razorpay_action_config(self, bot: Text, with_doc_id: bool = True): action.pop('user') yield action - - def save_content(self, content: Text, user: Text, bot: Text, collection: Text = None): - bot_settings = self.get_bot_settings(bot=bot, user=user) - if not bot_settings["llm_settings"]['enable_faq']: - raise AppException('Faq feature is disabled for the bot! Please contact support.') - if len(content.split()) < 10: - raise AppException("Content should contain atleast 10 words.") - - content_obj = CognitionData() - content_obj.data = content - content_obj.collection = collection - content_obj.user = user - content_obj.bot = bot - id = ( - content_obj.save().id.__str__() - ) - return id - - def update_content(self, content_id: str, content: Text, user: Text, bot: Text, collection: Text = None): - if len(content.split()) < 10: - raise AppException("Content should contain atleast 10 words.") - - Utility.is_exist(CognitionData, bot=bot, id__ne=content_id, data=content, content_type__ne=CognitionDataType.json.value, - exp_message="Text already exists!") - - try: - content_obj = CognitionData.objects(bot=bot, id=content_id).get() - content_obj.data = content - content_obj.collection = collection - content_obj.user = user - content_obj.timestamp = datetime.utcnow() - content_obj.save() - except DoesNotExist: - raise AppException("Content with given id not found!") - - def delete_content(self, content_id: str, user: Text, bot: Text): - try: - content = CognitionData.objects(bot=bot, id=content_id).get() - content.delete() - except DoesNotExist: - raise AppException("Text does not exists!") - - def get_content(self, bot: Text, **kwargs): - """ - fetches content - - :param bot: bot id - :return: yield dict - """ - kwargs["bot"] = bot - search = kwargs.pop('data', None) - start_idx = kwargs.pop('start_idx', 0) - page_size = kwargs.pop('page_size', 10) - cognition_data = CognitionData.objects(**kwargs) - if search: - cognition_data = cognition_data.search_text(search) - for value in cognition_data.skip(start_idx).limit(page_size): - item = value.to_mongo().to_dict() - item.pop('timestamp') - item["_id"] = item["_id"].__str__() - yield item - - def list_collection(self, bot: Text): - """ - Retrieve cognition data. - :param bot: bot id - """ - collections = list(CognitionData.objects(bot=bot).distinct(field='collection')) - return collections - - def save_cognition_data(self, payload: Dict, user: Text, bot: Text): - bot_settings = self.get_bot_settings(bot=bot, user=user) - if not bot_settings["llm_settings"]['enable_faq']: - raise AppException('Faq feature is disabled for the bot! Please contact support.') - payload_obj = CognitionData() - payload_obj.data = payload.get('data') - payload_obj.content_type = payload.get('content_type') - payload_obj.metadata = [CognitionMetadata(**meta) for meta in payload.get('metadata', [])] - payload_obj.collection = payload.get('collection', None) - payload_obj.user = user - payload_obj.bot = bot - payload_id = payload_obj.save().to_mongo().to_dict()["_id"].__str__() - return payload_id - - def update_cognition_data(self, payload_id: str, payload: Dict, user: Text, bot: Text): - data = payload['data'] - content_type = payload['content_type'] - Utility.is_exist(CognitionData, bot=bot, id__ne=payload_id, data=data, content_type__ne=CognitionDataType.json.value, - exp_message="Payload data already exists!") - - try: - payload_obj = CognitionData.objects(bot=bot, id=payload_id).get() - payload_obj.data = data - payload_obj.content_type = content_type - payload_obj.collection = payload.get('collection', None) - payload_obj.user = user - payload_obj.timestamp = datetime.utcnow() - payload_obj.save() - except DoesNotExist: - raise AppException("Payload with given id not found!") - - def delete_cognition_data(self, payload_id: str, bot: Text): - try: - payload = CognitionData.objects(bot=bot, id=payload_id).get() - payload.delete() - except DoesNotExist: - raise AppException("Payload does not exists!") - - def list_cognition_data(self, bot: Text): - """ - fetches content - - :param bot: bot id - :return: yield dict - """ - for value in CognitionData.objects(bot=bot): - final_data = {} - item = value.to_mongo().to_dict() - data = item.pop("data") - data_type = item.pop("content_type") - metadata = item.pop("metadata") - final_data["_id"] = item["_id"].__str__() - final_data['content'] = data - final_data['content_type'] = data_type - final_data['metadata'] = metadata - final_data['collection'] = item.get('collection', None) - yield final_data diff --git a/kairon/shared/llm/gpt3.py b/kairon/shared/llm/gpt3.py index 5b0592d96..dda85e816 100644 --- a/kairon/shared/llm/gpt3.py +++ b/kairon/shared/llm/gpt3.py @@ -1,4 +1,3 @@ -import json from typing import Text, Dict, List from urllib.parse import urljoin @@ -10,9 +9,10 @@ from kairon.exceptions import AppException from kairon.shared.admin.constants import BotSecretType from kairon.shared.admin.processor import Sysadmin +from kairon.shared.cognition.data_objects import CognitionData +from kairon.shared.cognition.processor import CognitionDataProcessor from kairon.shared.constants import GPT3ResourceTypes from kairon.shared.data.constant import DEFAULT_SYSTEM_PROMPT, DEFAULT_CONTEXT_PROMPT -from kairon.shared.data.data_objects import CognitionData from kairon.shared.llm.base import LLMBase from kairon.shared.llm.clients.factory import LLMClientFactory from kairon.shared.models import CognitionDataType @@ -42,6 +42,7 @@ def __init__(self, bot: Text, llm_settings: dict): async def train(self, *args, **kwargs) -> Dict: await self.__delete_collections() count = 0 + processor = CognitionDataProcessor() collection_groups = list(CognitionData.objects.aggregate([ {'$match': {'bot': self.bot}}, {'$group': {'_id': "$collection", 'content': {'$push': "$$ROOT"}}}, @@ -52,10 +53,8 @@ async def train(self, *args, **kwargs) -> Dict: await self.__create_collection__(collection) for content in tqdm(collections['content'], desc="Training FAQ"): if content['content_type'] == CognitionDataType.json.value: - if not content['metadata'] or []: - search_payload, vector_embeddings = content['data'], json.dumps(content['data']) - else: - search_payload, vector_embeddings = Utility.get_embeddings_and_payload_data(content['data'], content['metadata']) + metadata = processor.find_matching_metadata(self.bot, content['data'], content.get('collection')) + search_payload, vector_embeddings = processor.get_embeddings_and_payload_data(content['data'], metadata) else: search_payload, vector_embeddings = {'content': content["data"]}, content["data"] search_payload['collection_name'] = collection diff --git a/kairon/shared/utils.py b/kairon/shared/utils.py index 5d2a031c7..dcfdab5cd 100644 --- a/kairon/shared/utils.py +++ b/kairon/shared/utils.py @@ -19,7 +19,7 @@ from pathlib import Path from secrets import choice from smtplib import SMTP -from typing import Text, List, Dict, Union, Any +from typing import Text, List, Dict, Union from urllib.parse import unquote_plus from urllib.parse import urljoin @@ -66,7 +66,7 @@ from .constants import MaskingStrategy, SYSTEM_TRIGGERED_UTTERANCES, ChannelTypes, PluginTypes from .data.constant import TOKEN_TYPE, KAIRON_TWO_STAGE_FALLBACK, SLOT_TYPE from .data.dto import KaironStoryStep -from .models import StoryStepType, LlmPromptType, LlmPromptSource, CognitionMetadataType +from .models import StoryStepType, LlmPromptType, LlmPromptSource from ..exceptions import AppException @@ -136,33 +136,6 @@ def check_empty_string(value: str): else: return False - @staticmethod - def retrieve_data(data: Any, metadata: Dict): - if metadata and isinstance(data, dict): - data_type = metadata["data_type"] - column_name = metadata["column_name"] - if column_name in data and data[column_name] and data_type == CognitionMetadataType.int.value: - try: - return int(data[column_name]) - except ValueError: - raise AppException("Invalid data type") - else: - return data[column_name] - - @staticmethod - def get_embeddings_and_payload_data(data: Any, metadata: Dict): - search_payload = {} - create_embedding_data = {} - for metadata_item in metadata: - column_name = metadata_item["column_name"] - converted_value = Utility.retrieve_data(data, metadata_item) - if converted_value and metadata_item["enable_search"]: - search_payload[column_name] = converted_value - if converted_value and metadata_item["create_embeddings"]: - create_embedding_data[column_name] = converted_value - create_embedding_data = json.dumps(create_embedding_data) - return search_payload, create_embedding_data - @staticmethod def validate_slot_initial_value_and_values(slot_value: Dict): initial_value = slot_value.get('initial_value') diff --git a/tests/integration_test/action_service_test.py b/tests/integration_test/action_service_test.py index 2b217e1c6..f1567ed44 100644 --- a/tests/integration_test/action_service_test.py +++ b/tests/integration_test/action_service_test.py @@ -16,8 +16,8 @@ EmailActionConfig, ActionServerLogs, GoogleSearchAction, JiraAction, ZendeskAction, PipedriveLeadsAction, SetSlots, \ HubspotFormsAction, HttpActionResponse, HttpActionRequestBody, SetSlotsFromResponse, CustomActionRequestParameters, \ KaironTwoStageFallbackAction, TwoStageFallbackTextualRecommendations, RazorpayAction, PromptAction, FormSlotSet, \ - DatabaseAction, DbOperation, DbQuery, PyscriptActionConfig, WebSearchAction, UserQuestion -from kairon.shared.actions.models import ActionType, ActionParameterType, DispatchType + DatabaseAction, DbQuery, PyscriptActionConfig, WebSearchAction, UserQuestion +from kairon.shared.actions.models import ActionType, ActionParameterType, DispatchType, DbActionOperationType from kairon.shared.actions.utils import ActionUtility from kairon.shared.admin.constants import BotSecretType from kairon.shared.admin.data_objects import BotSecrets @@ -2916,7 +2916,7 @@ def test_vectordb_action_execution_embedding_search_from_value(): payload_body = {"ids": [0], "with_payload": True, "with_vector": True} DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="embedding_search"), + query=DbActionOperationType.embedding_search.value, payload=DbQuery(type="from_value", value=payload_body), response=HttpActionResponse(value="The value of ${data.result.0.id} is ${data.result.0.vector}"), set_slots=[SetSlotsFromResponse(name="vector_value", value="${data.result.0.vector}")], @@ -3007,7 +3007,7 @@ def test_vectordb_action_execution_payload_search_from_value(): } DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="payload_search"), + query=DbActionOperationType.payload_search.value, payload=DbQuery(type="from_value", value=payload_body), response=HttpActionResponse(value="The value of ${data.0.city} with color ${data.0.color} is ${data.0.id}"), set_slots=[SetSlotsFromResponse(name="city_value", value="${data.0.id}")], @@ -3082,7 +3082,7 @@ def test_vectordb_action_execution_embedding_search_from_slot(): DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="embedding_search"), + query=DbActionOperationType.embedding_search.value, payload=DbQuery(type="from_slot", value='name'), response=HttpActionResponse(value="The value of ${data.result.0.id} is ${data.result.0.vector}"), set_slots=[SetSlotsFromResponse(name="vector_value", value="${data.result.0.vector}")], @@ -3167,7 +3167,7 @@ def test_vectordb_action_execution_payload_search_from_slot(): DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="payload_search"), + query=DbActionOperationType.payload_search.value, payload=DbQuery(type="from_slot", value='color'), response=HttpActionResponse(value="The name of the city with id ${data.0.id} is ${data.0.city}"), set_slots=[SetSlotsFromResponse(name="city_name", value="${data.0.city}")], @@ -3239,7 +3239,7 @@ def test_vectordb_action_execution_no_response_dispatch(): payload_body = {"ids": [0], "with_payload": True, "with_vector": True} DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="embedding_search"), + query=DbActionOperationType.embedding_search.value, payload=DbQuery(type="from_value", value=payload_body), response=HttpActionResponse(value="The value of ${data.result.0.id} is ${data.result.0.vector}", dispatch=False), @@ -3323,7 +3323,7 @@ def test_vectordb_action_execution_invalid_operation_type(): payload_body = {"ids": [0], "with_payload": True, "with_vector": True} DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="vector_search"), + query="vector_search", payload=DbQuery(type="from_value", value=payload_body), response=HttpActionResponse(value="The value of ${data.result.0.id} is ${data.result.0.vector}", dispatch=False), @@ -3383,7 +3383,7 @@ def test_vectordb_action_failed_execution(mock_action_config, mock_action): user="user") action_config = DatabaseAction( name=action_name, - query=DbOperation(type="from_value", value="embedding_search"), + query=DbActionOperationType.embedding_search.value, payload=DbQuery(type="from_value", value=payload_body), response=HttpActionResponse(value="The value of ${data.result.0.id} is ${data.result.0.vector"), bot="5f50fd0a56b697ca10d35d2e", diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index 1a4638c5b..64ff966f6 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -981,22 +981,6 @@ def test_list_bots(): assert response['data']['shared'] == [] -def test_content_upload_api_with_gpt_feature_disabled(): - response = client.post( - url=f"/api/bot/{pytest.bot}/data/text/faq?collection=data_details", - json={ - "data": "Data refers to any collection of facts, statistics, or information that can be analyzed or " - "used to inform decision-making. Data can take many forms, including text, numbers, images, " - "audio, and video." - }, - headers={"Authorization": pytest.token_type + " " + pytest.access_token} - ) - actual = response.json() - assert actual["message"] == "Faq feature is disabled for the bot! Please contact support." - assert not actual["data"] - assert actual["error_code"] == 422 - - def test_add_pyscript_action_empty_name(): script = """ data = [1, 2, 3, 4, 5] @@ -1267,90 +1251,478 @@ def test_get_client_config_url_with_ip_info(monkeypatch): assert actual["data"]["logs"][0]['bot'] == pytest.bot -def test_content_upload_api(monkeypatch): - def _mock_get_bot_settings(*args, **kwargs): - return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) +def test_metadata_upload_api(): + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [{"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + pytest.metadata_id = actual["data"]["_id"] + assert actual["message"] == "Schema saved!" + assert actual["data"]["_id"] + assert actual["error_code"] == 0 - monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + response_one = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "details_one", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details_one" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_one = response_one.json() + print(actual_one) + pytest.metadata_id_one = actual_one["data"]["_id"] + assert actual_one["message"] == "Schema saved!" + assert actual_one["data"]["_id"] + assert actual_one["error_code"] == 0 + + response_two = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "details_two", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details_two" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_two = response_two.json() + print(actual_two) + pytest.metadata_id_two = actual_two["data"]["_id"] + assert actual_two["message"] == "Schema saved!" + assert actual_two["data"]["_id"] + assert actual_two["error_code"] == 0 + + response_three = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "details_three", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details_three" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_three = response_three.json() + assert actual_three["message"] == "Collection limit exceeded!" + assert not actual_three["data"] + assert actual_three["error_code"] == 422 + + response = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.metadata_id_one}", + json={ + "metadata_id": pytest.metadata_id_one, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + response = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.metadata_id_two}", + json={ + "metadata_id": pytest.metadata_id_two, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + + +def test_metadata_upload_api_column_limit_exceeded(): + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "tech", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, + {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "gender", "data_type": "str", "enable_search": True, "create_embeddings": True} + ], + "collection_name": "test_metadata_upload_api_column_limit_exceeded" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == "Column limit exceeded for collection!" + assert actual["data"] is None + assert actual["error_code"] == 422 + + +def test_metadata_upload_api_same_column_in_schema(): + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "tech", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "tech", "data_type": "int", "enable_search": True, "create_embeddings": False}], + "collection_name": "test_metadata_upload_api_same_column_in_schema" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == "Columns cannot be same in the schema!" + assert actual["data"] is None + assert actual["error_code"] == 422 + + +def test_metadata_upload_invalid_data_type(): + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "test_metadata_upload_invalid_data_type", "data_type": "bool", "enable_search": True, "create_embeddings": True}], + "collection_name": "test_metadata_upload_invalid_data_type" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert actual["message"] == [{'loc': ['body', 'metadata', 0, 'data_type'], 'msg': "value is not a valid enumeration member; permitted: 'str', 'int'", 'type': 'type_error.enum', 'ctx': {'enum_values': ['str', 'int']}}, {'loc': ['body', 'metadata', 0, '__root__'], 'msg': 'Only str and int data types are supported', 'type': 'value_error'}] + assert not actual["data"] + assert actual["error_code"] == 422 + + +def test_metadata_upload_column_name_empty(): + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "", "data_type": "int", "enable_search": True, "create_embeddings": True}], + "collection_name": "test_metadata_upload_column_name_empty" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert actual["message"] == [{'loc': ['body', 'metadata', 0, '__root__'], 'msg': 'Column name cannot be empty', 'type': 'value_error'}] + assert not actual["data"] + assert actual["error_code"] == 422 + + +def test_metadata_update_api(): + metadata = { + "metadata": [{"column_name": "language", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details" + } + response = client.put( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.metadata_id}", + json=metadata, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert actual["message"] == "Schema updated!" + assert actual["error_code"] == 0 + + +def test_metadata_update_api_different_collection(): response = client.post( - url=f"/api/bot/{pytest.bot}/data/text/faq?collection=data_details", + url=f"/api/bot/{pytest.bot}/data/cognition/schema", json={ - "data": "Data refers to any collection of facts, statistics, or information that can be analyzed or " - "used to inform decision-making. Data can take many forms, including text, numbers, images, " - "audio, and video." + "metadata": [ + {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "different_collection" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() - pytest.content_id = actual["data"]["_id"] - assert actual["message"] == "Text saved!" + print(actual) + pytest.metadata_id_two = actual["data"]["_id"] + assert actual["message"] == "Schema saved!" assert actual["data"]["_id"] assert actual["error_code"] == 0 + updated_metadata = { + "metadata": [{"column_name": "language", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details" + } + response_one = client.put( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.metadata_id_two}", + json=updated_metadata, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_one = response_one.json() + print(actual_one) + assert actual_one["message"] == "Collection name cannot be updated!" + assert actual_one["error_code"] == 422 -def test_content_upload_api_without_collection(monkeypatch): + response = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.metadata_id_two}", + json={ + "metadata_id": pytest.metadata_id_two, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + + +def test_metadata_update_api_not_found(): + metadata_id = '594ced02ef345b2b049222c5' + metadata = { + "metadata": [{"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": True}], + "collection_name": "age" + } + response = client.put( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{metadata_id}", + json=metadata, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert not actual["success"] + assert actual["message"] == "Schema with given id not found!" + assert actual["data"] is None + assert actual["error_code"] == 422 + + +def test_get_payload_metadata(): + response = client.get( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert actual["success"] + assert actual["error_code"] == 0 + assert actual['data'][0]['metadata'][0] == {'column_name': 'language', 'data_type': 'str', 'enable_search': True, 'create_embeddings': True} + + +def test_delete_payload_content_metadata(): + response = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.metadata_id}", + json={ + "metadata_id": pytest.metadata_id, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert actual["success"] + assert actual["message"] == "Schema deleted!" + assert actual["data"] is None + assert actual["error_code"] == 0 + + +def test_delete_payload_content_metadata_does_not_exist(): + metadata_id = '61f3a2c0aef98d5b4c58e90f' + response = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{metadata_id}", + json={ + "metadata_id": metadata_id, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == "Schema does not exists!" + assert actual["data"] is None + assert actual["error_code"] == 422 + + +def test_get_payload_content_metadata_not_exists(): + response = client.get( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert actual["success"] + assert actual["message"] is None + assert actual["error_code"] == 0 + assert actual["data"] == [] + + +def test_content_upload_api_with_gpt_feature_disabled(): + payload = { + "data": "Data refers to any collection of facts, statistics, or information that can be analyzed or " + "used to inform decision-making. Data can take many forms, including text, numbers, images, " + "audio, and video.", + "content_type": "text", + "collection": "data_details"} + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert actual["message"] == "Faq feature is disabled for the bot! Please contact support." + assert not actual["data"] + assert actual["error_code"] == 422 + + +def test_content_upload_api(monkeypatch): def _mock_get_bot_settings(*args, **kwargs): return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) - response = client.post( - url=f"/api/bot/{pytest.bot}/data/text/faq", + response_one = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", json={ - "data": "Blockchain technology is an advanced database mechanism that allows transparent information sharing within a business network." + "collection_name": "details" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) + payload = { + "data": "Data refers to any collection of facts, statistics, or information that can be analyzed or " + "used to inform decision-making. Data can take many forms, including text, numbers, images, " + "audio, and video.", + "content_type": "text", + "collection": "details"} + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) actual = response.json() - pytest.content_id_two = actual["data"]["_id"] - assert actual["message"] == "Text saved!" + pytest.content_id_text = actual["data"]["_id"] + assert actual["message"] == "Record saved!" assert actual["data"]["_id"] assert actual["error_code"] == 0 + payload_two = { + "data": "Data refers to any collection of facts, statistics, or information that can be analyzed.", + "content_type": "text", + "collection": "payload_two"} + response_two = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload_two, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response_two.json() + assert actual["message"] == "Collection does not exist!" + assert not actual["data"] + assert actual["error_code"] == 422 -def test_content_upload_api_invalid(monkeypatch): + response_three = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "response_two" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + payload_three = { + "data": "Data can take many forms, including text, numbers, images, audio, and video.", + "content_type": "text", + "collection": "response_two"} + response_four = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload_three, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response_four.json() + assert actual["message"] == "Content type text cannot have metadata!" + assert not actual["data"] + assert actual["error_code"] == 422 + + +def test_content_upload_api_invalid_atleast_ten_words(monkeypatch): def _mock_get_bot_settings(*args, **kwargs): return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + payload = { + "data": "Blockchain technology is an advanced"} response = client.post( - url=f"/api/bot/{pytest.bot}/data/text/faq?collection=data", - json={ - "data": "Data" - }, + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, headers={"Authorization": pytest.token_type + " " + pytest.access_token} - ) actual = response.json() + print(actual) assert actual["message"] == "Content should contain atleast 10 words." assert not actual["success"] assert actual["data"] is None assert actual["error_code"] == 422 -def test_content_updated_api(): +def test_content_upload_empty_data_text(monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) + + monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + payload = { + "data": "", + "content_type": "text", + } + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == [{'loc': ['body', '__root__'], 'msg': 'data cannot be empty', 'type': 'value_error'}] + + +def test_content_upload_api_without_collection(monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) + + monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + payload = { + "data": "Blockchain technology is an advanced database mechanism that allows transparent information sharing within a business network."} + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + pytest.content_id_no_collection = actual["data"]["_id"] + assert actual["message"] == "Record saved!" + assert actual["data"]["_id"] + assert actual["error_code"] == 0 + + +def test_content_update_api(): response = client.put( - url=f"/api/bot/{pytest.bot}/data/text/faq/{pytest.content_id}?collection=aws", + url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.content_id_text}", json={ - "text_id": pytest.content_id, + "cognition_id": pytest.content_id_text, "data": "AWS Fargate is a serverless compute engine for containers that allows you to run " "Docker containers without having to manage the underlying EC2 instances. With Fargate, " "you can focus on developing and deploying your applications rather than managing the infrastructure.", + "collection": "details", + "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() assert actual["success"] - assert actual["message"] == "Text updated!" + assert actual["message"] == "Record updated!" assert actual["error_code"] == 0 +def test_content_update_api_collection_does_not_exist(): + response = client.put( + url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.content_id_text}", + json={ + "cognition_id": pytest.content_id_text, + "data": "Docker containers without having to manage the underlying EC2 instances.", + "collection": "test_content_update_api_collection_does_not_exist", + "content_type": "text" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == "Collection does not exist!" + assert actual["error_code"] == 422 + + def test_content_update_api_invalid(): response = client.put( - url=f"/api/bot/{pytest.bot}/data/text/faq/{pytest.content_id}", + url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.content_id_text}", json={ - "text_id": pytest.content_id, - "data": "Data" + "cognition_id": pytest.content_id_text, + "data": "AWS Fargate is a serverless compute engine.", + "collection": "details", + "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1365,19 +1737,21 @@ def test_content_update_api_invalid(): def test_content_update_api_already_exist(): content_id = '6009cb85e65f6dce28fb3e51' response = client.put( - url=f"/api/bot/{pytest.bot}/data/text/faq/{content_id}", + url=f"/api/bot/{pytest.bot}/data/cognition/{content_id}", json={ - "text_id": content_id, + "cognition_id": content_id, "data": "AWS Fargate is a serverless compute engine for containers that allows you to run " "Docker containers without having to manage the underlying EC2 instances. With Fargate, " - "you can focus on developing and deploying your applications rather than managing the infrastructure." + "you can focus on developing and deploying your applications rather than managing the infrastructure.", + "collection": "details", + "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() assert not actual["success"] - assert actual["message"] == "Text already exists!" + assert actual["message"] == "Payload data already exists!" assert actual["data"] is None assert actual["error_code"] == 422 @@ -1385,24 +1759,26 @@ def test_content_update_api_already_exist(): def test_content_update_api_id_not_found(): content_id = '594ced02ed345b2b049222c5' response = client.put( - url=f"/api/bot/{pytest.bot}/data/text/faq/{content_id}", + url=f"/api/bot/{pytest.bot}/data/cognition/{content_id}", json={ - "text_id": content_id, + "cognition_id": content_id, "data": "Artificial intelligence (AI) involves using computers to do things that traditionally require human " "intelligence. AI can process large amounts of data in ways that humans cannot. The goal for AI is " - "to be able to do things like recognize patterns, make decisions, and judge like humans." + "to be able to do things like recognize patterns, make decisions, and judge like humans.", + "collection": "details", + "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() assert not actual["success"] - assert actual["message"] == "Content with given id not found!" + assert actual["message"] == "Payload with given id not found!" assert actual["data"] is None assert actual["error_code"] == 422 -@mock.patch('kairon.shared.data.processor.MongoProcessor.get_content', autospec=True) +@mock.patch('kairon.shared.cognition.processor.CognitionDataProcessor.get_content', autospec=True) def test_get_content(mock_get_content): def _get_content(*args, **kwargs): return [{'vector_id': 1, @@ -1410,7 +1786,7 @@ def _get_content(*args, **kwargs): 'data': 'AWS Fargate is a serverless compute engine for containers that allows you to run Docker containers without having to manage the underlying EC2 instances. With Fargate, you can focus on developing and deploying your applications rather than managing the infrastructure.', 'user': '"integration@demo.ai"', 'bot': pytest.bot, 'content_type': 'text', - 'metadata': [], + 'metadata': None, 'collection': 'aws'}] mock_get_content.return_value = _get_content() @@ -1437,7 +1813,7 @@ def test_get_content_without_data(): assert actual["success"] assert actual["error_code"] == 0 assert actual["data"] - assert actual["data"][0]['collection'] == 'aws' + assert actual["data"][0]['collection'] == 'details' assert actual["data"][0]['data'] == 'AWS Fargate is a serverless compute engine for containers that allows you to run Docker containers without having to manage the underlying EC2 instances. With Fargate, you can focus on developing and deploying your applications rather than managing the infrastructure.' assert actual["data"][1]['data'] == 'Blockchain technology is an advanced database mechanism that allows transparent information sharing within a business network.' @@ -1448,51 +1824,52 @@ def test_list_collection(): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() - print(actual) + print(set(actual["data"])) assert actual["success"] assert actual["error_code"] == 0 - assert set(actual["data"]) == {'aws'} + assert set(actual["data"]) == {'details'} def test_delete_content(): + response_one = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.content_id_no_collection}", + json={ + "cognition_id": pytest.content_id_no_collection, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_one = response_one.json() + assert actual_one["success"] + assert actual_one["message"] == "Record deleted!" + assert actual_one["data"] is None + assert actual_one["error_code"] == 0 + response = client.delete( - url=f"/api/bot/{pytest.bot}/data/text/faq/{pytest.content_id}", + url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.content_id_text}", json={ - "text_id": pytest.content_id, + "cognition_id": pytest.content_id_text, }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() assert actual["success"] - assert actual["message"] == "Text deleted!" + assert actual["message"] == "Record deleted!" assert actual["data"] is None assert actual["error_code"] == 0 - response_two = client.delete( - url=f"/api/bot/{pytest.bot}/data/text/faq/{pytest.content_id_two}", - json={ - "text_id": pytest.content_id_two, - }, - headers={"Authorization": pytest.token_type + " " + pytest.access_token} - ) - actual_two = response_two.json() - assert actual_two["success"] - assert actual_two["message"] == "Text deleted!" - assert actual_two["data"] is None - assert actual_two["error_code"] == 0 def test_delete_content_does_not_exist(): content_id = '635981f6e40f61599e000064' response = client.delete( - url=f"/api/bot/{pytest.bot}/data/text/faq/{content_id}", + url=f"/api/bot/{pytest.bot}/data/cognition/{content_id}", json={ - "text_id": content_id, + "cognition_id": content_id, }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() assert not actual["success"] - assert actual["message"] == "Text does not exists!" + assert actual["message"] == "Payload does not exists!" assert actual["data"] is None assert actual["error_code"] == 422 @@ -1513,10 +1890,7 @@ def test_get_content_not_exists(): def test_payload_upload_api_with_gpt_feature_disabled(): payload = { "data": {"name": "Nupur", "age": 25, "city": "Bengaluru"}, - "content_type": "json", - "metadata": [{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, - {"column_name": "city", "data_type": "str", "enable_search": False, "create_embeddings": True}]} + "content_type": "json"} response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", json=payload, @@ -1533,11 +1907,19 @@ def _mock_get_bot_settings(*args, **kwargs): return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + metadata = { + "metadata": [{"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "Details", + } + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json=metadata, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) payload = { "data": {"details": "AWS"}, "content_type": "json", - "collection": "Details", - "metadata": [{"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}] + "collection": "Details" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -1551,19 +1933,15 @@ def _mock_get_bot_settings(*args, **kwargs): assert actual["error_code"] == 0 -def test_payload_upload_invalid_data_type(monkeypatch): +def test_payload_upload_collection_does_not_exists(monkeypatch): def _mock_get_bot_settings(*args, **kwargs): return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) payload = { - "data": {"name": "Ram", "age": "Twenty-Three", "color": "red"}, + "data": {"name": "Ram", "age": 23, "color": "red"}, "content_type": "json", - "metadata": [ - {"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, - {"column_name": "color", "data_type": "str", "enable_search": False, "create_embeddings": True} - ] + "collection": "test_payload_upload_collection_does_not_exists" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -1572,17 +1950,84 @@ def _mock_get_bot_settings(*args, **kwargs): ) actual = response.json() assert not actual["success"] - assert actual["message"] == 'Invalid data type' + assert actual["message"] == 'Collection does not exist!' + + +def test_payload_upload_metadata_missing(monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) + + monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + payload = { + "data": {"city": "Pune", "color": "red"}, + "content_type": "json", + "collection": "Details" + } + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == 'Metadata related to payload not found!' + + +def test_payload_upload_metadata_invalid_data_type(monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) + + monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + metadata = { + "metadata": [{"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": True}], + "collection_name": "Details" + } + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json=metadata, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + payload = { + "data": {"age": "Twenty-Three"}, + "content_type": "json", + "collection": "Details" + } + response = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition", + json=payload, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == 'Invalid data type!' + + +def test_payload_updated_api_collection_does_not_exists(): + response = client.put( + url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.payload_id}", + json={ + "cognition_id": pytest.payload_id, + "data": {"details": "data science"}, + "collection": "test_payload_updated_api_collection_does_not_exists", + "content_type": "json" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + + ) + actual = response.json() + assert not actual["success"] + assert actual["message"] == "Collection does not exist!" + assert actual["error_code"] == 422 def test_payload_updated_api(): response = client.put( url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.payload_id}", json={ - "payload_id": pytest.payload_id, - "data": 'Data Collection means gathering relevant data from various sources, which can include databases, APIs, websites, sensors, social media, and more.', - "collection": "Collection", - "content_type": "text" + "cognition_id": pytest.payload_id, + "data": {"details": "data science"}, + "collection": "Details", + "content_type": "json" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1602,9 +2047,9 @@ def _mock_get_bot_settings(*args, **kwargs): response = client.put( url=f"/api/bot/{pytest.bot}/data/cognition/{payload_id}", json={ - "payload_id": payload_id, - "data": 'Data Collection means gathering relevant data from various sources, which can include databases, APIs, websites, sensors, social media, and more.', - "content_type": "text", + "cognition_id": payload_id, + "data": {"details": "data science"}, + "content_type": "json", }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1622,9 +2067,9 @@ def test_payload_content_update_api_id_not_found(): response = client.put( url=f"/api/bot/{pytest.bot}/data/cognition/{payload_id}", json={ - "text_id": payload_id, - "data": 'Data Science is an emerging field.', - "content_type": "text", + "cognition_id": payload_id, + "data": {"details": "data"}, + "content_type": "json", }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1652,7 +2097,7 @@ def test_delete_payload_content(): response = client.delete( url=f"/api/bot/{pytest.bot}/data/cognition/{pytest.payload_id}", json={ - "text_id": pytest.payload_id, + "cognition_id": pytest.payload_id, }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1669,7 +2114,7 @@ def test_delete_payload_content_does_not_exist(): response = client.delete( url=f"/api/bot/{pytest.bot}/data/cognition/{payload_id}", json={ - "text_id": payload_id, + "cognition_id": payload_id, }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -6698,7 +7143,7 @@ def test_get_secret_2(): def test_add_vectordb_action_empty_name(): request_body = { "name": '', - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [0], "with_payload": True, "with_vector": True}}, "response": {"value": "0"} } @@ -6718,7 +7163,7 @@ def test_add_vectordb_action_empty_name(): def test_add_vectordb_action_empty_operation_value(): request_body = { "name": 'action_test_empty_operation_value', - "query": {"type": "from_value", "value": ""}, + "query": "", "payload": {"type": "from_value", "value": {"ids": [0], "with_payload": True, "with_vector": True}}, "response": {"value": "0"} } @@ -6731,19 +7176,16 @@ def test_add_vectordb_action_empty_operation_value(): actual = response.json() print(actual) assert actual["error_code"] == 422 - assert actual["message"] == [{'loc': ['body', 'query', 'value'], + assert actual["message"] == [{'loc': ['body', 'query'], 'msg': "value is not a valid enumeration member; permitted: 'payload_search', 'embedding_search'", - 'type': 'type_error.enum', - 'ctx': {'enum_values': ['payload_search', 'embedding_search']}}, - {'loc': ['body', 'query', '__root__'], 'msg': 'value cannot be empty', - 'type': 'value_error'}] + 'type': 'type_error.enum', 'ctx': {'enum_values': ['payload_search', 'embedding_search']}}] assert not actual["success"] def test_add_vectordb_action_empty_operation_type(): request_body = { "name": 'action_test_empty_operation_type', - "query": {"type": "", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [0], "with_payload": True, "with_vector": True}}, "response": {"value": "0"} } @@ -6755,19 +7197,15 @@ def test_add_vectordb_action_empty_operation_type(): actual = response.json() print(actual) - assert actual["error_code"] == 422 - assert actual["message"] == [{'loc': ['body', 'query', 'type'], - 'msg': "value is not a valid enumeration member; permitted: 'from_value', 'from_slot'", - 'type': 'type_error.enum', 'ctx': {'enum_values': ['from_value', 'from_slot']}}, - {'loc': ['body', 'query', '__root__'], 'msg': 'type cannot be empty', - 'type': 'value_error'}] - assert not actual["success"] + assert actual["error_code"] == 0 + assert actual["message"] == "Action added!" + assert actual["success"] def test_add_vectordb_action_empty_payload_type(): request_body = { "name": 'test_add_vectordb_action_empty_payload_type', - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "", "value": {"ids": [0], "with_payload": True, "with_vector": True}}, "response": {"value": "0"} } @@ -6791,7 +7229,7 @@ def test_add_vectordb_action_empty_payload_type(): def test_add_vectordb_action_empty_payload_value(): request_body = { "name": 'action_test_empty_value', - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_value", "value": ''}, "response": {"value": "0"} } @@ -6811,7 +7249,7 @@ def test_add_vectordb_action_empty_payload_value(): def test_add_vectordb_action(): request_body = { "name": 'vectordb_action_test', - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_value", "value": { "filter": { "should": [ @@ -6838,7 +7276,7 @@ def test_add_vectordb_action(): def test_add_vectordb_action_case_insensitivity(): request_body = { "name": 'VECTORDB_ACTION_CASE_INSENSITIVE', - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_value", "value": { "filter": { "should": [ @@ -6883,7 +7321,7 @@ def test_add_vectordb_action_case_insensitivity(): def test_add_vectordb_action_existing(): request_body = { "name": 'test_add_vectordb_action_existing', - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [0], "with_payload": True, "with_vector": True}}, "response": {"value": "0"}, "set_slots": [{"name": "bot", "value": "${RESPONSE}", "evaluation_type": "expression"}] @@ -6926,7 +7364,7 @@ def test_add_vectordb_action_with_slots(): request_body = { "name": 'test_add_vectordb_action_with_slots', - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_slot", "value": "vectordb"}, "response": {"value": "0"} } @@ -6943,39 +7381,10 @@ def test_add_vectordb_action_with_slots(): assert actual["success"] -def test_add_vectordb_action_with_invalid_operation_type(): - request_body = { - "name": 'test_add_vectordb_action_with_invalid_operation_type', - "query": {"type": "from_val", "value": "payload_search"}, - "payload": {"type": "from_value", "value": { - "filter": { - "should": [ - {"key": "city", "match": {"value": "London"}}, - {"key": "color", "match": {"value": "red"}} - ] - } - }}, - "response": {"value": "0"} - } - - response = client.post( - url=f"/api/bot/{pytest.bot}/action/db", - json=request_body, - headers={"Authorization": pytest.token_type + " " + pytest.access_token}, - ) - - actual = response.json() - print(actual) - assert not actual["success"] - assert str(actual['message']).__contains__("value is not a valid enumeration member") - assert actual["data"] is None - assert actual["error_code"] == 422 - - def test_update_vectordb_action(): request_body = { "name": 'test_update_vectordb_action', - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_value", "value": { "filter": { "should": [ @@ -6997,7 +7406,7 @@ def test_update_vectordb_action(): request_body = { "name": 'test_update_vectordb_action', - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [0], "with_payload": True, "with_vector": True}}, "response": {"value": "0"}, "set_slots": [{"name": "bot", "value": "${RESPONSE}", "evaluation_type": "script"}] @@ -7024,7 +7433,7 @@ def test_update_vectordb_action(): def test_update_vectordb_action_non_existing(): request_body = { "name": 'test_update_vectordb_action_non_existing', - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [6], "with_payload": True, "with_vector": True}}, "response": {"value": "15"}, "set_slots": [{"name": "age", "value": "${RESPONSE}", "evaluation_type": "script"}] @@ -7038,7 +7447,7 @@ def test_update_vectordb_action_non_existing(): request_body = { "name": "test_update_vectordb_action_non_existing_new", - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [6], "with_payload": True, "with_vector": True}}, "response": {"value": "15"}, "set_slots": [{"name": "age", "value": "${RESPONSE}", "evaluation_type": "script"}] @@ -7058,7 +7467,7 @@ def test_update_vectordb_action_non_existing(): def test_update_vector_action_wrong_parameter(): request_body = { "name": "test_update_vector_action_1", - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_value", "value": {"ids": [8], "with_payload": True, "with_vector": True}}, "response": {"value": "15"}, "set_slots": [{"name": "bot", "value": "${RESPONSE}", "evaluation_type": "expression"}] @@ -7074,7 +7483,7 @@ def test_update_vector_action_wrong_parameter(): request_body = { "name": "test_update_vector_action_1", - "query": {"type": "from_value", "value": "embedding_search"}, + "query": "embedding_search", "payload": {"type": "from_val", "value": {"ids": [81], "with_payload": True, "with_vector": True}}, "response": {"value": "nupur"}, "set_slots": [{"name": "bot", "value": "${RESPONSE}", "evaluation_type": "expression"}] @@ -7118,15 +7527,16 @@ def test_list_vector_db_action(): headers={"Authorization": pytest.token_type + " " + pytest.access_token}, ) actual = response.json() + print(actual) assert actual["error_code"] == 0 assert actual["success"] - assert actual['data'][0]['name'] == 'vectordb_action_test' + assert actual['data'][0]['name'] == 'action_test_empty_operation_type' def test_delete_vectordb_action(): request_body = { "name": "test_delete_vectordb_action", - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_value", "value": { "filter": { "should": [ @@ -7158,7 +7568,7 @@ def test_delete_vectordb_action(): def test_delete_vectordb_action_non_existing(): request_body = { "name": "test_delete_vectordb_action_non_existing", - "query": {"type": "from_value", "value": "payload_search"}, + "query": "payload_search", "payload": {"type": "from_value", "value": { "filter": { "should": [ @@ -8038,10 +8448,11 @@ def test_list_actions(): assert Utility.check_empty_string(actual["message"]) print(actual['data']) assert actual['data'] == {'actions': ['action_greet'], - 'database_action': ['vectordb_action_test', 'vectordb_action_case_insensitive', - 'test_add_vectordb_action_existing', 'test_add_vectordb_action_with_slots', - 'test_update_vectordb_action', 'test_update_vectordb_action_non_existing', - 'test_update_vector_action_1', 'test_delete_vectordb_action_non_existing'], + 'database_action': ['action_test_empty_operation_type', 'vectordb_action_test', + 'vectordb_action_case_insensitive', 'test_add_vectordb_action_existing', + 'test_add_vectordb_action_with_slots', 'test_update_vectordb_action', + 'test_update_vectordb_action_non_existing', 'test_update_vector_action_1', + 'test_delete_vectordb_action_non_existing'], 'http_action': ['test_add_http_action_no_token', 'test_add_http_action_with_valid_dispatch_type', 'test_add_http_action_with_dynamic_params', 'test_update_http_action_with_dynamic_params', 'test_add_http_action_with_sender_id_parameter_type', @@ -13903,7 +14314,9 @@ def test_get_bot_settings(): 'test_limit_per_day': 5, 'training_limit_per_day': 5, 'website_data_generator_depth_search_limit': 2, - 'whatsapp': 'meta'} + 'whatsapp': 'meta', + 'cognition_collections_limit': 3, + 'cognition_columns_per_collection_limit': 5} def test_update_analytics_settings_with_empty_value(): @@ -13977,7 +14390,9 @@ def test_update_analytics_settings(): 'test_limit_per_day': 5, 'training_limit_per_day': 5, 'website_data_generator_depth_search_limit': 2, - 'whatsapp': 'meta'} + 'whatsapp': 'meta', + 'cognition_collections_limit': 3, + 'cognition_columns_per_collection_limit': 5} def test_delete_channels_config(): diff --git a/tests/unit_test/action/action_test.py b/tests/unit_test/action/action_test.py index 62f5527e1..a71742e4c 100644 --- a/tests/unit_test/action/action_test.py +++ b/tests/unit_test/action/action_test.py @@ -1,9 +1,9 @@ import json import os -import mock import re from unittest import mock +import mock from googleapiclient.http import HttpRequest from pipedrive.exceptions import UnauthorizedError, BadRequestError @@ -21,7 +21,6 @@ from kairon.actions.definitions.two_stage_fallback import ActionTwoStageFallback from kairon.actions.definitions.web_search import ActionWebSearch from kairon.actions.definitions.zendesk import ActionZendeskTicket -from kairon.exceptions import AppException from kairon.shared.constants import KAIRON_USER_MSG_ENTITY from kairon.shared.data.constant import KAIRON_TWO_STAGE_FALLBACK from kairon.shared.data.data_objects import Slots, KeyVault, BotSettings, LLMSettings @@ -2721,7 +2720,9 @@ def test_get_prompt_action_config(self): 'training_limit_per_day': 5, 'user': 'test_user', 'website_data_generator_depth_search_limit': 2, - 'whatsapp': 'meta'} + 'whatsapp': 'meta', + 'cognition_collections_limit': 3, + 'cognition_columns_per_collection_limit': 5} def test_prompt_action_not_exists(self): with pytest.raises(ActionFailure, match="Faq feature is disabled for the bot! Please contact support."): @@ -3963,7 +3964,9 @@ def test_get_bot_settings(self): 'test_limit_per_day': 5, 'training_limit_per_day': 5, 'website_data_generator_depth_search_limit': 2, - 'whatsapp': 'meta'} + 'whatsapp': 'meta', + 'cognition_collections_limit': 3, + 'cognition_columns_per_collection_limit': 5} def test_get_prompt_action_config_2(self): bot = "test_bot_action_test" diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 6524dfdd1..78fe00e73 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -49,6 +49,8 @@ from kairon.shared.admin.data_objects import BotSecrets from kairon.shared.auth import Authentication from kairon.shared.chat.data_objects import Channels +from kairon.shared.cognition.data_objects import CognitionData, CognitionSchema +from kairon.shared.cognition.processor import CognitionDataProcessor from kairon.shared.constants import SLOT_SET_TYPE from kairon.shared.data.audit.data_objects import AuditLogData from kairon.shared.data.constant import ENDPOINT_TYPE @@ -65,7 +67,7 @@ TrainingDataGenerator, TrainingDataGeneratorResponse, TrainingExamplesTrainingDataGenerator, Rules, Configs, Utterances, BotSettings, ChatClientConfig, LookupTables, Forms, - SlotMapping, KeyVault, MultiflowStories, CognitionData, LLMSettings, + SlotMapping, KeyVault, MultiflowStories, LLMSettings, MultiflowStoryEvents, Synonyms, Lookup ) @@ -9520,7 +9522,7 @@ def test_add_vector_embedding_action_config_op_embedding_search(self): user = 'test_vector_user' action = 'test_vectordb_action_op_embedding_search' response = '0' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload_body = { "ids": [ 0 @@ -9542,8 +9544,7 @@ def test_add_vector_embedding_action_config_op_embedding_search(self): assert actual_vectordb_action['name'] == action assert actual_vectordb_action['payload']['type'] == 'from_value' assert actual_vectordb_action['payload']['value'] == {'ids': [0], 'with_payload': True, 'with_vector': True} - assert actual_vectordb_action['query']['type'] == 'from_value' - assert actual_vectordb_action['query']['value'] == 'embedding_search' + assert actual_vectordb_action['query'] == 'embedding_search' assert actual_vectordb_action['response']['value'] == '0' def test_add_vector_embedding_action_config_op_payload_search(self): @@ -9552,7 +9553,7 @@ def test_add_vector_embedding_action_config_op_payload_search(self): user = 'test_vector_user' action = 'test_vectordb_action_op_payload_search' response = '1' - query = {'type': 'from_value', 'value': 'payload_search'} + query = 'payload_search' payload_body = { "filter": { "should": [ @@ -9575,8 +9576,7 @@ def test_add_vector_embedding_action_config_op_payload_search(self): assert actual_vectordb_action['name'] == action assert actual_vectordb_action['payload']['type'] == 'from_value' assert actual_vectordb_action['payload']['value'] == payload_body - assert actual_vectordb_action['query']['type'] == 'from_value' - assert actual_vectordb_action['query']['value'] == 'payload_search' + assert actual_vectordb_action['query'] == 'payload_search' assert actual_vectordb_action['response']['value'] == '1' def test_add_vector_embedding_action_config_op_embedding_search_from_slot(self): @@ -9585,7 +9585,7 @@ def test_add_vector_embedding_action_config_op_embedding_search_from_slot(self): user = 'test_vector_user' action = 'test_vectordb_action_op_embedding_search_from_slot' response = 'nupur.khare' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload = {'type': 'from_slot', 'value': 'email'} processor.add_slot({"name": "email", "type": "text", "initial_value": "nupur.khare@digite.com", "influence_conversation": True}, bot, user, raise_exception_if_exists=False) @@ -9603,8 +9603,7 @@ def test_add_vector_embedding_action_config_op_embedding_search_from_slot(self): assert actual_vectordb_action['name'] == action assert actual_vectordb_action['payload']['type'] == 'from_slot' assert actual_vectordb_action['payload']['value'] == 'email' - assert actual_vectordb_action['query']['type'] == 'from_value' - assert actual_vectordb_action['query']['value'] == 'embedding_search' + assert actual_vectordb_action['query'] == 'embedding_search' assert actual_vectordb_action['response']['value'] == 'nupur.khare' def test_add_vector_embedding_action_config_op_embedding_search_from_slot_does_not_exists(self): @@ -9613,7 +9612,7 @@ def test_add_vector_embedding_action_config_op_embedding_search_from_slot_does_n user = 'test_vector_user_slot' action = 'test_vectordb_action_slot' response = 'nupur.khare' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload = {'type': 'from_slot', 'value': 'cuisine'} vectordb_action_config = DatabaseActionRequest( name=action, @@ -9631,7 +9630,7 @@ def test_add_vector_embedding_action_config_existing_name(self): user = 'test_vector_user' action = 'test_vectordb_action_op_embedding_search' response = '0' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload_body = { "ids": [ 0 @@ -9655,7 +9654,7 @@ def test_add_vector_embedding_action_config_empty_payload_values(self): user = 'test_vector_user_empty_name' action = 'test_add_vectordb_action_config_empty_name' response = '0' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload_body = { "ids": [ 0 @@ -9701,7 +9700,7 @@ def test_add_vector_embedding_action_config_empty_operation_values(self): user = 'test_vector_user_empty_operation_values' action = 'test_add_vector_embedding_action_config_empty_operation_values' response = '0' - query = {'type': 'from_value', 'value': 'payload_search'} + query = 'payload_search' payload_body = { "ids": [ 0 @@ -9727,19 +9726,9 @@ def test_add_vector_embedding_action_config_empty_operation_values(self): response=ActionResponseEvaluation(value=response) ) vectordb_action_two = vectordb_action_config_two.dict() - vectordb_action_two['query']['value'] = '' + vectordb_action_two['query'] = '' with pytest.raises(ValidationError, match="query value is required"): processor.add_db_action(vectordb_action_two, user, bot) - vectordb_action_config_three = DatabaseActionRequest( - name=action, - query=query, - payload=payload, - response=ActionResponseEvaluation(value=response) - ) - vectordb_action_three = vectordb_action_config_three.dict() - vectordb_action_three['query']['type'] = '' - with pytest.raises(ValidationError, match="query type is required"): - processor.add_db_action(vectordb_action_three, user, bot) def test_get_vector_embedding_action(self): processor = MongoProcessor() @@ -9748,7 +9737,7 @@ def test_get_vector_embedding_action(self): action = 'test_get_vectordb_action' response = 'nupur.khare' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload = {'type': 'from_slot', 'value': 'email'} DatabaseAction( name=action, @@ -9762,7 +9751,7 @@ def test_get_vector_embedding_action(self): actual = processor.get_db_action_config(bot=bot, action=action) assert actual is not None assert actual['name'] == action - assert actual['query'] == {'type': 'from_value', 'value': 'embedding_search'} + assert actual['query'] == 'embedding_search' assert actual['payload'] == {'type': 'from_slot', 'value': 'email'} assert actual['collection'] == 'test_vector_bot_get_faq_embd' assert actual['response'] == {'value': 'nupur.khare', 'dispatch': True, 'evaluation_type': 'expression', 'dispatch_type': 'text'} @@ -9776,7 +9765,7 @@ def test_get_vector_embedding_action_does_not_exists(self): action = 'test_get_vectordb_action' response = 'nupur.khare' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload = {'type': 'from_slot', 'value': 'email'} DatabaseAction( name=action, @@ -9806,7 +9795,7 @@ def test_update_vector_embedding_action(self): user = 'test_update_vectordb_action_user' action = 'test_update_vectordb_action' response = '15' - query = {'type': 'from_value', 'value': 'payload_search'} + query = 'payload_search' payload_body = { "filter": { "should": [ @@ -9853,7 +9842,7 @@ def test_update_vector_embedding_action_does_not_exists(self): user = 'test_update_vectordb_action_user' action = 'test_update_vectordb_action_does_not_exists' response = 'Digite' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload_body = { "ids": [ 0 @@ -9877,7 +9866,7 @@ def test_delete_vector_embedding_action_config(self): user = 'test_vector_user' action = 'test_delete_vector_embedding_action_config' response = '0' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload_body = { "ids": [ 0 @@ -9909,7 +9898,7 @@ def test_delete_vector_embedding_action_config_non_existing(self): user = 'test_vector_user' action = 'test_delete_vector_embedding_action_config_non_existing' response = '0' - query = {'type': 'from_value', 'value': 'embedding_search'} + query = 'embedding_search' payload_body = { "ids": [ 0 @@ -14268,8 +14257,181 @@ def _mock_aggregation(*args, **kwargs): assert Utility.is_exist(Utterances, raise_error=False, name='utter_ask', bot='test') assert Utility.is_exist(Responses, raise_error=False, name='utter_ask', bot='test') + def test_save_payload_metadata(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [ + {"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details_collection", + "bot": bot, + "user": user + } + pytest.metadata_id = processor.save_cognition_schema(metadata, user, bot) + + metadata_one = { + "metadata": [ + {"column_name": "metadata_one", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "metadata_one", + "bot": bot, + "user": user + } + pytest.metadata_id_one = processor.save_cognition_schema(metadata_one, user, bot) + + metadata_two = { + "metadata": [ + {"column_name": "metadata_two", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "metadata_two", + "bot": bot, + "user": user + } + pytest.metadata_id_two = processor.save_cognition_schema(metadata_two, user, bot) + + metadata_three = { + "metadata": [ + {"column_name": "metadata_three", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "metadata_three", + "bot": bot, + "user": user + } + with pytest.raises(AppException, match="Collection limit exceeded!"): + processor.save_cognition_schema(metadata_three, user, bot) + processor.delete_cognition_schema(pytest.metadata_id_one, bot) + processor.delete_cognition_schema(pytest.metadata_id_two, bot) + + def test_save_payload_metadata_column_limit_exceeded(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [ + {"column_name": "tech", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, + {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "gender", "data_type": "str", "enable_search": True, "create_embeddings": True} + ], + "collection_name": "test_save_payload_metadata_column_limit_exceeded", + "bot": bot, + "user": user + } + with pytest.raises(AppException, match="Column limit exceeded for collection!"): + processor.save_cognition_schema(metadata, user, bot) + + def test_save_payload_metadata_same_columns(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [ + {"column_name": "tech", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "tech", "data_type": "int", "enable_search": True, "create_embeddings": False}], + "collection_name": "details_collection", + "bot": bot, + "user": user + } + with pytest.raises(AppException, match="Columns cannot be same in the schema!"): + processor.save_cognition_schema(metadata, user, bot) + + def test_save_payload_metadata_column_name_empty(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [{"column_name": "", "data_type": "int", "enable_search": True, + "create_embeddings": True}], + "collection_name": "column_name_empty", + "bot": bot, + "user": user} + with pytest.raises(ValidationError, match="Column name cannot be empty"): + CognitionSchema(**metadata).save() + + def test_save_payload_metadata_data_type_invalid(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [{"column_name": "name", "data_type": "bool", "enable_search": True, + "create_embeddings": True}], + "bot": bot, + "user": user + } + with pytest.raises(ValidationError, match="Only str and int data types are supported"): + CognitionSchema(**metadata).save() + + def test_update_payload_metadata_different_collection_name(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [ + {"column_name": "birthday", "data_type": "int", "enable_search": True, "create_embeddings": True}], + "collection_name": "test_update_payload_metadata_different_collection_name", + "bot": bot, + "user": user + } + with pytest.raises(AppException, match="Collection name cannot be updated!"): + processor.update_cognition_schema(pytest.metadata_id, metadata, user, bot) + + def test_update_payload_metadata(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata = { + "metadata": [ + {"column_name": "birthday", "data_type": "int", "enable_search": True, "create_embeddings": True}], + "collection_name": "details_collection", + "bot": bot, + "user": user + } + processor.update_cognition_schema(pytest.metadata_id, metadata, user, bot) + + def test_update_payload_metadata_not_found(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + metadata_id = '5349b4ddd2919d08c09890f3' + metadata = { + "metadata": [ + {"column_name": "month", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "age", + "bot": bot, + "user": user + } + with pytest.raises(AppException, match="Schema with given id not found!"): + processor.update_cognition_schema(metadata_id, metadata, user, bot) + + def test_get_payload_metadata(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + data = list(processor.list_cognition_schema(bot)) + print(data) + assert data[0] + assert data[0]['_id'] + + def test_delete_payload_metadata(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + processor.delete_cognition_schema(pytest.metadata_id, bot) + + def test_delete_payload_metadata_does_not_exists(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + with pytest.raises(AppException, match="Schema does not exists!"): + processor.delete_cognition_schema("507f191e050c19729de760ea", bot) + + def test_get_payload_metadata_not_exists(self): + processor = CognitionDataProcessor() + bot = 'testing' + assert list(processor.list_cognition_schema(bot)) == [] + + def test_save_content_with_gpt_feature_disabled(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' collection = "Bot" @@ -14278,15 +14440,66 @@ def test_save_content_with_gpt_feature_disabled(self): ' to perform a wide range of tasks, from simple tasks like answering basic questions or sending ' \ 'automated messages to complex tasks like performing data analysis, playing games, or even controlling ' \ 'physical machines.' + payload = { + "data": content, + "content_type": "text", + "collection": collection} with pytest.raises(AppException, match="Faq feature is disabled for the bot! Please contact support."): - processor.save_content(content, user, bot, collection) + processor.save_cognition_data(payload, user, bot) settings = BotSettings.objects(bot=bot).get() settings.llm_settings = LLMSettings(enable_faq=True) settings.save() + def test_save_content_atleast_ten_words(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + collection = 'Bot' + content = 'A bot, short for robot, is a program.' + payload = { + "data": content, + "content_type": "text", + "collection": collection} + with pytest.raises(AppException, match="Content should contain atleast 10 words."): + processor.save_cognition_data(payload, user, bot) + + def test_save_content_collection_does_not_exist(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + collection = 'Bot' + content = 'A bot, short for robot, is a program. Bots can be programmed to perform a wide range of tasks.' + payload = { + "data": content, + "content_type": "text", + "collection": collection} + with pytest.raises(AppException, match="Collection does not exist!"): + processor.save_cognition_data(payload, user, bot) + + def test_save_content_text_with_metadata_invalid(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + collection = "test_save_content_text_with_metadata_invalid" + content = 'A large language model is a type of artificial intelligence system designed to understand and generate human language.' + payload = { + "data": content, + "content_type": "text", + "collection": collection} + metadata = { + "metadata": [ + {"column_name": "LLM", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": collection, + "bot": bot, + "user": user + } + processor.save_cognition_schema(metadata, user, bot) + with pytest.raises(AppException, match="Content type text cannot have metadata!"): + processor.save_cognition_data(payload, user, bot) + def test_save_content(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' collection = "Bot" @@ -14295,77 +14508,104 @@ def test_save_content(self): ' to perform a wide range of tasks, from simple tasks like answering basic questions or sending ' \ 'automated messages to complex tasks like performing data analysis, playing games, or even controlling ' \ 'physical machines.' - pytest.content_id = processor.save_content(content, user, bot, collection) + payload = { + "data": content, + "content_type": "text", + "collection": collection} + metadata = { + "metadata": None, + "collection_name": collection, + "bot": bot, + "user": user + } + processor.save_cognition_schema(metadata, user, bot) + pytest.content_id = processor.save_cognition_data(payload, user, bot) content_id = '5349b4ddd2791d08c09890f3' - with pytest.raises(AppException, match="Text already exists!"): - processor.update_content(content_id, content, user, bot, None) + with pytest.raises(AppException, match="Payload data already exists!"): + processor.update_cognition_data(content_id, payload, user, bot) - def test_save_content_invalid(self): - processor = MongoProcessor() + def test_update_content_atleast_ten_words(self): + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'example' - content = 'A bot, short for robot, is a program.' + collection = 'Bot' + content = 'Bots are commonly used in various industries.' + payload = { + "data": content, + "content_type": "text", + "collection": collection} with pytest.raises(AppException, match="Content should contain atleast 10 words."): - processor.save_content(content, user, bot, collection) + processor.update_cognition_data(pytest.content_id, payload, user, bot) + + def test_update_content_collection_does_not_exists(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + collection = 'test_update_content_collection_does_not_exists' + content = 'LLMs can be used for a wide range of applications, including chatbots, language translation, content generation, sentiment analysis, and many other natural language processing tasks. ' + payload = { + "data": content, + "content_type": "text", + "collection": collection} + with pytest.raises(AppException, match="Collection does not exist!"): + processor.update_cognition_data(pytest.content_id, payload, user, bot) def test_update_content(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'Bot_details' + collection = 'Bot' content = 'Bots are commonly used in various industries, such as e-commerce, customer service, gaming, ' \ 'and social media. Some bots are designed to interact with humans in a conversational manner and are ' \ 'called chatbots or virtual assistants.' - processor.update_content(pytest.content_id, content, user, bot, collection) - - def test_update_content_invalid(self): - processor = MongoProcessor() - bot = 'test' - user = 'testUser' - collection = 'example_one' - content = 'Bots are commonly used in various industries.' - with pytest.raises(AppException, match="Content should contain atleast 10 words."): - content_id = processor.save_content(content, user, bot, collection) - processor.update_content(content_id, content, user, bot, collection) + payload = { + "data": content, + "content_type": "text", + "collection": collection} + processor.update_cognition_data(pytest.content_id, payload, user, bot) def test_update_content_not_found(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' content_id = '5349b4ddd2781d08c09890f3' + collection = 'Bot' content = 'MongoDB is a source-available cross-platform document-oriented database program. ' \ 'Classified as a NoSQL database program, MongoDB uses JSON-like documents with optional schemas. ' \ 'MongoDB is developed by MongoDB Inc. and licensed under the Server Side Public License which is ' \ 'deemed non-free by several distributions.' - with pytest.raises(AppException, match="Content with given id not found!"): - processor.update_content(content_id, content, user, bot, None) + payload = { + "data": content, + "content_type": "text", + "collection": collection} + with pytest.raises(AppException, match="Payload with given id not found!"): + processor.update_cognition_data(content_id, payload, user, bot) def test_delete_content(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - processor.delete_content(pytest.content_id, user, bot) + processor.delete_cognition_data(pytest.content_id, bot) def test_delete_content_does_not_exists(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - with pytest.raises(AppException, match="Text does not exists!"): - processor.delete_content("507f191e810c19729de860ea", user, bot) + with pytest.raises(AppException, match="Payload does not exists!"): + processor.delete_cognition_data("507f191e810c19729de860ea", bot) - @patch("kairon.shared.data.processor.MongoProcessor.get_content", autospec=True) + @patch("kairon.shared.cognition.processor.CognitionDataProcessor.get_content", autospec=True) def test_get_content_not_exists(self, mock_get_content): def _get_content(*args, **kwargs): return [] mock_get_content.return_value = _get_content() kwargs = {} - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' assert list(processor.get_content(bot, **kwargs)) == [] - @patch("kairon.shared.data.processor.MongoProcessor.get_content", autospec=True) + @patch("kairon.shared.cognition.processor.CognitionDataProcessor.get_content", autospec=True) def test_get_content(self, mock_get_content): def _get_content(*args, **kwargs): return [{'vector_id': 1, @@ -14373,17 +14613,21 @@ def _get_content(*args, **kwargs): 'data': 'Unit testing is a software testing technique in which individual units or components of a software application are tested in isolation to ensure that each unit functions as expected. ', 'user': 'testUser', 'bot': 'test', 'content_type': 'text', - 'metadata': [], + 'metadata': None, 'collection': 'testing'}] mock_get_content.return_value = _get_content() - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'testing' + collection = 'Bot' content = 'Unit testing is a software testing technique in which individual units or components of a software ' \ 'application are tested in isolation to ensure that each unit functions as expected. ' - pytest.content_id = processor.save_content(content, user, bot, collection) + payload = { + "data": content, + "content_type": "text", + "collection": collection} + pytest.content_id_unit = processor.save_cognition_data(payload, user, bot) kwargs = {'data': 'Unit testing'} data = list(processor.get_content(bot, **kwargs)) assert data[0][ @@ -14402,30 +14646,30 @@ def _get_content(*args, **kwargs): def test_list_content(self): bot = 'test' user = 'testUser' - processor = MongoProcessor() - contents = processor.list_collection(bot) - assert contents == ['testing'] + processor = CognitionDataProcessor() + contents = processor.list_cognition_collections(bot) + print(contents) + assert contents == ['Bot'] def test_delete_content_for_action(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'test' - user = 'testUser' - processor.delete_content(pytest.content_id, user, bot) + processor.delete_cognition_data(pytest.content_id_unit, bot) def test_save_payload_content_with_gpt_feature_disabled(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' payload = { "data": {"name": "Sita", "engineer": "yes"}, "content_type": "json", - "metadata": [{"column_name": "name", "data_type": "str", "enable_search": True, - "create_embeddings": True}, {"column_name": "engineer", "data_type": "str", "enable_search": True, - "create_embeddings": True}], + "collection": "test_save_payload_content_with_gpt_feature_disabled", "bot": bot, "user": user } - + settings = BotSettings.objects(bot=bot).get() + settings.llm_settings = LLMSettings(enable_faq=False) + settings.save() with pytest.raises(AppException, match="Faq feature is disabled for the bot! Please contact support."): processor.save_cognition_data(payload, user, bot) @@ -14433,166 +14677,152 @@ def test_save_payload_content_with_gpt_feature_disabled(self): settings.llm_settings = LLMSettings(enable_faq=True) settings.save() - def test_save_payload_content(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_save_payload_content_collection_does_not_exists(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' payload = { "data": {"name": "Nupur", "city": "Pune"}, - "content_type": "json", - "metadata": [{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "city", "data_type": "str", "enable_search": False, "create_embeddings": True}]} - pytest.payload_id = processor.save_cognition_data(payload, user, bot) + "collection": "test_save_payload_content_collection_does_not_exists", + "content_type": "json"} + with pytest.raises(AppException, match="Collection does not exist!"): + processor.save_cognition_data(payload, user, bot) - def test_save_payload_content_with_update(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_save_payload_content(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' - payload = { - "data": 'Data science is an interdisciplinary field that involves extracting knowledge and insights from data using various scientific methods, algorithms, processes, and systems.', - "content_type": "text", + metadata = { + "metadata": [ + {"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "city", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "test_save_payload_content", "bot": bot, "user": user } - pytest.payload_id_two = processor.save_cognition_data(payload, user, bot) + processor.save_cognition_schema(metadata, user, bot) + + payload = { + "data": {"name": "Nupur", "city": "Pune"}, + "collection": "test_save_payload_content", + "content_type": "json"} + pytest.payload_id = processor.save_cognition_data(payload, user, bot) payload_id = '64b0f2e66707e9282a13f6cd' with pytest.raises(AppException, match="Payload data already exists!"): processor.update_cognition_data(payload_id, payload, user, bot) - def test_save_payload_content_metadata_int(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_save_payload_content_metadata_does_not_exists(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' payload = { - "data": {"name": "Ram", "age": 23, "color": "red"}, - "content_type": "json", - "metadata": [{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, - {"column_name": "color", "data_type": "str", "enable_search": False, "create_embeddings": True}]} - actual = processor.save_cognition_data(payload, user, bot) - assert actual + "data": {"number": 15, "group": "a"}, + "collection": "test_save_payload_content", + "content_type": "json"} + with pytest.raises(AppException, match="Metadata related to payload not found!"): + processor.save_cognition_data(payload, user, bot) - def test_save_payload_content_as_json(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_save_payload_content_invalid_data_type(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' + metadata = { + "metadata": [ + {"column_name": "number", "data_type": "int", "enable_search": True, "create_embeddings": True}], + "collection_name": "Bot", + "bot": bot, + "user": user + } + processor.save_cognition_schema(metadata, user, bot) payload = { - "data": {"name": "Nupur", "age": 25, "city": "Bengaluru"}, + "data": {"number": "Twenty-three"}, "content_type": "json", - "metadata": [{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, - {"column_name": "city", "data_type": "str", "enable_search": False, "create_embeddings": True}]} - actual = processor.save_cognition_data(payload, user, bot) - assert actual + "collection": "Bot"} + with pytest.raises(AppException, match="Invalid data type!"): + processor.save_cognition_data(payload, user, bot) - def test_save_payload_content_column_name_empty(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_save_payload_content_data_empty(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' payload = { - "data": {"name": "Nupur"}, + "data": {}, "content_type": "json", - "metadata": [{"column_name": "", "data_type": "int", "enable_search": True, - "create_embeddings": True}], "bot": bot, - "user": user} - with pytest.raises(ValidationError, match="Column name cannot be empty"): + "user": user + } + with pytest.raises(ValidationError, match="data cannot be empty"): CognitionData(**payload).save() - def test_save_payload_content_data_type_invalid(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_update_payload_content_not_found(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' + payload_id = '5349b4ddd2719d08c09890f3' payload = { - "data": {"name": "Nupur"}, + "data": {"city": "Pune", "color": "red"}, "content_type": "json", - "metadata": [{"column_name": "name", "data_type": "bool", "enable_search": True, - "create_embeddings": True}], "bot": bot, "user": user } - with pytest.raises(ValidationError, match="Only str and int data types are supported"): - CognitionData(**payload).save() + with pytest.raises(AppException, match="Payload with given id not found!"): + processor.update_cognition_data(payload_id, payload, user, bot) - def test_save_payload_content_invalid_metadata_data_type(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_update_payload_content_collection_does_not_exists(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' payload = { - "data": {"name": "Ram", "age": "Twenty-Three", "color": "red"}, + "data": {"city": "Pune", "color": "red"}, "content_type": "json", - "metadata": [ - {"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, - {"column_name": "color", "data_type": "str", "enable_search": False, "create_embeddings": True} - ] + "collection": "test_update_payload_content_collection_does_not_exists", + "bot": bot, + "user": user } - with pytest.raises(AppException, match="Invalid data type"): - actual = processor.save_cognition_data(payload, user, bot) + with pytest.raises(AppException, match="Collection does not exist!"): + processor.update_cognition_data(pytest.payload_id, payload, user, bot) def test_update_payload_content(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' payload = { - "data": 'AI can process large amounts of data in ways that humans cannot.', - "content_type": "text", + "data": {"name": "Digite", "city": "Mumbai"}, + "content_type": "json", + "collection": "test_save_payload_content", "bot": bot, "user": user } processor.update_cognition_data(pytest.payload_id, payload, user, bot) - def test_update_payload_content_not_found(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + def test_get_payload_content(self): + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' - payload_id = '5349b4ddd2719d08c09890f3' - payload = { - "data": 'Data science plays a crucial role in various industries, including finance, healthcare, marketing, e-commerce, and many others.', - "content_type": "text", - "bot": bot, - "user": user - } - with pytest.raises(AppException, match="Payload with given id not found!"): - processor.update_cognition_data(payload_id, payload, user, bot) + data = list(processor.list_cognition_data(bot)) + print(data) + assert data[0][ + 'content'] == {"name": "Digite", "city": "Mumbai"} + assert data[0]['_id'] def test_delete_payload_content(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' processor.delete_cognition_data(pytest.payload_id, bot) def test_delete_payload_content_does_not_exists(self): - processor = MongoProcessor() - bot = 'test_bot_payload' + processor = CognitionDataProcessor() + bot = 'test' user = 'testUser' with pytest.raises(AppException, match="Payload does not exists!"): processor.delete_cognition_data("507f191e050c19729de860ea", bot) def test_get_payload_content_not_exists(self): - processor = MongoProcessor() + processor = CognitionDataProcessor() bot = 'testing' assert list(processor.list_cognition_data(bot)) == [] - def test_get_payload_content(self): - processor = MongoProcessor() - bot = 'test_bot_payload' - user = 'testUser' - payload = { - "data": {"subject": "DBMS", "year": "2"}, - "content_type": "json", - "metadata": [{"column_name": "subject", "data_type": "str", "enable_search": True, - "create_embeddings": True}, {"column_name": "year", "data_type": "str", "enable_search": True, - "create_embeddings": True}], - "bot": bot, - "user": user - } - pytest.content_id = processor.save_cognition_data(payload, user, bot) - data = list(processor.list_cognition_data(bot)) - assert data[0][ - 'content'] == 'Data science is an interdisciplinary field that involves extracting knowledge and insights from data using various scientific methods, algorithms, processes, and systems.' - assert data[0]['_id'] - class TestTrainingDataProcessor: diff --git a/tests/unit_test/llm_test.py b/tests/unit_test/llm_test.py index 543254942..007393116 100644 --- a/tests/unit_test/llm_test.py +++ b/tests/unit_test/llm_test.py @@ -10,8 +10,9 @@ from kairon.exceptions import AppException from kairon.shared.admin.constants import BotSecretType from kairon.shared.admin.data_objects import BotSecrets +from kairon.shared.cognition.data_objects import CognitionData, CognitionSchema from kairon.shared.data.constant import DEFAULT_SYSTEM_PROMPT -from kairon.shared.data.data_objects import CognitionData, LLMSettings +from kairon.shared.data.data_objects import LLMSettings from kairon.shared.llm.factory import LLMFactory from kairon.shared.llm.gpt3 import GPT3FAQEmbedding, LLMBase from kairon.shared.utils import Utility @@ -118,25 +119,31 @@ async def test_gpt3_faq_embedding_train_payload_text(self, aioresponses): bot = "test_embed_faq_text" user = "test" value = "nupurkhare" + CognitionSchema( + metadata=[{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "city", "data_type": "str", "enable_search": False, "create_embeddings": True}], + collection_name="User_details", + bot=bot, user=user + ).save() + CognitionSchema( + metadata=[{"column_name": "country", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "lang", "data_type": "str", "enable_search": False, "create_embeddings": True}, + {"column_name": "role", "data_type": "str", "enable_search": True, "create_embeddings": True}], + collection_name="Country_details", + bot=bot, user=user).save() test_content = CognitionData( data={"name": "Nupur", "city": "Pune"}, content_type="json", - metadata=[{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "city", "data_type": "str", "enable_search": False, "create_embeddings": True}], collection="User_details", bot=bot, user=user).save() test_content_two = CognitionData( data={"country": "Spain", "lang": "spanish"}, content_type="json", - metadata=[{"column_name": "country", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "lang", "data_type": "str", "enable_search": False, "create_embeddings": True}], collection="Country_details", bot=bot, user=user).save() test_content_three = CognitionData( data={"role": "ds", "lang": "spanish"}, content_type="json", - metadata=[{"column_name": "role", "data_type": "str", "enable_search": True, "create_embeddings": True}, - {"column_name": "lang", "data_type": "str", "enable_search": False, "create_embeddings": True}], collection="Country_details", bot=bot, user=user).save() secret = BotSecrets(secret_type=BotSecretType.gpt_key.value, value=value, bot=bot, user=user).save() @@ -207,7 +214,7 @@ async def test_gpt3_faq_embedding_train_payload_text(self, aioresponses): "input": '{"country": "Spain", "lang": "spanish"}'} assert list(aioresponses.requests.values())[3][0].kwargs['headers'] == request_header assert list(aioresponses.requests.values())[3][1].kwargs['json'] == {"model": "text-embedding-ada-002", - "input": '{"role": "ds", "lang": "spanish"}'} + "input": '{"lang": "spanish", "role": "ds"}'} assert list(aioresponses.requests.values())[3][1].kwargs['headers'] == request_header assert list(aioresponses.requests.values())[3][2].kwargs['json'] == {"model": "text-embedding-ada-002", "input": '{"name": "Nupur", "city": "Pune"}'} @@ -233,12 +240,16 @@ async def test_gpt3_faq_embedding_train_payload_with_int(self, aioresponses): bot = "test_embed_faq_json" user = "test" value = "nupurkhare" - test_content = CognitionData( - data={"name": "Ram", "age": "23", "color": "red"}, - content_type="json", + CognitionSchema( metadata=[{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}], + collection_name="payload_with_int", + bot=bot, user=user).save() + test_content = CognitionData( + data={"name": "Ram", "age": "23", "color": "red"}, + content_type="json", + collection="payload_with_int", bot=bot, user=user).save() secret = BotSecrets(secret_type=BotSecretType.gpt_key.value, value=value, bot=bot, user=user).save() @@ -254,7 +265,7 @@ async def test_gpt3_faq_embedding_train_payload_with_int(self, aioresponses): gpt3 = GPT3FAQEmbedding(test_content.bot, LLMSettings(provider="openai").to_mongo().to_dict()) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/test_embed_faq_json_payload_with_int_faq_embd"), method="PUT", status=200 ) @@ -264,7 +275,7 @@ async def test_gpt3_faq_embedding_train_payload_with_int(self, aioresponses): payload={"time": 0, "status": "ok", "result": {"collections": []}}) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}/points"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/test_embed_faq_json_payload_with_int_faq_embd/points"), method="PUT", payload={"result": {"operation_id": 0, "status": "acknowledged"}, "status": "ok", "time": 0.003612634} ) @@ -273,27 +284,31 @@ async def test_gpt3_faq_embedding_train_payload_with_int(self, aioresponses): response = await gpt3.train() assert response['faq'] == 1 - assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': gpt3.bot + gpt3.suffix, + assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': 'test_embed_faq_json_payload_with_int_faq_embd', 'vectors': gpt3.vector_config} assert list(aioresponses.requests.values())[2][0].kwargs['json'] == {"model": "text-embedding-ada-002", "input": json.dumps(input)} assert list(aioresponses.requests.values())[2][0].kwargs['headers'] == request_header assert list(aioresponses.requests.values())[3][0].kwargs['json'] == {'points': [{'id': test_content.vector_id, 'vector': embedding, - 'payload': {'name': 'Ram', 'age': 23, 'color': 'red', "collection_name": f"{gpt3.bot}{gpt3.suffix}"} + 'payload': {'name': 'Ram', 'age': 23, 'color': 'red', "collection_name": "test_embed_faq_json_payload_with_int_faq_embd"} }]} @pytest.mark.asyncio async def test_gpt3_faq_embedding_train_int(self, aioresponses): - bot = "test_embed_faq_int" + bot = "test_int" user = "test" value = "nupurkhare" - test_content = CognitionData( - data={"name": "Ram", "age": 23, "color": "red"}, - content_type="json", + CognitionSchema( metadata=[{"column_name": "name", "data_type": "str", "enable_search": True, "create_embeddings": True}, {"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": False}, {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}], + collection_name="embd_int", + bot=bot, user=user).save() + test_content = CognitionData( + data={"name": "Ram", "age": 23, "color": "red"}, + content_type="json", + collection="embd_int", bot=bot, user=user).save() secret = BotSecrets(secret_type=BotSecretType.gpt_key.value, value=value, bot=bot, user=user).save() @@ -317,23 +332,23 @@ async def test_gpt3_faq_embedding_train_int(self, aioresponses): aioresponses.add( method="DELETE", - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/test_int_embd_int_faq_embd"), ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/test_int_embd_int_faq_embd"), method="PUT", status=200 ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/test_int_embd_int_faq_embd"), method="PUT", status=200 ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}/points"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/test_int_embd_int_faq_embd/points"), method="PUT", payload={"result": {"operation_id": 0, "status": "acknowledged"}, "status": "ok", "time": 0.003612634} ) @@ -341,77 +356,13 @@ async def test_gpt3_faq_embedding_train_int(self, aioresponses): response = await gpt3.train() assert response['faq'] == 1 - assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': gpt3.bot + gpt3.suffix, + assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': 'test_int_embd_int_faq_embd', 'vectors': gpt3.vector_config} assert list(aioresponses.requests.values())[2][0].kwargs['json'] == {"model": "text-embedding-ada-002", "input": json.dumps(input)} assert list(aioresponses.requests.values())[2][0].kwargs['headers'] == request_header expected_payload = test_content.data - expected_payload['collection_name'] = f"{gpt3.bot}{gpt3.suffix}" - assert list(aioresponses.requests.values())[3][0].kwargs['json'] == { - 'points': [{'id': test_content.vector_id, - 'vector': embedding, - 'payload': expected_payload - }]} - - @pytest.mark.asyncio - async def test_gpt3_faq_embedding_train_payload_json_no_metadata(self, aioresponses): - bot = "test_embed_faq_json_no_metadata" - user = "test" - value = "nupurkhare" - test_content = CognitionData( - data={"name": "Nupur", "age": 25, "city": "Bengaluru"}, - content_type="json", - metadata=[], - bot=bot, user=user).save() - secret = BotSecrets(secret_type=BotSecretType.gpt_key.value, value=value, bot=bot, user=user).save() - - embedding = list(np.random.random(GPT3FAQEmbedding.__embedding__)) - request_header = {"Authorization": "Bearer nupurkhare"} - - aioresponses.add( - url="https://api.openai.com/v1/embeddings", - method="POST", - status=200, - payload={'data': [{'embedding': embedding}]} - ) - - with mock.patch.dict(Utility.environment, {'llm': {"faq": "GPT3_FAQ_EMBED", 'api_key': secret}}): - gpt3 = GPT3FAQEmbedding(test_content.bot, LLMSettings(provider="openai").to_mongo().to_dict()) - - aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections"), - method="GET", - payload={"time": 0, "status": "ok", "result": {"collections": []}}) - - - aioresponses.add( - method="DELETE", - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), - ) - - aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), - method="PUT", - status=200 - ) - - aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}/points"), - method="PUT", - payload={"result": {"operation_id": 0, "status": "acknowledged"}, "status": "ok", "time": 0.003612634} - ) - - response = await gpt3.train() - assert response['faq'] == 1 - - assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': gpt3.bot + gpt3.suffix, - 'vectors': gpt3.vector_config} - assert list(aioresponses.requests.values())[2][0].kwargs['json'] == {"model": "text-embedding-ada-002", - "input": json.dumps(test_content.data)} - assert list(aioresponses.requests.values())[2][0].kwargs['headers'] == request_header - expected_payload = test_content.data - expected_payload['collection_name'] = f"{gpt3.bot}{gpt3.suffix}" + expected_payload['collection_name'] = 'test_int_embd_int_faq_embd' assert list(aioresponses.requests.values())[3][0].kwargs['json'] == { 'points': [{'id': test_content.vector_id, 'vector': embedding, @@ -479,23 +430,21 @@ async def test_gpt3_faq_embedding_train_upsert_error(self, aioresponses): assert list(aioresponses.requests.values())[3][0].kwargs['json'] == {'points': [{'id': test_content.vector_id, 'vector': embedding, 'payload': {'collection_name': f"{bot}{gpt3.suffix}",'content': test_content.data}}]} - @pytest.mark.asyncio async def test_gpt3_faq_embedding_train_payload_upsert_error_json(self, aioresponses): - bot = "test_embed_faq_payload_upsert_error" + bot = "payload_upsert_error" user = "test" value = "nupurk" + CognitionSchema( + metadata=[{"column_name": "city", "data_type": "str", "enable_search": True, "create_embeddings": True}, + {"column_name": "color", "data_type": "str", "enable_search": True, "create_embeddings": True}], + collection_name="error_json", + bot=bot, user=user + ).save() test_content = CognitionData( - data={ - "filter": { - "should": [ - {"key": "city", "match": {"value": "London"}}, - {"key": "color", "match": {"value": "red"}} - ] - } - }, + data={'city': 'London', 'color': 'red'}, content_type="json", - metadata=[], + collection="error_json", bot=bot, user=user).save() secret = BotSecrets(secret_type=BotSecretType.gpt_key.value, value=value, bot=bot, user=user).save() @@ -520,17 +469,17 @@ async def test_gpt3_faq_embedding_train_payload_upsert_error_json(self, aiorespo aioresponses.add( method="DELETE", - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/payload_upsert_error_error_json_faq_embd"), ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/payload_upsert_error_error_json_faq_embd"), method="PUT", status=200 ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}{gpt3.suffix}/points"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/payload_upsert_error_error_json_faq_embd/points"), method="PUT", payload={"result": None, 'status': {'error': 'Json deserialize error: missing field `vectors` at line 1 column 34779'}, @@ -540,11 +489,11 @@ async def test_gpt3_faq_embedding_train_payload_upsert_error_json(self, aiorespo with pytest.raises(AppException, match="Unable to train FAQ! Contact support"): await gpt3.train() - assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': gpt3.bot + gpt3.suffix, 'vectors': gpt3.vector_config} + assert list(aioresponses.requests.values())[1][0].kwargs['json'] == {'name': 'payload_upsert_error_error_json_faq_embd', 'vectors': gpt3.vector_config} assert list(aioresponses.requests.values())[2][0].kwargs['json'] == {"model": "text-embedding-ada-002", "input": json.dumps(test_content.data)} assert list(aioresponses.requests.values())[2][0].kwargs['headers'] == request_header expected_payload = test_content.data - expected_payload['collection_name'] = f"{gpt3.bot}{gpt3.suffix}" + expected_payload['collection_name'] = 'payload_upsert_error_error_json_faq_embd' assert list(aioresponses.requests.values())[3][0].kwargs['json'] == {'points': [{'id': test_content.vector_id, 'vector': embedding, 'payload': expected_payload