From 32eadb276d9a2c535d1ac91a1a35333e07bc0e3a Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Tue, 31 Oct 2023 19:38:25 +0530 Subject: [PATCH 1/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- kairon/shared/cognition/processor.py | 8 ++ tests/integration_test/services_test.py | 79 ++++++++++++++++++- .../data_processor/data_processor_test.py | 42 ++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py index 3a7cf6596..6f6ebae63 100644 --- a/kairon/shared/cognition/processor.py +++ b/kairon/shared/cognition/processor.py @@ -5,6 +5,7 @@ from mongoengine import DoesNotExist, Q from kairon.exceptions import AppException +from kairon.shared.actions.data_objects import PromptAction from kairon.shared.cognition.data_objects import CognitionData, CognitionSchema, ColumnMetadata from kairon.shared.data.processor import MongoProcessor from kairon.shared.models import CognitionDataType, CognitionMetadataType @@ -79,6 +80,7 @@ def save_cognition_schema(self, schema: Dict, user: Text, bot: Text): def delete_cognition_schema(self, schema_id: str, bot: Text): try: metadata = CognitionSchema.objects(bot=bot, id=schema_id).get() + CognitionDataProcessor.get_attached_collection(bot, metadata['collection_name']) cognition_data = list(CognitionData.objects(Q(collection=metadata['collection_name']) & Q(bot=bot))) if cognition_data: @@ -234,3 +236,9 @@ def find_matching_metadata(bot: Text, data: Any, collection: Text = None): except DoesNotExist as e: logger.exception(e) raise AppException("Columns do not exist in the schema!") + + @staticmethod + def get_attached_collection(bot: Text, collection: Text): + prompt_action = list(PromptAction.objects(bot=bot, collection__exact=collection)) + if prompt_action: + raise AppException(f'Cannot remove collection {collection} linked to action "{prompt_action[0].name}"!') diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index d2fb0a1a1..387cf98d9 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -1515,7 +1515,84 @@ def test_get_payload_content_metadata_not_exists(): assert actual["data"] == [] -def test_content_upload_api_with_gpt_feature_disabled(): +def test_delete_schema_attached_to_prompt_action(monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=True)) + + monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) + action = {'name': 'test_delete_schema_attached_to_prompt_action', + 'llm_prompts': [{'name': 'System Prompt', 'data': 'You are a personal assistant.', 'type': 'system', + 'source': 'static', 'is_enabled': True}, + {'name': 'Similarity Prompt', + 'instructions': 'Answer question based on the context above, if answer is not in the context go check previous logs.', + 'type': 'user', 'source': 'bot_content', 'is_enabled': True}, + {'name': 'Query Prompt', + 'data': 'A programming language is a system of notation for writing computer programs.[1] Most programming languages are text-based formal languages, but they may also be graphical. They are a kind of computer language.', + 'instructions': 'Answer according to the context', 'type': 'query', + 'source': 'static', 'is_enabled': True}, + {'name': 'Query Prompt', + 'data': 'If there is no specific query, assume that user is aking about java programming.', + 'instructions': 'Answer according to the context', 'type': 'query', + 'source': 'static', 'is_enabled': True}], + 'instructions': ['Answer in a short manner.', 'Keep it simple.'], + 'collection': 'Python', + 'num_bot_responses': 5, + "failure_message": DEFAULT_NLU_FALLBACK_RESPONSE, "top_results": 10, "similarity_threshold": 0.70} + response = client.post( + f"/api/bot/{pytest.bot}/action/prompt", + json=action, + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual = response.json() + + response_one = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": None, + "collection_name": "Python" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response_one.json() + print(actual) + pytest.delete_schema_id = actual["data"]["_id"] + + response_two = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.delete_schema_id}", + json={ + "schema_id": pytest.delete_schema_id, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_two = response_two.json() + print(actual_two) + assert not actual_two["success"] + assert actual_two["message"] == 'Cannot remove collection Python linked to action "test_delete_schema_attached_to_prompt_action"!' + assert actual_two["data"] is None + assert actual_two["error_code"] == 422 + + response_three = client.delete( + f"/api/bot/{pytest.bot}/action/test_delete_schema_attached_to_prompt_action", + headers={"Authorization": pytest.token_type + " " + pytest.access_token}, + ) + actual_three = response_three.json() + assert actual_three['message'] == 'Action deleted' + + response_four = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.delete_schema_id}", + json={ + "schema_id": pytest.delete_schema_id, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_four = response_four.json() + assert actual_four['message'] == 'Schema deleted!' + +def test_content_upload_api_with_gpt_feature_disabled(monkeypatch): + def _mock_get_bot_settings(*args, **kwargs): + return BotSettings(bot=pytest.bot, user="integration@demo.ai", llm_settings=LLMSettings(enable_faq=False)) + + monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) payload = { "data": "Data refers to any collection of facts, statistics, or information that can be analyzed or " "used to inform decision-making. Data can take many forms, including text, numbers, images, " diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 976b36b17..682284ec7 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -14415,6 +14415,48 @@ def test_get_payload_metadata_not_exists(self): bot = 'testing' assert list(processor.list_cognition_schema(bot)) == [] + def test_delete_schema_attached_to_prompt_action(self): + processor = CognitionDataProcessor() + processor_two = MongoProcessor() + bot = 'test' + user = 'testUser' + settings = BotSettings.objects(bot=bot).get() + settings.llm_settings = LLMSettings(enable_faq=True) + settings.save() + schema = { + "metadata": None, + "collection_name": "Python", + "bot": bot, + "user": user + } + pytest.delete_schema_id = processor.save_cognition_schema(schema, user, bot) + request = {'name': 'test_delete_schema_attached_to_prompt_action', + 'user_question': {'type': 'from_slot', 'value': 'prompt_question'}, + 'llm_prompts': [ + {'name': 'System Prompt', + 'data': 'You are a personal assistant. Answer question based on the context below.', + 'type': 'system', 'source': 'static', 'is_enabled': True}, + {'name': 'History Prompt', 'type': 'user', 'source': 'history', 'is_enabled': True}, + {'name': 'Query Prompt', 'data': "What kind of language is python?", + 'instructions': 'Rephrase the query.', + 'type': 'query', 'source': 'static', 'is_enabled': False}, + {'name': 'Similarity Prompt', + 'instructions': 'Answer question based on the context above, if answer is not in the context go check previous logs.', + 'type': 'user', 'source': 'bot_content', + 'is_enabled': True} + ], + 'instructions': ['Answer in a short manner.', 'Keep it simple.'], + 'collection': 'Python', + "set_slots": [{"name": "gpt_result", "value": "${data}", "evaluation_type": "expression"}, + {"name": "gpt_result_type", "value": "${data.type}", "evaluation_type": "script"}], + "dispatch_response": False + } + processor_two.add_prompt_action(request, bot, user) + with pytest.raises(AppException, match='Cannot remove collection Python linked to action "test_delete_schema_attached_to_prompt_action"!'): + processor.delete_cognition_schema(pytest.delete_schema_id, bot) + processor_two.delete_action('test_delete_schema_attached_to_prompt_action', bot, user) + processor.delete_cognition_schema(pytest.delete_schema_id, bot) + def test_save_content_with_gpt_feature_disabled(self): processor = CognitionDataProcessor() bot = 'test' From 40a04ce1dd16eac4041ac82bdc8e37c5eecdec02 Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Wed, 1 Nov 2023 00:05:02 +0530 Subject: [PATCH 2/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- kairon/api/models.py | 2 +- kairon/shared/cognition/data_objects.py | 3 ++ kairon/shared/cognition/processor.py | 16 +++--- tests/integration_test/services_test.py | 53 +++++++++++++++---- .../data_processor/data_processor_test.py | 51 +++++++++++------- 5 files changed, 91 insertions(+), 34 deletions(-) diff --git a/kairon/api/models.py b/kairon/api/models.py index 56ce81241..9fb0bd633 100644 --- a/kairon/api/models.py +++ b/kairon/api/models.py @@ -949,7 +949,7 @@ def check(cls, values): class CognitionSchemaRequest(BaseModel): metadata: List[ColumnMetadata] = None - collection_name: str + collection_name: constr(to_lower=True, strip_whitespace=True) class CognitiveDataRequest(BaseModel): diff --git a/kairon/shared/cognition/data_objects.py b/kairon/shared/cognition/data_objects.py index a0a0309d7..1df71c5c5 100644 --- a/kairon/shared/cognition/data_objects.py +++ b/kairon/shared/cognition/data_objects.py @@ -51,6 +51,9 @@ def validate(self, clean=True): for metadata_dict in self.metadata: metadata_dict.validate() + def clean(self): + self.collection_name = self.collection_name.strip().lower() + @auditlogger.log @push_notification.apply diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py index 6f6ebae63..285661939 100644 --- a/kairon/shared/cognition/processor.py +++ b/kairon/shared/cognition/processor.py @@ -4,6 +4,7 @@ from loguru import logger from mongoengine import DoesNotExist, Q +from kairon import Utility from kairon.exceptions import AppException from kairon.shared.actions.data_objects import PromptAction from kairon.shared.cognition.data_objects import CognitionData, CognitionSchema, ColumnMetadata @@ -65,6 +66,9 @@ def is_same_column_in_metadata(metadata): return len(unique_column_names) < len(column_names) def save_cognition_schema(self, schema: Dict, user: Text, bot: Text): + Utility.is_exist( + CognitionSchema, exp_message="Collection already exists!", + collection_name__iexact=schema.get('collection_name'), bot=bot) if CognitionDataProcessor.is_collection_limit_exceeded(bot, user, schema.get('collection_name')): raise AppException('Collection limit exceeded!') if schema.get('metadata') and CognitionDataProcessor.is_column_collection_limit_exceeded(bot, user, schema.get('metadata')): @@ -80,7 +84,7 @@ def save_cognition_schema(self, schema: Dict, user: Text, bot: Text): def delete_cognition_schema(self, schema_id: str, bot: Text): try: metadata = CognitionSchema.objects(bot=bot, id=schema_id).get() - CognitionDataProcessor.get_attached_collection(bot, metadata['collection_name']) + CognitionDataProcessor.validate_collection_name(bot, metadata['collection_name']) cognition_data = list(CognitionData.objects(Q(collection=metadata['collection_name']) & Q(bot=bot))) if cognition_data: @@ -126,11 +130,11 @@ def save_cognition_data(self, payload: Dict, user: Text, bot: Text): raise AppException("Content should contain atleast 10 words.") if payload.get('collection'): - if not Utility.is_exist(CognitionSchema, bot=bot, collection_name=payload.get('collection'), raise_error=False): + if not Utility.is_exist(CognitionSchema, bot=bot, collection_name__iexact=payload.get('collection'), raise_error=False): raise AppException('Collection does not exist!') if payload.get('content_type') == CognitionDataType.text.value and \ not Utility.is_exist(CognitionSchema, bot=bot, metadata=[], - collection_name=payload.get('collection'), raise_error=False): + collection_name__iexact=payload.get('collection'), raise_error=False): raise AppException('Text content type does not have schema!') if payload.get('content_type') == CognitionDataType.json.value: CognitionDataProcessor.validate_metadata_and_payload(bot, payload) @@ -153,7 +157,7 @@ def update_cognition_data(self, row_id: str, payload: Dict, user: Text, bot: Tex raise AppException("Content should contain atleast 10 words.") Utility.is_exist(CognitionData, bot=bot, id__ne=row_id, data=data, exp_message="Payload data already exists!") - if payload.get('collection') and not Utility.is_exist(CognitionSchema, bot=bot, collection_name=payload.get('collection'), raise_error=False): + if payload.get('collection') and not Utility.is_exist(CognitionSchema, bot=bot, collection_name__iexact=payload.get('collection'), raise_error=False): raise AppException('Collection does not exist!') try: payload_obj = CognitionData.objects(bot=bot, id=row_id).get() @@ -238,7 +242,7 @@ def find_matching_metadata(bot: Text, data: Any, collection: Text = None): raise AppException("Columns do not exist in the schema!") @staticmethod - def get_attached_collection(bot: Text, collection: Text): - prompt_action = list(PromptAction.objects(bot=bot, collection__exact=collection)) + def validate_collection_name(bot: Text, collection: Text): + prompt_action = list(PromptAction.objects(bot=bot, collection__iexact=collection)) if prompt_action: raise AppException(f'Cannot remove collection {collection} linked to action "{prompt_action[0].name}"!') diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index 387cf98d9..aa7f25482 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -1346,6 +1346,22 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) + response_four = client.post( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + json={ + "metadata": [ + {"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details" + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_four = response_four.json() + print(actual) + assert not actual_four["success"] + assert actual_four["message"] == "Collection already exists!" + assert actual_four["data"] is None + assert actual_four["error_code"] == 422 + def test_metadata_upload_api_column_limit_exceeded(): response = client.post( @@ -1535,7 +1551,7 @@ def _mock_get_bot_settings(*args, **kwargs): 'instructions': 'Answer according to the context', 'type': 'query', 'source': 'static', 'is_enabled': True}], 'instructions': ['Answer in a short manner.', 'Keep it simple.'], - 'collection': 'Python', + 'collection': 'python', 'num_bot_responses': 5, "failure_message": DEFAULT_NLU_FALLBACK_RESPONSE, "top_results": 10, "similarity_threshold": 0.70} response = client.post( @@ -1567,7 +1583,7 @@ def _mock_get_bot_settings(*args, **kwargs): actual_two = response_two.json() print(actual_two) assert not actual_two["success"] - assert actual_two["message"] == 'Cannot remove collection Python linked to action "test_delete_schema_attached_to_prompt_action"!' + assert actual_two["message"] == 'Cannot remove collection python linked to action "test_delete_schema_attached_to_prompt_action"!' assert actual_two["data"] is None assert actual_two["error_code"] == 422 @@ -1622,6 +1638,8 @@ def _mock_get_bot_settings(*args, **kwargs): }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) + actual_one = response_one.json() + pytest.content_collection_id = actual_one["data"]["_id"] payload = { "data": "Data refers to any collection of facts, statistics, or information that can be analyzed or " "used to inform decision-making. Data can take many forms, including text, numbers, images, " @@ -1938,6 +1956,22 @@ def test_get_content_not_exists(): assert actual["data"]['row_count'] == 0 +def test_delete_payload_content_collection(): + response = client.delete( + url=f"/api/bot/{pytest.bot}/data/cognition/schema/{pytest.content_collection_id}", + json={ + "metadata_id": pytest.content_collection_id, + }, + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual = response.json() + print(actual) + assert actual["success"] + assert actual["message"] == "Schema deleted!" + assert actual["data"] is None + assert actual["error_code"] == 0 + + def test_payload_upload_api_with_gpt_feature_disabled(): payload = { "data": {"name": "Nupur", "age": 25, "city": "Bengaluru"}, @@ -1970,7 +2004,7 @@ def _mock_get_bot_settings(*args, **kwargs): payload = { "data": {"details": "AWS"}, "content_type": "json", - "collection": "Details" + "collection": "details" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -1978,6 +2012,7 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() + print(actual) pytest.payload_id = actual["data"]["_id"] assert actual["message"] == "Record saved!" assert actual["data"]["_id"] @@ -2012,7 +2047,7 @@ def _mock_get_bot_settings(*args, **kwargs): payload = { "data": {"city": "Pune", "color": "red"}, "content_type": "json", - "collection": "Details" + "collection": "details" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -2031,7 +2066,7 @@ def _mock_get_bot_settings(*args, **kwargs): monkeypatch.setattr(MongoProcessor, 'get_bot_settings', _mock_get_bot_settings) metadata = { "metadata": [{"column_name": "age", "data_type": "int", "enable_search": True, "create_embeddings": True}], - "collection_name": "Details" + "collection_name": "test_payload_upload_metadata_invalid_data_type" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition/schema", @@ -2041,7 +2076,7 @@ def _mock_get_bot_settings(*args, **kwargs): payload = { "data": {"age": "Twenty-Three"}, "content_type": "json", - "collection": "Details" + "collection": "test_payload_upload_metadata_invalid_data_type" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -2096,7 +2131,7 @@ def test_payload_updated_api(): json={ "row_id": pytest.payload_id, "data": {"details": "data science"}, - "collection": "Details", + "collection": "details", "content_type": "json" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -2154,7 +2189,7 @@ def test_payload_content_update_api_id_not_found(): def test_get_payload_content(): response = client.get( - url=f"/api/bot/{pytest.bot}/data/cognition?collection=Details", + url=f"/api/bot/{pytest.bot}/data/cognition?collection=details", headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() @@ -2162,7 +2197,7 @@ def test_get_payload_content(): assert actual["success"] assert actual["error_code"] == 0 assert actual["data"]['data'][0]['data'] == {'details': 'data science'} - assert actual["data"]['data'][0]['collection'] == 'Details' + assert actual["data"]['data'][0]['collection'] == 'details' assert actual["data"]['row_count'] == 1 diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 682284ec7..76b97cb73 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -14308,6 +14308,15 @@ def test_save_payload_metadata(self): processor.save_cognition_schema(schema_three, user, bot) processor.delete_cognition_schema(pytest.schema_id_one, bot) processor.delete_cognition_schema(pytest.schema_id_two, bot) + schema_four = { + "metadata": [ + {"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], + "collection_name": "details_collection", + "bot": bot, + "user": user + } + with pytest.raises(AppException, match="Collection already exists!"): + processor.save_cognition_schema(schema, user, bot) settings = BotSettings.objects(bot=bot).get() settings.llm_settings = LLMSettings(enable_faq=False) settings.save() @@ -14339,7 +14348,7 @@ def test_save_payload_metadata_same_columns(self): "metadata": [ {"column_name": "tech", "data_type": "str", "enable_search": True, "create_embeddings": True}, {"column_name": "tech", "data_type": "int", "enable_search": True, "create_embeddings": False}], - "collection_name": "details_collection", + "collection_name": "details_collect", "bot": bot, "user": user } @@ -14366,6 +14375,7 @@ def test_save_payload_metadata_data_type_invalid(self): schema = { "metadata": [{"column_name": "name", "data_type": "bool", "enable_search": True, "create_embeddings": True}], + "collection_name": "test_save_payload_metadata_data_type_invalid", "bot": bot, "user": user } @@ -14446,13 +14456,13 @@ def test_delete_schema_attached_to_prompt_action(self): 'is_enabled': True} ], 'instructions': ['Answer in a short manner.', 'Keep it simple.'], - 'collection': 'Python', + 'collection': 'python', "set_slots": [{"name": "gpt_result", "value": "${data}", "evaluation_type": "expression"}, {"name": "gpt_result_type", "value": "${data.type}", "evaluation_type": "script"}], "dispatch_response": False } processor_two.add_prompt_action(request, bot, user) - with pytest.raises(AppException, match='Cannot remove collection Python linked to action "test_delete_schema_attached_to_prompt_action"!'): + with pytest.raises(AppException, match='Cannot remove collection python linked to action "test_delete_schema_attached_to_prompt_action"!'): processor.delete_cognition_schema(pytest.delete_schema_id, bot) processor_two.delete_action('test_delete_schema_attached_to_prompt_action', bot, user) processor.delete_cognition_schema(pytest.delete_schema_id, bot) @@ -14541,14 +14551,14 @@ def test_save_content(self): payload = { "data": content, "content_type": "text", - "collection": collection} + "collection": 'bot'} metadata = { "metadata": None, "collection_name": collection, "bot": bot, "user": user } - processor.save_cognition_schema(metadata, user, bot) + pytest.save_content_collection = processor.save_cognition_schema(metadata, user, bot) pytest.content_id = processor.save_cognition_data(payload, user, bot) content_id = '5349b4ddd2791d08c09890f3' with pytest.raises(AppException, match="Payload data already exists!"): @@ -14558,7 +14568,7 @@ def test_update_content_atleast_ten_words(self): processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'Bot' + collection = 'bot' content = 'Bots are commonly used in various industries.' payload = { "data": content, @@ -14584,7 +14594,7 @@ def test_update_content(self): processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'Bot' + collection = 'bot' content = 'Bots are commonly used in various industries, such as e-commerce, customer service, gaming, ' \ 'and social media. Some bots are designed to interact with humans in a conversational manner and are ' \ 'called chatbots or virtual assistants.' @@ -14599,7 +14609,7 @@ def test_update_content_not_found(self): bot = 'test' user = 'testUser' content_id = '5349b4ddd2781d08c09890f3' - collection = 'Bot' + collection = 'bot' content = 'MongoDB is a source-available cross-platform document-oriented database program. ' \ 'Classified as a NoSQL database program, MongoDB uses JSON-like documents with optional schemas. ' \ 'MongoDB is developed by MongoDB Inc. and licensed under the Server Side Public License which is ' \ @@ -14642,7 +14652,7 @@ def test_list_cognition_data(self, mock_get_cognition_data, mock_list_cognition_ 'row_id': '65266ff16f0190ca4fd09898', 'data': 'Unit testing is a software testing technique in which individual units or components of a software application are tested in isolation to ensure that each unit functions as expected. ', 'content_type': 'text', - 'collection': 'testing', 'user': 'testUser', 'bot': 'test'}] + 'collection': 'bot', 'user': 'testUser', 'bot': 'test'}] row_count = 1 def _list_cognition_data(*args, **kwargs): return cognition_data @@ -14656,28 +14666,27 @@ def _get_cognition_data(*args, **kwargs): processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'Bot' content = 'Unit testing is a software testing technique in which individual units or components of a software ' \ 'application are tested in isolation to ensure that each unit functions as expected. ' payload = { "data": content, "content_type": "text", - "collection": collection} + "collection": "bot"} pytest.content_id_unit = processor.save_cognition_data(payload, user, bot) - kwargs = {'collection': 'testing', 'data': 'Unit testing'} + kwargs = {'collection': 'bot', 'data': 'Unit testing'} data = list(processor.list_cognition_data(bot, **kwargs)) print(data) assert data[0][ 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ 'software application are tested in isolation to ensure that each unit functions as expected. ' assert data[0]['row_id'] - assert data[0]['collection'] == 'testing' + assert data[0]['collection'] == 'bot' log, count = processor.get_cognition_data(bot, **kwargs) assert log[0][ 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ 'software application are tested in isolation to ensure that each unit functions as expected. ' assert log[0]['row_id'] - assert log[0]['collection'] == 'testing' + assert log[0]['collection'] == 'bot' assert count == 1 kwargs = {} actual = list(processor.list_cognition_data(bot, **kwargs)) @@ -14686,13 +14695,13 @@ def _get_cognition_data(*args, **kwargs): 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ 'software application are tested in isolation to ensure that each unit functions as expected. ' assert actual[0]['row_id'] - assert actual[0]['collection'] == 'testing' + assert actual[0]['collection'] == 'bot' cognition_data, row_count = processor.get_cognition_data(bot, **kwargs) assert cognition_data[0][ 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ 'software application are tested in isolation to ensure that each unit functions as expected. ' assert cognition_data[0]['row_id'] - assert cognition_data[0]['collection'] == 'testing' + assert cognition_data[0]['collection'] == 'bot' assert row_count == 1 def test_delete_content_for_action(self): @@ -14700,6 +14709,12 @@ def test_delete_content_for_action(self): bot = 'test' processor.delete_cognition_data(pytest.content_id_unit, bot) + def test_delete_payload_content_collection(self): + processor = CognitionDataProcessor() + bot = 'test' + user = 'testUser' + processor.delete_cognition_schema(pytest.save_content_collection, bot) + def test_save_payload_content_with_gpt_feature_disabled(self): processor = CognitionDataProcessor() bot = 'test' @@ -14773,7 +14788,7 @@ def test_save_payload_content_invalid_data_type(self): metadata = { "metadata": [ {"column_name": "number", "data_type": "int", "enable_search": True, "create_embeddings": True}], - "collection_name": "Bot", + "collection_name": "test_save_payload_content_invalid_data_type", "bot": bot, "user": user } @@ -14781,7 +14796,7 @@ def test_save_payload_content_invalid_data_type(self): payload = { "data": {"number": "Twenty-three"}, "content_type": "json", - "collection": "Bot"} + "collection": "test_save_payload_content_invalid_data_type"} with pytest.raises(AppException, match="Invalid data type!"): processor.save_cognition_data(payload, user, bot) From 88d8b41a11e61edf6add759839efee04c9aad169 Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Wed, 1 Nov 2023 08:03:13 +0530 Subject: [PATCH 3/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- kairon/api/models.py | 2 +- kairon/shared/cognition/data_objects.py | 4 ++++ kairon/shared/cognition/processor.py | 1 + tests/integration_test/services_test.py | 8 +++---- .../data_processor/data_processor_test.py | 11 +++++---- tests/unit_test/llm_test.py | 24 +++++++++---------- 6 files changed, 28 insertions(+), 22 deletions(-) diff --git a/kairon/api/models.py b/kairon/api/models.py index 9fb0bd633..9609e67a7 100644 --- a/kairon/api/models.py +++ b/kairon/api/models.py @@ -955,7 +955,7 @@ class CognitionSchemaRequest(BaseModel): class CognitiveDataRequest(BaseModel): data: Any content_type: CognitionDataType = CognitionDataType.text.value - collection: str = None + collection: constr(to_lower=True, strip_whitespace=True) = None @root_validator def check(cls, values): diff --git a/kairon/shared/cognition/data_objects.py b/kairon/shared/cognition/data_objects.py index 1df71c5c5..3d8b1b9b8 100644 --- a/kairon/shared/cognition/data_objects.py +++ b/kairon/shared/cognition/data_objects.py @@ -79,3 +79,7 @@ def validate(self, clean=True): raise ValidationError("content type and type of data do not match!") if not self.data or (isinstance(self.data, str) and Utility.check_empty_string(self.data)): raise ValidationError("data cannot be empty") + + def clean(self): + if self.collection: + self.collection = self.collection.strip().lower() diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py index 285661939..4d506a8ec 100644 --- a/kairon/shared/cognition/processor.py +++ b/kairon/shared/cognition/processor.py @@ -211,6 +211,7 @@ def list_cognition_data(self, bot: Text, start_idx: int = 0, page_size: int = 10 def get_cognition_data(self, bot: Text, start_idx: int = 0, page_size: int = 10, **kwargs): processor = MongoProcessor() collection = kwargs.pop('collection', None) + collection = collection.lower() if collection else None kwargs['collection'] = collection cognition_data = list(self.list_cognition_data(bot, start_idx, page_size, **kwargs)) row_cnt = processor.get_row_count(CognitionData, bot, **kwargs) diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index aa7f25482..ff5ab08b3 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -2004,7 +2004,7 @@ def _mock_get_bot_settings(*args, **kwargs): payload = { "data": {"details": "AWS"}, "content_type": "json", - "collection": "details" + "collection": "Details" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -2047,7 +2047,7 @@ def _mock_get_bot_settings(*args, **kwargs): payload = { "data": {"city": "Pune", "color": "red"}, "content_type": "json", - "collection": "details" + "collection": "Details" } response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -2131,7 +2131,7 @@ def test_payload_updated_api(): json={ "row_id": pytest.payload_id, "data": {"details": "data science"}, - "collection": "details", + "collection": "Details", "content_type": "json" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -2189,7 +2189,7 @@ def test_payload_content_update_api_id_not_found(): def test_get_payload_content(): response = client.get( - url=f"/api/bot/{pytest.bot}/data/cognition?collection=details", + url=f"/api/bot/{pytest.bot}/data/cognition?collection=Details", headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 76b97cb73..4ace82452 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -14551,7 +14551,7 @@ def test_save_content(self): payload = { "data": content, "content_type": "text", - "collection": 'bot'} + "collection": collection} metadata = { "metadata": None, "collection_name": collection, @@ -14568,7 +14568,7 @@ def test_update_content_atleast_ten_words(self): processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'bot' + collection = 'Bot' content = 'Bots are commonly used in various industries.' payload = { "data": content, @@ -14594,7 +14594,7 @@ def test_update_content(self): processor = CognitionDataProcessor() bot = 'test' user = 'testUser' - collection = 'bot' + collection = 'Bot' content = 'Bots are commonly used in various industries, such as e-commerce, customer service, gaming, ' \ 'and social media. Some bots are designed to interact with humans in a conversational manner and are ' \ 'called chatbots or virtual assistants.' @@ -14609,7 +14609,7 @@ def test_update_content_not_found(self): bot = 'test' user = 'testUser' content_id = '5349b4ddd2781d08c09890f3' - collection = 'bot' + collection = 'Bot' content = 'MongoDB is a source-available cross-platform document-oriented database program. ' \ 'Classified as a NoSQL database program, MongoDB uses JSON-like documents with optional schemas. ' \ 'MongoDB is developed by MongoDB Inc. and licensed under the Server Side Public License which is ' \ @@ -14671,7 +14671,7 @@ def _get_cognition_data(*args, **kwargs): payload = { "data": content, "content_type": "text", - "collection": "bot"} + "collection": "Bot"} pytest.content_id_unit = processor.save_cognition_data(payload, user, bot) kwargs = {'collection': 'bot', 'data': 'Unit testing'} data = list(processor.list_cognition_data(bot, **kwargs)) @@ -14681,6 +14681,7 @@ def _get_cognition_data(*args, **kwargs): 'software application are tested in isolation to ensure that each unit functions as expected. ' assert data[0]['row_id'] assert data[0]['collection'] == 'bot' + kwargs = {'collection': 'Bot', 'data': 'Unit testing'} log, count = processor.get_cognition_data(bot, **kwargs) assert log[0][ 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ diff --git a/tests/unit_test/llm_test.py b/tests/unit_test/llm_test.py index f9f5b717d..e03f35c63 100644 --- a/tests/unit_test/llm_test.py +++ b/tests/unit_test/llm_test.py @@ -164,42 +164,42 @@ async def test_gpt3_faq_embedding_train_payload_text(self, aioresponses): aioresponses.add( url=urljoin(Utility.environment['vector']['db'], f"/collections"), method="GET", - payload={"time": 0, "status": "ok", "result": {"collections": [{"name": "test_embed_faq_text_Swift_faq_embd"}, + payload={"time": 0, "status": "ok", "result": {"collections": [{"name": "test_embed_faq_text_swift_faq_embd"}, {"name": "example_bot_Swift_faq_embd"}]}} ) aioresponses.add( method="DELETE", - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_Swift{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_swift{gpt3.suffix}"), ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_User_details{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_user_details{gpt3.suffix}"), method="PUT", status=200 ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_User_details{gpt3.suffix}/points"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_user_details{gpt3.suffix}/points"), method="PUT", payload={"result": {"operation_id": 0, "status": "acknowledged"}, "status": "ok", "time": 0.003612634} ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_Country_details{gpt3.suffix}"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_country_details{gpt3.suffix}"), method="PUT", status=200 ) aioresponses.add( - url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_Country_details{gpt3.suffix}/points"), + url=urljoin(Utility.environment['vector']['db'], f"/collections/{gpt3.bot}_country_details{gpt3.suffix}/points"), method="PUT", payload={"result": {"operation_id": 0, "status": "acknowledged"}, "status": "ok", "time": 0.003612634} ) aioresponses.add( url=urljoin(Utility.environment['vector']['db'], - f"/collections/test_embed_faq_text_Country_details_faq_embd/points"), + f"/collections/test_embed_faq_text_country_details_faq_embd/points"), method="PUT", payload={"result": {"operation_id": 0, "status": "acknowledged"}, "status": "ok", "time": 0.003612634} ) @@ -207,7 +207,7 @@ async def test_gpt3_faq_embedding_train_payload_text(self, aioresponses): response = await gpt3.train() assert response['faq'] == 3 - assert list(aioresponses.requests.values())[2][0].kwargs['json'] == {'name': f"{gpt3.bot}_Country_details{gpt3.suffix}", + assert list(aioresponses.requests.values())[2][0].kwargs['json'] == {'name': f"{gpt3.bot}_country_details{gpt3.suffix}", 'vectors': gpt3.vector_config} assert list(aioresponses.requests.values())[3][0].kwargs['json'] == {"model": "text-embedding-ada-002", @@ -221,17 +221,17 @@ async def test_gpt3_faq_embedding_train_payload_text(self, aioresponses): assert list(aioresponses.requests.values())[3][2].kwargs['headers'] == request_header assert list(aioresponses.requests.values())[4][0].kwargs['json'] == {'points': [{'id': test_content_two.vector_id, 'vector': embedding, - 'payload': {'collection_name': f"{gpt3.bot}_Country_details{gpt3.suffix}", + 'payload': {'collection_name': f"{gpt3.bot}_country_details{gpt3.suffix}", 'country': 'Spain'}}]} assert list(aioresponses.requests.values())[4][1].kwargs['json'] == {'points': [{'id': test_content_three.vector_id, 'vector': embedding, - 'payload': {'collection_name': f"{gpt3.bot}_Country_details{gpt3.suffix}", 'role': 'ds'}}]} + 'payload': {'collection_name': f"{gpt3.bot}_country_details{gpt3.suffix}", 'role': 'ds'}}]} - assert list(aioresponses.requests.values())[5][0].kwargs['json'] == {'name': f"{gpt3.bot}_User_details{gpt3.suffix}", + assert list(aioresponses.requests.values())[5][0].kwargs['json'] == {'name': f"{gpt3.bot}_user_details{gpt3.suffix}", 'vectors': gpt3.vector_config} assert list(aioresponses.requests.values())[6][0].kwargs['json'] == {'points': [{'id': test_content.vector_id, 'vector': embedding, - 'payload': {'collection_name': f"{gpt3.bot}_User_details{gpt3.suffix}", + 'payload': {'collection_name': f"{gpt3.bot}_user_details{gpt3.suffix}", 'name': 'Nupur'}}]} assert response['faq'] == 3 From 84b03842cc221f017f9d4dd82ca630720e3236d8 Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Wed, 1 Nov 2023 08:35:48 +0530 Subject: [PATCH 4/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- tests/unit_test/data_processor/data_processor_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 4ace82452..836e6a05e 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -14675,7 +14675,6 @@ def _get_cognition_data(*args, **kwargs): pytest.content_id_unit = processor.save_cognition_data(payload, user, bot) kwargs = {'collection': 'bot', 'data': 'Unit testing'} data = list(processor.list_cognition_data(bot, **kwargs)) - print(data) assert data[0][ 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ 'software application are tested in isolation to ensure that each unit functions as expected. ' @@ -14691,7 +14690,6 @@ def _get_cognition_data(*args, **kwargs): assert count == 1 kwargs = {} actual = list(processor.list_cognition_data(bot, **kwargs)) - print(actual) assert actual[0][ 'data'] == 'Unit testing is a software testing technique in which individual units or components of a ' \ 'software application are tested in isolation to ensure that each unit functions as expected. ' From 7a24663235ed8102e4335ae84772c8f8e86f23e9 Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Wed, 1 Nov 2023 09:58:11 +0530 Subject: [PATCH 5/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- kairon/shared/cognition/processor.py | 2 +- tests/integration_test/services_test.py | 42 ++++++++++++------- .../data_processor/data_processor_test.py | 18 ++++++-- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py index 4d506a8ec..2157aaa06 100644 --- a/kairon/shared/cognition/processor.py +++ b/kairon/shared/cognition/processor.py @@ -235,7 +235,7 @@ def find_matching_metadata(bot: Text, data: Any, collection: Text = None): columns = list(data.keys()) try: matching_metadata = CognitionSchema.objects(Q(metadata__column_name__in=columns) & - Q(collection_name=collection) & + Q(collection_name__iexact=collection) & Q(bot=bot)).get() return matching_metadata except DoesNotExist as e: diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index ff5ab08b3..84a35471f 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -34,6 +34,7 @@ from kairon.shared.actions.utils import ActionUtility from kairon.shared.auth import Authentication from kairon.shared.cloud.utils import CloudUtility +from kairon.shared.cognition.data_objects import CognitionSchema, CognitionData from kairon.shared.constants import EventClass from kairon.shared.data.audit.data_objects import AuditLogData from kairon.shared.data.constant import UTTERANCE_TYPE, EVENT_STATUS, TOKEN_TYPE, AuditlogActions, \ @@ -45,7 +46,7 @@ from kairon.shared.data.training_data_generation_processor import TrainingDataGenerationProcessor from kairon.shared.data.utils import DataUtility from kairon.shared.metering.constants import MetricType -from kairon.shared.models import StoryEventType +from kairon.shared.models import StoryEventType, CognitionDataType from kairon.shared.models import User from kairon.shared.multilingual.processor import MultilingualLogProcessor from kairon.shared.multilingual.utils.translator import Translator @@ -1261,7 +1262,7 @@ def _mock_get_bot_settings(*args, **kwargs): url=f"/api/bot/{pytest.bot}/data/cognition/schema", json={ "metadata": [{"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details" + "collection_name": "Details" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1272,9 +1273,14 @@ def _mock_get_bot_settings(*args, **kwargs): assert actual["data"]["_id"] assert actual["error_code"] == 0 + cognition_schema = CognitionSchema.objects(bot=pytest.bot, id=pytest.schema_id).get() + assert cognition_schema['collection_name'] == 'details' + assert cognition_schema['metadata'][0]['column_name'] == 'details' + assert cognition_schema['metadata'][0]['data_type'] == "str" + payload = { "data": {"details": "Nupur"}, - "collection": "details", + "collection": "Details", "content_type": "json"} payload_response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", @@ -1282,15 +1288,21 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) payload_actual = payload_response.json() + pytest.cognition_id = payload_actual["data"]["_id"] assert payload_actual["message"] == "Record saved!" assert payload_actual["error_code"] == 0 + cognition_data = CognitionData.objects(bot=pytest.bot, id=pytest.cognition_id).get() + assert cognition_data['data'] == {"details": "Nupur"} + assert cognition_data['collection'] == 'details' + assert cognition_data['content_type'] == CognitionDataType.json.value + response_one = client.post( url=f"/api/bot/{pytest.bot}/data/cognition/schema", json={ "metadata": [ {"column_name": "details_one", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details_one" + "collection_name": "Details_one" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1306,7 +1318,7 @@ def _mock_get_bot_settings(*args, **kwargs): json={ "metadata": [ {"column_name": "details_two", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details_two" + "collection_name": "Details_two" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1322,7 +1334,7 @@ def _mock_get_bot_settings(*args, **kwargs): json={ "metadata": [ {"column_name": "details_three", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details_three" + "collection_name": "Details_three" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1351,7 +1363,7 @@ def _mock_get_bot_settings(*args, **kwargs): json={ "metadata": [ {"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details" + "collection_name": "Details" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1478,7 +1490,7 @@ def _mock_get_bot_settings(*args, **kwargs): url=f"/api/bot/{pytest.bot}/data/cognition/schema", json={ "metadata": [{"column_name": "country", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details" + "collection_name": "Details" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1614,7 +1626,7 @@ def _mock_get_bot_settings(*args, **kwargs): "used to inform decision-making. Data can take many forms, including text, numbers, images, " "audio, and video.", "content_type": "text", - "collection": "data_details"} + "collection": "Data_details"} response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", json=payload, @@ -1634,7 +1646,7 @@ def _mock_get_bot_settings(*args, **kwargs): response_one = client.post( url=f"/api/bot/{pytest.bot}/data/cognition/schema", json={ - "collection_name": "details" + "collection_name": "Details" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) @@ -1645,7 +1657,7 @@ def _mock_get_bot_settings(*args, **kwargs): "used to inform decision-making. Data can take many forms, including text, numbers, images, " "audio, and video.", "content_type": "text", - "collection": "details"} + "collection": "Details"} response = client.post( url=f"/api/bot/{pytest.bot}/data/cognition", json=payload, @@ -1761,7 +1773,7 @@ def test_content_update_api(): "data": "AWS Fargate is a serverless compute engine for containers that allows you to run " "Docker containers without having to manage the underlying EC2 instances. With Fargate, " "you can focus on developing and deploying your applications rather than managing the infrastructure.", - "collection": "details", + "collection": "Details", "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1797,7 +1809,7 @@ def test_content_update_api_invalid(): json={ "row_id": pytest.content_id_text, "data": "AWS Fargate is a serverless compute engine.", - "collection": "details", + "collection": "Details", "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1819,7 +1831,7 @@ def test_content_update_api_already_exist(): "data": "AWS Fargate is a serverless compute engine for containers that allows you to run " "Docker containers without having to manage the underlying EC2 instances. With Fargate, " "you can focus on developing and deploying your applications rather than managing the infrastructure.", - "collection": "details", + "collection": "Details", "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} @@ -1841,7 +1853,7 @@ def test_content_update_api_id_not_found(): "data": "Artificial intelligence (AI) involves using computers to do things that traditionally require human " "intelligence. AI can process large amounts of data in ways that humans cannot. The goal for AI is " "to be able to do things like recognize patterns, make decisions, and judge like humans.", - "collection": "details", + "collection": "Details", "content_type": "text" }, headers={"Authorization": pytest.token_type + " " + pytest.access_token} diff --git a/tests/unit_test/data_processor/data_processor_test.py b/tests/unit_test/data_processor/data_processor_test.py index 836e6a05e..5c2c38ece 100644 --- a/tests/unit_test/data_processor/data_processor_test.py +++ b/tests/unit_test/data_processor/data_processor_test.py @@ -80,7 +80,7 @@ from kairon.shared.llm.gpt3 import GPT3FAQEmbedding from kairon.shared.metering.constants import MetricType from kairon.shared.metering.data_object import Metering -from kairon.shared.models import StoryEventType, HttpContentType +from kairon.shared.models import StoryEventType, HttpContentType, CognitionDataType from kairon.shared.multilingual.processor import MultilingualLogProcessor from kairon.shared.test.data_objects import ModelTestingLogs from kairon.shared.test.processor import ModelTestingLogProcessor @@ -14267,17 +14267,27 @@ def test_save_payload_metadata(self): schema = { "metadata": [ {"column_name": "details", "data_type": "str", "enable_search": True, "create_embeddings": True}], - "collection_name": "details_collection", + "collection_name": "Details_collection", "bot": bot, "user": user } pytest.schema_id = processor.save_cognition_schema(schema, user, bot) + cognition_schema = CognitionSchema.objects(bot=bot, id=pytest.schema_id).get() + assert cognition_schema['collection_name'] == 'details_collection' + assert cognition_schema['metadata'][0]['column_name'] == 'details' + assert cognition_schema['metadata'][0]['data_type'] == "str" + payload = { "data": {"details": "Pune"}, - "collection": "details_collection", + "collection": "Details_collection", "content_type": "json"} - processor.save_cognition_data(payload, user, bot) + pytest.cognition_id = processor.save_cognition_data(payload, user, bot) + + cognition_data = CognitionData.objects(bot=bot, id=pytest.cognition_id).get() + assert cognition_data['data'] == {"details": "Pune"} + assert cognition_data['collection'] == 'details_collection' + assert cognition_data['content_type'] == CognitionDataType.json.value schema_one = { "metadata": [ From 3fa8705f1a63ca0188dd0f1310109836c1f332ca Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Wed, 1 Nov 2023 10:37:55 +0530 Subject: [PATCH 6/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- tests/integration_test/services_test.py | 32 +++++++++++++++++-------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index 84a35471f..40c90f978 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -34,7 +34,6 @@ from kairon.shared.actions.utils import ActionUtility from kairon.shared.auth import Authentication from kairon.shared.cloud.utils import CloudUtility -from kairon.shared.cognition.data_objects import CognitionSchema, CognitionData from kairon.shared.constants import EventClass from kairon.shared.data.audit.data_objects import AuditLogData from kairon.shared.data.constant import UTTERANCE_TYPE, EVENT_STATUS, TOKEN_TYPE, AuditlogActions, \ @@ -46,7 +45,7 @@ from kairon.shared.data.training_data_generation_processor import TrainingDataGenerationProcessor from kairon.shared.data.utils import DataUtility from kairon.shared.metering.constants import MetricType -from kairon.shared.models import StoryEventType, CognitionDataType +from kairon.shared.models import StoryEventType from kairon.shared.models import User from kairon.shared.multilingual.processor import MultilingualLogProcessor from kairon.shared.multilingual.utils.translator import Translator @@ -1273,10 +1272,15 @@ def _mock_get_bot_settings(*args, **kwargs): assert actual["data"]["_id"] assert actual["error_code"] == 0 - cognition_schema = CognitionSchema.objects(bot=pytest.bot, id=pytest.schema_id).get() - assert cognition_schema['collection_name'] == 'details' - assert cognition_schema['metadata'][0]['column_name'] == 'details' - assert cognition_schema['metadata'][0]['data_type'] == "str" + response_schema = client.get( + url=f"/api/bot/{pytest.bot}/data/cognition/schema", + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_schema = response_schema.json() + print(actual_schema) + assert actual_schema["data"][0]['collection_name'] == 'details' + assert actual_schema["data"][0]['metadata'][0] == {'column_name': 'details', 'data_type': 'str', 'enable_search': True, 'create_embeddings': True} + assert actual_schema["error_code"] == 0 payload = { "data": {"details": "Nupur"}, @@ -1292,10 +1296,18 @@ def _mock_get_bot_settings(*args, **kwargs): assert payload_actual["message"] == "Record saved!" assert payload_actual["error_code"] == 0 - cognition_data = CognitionData.objects(bot=pytest.bot, id=pytest.cognition_id).get() - assert cognition_data['data'] == {"details": "Nupur"} - assert cognition_data['collection'] == 'details' - assert cognition_data['content_type'] == CognitionDataType.json.value + response_payload = client.get( + url=f"/api/bot/{pytest.bot}/data/cognition?collection=Details", + headers={"Authorization": pytest.token_type + " " + pytest.access_token} + ) + actual_payload = response_payload.json() + print(actual_payload) + assert actual_payload["success"] + assert actual_payload["message"] is None + assert actual_payload["error_code"] == 0 + assert actual_payload["data"]['data'][0]['collection'] == 'details' + assert actual_payload["data"]['data'][0]['data'] == {'details': 'Nupur'} + assert actual_payload["data"]['row_count'] == 1 response_one = client.post( url=f"/api/bot/{pytest.bot}/data/cognition/schema", From 421940121c2e385f707e817ebbe631ed2e1ae834 Mon Sep 17 00:00:00 2001 From: maheshsattala <59285563+maheshsattala@users.noreply.github.com> Date: Wed, 1 Nov 2023 10:50:52 +0530 Subject: [PATCH 7/8] log webhook response (#1069) * Added changes to log whatsapp webhook response in db. * Added test cases to fix coverage issue. * Added test cases to fix coverage issue. * Added test cases to fix coverage issue. * Added test cases in the chat_service_test. * Added test cases in the chat_service_test. --------- Co-authored-by: Mahesh --- kairon/chat/handlers/channels/whatsapp.py | 5 + kairon/shared/chat/data_objects.py | 13 + kairon/shared/chat/processor.py | 20 +- tests/integration_test/chat_service_test.py | 253 +++++++++++++++++++- 4 files changed, 289 insertions(+), 2 deletions(-) diff --git a/kairon/chat/handlers/channels/whatsapp.py b/kairon/chat/handlers/channels/whatsapp.py index c8d91e52b..02195e286 100644 --- a/kairon/chat/handlers/channels/whatsapp.py +++ b/kairon/chat/handlers/channels/whatsapp.py @@ -70,6 +70,11 @@ async def __handle_meta_payload(self, payload: Dict, metadata: Optional[Dict[Tex msg_metadata = changes.get("value", {}).get("metadata", {}) metadata.update(msg_metadata) messages = changes.get("value", {}).get("messages") + if not messages: + statuses = changes.get("value", {}).get("statuses") + user = metadata.get('display_phone_number') + for status_data in statuses: + ChatDataProcessor.save_whatsapp_audit_log(status_data, bot, user) for message in messages or []: await self.message(message, metadata, bot) diff --git a/kairon/shared/chat/data_objects.py b/kairon/shared/chat/data_objects.py index ecdb725da..c4eb4b672 100644 --- a/kairon/shared/chat/data_objects.py +++ b/kairon/shared/chat/data_objects.py @@ -5,6 +5,7 @@ from kairon.shared.data.audit.data_objects import Auditlog from kairon.shared.data.signals import push_notification, auditlogger from kairon.shared.utils import Utility +from mongoengine import ListField @auditlogger.log @@ -36,3 +37,15 @@ def validate(self, clean=True): }) Utility.register_telegram_webhook(Utility.decrypt_message(self.config['access_token']), webhook_url) + +@auditlogger.log +@push_notification.apply +class WhatsappAuditLog(Auditlog): + data = DictField(default=None) + status = StringField(required=True) + message_id = StringField(required=True) + errors = ListField(DictField(default=[])) + initiator = StringField(default=None) + bot = StringField(required=True) + user = StringField(required=True) + timestamp = DateTimeField(default=datetime.utcnow) diff --git a/kairon/shared/chat/processor.py b/kairon/shared/chat/processor.py index 10f064bb6..52732e244 100644 --- a/kairon/shared/chat/processor.py +++ b/kairon/shared/chat/processor.py @@ -2,7 +2,7 @@ from mongoengine import DoesNotExist from loguru import logger -from .data_objects import Channels +from .data_objects import Channels, WhatsappAuditLog from datetime import datetime from kairon.shared.utils import Utility from ..constants import ChannelTypes @@ -125,3 +125,21 @@ def get_channel_endpoint(connector_type: Text, bot: Text): except DoesNotExist: raise AppException('Channel not configured') + @staticmethod + def save_whatsapp_audit_log(status_data: Dict, bot: Text, user: Text): + """ + save or updates channel configuration + :param status_data: status_data dict + :param bot: bot id + :param user: user id + :return: None + """ + WhatsappAuditLog( + status=status_data.get('status'), + data=status_data.get('conversation'), + initiator=status_data.get('conversation', {}).get('origin', {}).get('type'), + message_id=status_data.get('id'), + errors=status_data.get('errors', []), + bot=bot, + user=user + ).save() diff --git a/tests/integration_test/chat_service_test.py b/tests/integration_test/chat_service_test.py index 0784e470e..53d8a81cd 100644 --- a/tests/integration_test/chat_service_test.py +++ b/tests/integration_test/chat_service_test.py @@ -27,7 +27,6 @@ from kairon.shared.auth import Authentication from kairon.shared.chat.processor import ChatDataProcessor from kairon.shared.constants import UserActivityType -from kairon.shared.concurrency.actors.factory import ActorFactory from kairon.shared.data.constant import INTEGRATION_STATUS from kairon.shared.data.constant import TOKEN_TYPE from kairon.shared.data.data_objects import BotSettings @@ -1415,6 +1414,258 @@ def _mock_validate_hub_signature(*args, **kwargs): assert whatsapp_msg_handler.call_args[0][4] == bot +@responses.activate +def test_whatsapp_valid_statuses_with_sent_request(): + from kairon.shared.chat.data_objects import WhatsappAuditLog + + def _mock_validate_hub_signature(*args, **kwargs): + return True + + with patch.object(MessengerHandler, "validate_hub_signature", _mock_validate_hub_signature): + response = client.post( + f"/api/bot/whatsapp/{bot}/{token}", + headers={"hub.verify_token": "valid"}, + json={ + "object": "whatsapp_business_account", + "entry": [{ + "id": "108103872212677", + "changes": [{ + "value": { + "messaging_product": "whatsapp", + "metadata": { + "display_phone_number": "919876543210", + "phone_number_id": "108578266683441" + }, + "contacts": [{ + "profile": { + "name": "Hitesh" + }, + "wa_id": "919876543210" + }], + "statuses": [{ + "id": "wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIA", + "recipient_id": "91551234567", + "status": "sent", + "timestamp": "1691548112", + "conversation": { + "id": "CONVERSATION_ID", + "expiration_timestamp": "1691598412", + "origin": { + "type": "business_initated" + } + }, + "pricing": { + "pricing_model": "CBP", + "billable": "True", + "category": "business_initated" + } + }] + }, + "field": "messages" + }] + }] + }) + actual = response.json() + assert actual == 'success' + log = WhatsappAuditLog.objects( + bot=bot, message_id='wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIA').get().to_mongo().to_dict() + assert log['data'] == { + 'id': 'CONVERSATION_ID', 'expiration_timestamp': '1691598412', 'origin': {'type': 'business_initated'} + } + assert log['initiator'] == 'business_initated' + assert log['status'] == 'sent' + + +@responses.activate +def test_whatsapp_valid_statuses_with_delivered_request(): + from kairon.shared.chat.data_objects import WhatsappAuditLog + + def _mock_validate_hub_signature(*args, **kwargs): + return True + + with patch.object(MessengerHandler, "validate_hub_signature", _mock_validate_hub_signature): + response = client.post( + f"/api/bot/whatsapp/{bot}/{token}", + headers={"hub.verify_token": "valid"}, + json={ + "object": "whatsapp_business_account", + "entry": [{ + "id": "108103872212677", + "changes": [{ + "value": { + "messaging_product": "whatsapp", + "metadata": { + "display_phone_number": "919876543210", + "phone_number_id": "108578266683441" + }, + "contacts": [{ + "profile": { + "name": "Hitesh" + }, + "wa_id": "919876543210" + }], + "statuses": [{ + "id": "wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIB", + "recipient_id": "91551234567", + "status": "delivered", + "timestamp": "1691548112", + "conversation": { + "id": "CONVERSATION_ID", + "expiration_timestamp": "1691598412", + "origin": { + "type": "user_initiated" + } + }, + "pricing": { + "pricing_model": "CBP", + "billable": "True", + "category": "service" + } + }] + }, + "field": "messages" + }] + }] + }) + actual = response.json() + assert actual == 'success' + log = WhatsappAuditLog.objects( + bot=bot, message_id='wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIB').get().to_mongo().to_dict() + assert log['data'] == { + 'id': 'CONVERSATION_ID', 'expiration_timestamp': '1691598412', 'origin': {'type': 'user_initiated'} + } + assert log['initiator'] == 'user_initiated' + assert log['status'] == 'delivered' + + +@responses.activate +def test_whatsapp_valid_statuses_with_read_request(): + from kairon.shared.chat.data_objects import WhatsappAuditLog + + def _mock_validate_hub_signature(*args, **kwargs): + return True + + with patch.object(MessengerHandler, "validate_hub_signature", _mock_validate_hub_signature): + response = client.post( + f"/api/bot/whatsapp/{bot}/{token}", + headers={"hub.verify_token": "valid"}, + json={ + "object": "whatsapp_business_account", + "entry": [{ + "id": "108103872212677", + "changes": [{ + "value": { + "messaging_product": "whatsapp", + "metadata": { + "display_phone_number": "919876543210", + "phone_number_id": "108578266683441" + }, + "contacts": [{ + "profile": { + "name": "Hitesh" + }, + "wa_id": "919876543210" + }], + "statuses": [{ + "id": "wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIC", + "recipient_id": "91551234567", + "status": "read", + "timestamp": "1691548112" + }] + }, + "field": "messages" + }] + }] + }) + actual = response.json() + assert actual == 'success' + log = WhatsappAuditLog.objects( + bot=bot, message_id='wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIC').get().to_mongo().to_dict() + assert log.get('data') is None + assert log.get('initiator') is None + assert log.get('status') == 'read' + + logs = WhatsappAuditLog.objects(bot=bot, user='919876543210') + assert len(WhatsappAuditLog.objects(bot=bot, user='919876543210')) == 3 + assert logs[0]['data'] == { + 'id': 'CONVERSATION_ID', 'expiration_timestamp': '1691598412', 'origin': {'type': 'business_initated'} + } + assert logs[0]['initiator'] == 'business_initated' + assert logs[0]['status'] == 'sent' + assert logs[1]['data'] == { + 'id': 'CONVERSATION_ID', 'expiration_timestamp': '1691598412', 'origin': {'type': 'user_initiated'} + } + assert logs[1]['initiator'] == 'user_initiated' + assert logs[1]['status'] == 'delivered' + assert logs[2]['status'] == 'read' + + +@responses.activate +def test_whatsapp_valid_statuses_with_errors_request(): + from kairon.shared.chat.data_objects import WhatsappAuditLog + + def _mock_validate_hub_signature(*args, **kwargs): + return True + + with patch.object(MessengerHandler, "validate_hub_signature", _mock_validate_hub_signature): + response = client.post( + f"/api/bot/whatsapp/{bot}/{token}", + headers={"hub.verify_token": "valid"}, + json={ + "object": "whatsapp_business_account", + "entry": [{ + "id": "108103872212677", + "changes": [{ + "value": { + "messaging_product": "whatsapp", + "metadata": { + "display_phone_number": "919876543219", + "phone_number_id": "108578266683441" + }, + "contacts": [{ + "profile": { + "name": "Hitesh" + }, + "wa_id": "919876543210" + }], + "statuses": [ + { + "id": "wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIZ", + "status": "failed", + "timestamp": "1689380458", + "recipient_id": "15551234567", + "errors": [ + { + "code": 130472, + "title": "User's number is part of an experiment", + "message": "User's number is part of an experiment", + "error_data": { + "details": "Failed to send message because this user's phone number is part of an experiment" + }, + "href": "https://developers.facebook.com/docs/whatsapp/cloud-api/support/error-codes/" + } + ] + } + ] + }, + "field": "messages" + }] + }] + }) + actual = response.json() + assert actual == 'success' + assert WhatsappAuditLog.objects(bot=bot, message_id='wamid.HBgLMTIxMTU1NTc5NDcVAgARGBIyRkQxREUxRDJFQUJGMkQ3NDIZ') + log = WhatsappAuditLog.objects(bot=bot, user='919876543219').get().to_mongo().to_dict() + assert log.get('status') == 'failed' + assert log.get('data') is None + assert log.get('errors') == [{ + 'code': 130472, 'title': "User's number is part of an experiment", + 'message': "User's number is part of an experiment", + 'error_data': {'details': "Failed to send message because this user's phone number is part of an experiment"}, + 'href': 'https://developers.facebook.com/docs/whatsapp/cloud-api/support/error-codes/' + }] + + @responses.activate def test_whatsapp_valid_unsupported_message_request(): def _mock_validate_hub_signature(*args, **kwargs): From d5efff6e465e6a8bbffb011fb042e9a2e898de74 Mon Sep 17 00:00:00 2001 From: Nupur Khare Date: Wed, 1 Nov 2023 12:14:29 +0530 Subject: [PATCH 8/8] Fixed - Collection is getting deleted even after it is attached to prompt action. 1. Added unit and integration test cases. 2. Fixed test cases. --- tests/integration_test/services_test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py index 40c90f978..55e8c238d 100644 --- a/tests/integration_test/services_test.py +++ b/tests/integration_test/services_test.py @@ -1266,7 +1266,6 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual = response.json() - print(actual) pytest.schema_id = actual["data"]["_id"] assert actual["message"] == "Schema saved!" assert actual["data"]["_id"] @@ -1277,7 +1276,6 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual_schema = response_schema.json() - print(actual_schema) assert actual_schema["data"][0]['collection_name'] == 'details' assert actual_schema["data"][0]['metadata'][0] == {'column_name': 'details', 'data_type': 'str', 'enable_search': True, 'create_embeddings': True} assert actual_schema["error_code"] == 0 @@ -1301,7 +1299,6 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual_payload = response_payload.json() - print(actual_payload) assert actual_payload["success"] assert actual_payload["message"] is None assert actual_payload["error_code"] == 0 @@ -1319,7 +1316,6 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual_one = response_one.json() - print(actual_one) pytest.schema_id_one = actual_one["data"]["_id"] assert actual_one["message"] == "Schema saved!" assert actual_one["data"]["_id"] @@ -1335,7 +1331,6 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual_two = response_two.json() - print(actual_two) pytest.schema_id_two = actual_two["data"]["_id"] assert actual_two["message"] == "Schema saved!" assert actual_two["data"]["_id"] @@ -1380,7 +1375,6 @@ def _mock_get_bot_settings(*args, **kwargs): headers={"Authorization": pytest.token_type + " " + pytest.access_token} ) actual_four = response_four.json() - print(actual) assert not actual_four["success"] assert actual_four["message"] == "Collection already exists!" assert actual_four["data"] is None