From b850f959d6739c2575b86bca89c13d5561208667 Mon Sep 17 00:00:00 2001 From: spandan_mondal Date: Thu, 19 Dec 2024 16:52:19 +0530 Subject: [PATCH] cognition upload fix --- kairon/importer/validator/file_validator.py | 6 + kairon/shared/cognition/processor.py | 23 +++ kairon/shared/data/processor.py | 5 +- .../data_processor/data_processor2_test.py | 149 ++++++++++++++++++ .../validator/training_data_validator_test.py | 15 +- 5 files changed, 194 insertions(+), 4 deletions(-) diff --git a/kairon/importer/validator/file_validator.py b/kairon/importer/validator/file_validator.py index 9cdeed4c4..a941f679d 100644 --- a/kairon/importer/validator/file_validator.py +++ b/kairon/importer/validator/file_validator.py @@ -1058,6 +1058,12 @@ def validate_content(bot: Text, user: Text, bot_content: List, save_data: bool = current_dir = os.path.dirname(os.path.realpath(__file__)) bot_content_schema_file_path = os.path.join(current_dir, "bot_content_schema.yaml") schema_validator = Core(source_data=bot_content, schema_files=[bot_content_schema_file_path]) + + from kairon.shared.cognition.processor import CognitionDataProcessor + new_collection_names = [data_item.get('collection') for data_item in bot_content] + if CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, new_collection_names, overwrite): + bot_content_errors.append('Collection limit exceeded!') + try: schema_validator.validate(raise_exception=True) logger.info("Validation successful!") diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py index 9228eaf63..dbbe349d6 100644 --- a/kairon/shared/cognition/processor.py +++ b/kairon/shared/cognition/processor.py @@ -39,6 +39,29 @@ def is_collection_limit_exceeded(bot, user, collection): else: return False + @staticmethod + def is_collection_limit_exceeded_for_mass_uploading(bot:str, user:str, collection_names:List[str], overwrite:bool = False): + """ + checks if collection limit is exhausted + + :param bot: bot id + :param user: user + :param collection_names: List of names of collection + :return: boolean + :raises: AppException + """ + + bot_settings = MongoProcessor.get_bot_settings(bot, user) + bot_settings = bot_settings.to_mongo().to_dict() + if overwrite: + return len(collection_names) > bot_settings["cognition_collections_limit"] + else: + collections = list(CognitionSchema.objects(bot=bot).distinct(field='collection_name')) + new_to_add = [collection for collection in collection_names if collection not in collections] + return len(new_to_add) + len(collections) > bot_settings["cognition_collections_limit"] + + + @staticmethod def is_column_collection_limit_exceeded(bot, user, metadata): """ diff --git a/kairon/shared/data/processor.py b/kairon/shared/data/processor.py index 1b21875e2..f5edb999c 100644 --- a/kairon/shared/data/processor.py +++ b/kairon/shared/data/processor.py @@ -54,7 +54,6 @@ from rasa.shared.nlu.training_data.message import Message from rasa.shared.nlu.training_data.training_data import TrainingData from rasa.shared.utils.io import read_config_file -from uuid6 import uuid7 from werkzeug.utils import secure_filename from kairon.api import models @@ -237,7 +236,6 @@ def download_files(self, bot: Text, user: Text, download_multiflow: bool = False stories = stories.merge(multiflow_stories[0]) rules = rules.merge(multiflow_stories[1]) multiflow_stories = self.load_multiflow_stories_yaml(bot) - #actions = self.load_action_configurations(bot) bot_content = self.load_bot_content(bot) actions, other_collections = ActionSerializer.serialize(bot) return Utility.create_zip_file( @@ -692,7 +690,8 @@ def __prepare_cognition_data_for_bot(self, bot: Text) -> List[Dict[str, Any]]: data_results = CognitionData.objects(bot=bot, collection=collection_name).only("content_type", "data") entries = [d.data for d in data_results] - entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata) + if type_value == "json": + entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata) collection_data["data"] = entries formatted_result.append(collection_data) diff --git a/tests/unit_test/data_processor/data_processor2_test.py b/tests/unit_test/data_processor/data_processor2_test.py index 0a18cb22e..acac94fbc 100644 --- a/tests/unit_test/data_processor/data_processor2_test.py +++ b/tests/unit_test/data_processor/data_processor2_test.py @@ -1,4 +1,5 @@ import os +from unittest.mock import patch, MagicMock import pytest @@ -151,3 +152,151 @@ def test_validate_metadata_and_payload_missing_column(): with pytest.raises(AppException, match="Column 'quantity' does not exist or has no value."): CognitionDataProcessor.validate_column_values(data, schema) + + + +@patch('kairon.shared.cognition.processor.MongoProcessor') +@patch('kairon.shared.cognition.processor.CognitionSchema') +def test_is_collection_limit_exceeded_for_mass_uploading_exceeded(mock_cognition_schema, mock_mongo_processor): + bot = "test_bot" + user = "test_user" + collection_names = ["collection1", "collection2", "collection3"] + + mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = { + "cognition_collections_limit": 5 + } + mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"] + + result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names) + assert result is True + +@patch('kairon.shared.cognition.processor.MongoProcessor') +@patch('kairon.shared.cognition.processor.CognitionSchema') +def test_is_collection_limit_exceeded_for_mass_uploading_exceeded_overwrite(mock_cognition_schema, mock_mongo_processor): + bot = "test_bot" + user = "test_user" + collection_names = ["collection1", "collection2", "collection3", "collection_4", "collection_5", "collection_6"] + + mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = { + "cognition_collections_limit": 5 + } + mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"] + + result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names, True) + assert result is True + +@patch('kairon.shared.cognition.processor.MongoProcessor') +@patch('kairon.shared.cognition.processor.CognitionSchema') +def test_is_collection_limit_exceeded_for_mass_uploading_not_exceeded_overwrite(mock_cognition_schema, mock_mongo_processor): + bot = "test_bot" + user = "test_user" + collection_names = ["collection1", "collection2", "collection3", "collection_4"] + + mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = { + "cognition_collections_limit": 5 + } + mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"] + + result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names, True) + assert result is False + +@patch('kairon.shared.cognition.processor.MongoProcessor') +@patch('kairon.shared.cognition.processor.CognitionSchema') +def test_is_collection_limit_exceeded_for_mass_uploading_not_exceeded(mock_cognition_schema, mock_mongo_processor): + bot = "test_bot" + user = "test_user" + collection_names = ["collection1", "collection2"] + + mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = { + "cognition_collections_limit": 5 + } + mock_cognition_schema.objects.return_value.distinct.return_value = ["collection1"] + + result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names) + assert result is False + + +@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_schema') +@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_data') +def test_save_bot_content(mock_save_cognition_data, mock_save_cognition_schema): + bot_content = [ + { + 'collection': 'collection1', + 'type': 'json', + 'metadata': [ + {'column_name': 'column1', 'data_type': 'str', 'enable_search': True, 'create_embeddings': False} + ], + 'data': [ + {'column1': 'value1'} + ] + } + ] + bot = 'test_bot' + user = 'test_user' + processor = MongoProcessor() + + processor.save_bot_content(bot_content, bot, user) + + mock_save_cognition_schema.assert_called_once_with(bot_content, bot, user) + mock_save_cognition_data.assert_called_once_with(bot_content, bot, user) + +@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_schema') +@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_data') +def test_save_bot_content_empty(mock_save_cognition_data, mock_save_cognition_schema): + bot_content = [] + bot = 'test_bot' + user = 'test_user' + processor = MongoProcessor() + + processor.save_bot_content(bot_content, bot, user) + + mock_save_cognition_schema.assert_not_called() + mock_save_cognition_data.assert_not_called() + + + + +@patch.object(MongoProcessor, 'data_format_correction_cognition_data') +@patch('kairon.shared.data.processor.CognitionSchema.objects') +@patch('kairon.shared.data.processor.CognitionData.objects') +def test_prepare_cognition_data_for_bot_json(mock_cognition_data_objects, mock_cognition_schema_objects, mock_data_format_correction): + bot = 'test_bot' + schema_result = MagicMock() + schema_result.collection_name = 'collection1' + schema_result.metadata = [ + MagicMock(column_name='column1', data_type='str', enable_search=True, create_embeddings=False) + ] + mock_cognition_schema_objects.return_value.only.return_value = [schema_result] + + data_result = MagicMock() + data_result.data = {'column1': 'value1'} + mock_cognition_data_objects.return_value.only.return_value = [data_result] + + mock_data_format_correction.return_value = [{'column1': 'value1'}] + + processor = MongoProcessor() + + processor._MongoProcessor__prepare_cognition_data_for_bot(bot) + + mock_data_format_correction.assert_called_once() + + +@patch.object(MongoProcessor, 'data_format_correction_cognition_data') +@patch('kairon.shared.data.processor.CognitionSchema.objects') +@patch('kairon.shared.data.processor.CognitionData.objects') +def test_prepare_cognition_data_for_bot_text_format_not_called(mock_cognition_data_objects, mock_cognition_schema_objects, mock_data_format_correction): + bot = 'test_bot' + schema_result = MagicMock() + schema_result.collection_name = 'collection1' + schema_result.metadata = [] + mock_cognition_schema_objects.return_value.only.return_value = [schema_result] + + data_result = MagicMock() + data_result.data = 'text data' + mock_cognition_data_objects.return_value.only.return_value = [data_result] + + processor = MongoProcessor() + + processor._MongoProcessor__prepare_cognition_data_for_bot(bot) + + mock_data_format_correction.assert_not_called() diff --git a/tests/unit_test/validator/training_data_validator_test.py b/tests/unit_test/validator/training_data_validator_test.py index a77f02514..2a7d189eb 100644 --- a/tests/unit_test/validator/training_data_validator_test.py +++ b/tests/unit_test/validator/training_data_validator_test.py @@ -1,5 +1,6 @@ +from unittest.mock import patch + import ujson as json -import re import pytest import yaml from mongoengine import connect @@ -910,3 +911,15 @@ def _mock_get_bot_settings(*args, **kwargs): errors = TrainingDataValidator.validate_content("your_bot_name", "integration@demo.ai", bot_content) assert errors + + @patch('kairon.shared.cognition.processor.CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading') + def test_validate_content_collection_limit_exceeded(self, mock_is_collection_limit_exceeded): + bot = 'test_bot' + user = 'test_user' + bot_content = [{'collection': 'collection1'}, {'collection': 'collection2'}] + mock_is_collection_limit_exceeded.return_value = True + + errors = TrainingDataValidator.validate_content(bot, user, bot_content) + + assert 'Collection limit exceeded!' in errors + mock_is_collection_limit_exceeded.assert_called_once_with(bot, user, ['collection1', 'collection2'], True) \ No newline at end of file