Skip to content

Commit

Permalink
cognition upload fix (#1674)
Browse files Browse the repository at this point in the history
Co-authored-by: spandan_mondal <[email protected]>
  • Loading branch information
hasinaxp and spandan_mondal authored Dec 20, 2024
1 parent 45a5000 commit c529677
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 4 deletions.
6 changes: 6 additions & 0 deletions kairon/importer/validator/file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,12 @@ def validate_content(bot: Text, user: Text, bot_content: List, save_data: bool =
current_dir = os.path.dirname(os.path.realpath(__file__))
bot_content_schema_file_path = os.path.join(current_dir, "bot_content_schema.yaml")
schema_validator = Core(source_data=bot_content, schema_files=[bot_content_schema_file_path])

from kairon.shared.cognition.processor import CognitionDataProcessor
new_collection_names = [data_item.get('collection') for data_item in bot_content]
if CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, new_collection_names, overwrite):
bot_content_errors.append('Collection limit exceeded!')

try:
schema_validator.validate(raise_exception=True)
logger.info("Validation successful!")
Expand Down
23 changes: 23 additions & 0 deletions kairon/shared/cognition/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,29 @@ def is_collection_limit_exceeded(bot, user, collection):
else:
return False

@staticmethod
def is_collection_limit_exceeded_for_mass_uploading(bot:str, user:str, collection_names:List[str], overwrite:bool = False):
"""
checks if collection limit is exhausted
:param bot: bot id
:param user: user
:param collection_names: List of names of collection
:return: boolean
:raises: AppException
"""

bot_settings = MongoProcessor.get_bot_settings(bot, user)
bot_settings = bot_settings.to_mongo().to_dict()
if overwrite:
return len(collection_names) > bot_settings["cognition_collections_limit"]
else:
collections = list(CognitionSchema.objects(bot=bot).distinct(field='collection_name'))
new_to_add = [collection for collection in collection_names if collection not in collections]
return len(new_to_add) + len(collections) > bot_settings["cognition_collections_limit"]



@staticmethod
def is_column_collection_limit_exceeded(bot, user, metadata):
"""
Expand Down
5 changes: 2 additions & 3 deletions kairon/shared/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.utils.io import read_config_file
from uuid6 import uuid7
from werkzeug.utils import secure_filename

from kairon.api import models
Expand Down Expand Up @@ -237,7 +236,6 @@ def download_files(self, bot: Text, user: Text, download_multiflow: bool = False
stories = stories.merge(multiflow_stories[0])
rules = rules.merge(multiflow_stories[1])
multiflow_stories = self.load_multiflow_stories_yaml(bot)
#actions = self.load_action_configurations(bot)
bot_content = self.load_bot_content(bot)
actions, other_collections = ActionSerializer.serialize(bot)
return Utility.create_zip_file(
Expand Down Expand Up @@ -692,7 +690,8 @@ def __prepare_cognition_data_for_bot(self, bot: Text) -> List[Dict[str, Any]]:

data_results = CognitionData.objects(bot=bot, collection=collection_name).only("content_type", "data")
entries = [d.data for d in data_results]
entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata)
if type_value == "json":
entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata)
collection_data["data"] = entries

formatted_result.append(collection_data)
Expand Down
149 changes: 149 additions & 0 deletions tests/unit_test/data_processor/data_processor2_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from unittest.mock import patch, MagicMock

import pytest

Expand Down Expand Up @@ -151,3 +152,151 @@ def test_validate_metadata_and_payload_missing_column():

with pytest.raises(AppException, match="Column 'quantity' does not exist or has no value."):
CognitionDataProcessor.validate_column_values(data, schema)



@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_exceeded(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2", "collection3"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names)
assert result is True

@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_exceeded_overwrite(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2", "collection3", "collection_4", "collection_5", "collection_6"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names, True)
assert result is True

@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_not_exceeded_overwrite(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2", "collection3", "collection_4"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names, True)
assert result is False

@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_not_exceeded(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection1"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names)
assert result is False


@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_schema')
@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_data')
def test_save_bot_content(mock_save_cognition_data, mock_save_cognition_schema):
bot_content = [
{
'collection': 'collection1',
'type': 'json',
'metadata': [
{'column_name': 'column1', 'data_type': 'str', 'enable_search': True, 'create_embeddings': False}
],
'data': [
{'column1': 'value1'}
]
}
]
bot = 'test_bot'
user = 'test_user'
processor = MongoProcessor()

processor.save_bot_content(bot_content, bot, user)

mock_save_cognition_schema.assert_called_once_with(bot_content, bot, user)
mock_save_cognition_data.assert_called_once_with(bot_content, bot, user)

@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_schema')
@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_data')
def test_save_bot_content_empty(mock_save_cognition_data, mock_save_cognition_schema):
bot_content = []
bot = 'test_bot'
user = 'test_user'
processor = MongoProcessor()

processor.save_bot_content(bot_content, bot, user)

mock_save_cognition_schema.assert_not_called()
mock_save_cognition_data.assert_not_called()




@patch.object(MongoProcessor, 'data_format_correction_cognition_data')
@patch('kairon.shared.data.processor.CognitionSchema.objects')
@patch('kairon.shared.data.processor.CognitionData.objects')
def test_prepare_cognition_data_for_bot_json(mock_cognition_data_objects, mock_cognition_schema_objects, mock_data_format_correction):
bot = 'test_bot'
schema_result = MagicMock()
schema_result.collection_name = 'collection1'
schema_result.metadata = [
MagicMock(column_name='column1', data_type='str', enable_search=True, create_embeddings=False)
]
mock_cognition_schema_objects.return_value.only.return_value = [schema_result]

data_result = MagicMock()
data_result.data = {'column1': 'value1'}
mock_cognition_data_objects.return_value.only.return_value = [data_result]

mock_data_format_correction.return_value = [{'column1': 'value1'}]

processor = MongoProcessor()

processor._MongoProcessor__prepare_cognition_data_for_bot(bot)

mock_data_format_correction.assert_called_once()


@patch.object(MongoProcessor, 'data_format_correction_cognition_data')
@patch('kairon.shared.data.processor.CognitionSchema.objects')
@patch('kairon.shared.data.processor.CognitionData.objects')
def test_prepare_cognition_data_for_bot_text_format_not_called(mock_cognition_data_objects, mock_cognition_schema_objects, mock_data_format_correction):
bot = 'test_bot'
schema_result = MagicMock()
schema_result.collection_name = 'collection1'
schema_result.metadata = []
mock_cognition_schema_objects.return_value.only.return_value = [schema_result]

data_result = MagicMock()
data_result.data = 'text data'
mock_cognition_data_objects.return_value.only.return_value = [data_result]

processor = MongoProcessor()

processor._MongoProcessor__prepare_cognition_data_for_bot(bot)

mock_data_format_correction.assert_not_called()
15 changes: 14 additions & 1 deletion tests/unit_test/validator/training_data_validator_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import patch

import ujson as json
import re
import pytest
import yaml
from mongoengine import connect
Expand Down Expand Up @@ -910,3 +911,15 @@ def _mock_get_bot_settings(*args, **kwargs):
errors = TrainingDataValidator.validate_content("your_bot_name", "[email protected]", bot_content)

assert errors

@patch('kairon.shared.cognition.processor.CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading')
def test_validate_content_collection_limit_exceeded(self, mock_is_collection_limit_exceeded):
bot = 'test_bot'
user = 'test_user'
bot_content = [{'collection': 'collection1'}, {'collection': 'collection2'}]
mock_is_collection_limit_exceeded.return_value = True

errors = TrainingDataValidator.validate_content(bot, user, bot_content)

assert 'Collection limit exceeded!' in errors
mock_is_collection_limit_exceeded.assert_called_once_with(bot, user, ['collection1', 'collection2'], True)

0 comments on commit c529677

Please sign in to comment.