Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cognition upload fix #1674

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions kairon/importer/validator/file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,12 @@ def validate_content(bot: Text, user: Text, bot_content: List, save_data: bool =
current_dir = os.path.dirname(os.path.realpath(__file__))
bot_content_schema_file_path = os.path.join(current_dir, "bot_content_schema.yaml")
schema_validator = Core(source_data=bot_content, schema_files=[bot_content_schema_file_path])

from kairon.shared.cognition.processor import CognitionDataProcessor
new_collection_names = [data_item.get('collection') for data_item in bot_content]
if CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, new_collection_names, overwrite):
bot_content_errors.append('Collection limit exceeded!')

try:
schema_validator.validate(raise_exception=True)
logger.info("Validation successful!")
Expand Down
23 changes: 23 additions & 0 deletions kairon/shared/cognition/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,29 @@ def is_collection_limit_exceeded(bot, user, collection):
else:
return False

@staticmethod
def is_collection_limit_exceeded_for_mass_uploading(bot:str, user:str, collection_names:List[str], overwrite:bool = False):
"""
checks if collection limit is exhausted

:param bot: bot id
:param user: user
:param collection_names: List of names of collection
:return: boolean
:raises: AppException
"""

bot_settings = MongoProcessor.get_bot_settings(bot, user)
bot_settings = bot_settings.to_mongo().to_dict()
if overwrite:
return len(collection_names) > bot_settings["cognition_collections_limit"]
else:
collections = list(CognitionSchema.objects(bot=bot).distinct(field='collection_name'))
new_to_add = [collection for collection in collection_names if collection not in collections]
return len(new_to_add) + len(collections) > bot_settings["cognition_collections_limit"]

hasinaxp marked this conversation as resolved.
Show resolved Hide resolved


@staticmethod
def is_column_collection_limit_exceeded(bot, user, metadata):
"""
Expand Down
5 changes: 2 additions & 3 deletions kairon/shared/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.utils.io import read_config_file
from uuid6 import uuid7
from werkzeug.utils import secure_filename

from kairon.api import models
Expand Down Expand Up @@ -237,7 +236,6 @@ def download_files(self, bot: Text, user: Text, download_multiflow: bool = False
stories = stories.merge(multiflow_stories[0])
rules = rules.merge(multiflow_stories[1])
multiflow_stories = self.load_multiflow_stories_yaml(bot)
#actions = self.load_action_configurations(bot)
bot_content = self.load_bot_content(bot)
actions, other_collections = ActionSerializer.serialize(bot)
return Utility.create_zip_file(
Expand Down Expand Up @@ -692,7 +690,8 @@ def __prepare_cognition_data_for_bot(self, bot: Text) -> List[Dict[str, Any]]:

data_results = CognitionData.objects(bot=bot, collection=collection_name).only("content_type", "data")
entries = [d.data for d in data_results]
entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata)
if type_value == "json":
entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata)
collection_data["data"] = entries

formatted_result.append(collection_data)
Expand Down
149 changes: 149 additions & 0 deletions tests/unit_test/data_processor/data_processor2_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from unittest.mock import patch, MagicMock

import pytest

Expand Down Expand Up @@ -151,3 +152,151 @@ def test_validate_metadata_and_payload_missing_column():

with pytest.raises(AppException, match="Column 'quantity' does not exist or has no value."):
CognitionDataProcessor.validate_column_values(data, schema)



@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_exceeded(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2", "collection3"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names)
assert result is True

@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_exceeded_overwrite(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2", "collection3", "collection_4", "collection_5", "collection_6"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names, True)
assert result is True

@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_not_exceeded_overwrite(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2", "collection3", "collection_4"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection_a", "collection_b", "collection_c"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names, True)
assert result is False

@patch('kairon.shared.cognition.processor.MongoProcessor')
@patch('kairon.shared.cognition.processor.CognitionSchema')
def test_is_collection_limit_exceeded_for_mass_uploading_not_exceeded(mock_cognition_schema, mock_mongo_processor):
bot = "test_bot"
user = "test_user"
collection_names = ["collection1", "collection2"]

mock_mongo_processor.get_bot_settings.return_value.to_mongo.return_value.to_dict.return_value = {
"cognition_collections_limit": 5
}
mock_cognition_schema.objects.return_value.distinct.return_value = ["collection1"]

result = CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading(bot, user, collection_names)
assert result is False


@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_schema')
@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_data')
def test_save_bot_content(mock_save_cognition_data, mock_save_cognition_schema):
bot_content = [
{
'collection': 'collection1',
'type': 'json',
'metadata': [
{'column_name': 'column1', 'data_type': 'str', 'enable_search': True, 'create_embeddings': False}
],
'data': [
{'column1': 'value1'}
]
}
]
bot = 'test_bot'
user = 'test_user'
processor = MongoProcessor()

processor.save_bot_content(bot_content, bot, user)

mock_save_cognition_schema.assert_called_once_with(bot_content, bot, user)
mock_save_cognition_data.assert_called_once_with(bot_content, bot, user)

@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_schema')
@patch.object(MongoProcessor, '_MongoProcessor__save_cognition_data')
def test_save_bot_content_empty(mock_save_cognition_data, mock_save_cognition_schema):
bot_content = []
bot = 'test_bot'
user = 'test_user'
processor = MongoProcessor()

processor.save_bot_content(bot_content, bot, user)

mock_save_cognition_schema.assert_not_called()
mock_save_cognition_data.assert_not_called()




@patch.object(MongoProcessor, 'data_format_correction_cognition_data')
@patch('kairon.shared.data.processor.CognitionSchema.objects')
@patch('kairon.shared.data.processor.CognitionData.objects')
def test_prepare_cognition_data_for_bot_json(mock_cognition_data_objects, mock_cognition_schema_objects, mock_data_format_correction):
bot = 'test_bot'
schema_result = MagicMock()
schema_result.collection_name = 'collection1'
schema_result.metadata = [
MagicMock(column_name='column1', data_type='str', enable_search=True, create_embeddings=False)
]
mock_cognition_schema_objects.return_value.only.return_value = [schema_result]

data_result = MagicMock()
data_result.data = {'column1': 'value1'}
mock_cognition_data_objects.return_value.only.return_value = [data_result]

mock_data_format_correction.return_value = [{'column1': 'value1'}]

processor = MongoProcessor()

processor._MongoProcessor__prepare_cognition_data_for_bot(bot)

mock_data_format_correction.assert_called_once()


@patch.object(MongoProcessor, 'data_format_correction_cognition_data')
@patch('kairon.shared.data.processor.CognitionSchema.objects')
@patch('kairon.shared.data.processor.CognitionData.objects')
def test_prepare_cognition_data_for_bot_text_format_not_called(mock_cognition_data_objects, mock_cognition_schema_objects, mock_data_format_correction):
bot = 'test_bot'
schema_result = MagicMock()
schema_result.collection_name = 'collection1'
schema_result.metadata = []
mock_cognition_schema_objects.return_value.only.return_value = [schema_result]

data_result = MagicMock()
data_result.data = 'text data'
mock_cognition_data_objects.return_value.only.return_value = [data_result]

processor = MongoProcessor()

processor._MongoProcessor__prepare_cognition_data_for_bot(bot)

mock_data_format_correction.assert_not_called()
15 changes: 14 additions & 1 deletion tests/unit_test/validator/training_data_validator_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import patch

import ujson as json
import re
import pytest
import yaml
from mongoengine import connect
Expand Down Expand Up @@ -910,3 +911,15 @@ def _mock_get_bot_settings(*args, **kwargs):
errors = TrainingDataValidator.validate_content("your_bot_name", "[email protected]", bot_content)

assert errors

@patch('kairon.shared.cognition.processor.CognitionDataProcessor.is_collection_limit_exceeded_for_mass_uploading')
def test_validate_content_collection_limit_exceeded(self, mock_is_collection_limit_exceeded):
bot = 'test_bot'
user = 'test_user'
bot_content = [{'collection': 'collection1'}, {'collection': 'collection2'}]
mock_is_collection_limit_exceeded.return_value = True

errors = TrainingDataValidator.validate_content(bot, user, bot_content)

assert 'Collection limit exceeded!' in errors
mock_is_collection_limit_exceeded.assert_called_once_with(bot, user, ['collection1', 'collection2'], True)
Loading