Skip to content

Commit

Permalink
added forced conversion for congition_data to strictly follow cogniti…
Browse files Browse the repository at this point in the history
…on_schema for upload download of bot_content (#1587)

* added forced conversion for congition_data to strictly follow cognition_schema for upload download of bot_content

* test fix

---------

Co-authored-by: spandan.mondal <[email protected]>
  • Loading branch information
hasinaxp and spandan.mondal authored Oct 28, 2024
1 parent b674b39 commit 9a7e824
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 5 deletions.
35 changes: 32 additions & 3 deletions kairon/shared/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,9 @@ def __prepare_cognition_data_for_bot(self, bot: Text) -> List[Dict[str, Any]]:
}

data_results = CognitionData.objects(bot=bot, collection=collection_name).only("content_type", "data")
for data_result in data_results:
collection_data["data"].append(data_result.data)
entries = [d.data for d in data_results]
entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata)
collection_data["data"] = entries

formatted_result.append(collection_data)

Expand Down Expand Up @@ -918,7 +919,10 @@ def __save_cognition_data(self, bot_content: list, bot: Text, user: Text):
)
cognition_data.save()
elif data_item['type'] == 'json':
for json_data in data_item['data']:
data_entries = data_item['data']
metadata = data_item['metadata']
data_entries = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
for json_data in data_entries:
cognition_data = CognitionData(
data=json_data,
content_type='json',
Expand All @@ -928,6 +932,31 @@ def __save_cognition_data(self, bot_content: list, bot: Text, user: Text):
)
cognition_data.save()

@staticmethod
def data_format_correction_cognition_data(data_entries, metadata):
convs = {
m['column_name']: (
int if m['data_type'] == 'int' else
float if m['data_type'] == 'float' else
str if m['data_type'] == 'str' else
None
)
for m in metadata
}
return [
{
**e,
**{
cname: (
convs[cname](e[cname][0] if isinstance(e[cname], list) and e[cname] else e[cname])
if e[cname] is not None and convs[cname] is not None else e[cname]
)
for cname in convs
}
}
for e in data_entries
]

def save_bot_content(self, bot_content: list, bot: Text, user: Text):
"""
saves bot content data
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_test/services_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2124,7 +2124,7 @@ def test_upload_with_bot_content_valifdate_payload_data():
assert len(actual["data"]["rows"]) == 3
assert all(row["content_type"] == "json" for row in actual["data"]["rows"])
assert actual["data"]["rows"][0]["data"]["city"] == "City 3"
assert actual["data"]["rows"][1]["data"]["population"] == "200"
assert actual["data"]["rows"][1]["data"]["population"] == 200
assert all(row["collection"] == "test_payload_collection" for row in actual["data"]["rows"])

CognitionData.objects(bot=pytest.bot).delete()
Expand Down Expand Up @@ -2344,7 +2344,7 @@ def test_upload_with_bot_content_event_append_validate_payload_data():
assert len(actual["data"]["rows"]) == 6
assert all(row["content_type"] == "json" for row in actual["data"]["rows"])
assert actual["data"]["rows"][4]["data"]["city"] == "City 2"
assert actual["data"]["rows"][1]["data"]["population"] == "200"
assert actual["data"]["rows"][1]["data"]["population"] == 200
assert all(row["collection"] == "test_payload_collection" for row in actual["data"]["rows"])

CognitionData.objects(bot=pytest.bot).delete()
Expand Down
49 changes: 49 additions & 0 deletions tests/unit_test/data_processor/data_processor2_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
from kairon.shared.utils import Utility
os.environ["system_file"] = "./tests/testing_data/system.yaml"
Utility.load_environment()
Utility.load_system_metadata()

from kairon.shared.data.processor import MongoProcessor

# NOTE: use this file for adding new tests for the data_processor module


def test_data_format_correction():
metadata = [
{'column_name': 'age', 'data_type': 'int'},
{'column_name': 'height', 'data_type': 'float'},
{'column_name': 'name', 'data_type': 'str'},
{'column_name': 'extra', 'data_type': 'dict'}, # Unsupported type
]

data_entries = [
{'age': '25', 'height': '175.5', 'name': 'Alice', 'extra': {'key': 'value'}},
{'age': ['30'], 'height': [180.0], 'name': ['Bob'], 'extra': [1, 2, 3]},
{'age': None, 'height': '165.2', 'name': 'Charlie', 'extra': None},
{'age': '40', 'height': None, 'name': None, 'extra': 'not a dict'}
]

expected_output = [
{'age': 25, 'height': 175.5, 'name': 'Alice', 'extra': {'key': 'value'}},
{'age': 30, 'height': 180.0, 'name': 'Bob', 'extra': [1, 2, 3]},
{'age': None, 'height': 165.2, 'name': 'Charlie', 'extra': None},
{'age': 40, 'height': None, 'name': None, 'extra': 'not a dict'}
]

result = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
assert result == expected_output, f"Expected {expected_output}, but got {result}"

def test_empty_entries():
metadata = [{'column_name': 'age', 'data_type': 'int'}]
data_entries = []
result = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
assert result == [], f"Expected [], but got {result}"


def test_non_list_non_string_values():
metadata = [{'column_name': 'age', 'data_type': 'int'}]
data_entries = [{'age': '22', 'height': 5.7, 'name': 'Tom'}]
expected_output = [{'age': 22, 'height': 5.7, 'name': 'Tom'}]
result = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
assert result == expected_output, f"Expected {expected_output}, but got {result}"

0 comments on commit 9a7e824

Please sign in to comment.