added forced conversion for congition_data to strictly follow cogniti…

…on_schema for upload download of bot_content (#1587) * added forced conversion for congition_data to strictly follow cognition_schema for upload download of bot_content * test fix --------- Co-authored-by: spandan.mondal <[email protected]>
digiteinfotech · Oct 28, 2024 · 9a7e824 · 9a7e824
1 parent b674b39
commit 9a7e824
Show file tree

Hide file tree

Showing 3 changed files with 83 additions and 5 deletions.
diff --git a/kairon/shared/data/processor.py b/kairon/shared/data/processor.py
@@ -654,8 +654,9 @@ def __prepare_cognition_data_for_bot(self, bot: Text) -> List[Dict[str, Any]]:
             }
 
             data_results = CognitionData.objects(bot=bot, collection=collection_name).only("content_type", "data")
-            for data_result in data_results:
-                collection_data["data"].append(data_result.data)
+            entries = [d.data for d in data_results]
+            entries = MongoProcessor.data_format_correction_cognition_data(entries, metadata)
+            collection_data["data"] = entries
 
             formatted_result.append(collection_data)
 
@@ -918,7 +919,10 @@ def __save_cognition_data(self, bot_content: list, bot: Text, user: Text):
                     )
                     cognition_data.save()
             elif data_item['type'] == 'json':
-                for json_data in data_item['data']:
+                data_entries = data_item['data']
+                metadata = data_item['metadata']
+                data_entries = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
+                for json_data in data_entries:
                     cognition_data = CognitionData(
                         data=json_data,
                         content_type='json',
@@ -928,6 +932,31 @@ def __save_cognition_data(self, bot_content: list, bot: Text, user: Text):
                     )
                     cognition_data.save()
 
+    @staticmethod
+    def data_format_correction_cognition_data(data_entries, metadata):
+        convs = {
+            m['column_name']: (
+                int if m['data_type'] == 'int' else
+                float if m['data_type'] == 'float' else
+                str if m['data_type'] == 'str' else
+                None
+            )
+            for m in metadata
+        }
+        return [
+            {
+                **e,
+                **{
+                    cname: (
+                        convs[cname](e[cname][0] if isinstance(e[cname], list) and e[cname] else e[cname])
+                        if e[cname] is not None and convs[cname] is not None else e[cname]
+                    )
+                    for cname in convs
+                }
+            }
+            for e in data_entries
+        ]
+
     def save_bot_content(self, bot_content: list, bot: Text, user: Text):
         """
         saves bot content data

diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py
@@ -2124,7 +2124,7 @@ def test_upload_with_bot_content_valifdate_payload_data():
     assert len(actual["data"]["rows"]) == 3
     assert all(row["content_type"] == "json" for row in actual["data"]["rows"])
     assert actual["data"]["rows"][0]["data"]["city"] == "City 3"
-    assert actual["data"]["rows"][1]["data"]["population"] == "200"
+    assert actual["data"]["rows"][1]["data"]["population"] == 200
     assert all(row["collection"] == "test_payload_collection" for row in actual["data"]["rows"])
 
     CognitionData.objects(bot=pytest.bot).delete()
@@ -2344,7 +2344,7 @@ def test_upload_with_bot_content_event_append_validate_payload_data():
     assert len(actual["data"]["rows"]) == 6
     assert all(row["content_type"] == "json" for row in actual["data"]["rows"])
     assert actual["data"]["rows"][4]["data"]["city"] == "City 2"
-    assert actual["data"]["rows"][1]["data"]["population"] == "200"
+    assert actual["data"]["rows"][1]["data"]["population"] == 200
     assert all(row["collection"] == "test_payload_collection" for row in actual["data"]["rows"])
 
     CognitionData.objects(bot=pytest.bot).delete()

diff --git a/tests/unit_test/data_processor/data_processor2_test.py b/tests/unit_test/data_processor/data_processor2_test.py
@@ -0,0 +1,49 @@
+import os
+from kairon.shared.utils import Utility
+os.environ["system_file"] = "./tests/testing_data/system.yaml"
+Utility.load_environment()
+Utility.load_system_metadata()
+
+from kairon.shared.data.processor import MongoProcessor
+
+# NOTE: use this file for adding new tests for the data_processor module
+
+
+def test_data_format_correction():
+    metadata = [
+        {'column_name': 'age', 'data_type': 'int'},
+        {'column_name': 'height', 'data_type': 'float'},
+        {'column_name': 'name', 'data_type': 'str'},
+        {'column_name': 'extra', 'data_type': 'dict'},  # Unsupported type
+    ]
+
+    data_entries = [
+        {'age': '25', 'height': '175.5', 'name': 'Alice', 'extra': {'key': 'value'}},
+        {'age': ['30'], 'height': [180.0], 'name': ['Bob'], 'extra': [1, 2, 3]},
+        {'age': None, 'height': '165.2', 'name': 'Charlie', 'extra': None},
+        {'age': '40', 'height': None, 'name': None, 'extra': 'not a dict'}
+    ]
+
+    expected_output = [
+        {'age': 25, 'height': 175.5, 'name': 'Alice', 'extra': {'key': 'value'}},
+        {'age': 30, 'height': 180.0, 'name': 'Bob', 'extra': [1, 2, 3]},
+        {'age': None, 'height': 165.2, 'name': 'Charlie', 'extra': None},
+        {'age': 40, 'height': None, 'name': None, 'extra': 'not a dict'}
+    ]
+
+    result = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
+    assert result == expected_output, f"Expected {expected_output}, but got {result}"
+
+def test_empty_entries():
+    metadata = [{'column_name': 'age', 'data_type': 'int'}]
+    data_entries = []
+    result = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
+    assert result == [], f"Expected [], but got {result}"
+
+
+def test_non_list_non_string_values():
+    metadata = [{'column_name': 'age', 'data_type': 'int'}]
+    data_entries = [{'age': '22', 'height': 5.7, 'name': 'Tom'}]
+    expected_output = [{'age': 22, 'height': 5.7, 'name': 'Tom'}]
+    result = MongoProcessor.data_format_correction_cognition_data(data_entries, metadata)
+    assert result == expected_output, f"Expected {expected_output}, but got {result}"