digiteinfotech · sfahad1414 · Dec 10, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 10, 2024
diff --git a/kairon/shared/cognition/processor.py b/kairon/shared/cognition/processor.py
@@ -383,18 +383,20 @@ def validate_column_values(data: Any, schema: Dict):
         if schema and isinstance(data, dict):
             data_type = schema['data_type']
             column_name = schema['column_name']
-            if column_name in data and data[column_name] and data_type == CognitionMetadataType.int.value:
-                try:
-                    return int(data[column_name])
-                except ValueError:
-                    raise AppException("Invalid data type!")
-            elif column_name in data and data[column_name] and data_type == CognitionMetadataType.float.value:
-                try:
-                    return float(data[column_name])
-                except ValueError:
-                    raise AppException("Invalid data type!")
+            if column_name in data and data[column_name] is not None:
+                value = data[column_name]
+
+                if data_type == CognitionMetadataType.int.value and not isinstance(value, int):
+                    raise AppException(
+                        f"Invalid data type for '{column_name}': Expected integer value")
+
+                if data_type == CognitionMetadataType.float.value and not isinstance(value, float):
+                    raise AppException(
+                        f"Invalid data type for '{column_name}': Expected float value")
+
+                return value
             else:
-                return data[column_name]
+                raise AppException(f"Column '{column_name}' does not exist or has no value.")
 
     @staticmethod
     def find_matching_metadata(bot: Text, data: Any, collection: Text = None):
@@ -484,7 +486,7 @@ def validate_data(self, primary_key_col: str, collection_name: str, event_type:
                     })
 
             if "document_non_existence" in event_validations:
-                if str(row_key) not in existing_document_map:
+                if row_key not in existing_document_map:
                     row_errors.append({
                         "status": "Document does not exist",
                         "primary_key": row_key,
@@ -550,7 +552,6 @@ async def upsert_data(self, primary_key_col: str, collection_name: str, event_ty
         }
 
         for row in data:
-            row = {str(key): str(value) for key, value in row.items()}
             primary_key_value = row.get(primary_key_col)
 
             existing_document = existing_document_map.get(primary_key_value)

diff --git a/tests/integration_test/services_test.py b/tests/integration_test/services_test.py
@@ -1452,10 +1452,10 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
     assert schema_response["error_code"] == 0
 
     dummy_data = {
-        "id": "1",
+        "id": 1,
         "item": "Juice",
-        "price": "2.00",
-        "quantity": "9"
+        "price": 2.00,
+        "quantity": 9
     }
     dummy_doc = CognitionData(
         data=dummy_data,
@@ -1471,8 +1471,8 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
     assert cognition_data.count() == 1
 
     sync_data = [
-        {"id": 1, "item": "Juice", "price": "2.50", "quantity": "10"},
-        {"id": 2, "item": "Apples", "price": "1.20", "quantity": "20"}
+        {"id": 1, "item": "Juice", "price": 2.50, "quantity": 10},
+        {"id": 2, "item": "Apples", "price": 1.20, "quantity": 20}
     ]
 
     response = client.post(
@@ -1490,8 +1490,8 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
     assert cognition_data.count() == 2
 
     expected_data = [
-        {"id": "1", "item": "Juice", "price": "2.50", "quantity": "10"},
-        {"id": "2", "item": "Apples", "price": "1.20", "quantity": "20"}
+        {"id": 1, "item": "Juice", "price": 2.50, "quantity": 10},
+        {"id": 2, "item": "Apples", "price": 1.20, "quantity": 20}
     ]
 
     for index, doc in enumerate(cognition_data):
@@ -1575,10 +1575,10 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
     assert schema_response["error_code"] == 0
 
     dummy_data_one = {
-        "id": "1",
+        "id": 1,
         "item": "Juice",
-        "price": "2.80",
-        "quantity": "56"
+        "price": 2.80,
+        "quantity": 56
     }
     dummy_doc = CognitionData(
         data=dummy_data_one,
@@ -1590,10 +1590,10 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
     )
     dummy_doc.save()
     dummy_data_two = {
-        "id": "2",
+        "id": 2,
         "item": "Milk",
-        "price": "2.80",
-        "quantity": "12"
+        "price": 2.80,
+        "quantity": 12
     }
     dummy_doc = CognitionData(
         data=dummy_data_two,
@@ -1609,8 +1609,8 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
     assert cognition_data.count() == 2
 
     sync_data = [
-        {"id": 1, "price": "80.50"},
-        {"id": 2, "price": "27.00"}
+        {"id": 1, "price": 80.50},
+        {"id": 2, "price": 27.00}
     ]
 
     response = client.post(
@@ -1628,8 +1628,8 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
     assert cognition_data.count() == 2
 
     expected_data = [
-        {"id": "1", "item": "Juice", "price": "80.50", "quantity": "56"},
-        {"id": "2", "item": "Milk", "price": "27.00", "quantity": "12"}
+        {"id": 1, "item": "Juice", "price": 80.50, "quantity": 56},
+        {"id": 2, "item": "Milk", "price": 27.00, "quantity": 12}
     ]
 
     for index, doc in enumerate(cognition_data):
@@ -1871,7 +1871,6 @@ def test_knowledge_vault_sync_column_length_mismatch(mock_embedding):
         json=sync_data,
         headers={"Authorization": pytest.token_type + " " + pytest.access_token}
     )
-
     actual = response.json()
     assert not actual["success"]
     assert actual["message"] == "Validation failed"
@@ -1928,10 +1927,10 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
     assert schema_response.json()["error_code"] == 0
 
     dummy_data = {
-        "id": "2",
+        "id": 2,
         "item": "Milk",
-        "price": "2.80",
-        "quantity": "12"
+        "price": 2.80,
+        "quantity": 12
     }
     dummy_doc = CognitionData(
         data=dummy_data,
@@ -1962,7 +1961,7 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
     cognition_data = CognitionData.objects(bot=pytest.bot, collection="groceries")
     assert cognition_data.count() == 1
     expected_data = [
-        {"id": "2", "item": "Milk", "price": "2.80", "quantity": "12"}
+        {"id": 2, "item": "Milk", "price": 2.80, "quantity": 12}
     ]
     for index, doc in enumerate(cognition_data):
         doc_data = doc.to_mongo().to_dict()["data"]
@@ -1972,7 +1971,6 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
     CognitionData.objects(bot=pytest.bot, collection="groceries").delete()
     LLMSecret.objects.delete()
 
-
 @pytest.mark.asyncio
 @responses.activate
 @mock.patch.object(litellm, "aembedding", autospec=True)
@@ -2017,10 +2015,10 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
     assert schema_response.json()["error_code"] == 0
 
     dummy_data = {
-        "id": "1",
+        "id": 1,
         "item": "Juice",
-        "price": "2.80",
-        "quantity": "5"
+        "price": 2.80,
+        "quantity": 5
     }
     dummy_doc = CognitionData(
         data=dummy_data,
@@ -2033,7 +2031,7 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
     dummy_doc.save()
 
     sync_data = [
-        {"id": "2", "price": 27.0}
+        {"id": 2, "price": 27.0}
     ]
 
     response = client.post(
@@ -2046,16 +2044,16 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
     assert not actual["success"]
     assert actual["message"] == "Validation failed"
     assert actual["error_code"] == 400
-    assert actual["data"] == {'2': [{'status': 'Document does not exist', 'primary_key': '2', 'message': "No document found for 'id': 2"}]}
+    assert actual["data"] == {'2': [{'status': 'Document does not exist', 'primary_key': 2, 'message': "No document found for 'id': 2"}]}
     cognition_data = CognitionData.objects(bot=pytest.bot, collection="groceries")
     assert cognition_data.count() == 1
 
     expected_data = [
         {
-            "id": "1",
+            "id": 1,
             "item": "Juice",
-            "price": "2.80",
-            "quantity": "5"
+            "price": 2.80,
+            "quantity": 5
         }
     ]