Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated code to save data in cognition data in respective data types #1618

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions kairon/shared/cognition/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,18 +383,24 @@ def validate_column_values(data: Any, schema: Dict):
if schema and isinstance(data, dict):
data_type = schema['data_type']
column_name = schema['column_name']
if column_name in data and data[column_name] and data_type == CognitionMetadataType.int.value:
try:
return int(data[column_name])
except ValueError:
raise AppException("Invalid data type!")
elif column_name in data and data[column_name] and data_type == CognitionMetadataType.float.value:
try:
return float(data[column_name])
except ValueError:
raise AppException("Invalid data type!")
if column_name in data and data[column_name] is not None:
value = data[column_name]

if data_type == CognitionMetadataType.int.value and not isinstance(value, int):
raise AppException(
f"Invalid data type for '{column_name}': Expected integer value")

if data_type == CognitionMetadataType.float.value and not isinstance(value, float):
raise AppException(
f"Invalid data type for '{column_name}': Expected float value")

if data_type == CognitionMetadataType.str.value and not isinstance(value, str):
raise AppException(
f"Invalid data type for '{column_name}': Expected string value")

return value
else:
return data[column_name]
raise AppException(f"Column '{column_name}' does not exist or has no value.")

@staticmethod
def find_matching_metadata(bot: Text, data: Any, collection: Text = None):
Expand Down Expand Up @@ -484,7 +490,7 @@ def validate_data(self, primary_key_col: str, collection_name: str, event_type:
})

if "document_non_existence" in event_validations:
if str(row_key) not in existing_document_map:
if row_key not in existing_document_map:
row_errors.append({
"status": "Document does not exist",
"primary_key": row_key,
Expand Down Expand Up @@ -550,7 +556,6 @@ async def upsert_data(self, primary_key_col: str, collection_name: str, event_ty
}

for row in data:
row = {str(key): str(value) for key, value in row.items()}
primary_key_value = row.get(primary_key_col)

existing_document = existing_document_map.get(primary_key_value)
Expand Down
16 changes: 16 additions & 0 deletions kairon/shared/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8717,10 +8717,26 @@ def save_doc_content(self, bot: Text, user: Text, doc_content, table_name: Text,
from ..cognition.processor import CognitionDataProcessor
cognition_processor = CognitionDataProcessor()

schema = CognitionSchema.objects(bot=bot, collection_name=table_name).first()

metadata_map = {meta['column_name']: meta['data_type'] for meta in schema.metadata}

Comment on lines +8720 to +8723
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Handle the case where the schema is not found

In the save_doc_content method, if no schema is found for the given table_name, schema will be None, leading to an AttributeError when accessing schema.metadata. Please add a check to handle the case where the schema does not exist and raise an appropriate exception.

Apply this diff to fix the issue:

            schema = CognitionSchema.objects(bot=bot, collection_name=table_name).first()
+           if schema is None:
+               raise ValueError(f"No schema found for table '{table_name}'")

            metadata_map = {meta['column_name']: meta['data_type'] for meta in schema.metadata}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
schema = CognitionSchema.objects(bot=bot, collection_name=table_name).first()
metadata_map = {meta['column_name']: meta['data_type'] for meta in schema.metadata}
schema = CognitionSchema.objects(bot=bot, collection_name=table_name).first()
if schema is None:
raise ValueError(f"No schema found for table '{table_name}'")
metadata_map = {meta['column_name']: meta['data_type'] for meta in schema.metadata}

if overwrite:
cognition_processor.delete_all_cognition_data_by_collection(table_name.lower(), bot)

for row in reversed(doc_content):
for column, value in row.items():
if column in metadata_map:
data_type = metadata_map[column]
try:
if data_type == 'int':
row[column] = int(value) if value else None
elif data_type == 'float':
row[column] = float(value) if value else None
except (ValueError, TypeError):
raise ValueError(
f"Error converting column '{column}' with value '{value}' to type '{data_type}'")
Comment on lines +8737 to +8738
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use raise ... from ... in except clause to preserve original traceback

When re-raising an exception in the except block, include the original exception using from err to preserve the traceback and help distinguish it from errors in exception handling.

Apply this diff to fix the issue:

-                        except (ValueError, TypeError):
-                            raise ValueError(
-                                f"Error converting column '{column}' with value '{value}' to type '{data_type}'")
+                        except (ValueError, TypeError) as err:
+                            raise ValueError(
+                                f"Error converting column '{column}' with value '{value}' to type '{data_type}'") from err
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
raise ValueError(
f"Error converting column '{column}' with value '{value}' to type '{data_type}'")
except (ValueError, TypeError) as err:
raise ValueError(
f"Error converting column '{column}' with value '{value}' to type '{data_type}'") from err
🧰 Tools
🪛 Ruff (0.8.0)

8737-8738: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)


payload = {
'collection': table_name,
'content_type': CognitionDataType.json.value,
Expand Down
88 changes: 43 additions & 45 deletions tests/integration_test/services_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,10 +1452,10 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
assert schema_response["error_code"] == 0

dummy_data = {
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.00",
"quantity": "9"
"price": 2.00,
"quantity": 9
}
dummy_doc = CognitionData(
data=dummy_data,
Expand All @@ -1471,8 +1471,8 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
assert cognition_data.count() == 1

sync_data = [
{"id": 1, "item": "Juice", "price": "2.50", "quantity": "10"},
{"id": 2, "item": "Apples", "price": "1.20", "quantity": "20"}
{"id": 1, "item": "Juice", "price": 2.50, "quantity": 10},
{"id": 2, "item": "Apples", "price": 1.20, "quantity": 20}
]

response = client.post(
Expand All @@ -1490,8 +1490,8 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
assert cognition_data.count() == 2

expected_data = [
{"id": "1", "item": "Juice", "price": "2.50", "quantity": "10"},
{"id": "2", "item": "Apples", "price": "1.20", "quantity": "20"}
{"id": 1, "item": "Juice", "price": 2.50, "quantity": 10},
{"id": 2, "item": "Apples", "price": 1.20, "quantity": 20}
]

for index, doc in enumerate(cognition_data):
Expand Down Expand Up @@ -1575,10 +1575,10 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
assert schema_response["error_code"] == 0

dummy_data_one = {
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.80",
"quantity": "56"
"price": 2.80,
"quantity": 56
}
dummy_doc = CognitionData(
data=dummy_data_one,
Expand All @@ -1590,10 +1590,10 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
)
dummy_doc.save()
dummy_data_two = {
"id": "2",
"id": 2,
"item": "Milk",
"price": "2.80",
"quantity": "12"
"price": 2.80,
"quantity": 12
}
dummy_doc = CognitionData(
data=dummy_data_two,
Expand All @@ -1609,8 +1609,8 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
assert cognition_data.count() == 2

sync_data = [
{"id": 1, "price": "80.50"},
{"id": 2, "price": "27.00"}
{"id": 1, "price": 80.50},
{"id": 2, "price": 27.00}
]

response = client.post(
Expand All @@ -1628,8 +1628,8 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
assert cognition_data.count() == 2

expected_data = [
{"id": "1", "item": "Juice", "price": "80.50", "quantity": "56"},
{"id": "2", "item": "Milk", "price": "27.00", "quantity": "12"}
{"id": 1, "item": "Juice", "price": 80.50, "quantity": 56},
{"id": 2, "item": "Milk", "price": 27.00, "quantity": 12}
]

for index, doc in enumerate(cognition_data):
Expand Down Expand Up @@ -1871,7 +1871,6 @@ def test_knowledge_vault_sync_column_length_mismatch(mock_embedding):
json=sync_data,
headers={"Authorization": pytest.token_type + " " + pytest.access_token}
)

actual = response.json()
assert not actual["success"]
assert actual["message"] == "Validation failed"
Expand Down Expand Up @@ -1928,10 +1927,10 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
assert schema_response.json()["error_code"] == 0

dummy_data = {
"id": "2",
"id": 2,
"item": "Milk",
"price": "2.80",
"quantity": "12"
"price": 2.80,
"quantity": 12
}
dummy_doc = CognitionData(
data=dummy_data,
Expand Down Expand Up @@ -1962,7 +1961,7 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
cognition_data = CognitionData.objects(bot=pytest.bot, collection="groceries")
assert cognition_data.count() == 1
expected_data = [
{"id": "2", "item": "Milk", "price": "2.80", "quantity": "12"}
{"id": 2, "item": "Milk", "price": 2.80, "quantity": 12}
]
for index, doc in enumerate(cognition_data):
doc_data = doc.to_mongo().to_dict()["data"]
Expand All @@ -1972,7 +1971,6 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
CognitionData.objects(bot=pytest.bot, collection="groceries").delete()
LLMSecret.objects.delete()


@pytest.mark.asyncio
@responses.activate
@mock.patch.object(litellm, "aembedding", autospec=True)
Expand Down Expand Up @@ -2017,10 +2015,10 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
assert schema_response.json()["error_code"] == 0

dummy_data = {
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.80",
"quantity": "5"
"price": 2.80,
"quantity": 5
}
dummy_doc = CognitionData(
data=dummy_data,
Expand All @@ -2033,7 +2031,7 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
dummy_doc.save()

sync_data = [
{"id": "2", "price": 27.0}
{"id": 2, "price": 27.0}
]

response = client.post(
Expand All @@ -2046,16 +2044,16 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
assert not actual["success"]
assert actual["message"] == "Validation failed"
assert actual["error_code"] == 400
assert actual["data"] == {'2': [{'status': 'Document does not exist', 'primary_key': '2', 'message': "No document found for 'id': 2"}]}
assert actual["data"] == {'2': [{'status': 'Document does not exist', 'primary_key': 2, 'message': "No document found for 'id': 2"}]}
cognition_data = CognitionData.objects(bot=pytest.bot, collection="groceries")
assert cognition_data.count() == 1

expected_data = [
{
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.80",
"quantity": "5"
"price": 2.80,
"quantity": 5
}
]

Expand Down Expand Up @@ -2165,10 +2163,10 @@ def test_upload_doc_content():

last_row = cognition_data.order_by('-_id').first()
assert last_row["data"] == {
'order_id': '67',
'order_id': 67,
'order_priority': 'Low',
'sales': '12.34',
'profit': '54.98'
'sales': 12.34,
'profit': 54.98
}
CognitionData.objects(bot=pytest.bot, collection="test_doc_content_upload").delete()

Expand Down Expand Up @@ -2271,10 +2269,10 @@ def test_upload_doc_content_append():
}
last_row = cognition_data.order_by('-_id').first()
assert last_row["data"] == {
'order_id': '67',
'order_priority': 'Low',
'sales': '12.34',
'profit': '54.98'
'order_id': 67,
'order_priority': "Low",
'sales': 12.34,
'profit': 54.98
}
CognitionData.objects(bot=pytest.bot, collection="test_doc_content_upload_append").delete()

Expand Down Expand Up @@ -2493,18 +2491,18 @@ def test_upload_doc_content_datatype_validation_failure():

third_last_row = cognition_data[-3]
assert third_last_row["data"] == {
"order_id": "33",
"order_id": 33,
"order_priority": "Low",
"sales": "905.94",
"profit": "-4.19"
"sales": 905.94,
"profit": -4.19
}

fourth_last_row = cognition_data[-4]
assert fourth_last_row["data"] == {
"order_id": "657",
"order_id": 657,
"order_priority": "Not Specified",
"sales": "237.28",
"profit": "-2088.68"
"sales": 237.28,
"profit": -2088.68
}

CognitionData.objects(bot=pytest.bot, collection="test_doc_content_upload_datatype_validation_failure").delete()
Expand Down Expand Up @@ -6493,7 +6491,7 @@ def _mock_get_bot_settings(*args, **kwargs):
)
actual = response.json()
assert not actual["success"]
assert actual["message"] == 'Invalid data type!'
assert actual["message"] == "Invalid data type for 'age': Expected integer value"


def test_payload_updated_api_collection_does_not_exists():
Expand Down
Loading
Loading