Skip to content

Commit

Permalink
Updated code to save data in cognition data in respective data types (#…
Browse files Browse the repository at this point in the history
…1618)

* Updated code to save data in cognition data in respective data types rather than strings and updated corresponding test cases

* Updated code and test cases

* Updated test case
  • Loading branch information
himanshugt16 authored Dec 10, 2024
1 parent 95a009e commit cfbf50e
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 131 deletions.
31 changes: 18 additions & 13 deletions kairon/shared/cognition/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,18 +383,24 @@ def validate_column_values(data: Any, schema: Dict):
if schema and isinstance(data, dict):
data_type = schema['data_type']
column_name = schema['column_name']
if column_name in data and data[column_name] and data_type == CognitionMetadataType.int.value:
try:
return int(data[column_name])
except ValueError:
raise AppException("Invalid data type!")
elif column_name in data and data[column_name] and data_type == CognitionMetadataType.float.value:
try:
return float(data[column_name])
except ValueError:
raise AppException("Invalid data type!")
if column_name in data and data[column_name] is not None:
value = data[column_name]

if data_type == CognitionMetadataType.int.value and not isinstance(value, int):
raise AppException(
f"Invalid data type for '{column_name}': Expected integer value")

if data_type == CognitionMetadataType.float.value and not isinstance(value, float):
raise AppException(
f"Invalid data type for '{column_name}': Expected float value")

if data_type == CognitionMetadataType.str.value and not isinstance(value, str):
raise AppException(
f"Invalid data type for '{column_name}': Expected string value")

return value
else:
return data[column_name]
raise AppException(f"Column '{column_name}' does not exist or has no value.")

@staticmethod
def find_matching_metadata(bot: Text, data: Any, collection: Text = None):
Expand Down Expand Up @@ -484,7 +490,7 @@ def validate_data(self, primary_key_col: str, collection_name: str, event_type:
})

if "document_non_existence" in event_validations:
if str(row_key) not in existing_document_map:
if row_key not in existing_document_map:
row_errors.append({
"status": "Document does not exist",
"primary_key": row_key,
Expand Down Expand Up @@ -550,7 +556,6 @@ async def upsert_data(self, primary_key_col: str, collection_name: str, event_ty
}

for row in data:
row = {str(key): str(value) for key, value in row.items()}
primary_key_value = row.get(primary_key_col)

existing_document = existing_document_map.get(primary_key_value)
Expand Down
16 changes: 16 additions & 0 deletions kairon/shared/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8717,10 +8717,26 @@ def save_doc_content(self, bot: Text, user: Text, doc_content, table_name: Text,
from ..cognition.processor import CognitionDataProcessor
cognition_processor = CognitionDataProcessor()

schema = CognitionSchema.objects(bot=bot, collection_name=table_name).first()

metadata_map = {meta['column_name']: meta['data_type'] for meta in schema.metadata}

if overwrite:
cognition_processor.delete_all_cognition_data_by_collection(table_name.lower(), bot)

for row in reversed(doc_content):
for column, value in row.items():
if column in metadata_map:
data_type = metadata_map[column]
try:
if data_type == 'int':
row[column] = int(value) if value else None
elif data_type == 'float':
row[column] = float(value) if value else None
except (ValueError, TypeError):
raise ValueError(
f"Error converting column '{column}' with value '{value}' to type '{data_type}'")

payload = {
'collection': table_name,
'content_type': CognitionDataType.json.value,
Expand Down
88 changes: 43 additions & 45 deletions tests/integration_test/services_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,10 +1452,10 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
assert schema_response["error_code"] == 0

dummy_data = {
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.00",
"quantity": "9"
"price": 2.00,
"quantity": 9
}
dummy_doc = CognitionData(
data=dummy_data,
Expand All @@ -1471,8 +1471,8 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
assert cognition_data.count() == 1

sync_data = [
{"id": 1, "item": "Juice", "price": "2.50", "quantity": "10"},
{"id": 2, "item": "Apples", "price": "1.20", "quantity": "20"}
{"id": 1, "item": "Juice", "price": 2.50, "quantity": 10},
{"id": 2, "item": "Apples", "price": 1.20, "quantity": 20}
]

response = client.post(
Expand All @@ -1490,8 +1490,8 @@ def test_knowledge_vault_sync_push_menu(mock_embedding, mock_collection_exists,
assert cognition_data.count() == 2

expected_data = [
{"id": "1", "item": "Juice", "price": "2.50", "quantity": "10"},
{"id": "2", "item": "Apples", "price": "1.20", "quantity": "20"}
{"id": 1, "item": "Juice", "price": 2.50, "quantity": 10},
{"id": 2, "item": "Apples", "price": 1.20, "quantity": 20}
]

for index, doc in enumerate(cognition_data):
Expand Down Expand Up @@ -1575,10 +1575,10 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
assert schema_response["error_code"] == 0

dummy_data_one = {
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.80",
"quantity": "56"
"price": 2.80,
"quantity": 56
}
dummy_doc = CognitionData(
data=dummy_data_one,
Expand All @@ -1590,10 +1590,10 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
)
dummy_doc.save()
dummy_data_two = {
"id": "2",
"id": 2,
"item": "Milk",
"price": "2.80",
"quantity": "12"
"price": 2.80,
"quantity": 12
}
dummy_doc = CognitionData(
data=dummy_data_two,
Expand All @@ -1609,8 +1609,8 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
assert cognition_data.count() == 2

sync_data = [
{"id": 1, "price": "80.50"},
{"id": 2, "price": "27.00"}
{"id": 1, "price": 80.50},
{"id": 2, "price": 27.00}
]

response = client.post(
Expand All @@ -1628,8 +1628,8 @@ def test_knowledge_vault_sync_field_update(mock_embedding, mock_collection_exist
assert cognition_data.count() == 2

expected_data = [
{"id": "1", "item": "Juice", "price": "80.50", "quantity": "56"},
{"id": "2", "item": "Milk", "price": "27.00", "quantity": "12"}
{"id": 1, "item": "Juice", "price": 80.50, "quantity": 56},
{"id": 2, "item": "Milk", "price": 27.00, "quantity": 12}
]

for index, doc in enumerate(cognition_data):
Expand Down Expand Up @@ -1871,7 +1871,6 @@ def test_knowledge_vault_sync_column_length_mismatch(mock_embedding):
json=sync_data,
headers={"Authorization": pytest.token_type + " " + pytest.access_token}
)

actual = response.json()
assert not actual["success"]
assert actual["message"] == "Validation failed"
Expand Down Expand Up @@ -1928,10 +1927,10 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
assert schema_response.json()["error_code"] == 0

dummy_data = {
"id": "2",
"id": 2,
"item": "Milk",
"price": "2.80",
"quantity": "12"
"price": 2.80,
"quantity": 12
}
dummy_doc = CognitionData(
data=dummy_data,
Expand Down Expand Up @@ -1962,7 +1961,7 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
cognition_data = CognitionData.objects(bot=pytest.bot, collection="groceries")
assert cognition_data.count() == 1
expected_data = [
{"id": "2", "item": "Milk", "price": "2.80", "quantity": "12"}
{"id": 2, "item": "Milk", "price": 2.80, "quantity": 12}
]
for index, doc in enumerate(cognition_data):
doc_data = doc.to_mongo().to_dict()["data"]
Expand All @@ -1972,7 +1971,6 @@ def test_knowledge_vault_sync_invalid_columns(mock_embedding):
CognitionData.objects(bot=pytest.bot, collection="groceries").delete()
LLMSecret.objects.delete()


@pytest.mark.asyncio
@responses.activate
@mock.patch.object(litellm, "aembedding", autospec=True)
Expand Down Expand Up @@ -2017,10 +2015,10 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
assert schema_response.json()["error_code"] == 0

dummy_data = {
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.80",
"quantity": "5"
"price": 2.80,
"quantity": 5
}
dummy_doc = CognitionData(
data=dummy_data,
Expand All @@ -2033,7 +2031,7 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
dummy_doc.save()

sync_data = [
{"id": "2", "price": 27.0}
{"id": 2, "price": 27.0}
]

response = client.post(
Expand All @@ -2046,16 +2044,16 @@ def test_knowledge_vault_sync_document_non_existence(mock_embedding):
assert not actual["success"]
assert actual["message"] == "Validation failed"
assert actual["error_code"] == 400
assert actual["data"] == {'2': [{'status': 'Document does not exist', 'primary_key': '2', 'message': "No document found for 'id': 2"}]}
assert actual["data"] == {'2': [{'status': 'Document does not exist', 'primary_key': 2, 'message': "No document found for 'id': 2"}]}
cognition_data = CognitionData.objects(bot=pytest.bot, collection="groceries")
assert cognition_data.count() == 1

expected_data = [
{
"id": "1",
"id": 1,
"item": "Juice",
"price": "2.80",
"quantity": "5"
"price": 2.80,
"quantity": 5
}
]

Expand Down Expand Up @@ -2165,10 +2163,10 @@ def test_upload_doc_content():

last_row = cognition_data.order_by('-_id').first()
assert last_row["data"] == {
'order_id': '67',
'order_id': 67,
'order_priority': 'Low',
'sales': '12.34',
'profit': '54.98'
'sales': 12.34,
'profit': 54.98
}
CognitionData.objects(bot=pytest.bot, collection="test_doc_content_upload").delete()

Expand Down Expand Up @@ -2271,10 +2269,10 @@ def test_upload_doc_content_append():
}
last_row = cognition_data.order_by('-_id').first()
assert last_row["data"] == {
'order_id': '67',
'order_priority': 'Low',
'sales': '12.34',
'profit': '54.98'
'order_id': 67,
'order_priority': "Low",
'sales': 12.34,
'profit': 54.98
}
CognitionData.objects(bot=pytest.bot, collection="test_doc_content_upload_append").delete()

Expand Down Expand Up @@ -2493,18 +2491,18 @@ def test_upload_doc_content_datatype_validation_failure():

third_last_row = cognition_data[-3]
assert third_last_row["data"] == {
"order_id": "33",
"order_id": 33,
"order_priority": "Low",
"sales": "905.94",
"profit": "-4.19"
"sales": 905.94,
"profit": -4.19
}

fourth_last_row = cognition_data[-4]
assert fourth_last_row["data"] == {
"order_id": "657",
"order_id": 657,
"order_priority": "Not Specified",
"sales": "237.28",
"profit": "-2088.68"
"sales": 237.28,
"profit": -2088.68
}

CognitionData.objects(bot=pytest.bot, collection="test_doc_content_upload_datatype_validation_failure").delete()
Expand Down Expand Up @@ -6496,7 +6494,7 @@ def _mock_get_bot_settings(*args, **kwargs):
)
actual = response.json()
assert not actual["success"]
assert actual["message"] == 'Invalid data type!'
assert actual["message"] == "Invalid data type for 'age': Expected integer value"


def test_payload_updated_api_collection_does_not_exists():
Expand Down
Loading

0 comments on commit cfbf50e

Please sign in to comment.