Skip to content

Commit

Permalink
Fix(wren-ai-service): Clear all documents before deploying the new MDL (
Browse files Browse the repository at this point in the history
#199)

* chore: experiment code

* chore: experiment code

* feat: a mechanism to avoid the pasted MDL items in the vector store

* fix: check view includes properties

* chore: remove the experiment code

* feat: add test case to validate the clear the documents

* fix: check ids is not empty
  • Loading branch information
paopa authored May 6, 2024
1 parent 3b0cecb commit e28f9c5
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
9 changes: 8 additions & 1 deletion wren-ai-service/src/pipelines/ask/indexing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
embedding_model_name: str = EMBEDDING_MODEL_NAME,
embedding_model_dim: int = EMBEDDING_MODEL_DIMENSION,
) -> None:
self._document_store = document_store
self._pipeline = Pipeline()
# TODO: add a component to remove existing documents to fully delete old documents
self._pipeline.add_component(
Expand All @@ -47,10 +48,16 @@ def __init__(
super().__init__(self._pipeline)

def run(self, mdl_str: str) -> Dict[str, Any]:
self._clear_documents()
return self._pipeline.run(
{"writer": {"documents": self._get_documents(mdl_str)}}
)

def _clear_documents(self) -> None:
ids = [str(i) for i in range(self._document_store.count_documents())]
if ids:
self._document_store.delete_documents(ids)

def _get_documents(self, mdl_str: str) -> List[Document]:
mdl_json = json.loads(mdl_str)

Expand Down Expand Up @@ -118,7 +125,7 @@ def _get_documents(self, mdl_str: str) -> List[Document]:

def _convert_views(self, views: List[Dict[str, Any]]) -> List[str]:
def _format(view: Dict[str, Any]) -> str:
properties = view["properties"] if view["properties"] else ""
properties = view["properties"] if "properties" in view else ""
return f"/* {properties} */\nCREATE VIEW {view['name']}\nAS ({view['statement']})"

return [_format(view) for view in views]
Expand Down
28 changes: 28 additions & 0 deletions wren-ai-service/tests/pipelines/test_ask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any

import pytest
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore

from src.pipelines.ask.components.document_store import init_document_store
from src.pipelines.ask.components.embedder import init_embedder
Expand Down Expand Up @@ -41,6 +42,33 @@ def test_indexing_pipeline(mdl_str: str, document_store: Any):
assert document_store.count_documents() == 2


def test_clear_documents(mdl_str: str):
store = QdrantDocumentStore(
":memory:",
index="test_clear_documents",
embedding_dim=3072,
recreate_index=True,
return_embedding=True,
wait_result_from_api=True,
)

indexing_pipeline = Indexing(
document_store=store,
)

indexing_pipeline.run(mdl_str)
assert store.count_documents() == 2

indexing_pipeline.run(
"""
{"models": [], "relationships": [], "views": [
{"name": "book", "statement": "SELECT * FROM book", "properties": {}}
]}
"""
)
assert store.count_documents() == 1


def test_query_understanding_pipeline():
query_understanding_pipeline = QueryUnderstanding(
generator=init_generator(),
Expand Down

0 comments on commit e28f9c5

Please sign in to comment.