diff --git a/integrations/qdrant/tests/conftest.py b/integrations/qdrant/tests/conftest.py new file mode 100644 index 000000000..4bdf0bad2 --- /dev/null +++ b/integrations/qdrant/tests/conftest.py @@ -0,0 +1,18 @@ +import numpy as np +import pytest +from haystack.dataclasses import SparseEmbedding + + +@pytest.fixture +def generate_sparse_embedding(): + """ + This fixture generates a random SparseEmbedding object each time it is used. + """ + + def _generate_random_sparse_embedding(): + random_indice_length = np.random.randint(3, 15) + indices = list(range(random_indice_length)) + values = [np.random.random_sample() for _ in range(random_indice_length)] + return SparseEmbedding(indices=indices, values=values) + + return _generate_random_sparse_embedding diff --git a/integrations/qdrant/tests/test_document_store.py b/integrations/qdrant/tests/test_document_store.py index acc09b11f..bc42a11a5 100644 --- a/integrations/qdrant/tests/test_document_store.py +++ b/integrations/qdrant/tests/test_document_store.py @@ -1,6 +1,5 @@ from typing import List -import numpy as np import pytest from haystack import Document from haystack.dataclasses import SparseEmbedding @@ -15,18 +14,6 @@ from haystack_integrations.document_stores.qdrant.document_store import QdrantDocumentStore, QdrantStoreError -def _generate_mocked_sparse_embedding(n): - list_of_sparse_vectors = [] - for _ in range(n): - random_indice_length = np.random.randint(3, 15) - data = { - "indices": list(range(random_indice_length)), - "values": [np.random.random_sample() for _ in range(random_indice_length)], - } - list_of_sparse_vectors.append(data) - return list_of_sparse_vectors - - class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest): @pytest.fixture def document_store(self) -> QdrantDocumentStore: @@ -56,14 +43,15 @@ def test_write_documents(self, document_store: QdrantDocumentStore): with pytest.raises(DuplicateDocumentError): document_store.write_documents(docs, DuplicatePolicy.FAIL) - def test_query_hybrid(self): + def test_query_hybrid(self, generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) docs = [] for i in range(20): - sparse_embedding = SparseEmbedding.from_dict(_generate_mocked_sparse_embedding(1)[0]) docs.append( - Document(content=f"doc {i}", sparse_embedding=sparse_embedding, embedding=_random_embeddings(768)) + Document( + content=f"doc {i}", sparse_embedding=generate_sparse_embedding(), embedding=_random_embeddings(768) + ) ) document_store.write_documents(docs) diff --git a/integrations/qdrant/tests/test_retriever.py b/integrations/qdrant/tests/test_retriever.py index a2aa2b090..47fec5968 100644 --- a/integrations/qdrant/tests/test_retriever.py +++ b/integrations/qdrant/tests/test_retriever.py @@ -13,8 +13,6 @@ ) from haystack_integrations.document_stores.qdrant import QdrantDocumentStore -from .test_document_store import _generate_mocked_sparse_embedding - class TestQdrantRetriever(FilterableDocsFixtureMixin): def test_init_default(self): @@ -225,12 +223,12 @@ def test_from_dict(self): assert retriever._scale_score is False assert retriever._return_embedding is True - def test_run(self, filterable_docs: List[Document]): + def test_run(self, filterable_docs: List[Document], generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True) # Add fake sparse embedding to documents for doc in filterable_docs: - doc.sparse_embedding = SparseEmbedding.from_dict(_generate_mocked_sparse_embedding(1)[0]) + doc.sparse_embedding = generate_sparse_embedding() document_store.write_documents(filterable_docs) retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)