diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py index 96bd4f37a..01645a999 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/converters.py @@ -17,7 +17,6 @@ def convert_haystack_documents_to_qdrant_points( documents: List[Document], *, - embedding_field: str, use_sparse_embeddings: bool, ) -> List[rest.PointStruct]: points = [] @@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points( if use_sparse_embeddings: vector = {} - dense_vector = payload.pop(embedding_field, None) + dense_vector = payload.pop("embedding", None) if dense_vector is not None: vector[DENSE_VECTORS_NAME] = dense_vector @@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points( vector[SPARSE_VECTORS_NAME] = sparse_vector_instance else: - vector = payload.pop(embedding_field) or {} + vector = payload.pop("embedding") or {} _id = convert_id(payload.get("id")) point = rest.PointStruct( diff --git a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py index 51d64e5e3..f9b2190ad 100644 --- a/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py +++ b/integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py @@ -110,14 +110,10 @@ def __init__( index: str = "Document", embedding_dim: int = 768, on_disk: bool = False, - content_field: str = "content", - name_field: str = "name", - embedding_field: str = "embedding", use_sparse_embeddings: bool = False, similarity: str = "cosine", return_embedding: bool = False, progress_bar: bool = True, - duplicate_documents: str = "overwrite", recreate_index: bool = False, shard_number: Optional[int] = None, replication_factor: Optional[int] = None, @@ -170,12 +166,6 @@ def __init__( Dimension of the embeddings. :param on_disk: Whether to store the collection on disk. - :param content_field: - The field for the document content. - :param name_field: - The field for the document name. - :param embedding_field: - The field for the document embeddings. :param use_sparse_embedding: If set to `True`, enables support for sparse embeddings. :param similarity: @@ -184,8 +174,6 @@ def __init__( Whether to return embeddings in the search results. :param progress_bar: Whether to show a progress bar or not. - :param duplicate_documents: - The parameter is not used and will be removed in future release. :param recreate_index: Whether to recreate the index. :param shard_number: @@ -260,14 +248,10 @@ def __init__( self.use_sparse_embeddings = use_sparse_embeddings self.embedding_dim = embedding_dim self.on_disk = on_disk - self.content_field = content_field - self.name_field = name_field - self.embedding_field = embedding_field self.similarity = similarity self.index = index self.return_embedding = return_embedding self.progress_bar = progress_bar - self.duplicate_documents = duplicate_documents self.write_batch_size = write_batch_size self.scroll_size = scroll_size @@ -380,7 +364,6 @@ def write_documents( for document_batch in batched_documents: batch = convert_haystack_documents_to_qdrant_points( document_batch, - embedding_field=self.embedding_field, use_sparse_embeddings=self.use_sparse_embeddings, ) @@ -891,12 +874,7 @@ def _handle_duplicate_documents( :param documents: A list of Haystack Document objects. :param index: name of the index - :param duplicate_documents: Handle duplicate documents based on parameter options. - Parameter options : ( 'skip','overwrite','fail') - skip (default option): Ignore the duplicates documents. - overwrite: Update any existing documents with the same ID when adding documents. - fail: An error is raised if the document ID of the document being added already - exists. + :param policy: The duplicate policy to use when writing documents. :returns: A list of Haystack Document objects. """ diff --git a/integrations/qdrant/tests/test_dict_converters.py b/integrations/qdrant/tests/test_dict_converters.py index dd54df4c4..9fc8779f7 100644 --- a/integrations/qdrant/tests/test_dict_converters.py +++ b/integrations/qdrant/tests/test_dict_converters.py @@ -22,15 +22,11 @@ def test_to_dict(): "index": "test", "embedding_dim": 768, "on_disk": False, - "content_field": "content", - "name_field": "name", - "embedding_field": "embedding", "force_disable_check_same_thread": False, "use_sparse_embeddings": False, "similarity": "cosine", "return_embedding": False, "progress_bar": True, - "duplicate_documents": "overwrite", "recreate_index": False, "shard_number": None, "replication_factor": None, @@ -62,15 +58,11 @@ def test_from_dict(): "index": "test", "embedding_dim": 768, "on_disk": False, - "content_field": "content", - "name_field": "name", - "embedding_field": "embedding", "force_disable_check_same_thread": False, "use_sparse_embeddings": True, "similarity": "cosine", "return_embedding": False, "progress_bar": True, - "duplicate_documents": "overwrite", "recreate_index": True, "shard_number": None, "quantization_config": None, @@ -87,16 +79,12 @@ def test_from_dict(): assert all( [ document_store.index == "test", - document_store.content_field == "content", - document_store.name_field == "name", - document_store.embedding_field == "embedding", document_store.force_disable_check_same_thread is False, document_store.use_sparse_embeddings is True, document_store.on_disk is False, document_store.similarity == "cosine", document_store.return_embedding is False, document_store.progress_bar, - document_store.duplicate_documents == "overwrite", document_store.recreate_index is True, document_store.shard_number is None, document_store.replication_factor is None, diff --git a/integrations/qdrant/tests/test_retriever.py b/integrations/qdrant/tests/test_retriever.py index 2eb0f6a34..3bfb9e62e 100644 --- a/integrations/qdrant/tests/test_retriever.py +++ b/integrations/qdrant/tests/test_retriever.py @@ -47,15 +47,11 @@ def test_to_dict(self): "index": "test", "embedding_dim": 768, "on_disk": False, - "content_field": "content", - "name_field": "name", "force_disable_check_same_thread": False, - "embedding_field": "embedding", "use_sparse_embeddings": False, "similarity": "cosine", "return_embedding": False, "progress_bar": True, - "duplicate_documents": "overwrite", "recreate_index": False, "shard_number": None, "replication_factor": None, @@ -170,15 +166,11 @@ def test_to_dict(self): "index": "test", "embedding_dim": 768, "on_disk": False, - "content_field": "content", - "name_field": "name", - "embedding_field": "embedding", "force_disable_check_same_thread": False, "use_sparse_embeddings": False, "similarity": "cosine", "return_embedding": False, "progress_bar": True, - "duplicate_documents": "overwrite", "recreate_index": False, "shard_number": None, "replication_factor": None, @@ -280,15 +272,11 @@ def test_to_dict(self): "index": "test", "embedding_dim": 768, "on_disk": False, - "content_field": "content", - "name_field": "name", - "embedding_field": "embedding", "force_disable_check_same_thread": False, "use_sparse_embeddings": False, "similarity": "cosine", "return_embedding": False, "progress_bar": True, - "duplicate_documents": "overwrite", "recreate_index": False, "shard_number": None, "replication_factor": None,