diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 8d61e63ed..5b827a984 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -60,7 +60,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do assert doc_received.content == doc_expected.content assert doc_received.meta == doc_expected.meta - @pytest.mark.unit def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): """ We customize this test because Chroma consider "not equal" true when @@ -72,14 +71,12 @@ def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: L result, [doc for doc in filterable_docs if doc.meta.get("page", "100") != "100"] ) - @pytest.mark.unit def test_delete_empty(self, document_store: ChromaDocumentStore): """ Deleting a non-existing document should not raise with Chroma """ document_store.delete_documents(["test"]) - @pytest.mark.unit def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore): """ Deleting a non-existing document should not raise with Chroma @@ -131,144 +128,117 @@ def test_same_collection_name_reinitialization(self): ChromaDocumentStore("test_name") @pytest.mark.skip(reason="Filter on array contents is not supported.") - @pytest.mark.unit def test_filter_document_array(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on dataframe contents is not supported.") - @pytest.mark.unit def test_filter_document_dataframe(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on table contents is not supported.") - @pytest.mark.unit def test_eq_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on embedding value is not supported.") - @pytest.mark.unit def test_eq_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$in operator is not supported.") - @pytest.mark.unit def test_in_filter_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$in operator is not supported. Filter on table contents is not supported.") - @pytest.mark.unit def test_in_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$in operator is not supported.") - @pytest.mark.unit def test_in_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on table contents is not supported.") - @pytest.mark.unit def test_ne_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on embedding value is not supported.") - @pytest.mark.unit def test_ne_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$nin operator is not supported. Filter on table contents is not supported.") - @pytest.mark.unit def test_nin_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$nin operator is not supported. Filter on embedding value is not supported.") - @pytest.mark.unit def test_nin_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="$nin operator is not supported.") - @pytest.mark.unit def test_nin_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_implicit_and_with_multi_key_dict( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_explicit_and_with_multikey_dict( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_explicit_and_with_list( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_explicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_simple_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter on table contents is not supported.") - @pytest.mark.unit def test_filter_nested_and_or_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_and_or_implicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_or_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): pass @pytest.mark.skip(reason="Filter syntax not supported.") - @pytest.mark.unit def test_filter_nested_multiple_identical_operators_same_level( self, document_store: ChromaDocumentStore, filterable_docs: List[Document] ): pass @pytest.mark.skip(reason="Duplicate policy not supported.") - @pytest.mark.unit def test_write_duplicate_fail(self, document_store: ChromaDocumentStore): pass @pytest.mark.skip(reason="Duplicate policy not supported.") - @pytest.mark.unit def test_write_duplicate_skip(self, document_store: ChromaDocumentStore): pass @pytest.mark.skip(reason="Duplicate policy not supported.") - @pytest.mark.unit def test_write_duplicate_overwrite(self, document_store: ChromaDocumentStore): pass diff --git a/integrations/cohere/tests/test_cohere_chat_generator.py b/integrations/cohere/tests/test_cohere_chat_generator.py index 7fd588fec..9a822856e 100644 --- a/integrations/cohere/tests/test_cohere_chat_generator.py +++ b/integrations/cohere/tests/test_cohere_chat_generator.py @@ -53,7 +53,6 @@ def chat_messages(): class TestCohereChatGenerator: - @pytest.mark.unit def test_init_default(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") @@ -64,14 +63,12 @@ def test_init_default(self, monkeypatch): assert component.api_base_url == cohere.COHERE_API_URL assert not component.generation_kwargs - @pytest.mark.unit def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("COHERE_API_KEY", raising=False) monkeypatch.delenv("CO_API_KEY", raising=False) with pytest.raises(ValueError): CohereChatGenerator() - @pytest.mark.unit def test_init_with_parameters(self): component = CohereChatGenerator( api_key=Secret.from_token("test-api-key"), @@ -86,7 +83,6 @@ def test_init_with_parameters(self): assert component.api_base_url == "test-base-url" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - @pytest.mark.unit def test_to_dict_default(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") component = CohereChatGenerator() @@ -102,7 +98,6 @@ def test_to_dict_default(self, monkeypatch): }, } - @pytest.mark.unit def test_to_dict_with_parameters(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") monkeypatch.setenv("CO_API_KEY", "fake-api-key") @@ -125,7 +120,6 @@ def test_to_dict_with_parameters(self, monkeypatch): }, } - @pytest.mark.unit def test_to_dict_with_lambda_streaming_callback(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "test-api-key") component = CohereChatGenerator( @@ -146,7 +140,6 @@ def test_to_dict_with_lambda_streaming_callback(self, monkeypatch): }, } - @pytest.mark.unit def test_from_dict(self, monkeypatch): monkeypatch.setenv("COHERE_API_KEY", "fake-api-key") monkeypatch.setenv("CO_API_KEY", "fake-api-key") @@ -166,7 +159,6 @@ def test_from_dict(self, monkeypatch): assert component.api_base_url == "test-base-url" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - @pytest.mark.unit def test_from_dict_fail_wo_env_var(self, monkeypatch): monkeypatch.delenv("COHERE_API_KEY", raising=False) monkeypatch.delenv("CO_API_KEY", raising=False) @@ -183,7 +175,6 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch): with pytest.raises(ValueError): CohereChatGenerator.from_dict(data) - @pytest.mark.unit def test_run(self, chat_messages, mock_chat_response): # noqa: ARG002 component = CohereChatGenerator(api_key=Secret.from_token("test-api-key")) response = component.run(chat_messages) @@ -195,13 +186,11 @@ def test_run(self, chat_messages, mock_chat_response): # noqa: ARG002 assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - @pytest.mark.unit def test_message_to_dict(self, chat_messages): obj = CohereChatGenerator(api_key=Secret.from_token("test-api-key")) dictionary = [obj._message_to_dict(message) for message in chat_messages] assert dictionary == [{"user_name": "Chatbot", "text": "What's the capital of France"}] - @pytest.mark.unit def test_run_with_params(self, chat_messages, mock_chat_response): component = CohereChatGenerator( api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5} @@ -220,7 +209,6 @@ def test_run_with_params(self, chat_messages, mock_chat_response): assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - @pytest.mark.unit def test_run_streaming(self, chat_messages, mock_chat_response): streaming_call_count = 0 diff --git a/integrations/deepeval/tests/test_evaluator.py b/integrations/deepeval/tests/test_evaluator.py index 8534ef687..7d1946185 100644 --- a/integrations/deepeval/tests/test_evaluator.py +++ b/integrations/deepeval/tests/test_evaluator.py @@ -270,6 +270,7 @@ def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params, monk # OpenAI API. It is parameterized by the metric, the inputs to the evalutor # and the metric parameters. @pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set") +@pytest.mark.integration @pytest.mark.parametrize( "metric, inputs, metric_params", [ diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index e46e76ed2..308486a78 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -15,6 +15,36 @@ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore +@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") +def test_to_dict(_mock_elasticsearch_client): + document_store = ElasticsearchDocumentStore(hosts="some hosts") + res = document_store.to_dict() + assert res == { + "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + "embedding_similarity_function": "cosine", + }, + } + + +@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") +def test_from_dict(_mock_elasticsearch_client): + data = { + "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + "embedding_similarity_function": "cosine", + }, + } + document_store = ElasticsearchDocumentStore.from_dict(data) + assert document_store._hosts == "some hosts" + assert document_store._index == "default" + assert document_store._embedding_similarity_function == "cosine" + + @pytest.mark.integration class TestDocumentStore(DocumentStoreBaseTests): """ @@ -67,34 +97,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do super().assert_documents_are_equal(received, expected) - @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") - def test_to_dict(self, _mock_elasticsearch_client): - document_store = ElasticsearchDocumentStore(hosts="some hosts") - res = document_store.to_dict() - assert res == { - "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - "embedding_similarity_function": "cosine", - }, - } - - @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") - def test_from_dict(self, _mock_elasticsearch_client): - data = { - "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - "embedding_similarity_function": "cosine", - }, - } - document_store = ElasticsearchDocumentStore.from_dict(data) - assert document_store._hosts == "some hosts" - assert document_store._index == "default" - assert document_store._embedding_similarity_function == "cosine" - def test_user_agent_header(self, document_store: ElasticsearchDocumentStore): assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/") diff --git a/integrations/mongodb_atlas/tests/test_retriever.py b/integrations/mongodb_atlas/tests/test_retriever.py index ec44513e2..4ef5222ce 100644 --- a/integrations/mongodb_atlas/tests/test_retriever.py +++ b/integrations/mongodb_atlas/tests/test_retriever.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from unittest.mock import Mock +from unittest.mock import MagicMock, Mock, patch import pytest from haystack.dataclasses import Document @@ -10,34 +10,48 @@ from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore -@pytest.fixture -def document_store(): - store = MongoDBAtlasDocumentStore( - database_name="haystack_integration_test", - collection_name="test_embeddings_collection", - vector_search_index="cosine_index", - ) - return store +class TestRetriever: + @pytest.fixture + def mock_client(self): + with patch( + "haystack_integrations.document_stores.mongodb_atlas.document_store.MongoClient" + ) as mock_mongo_client: + mock_connection = MagicMock() + mock_database = MagicMock() + mock_collection_names = MagicMock(return_value=["test_embeddings_collection"]) + mock_database.list_collection_names = mock_collection_names + mock_connection.__getitem__.return_value = mock_database + mock_mongo_client.return_value = mock_connection + yield mock_mongo_client -class TestRetriever: - def test_init_default(self, document_store: MongoDBAtlasDocumentStore): - retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store) - assert retriever.document_store == document_store + def test_init_default(self): + mock_store = Mock(spec=MongoDBAtlasDocumentStore) + retriever = MongoDBAtlasEmbeddingRetriever(document_store=mock_store) + assert retriever.document_store == mock_store assert retriever.filters == {} assert retriever.top_k == 10 - def test_init(self, document_store: MongoDBAtlasDocumentStore): + def test_init(self): + mock_store = Mock(spec=MongoDBAtlasDocumentStore) retriever = MongoDBAtlasEmbeddingRetriever( - document_store=document_store, + document_store=mock_store, filters={"field": "value"}, top_k=5, ) - assert retriever.document_store == document_store + assert retriever.document_store == mock_store assert retriever.filters == {"field": "value"} assert retriever.top_k == 5 - def test_to_dict(self, document_store: MongoDBAtlasDocumentStore): + def test_to_dict(self, mock_client, monkeypatch): # noqa: ARG002 mock_client appears unused but is required + monkeypatch.setenv("MONGO_CONNECTION_STRING", "test_conn_str") + + document_store = MongoDBAtlasDocumentStore( + database_name="haystack_integration_test", + collection_name="test_embeddings_collection", + vector_search_index="cosine_index", + ) + retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store, filters={"field": "value"}, top_k=5) res = retriever.to_dict() assert res == { @@ -61,7 +75,9 @@ def test_to_dict(self, document_store: MongoDBAtlasDocumentStore): }, } - def test_from_dict(self): + def test_from_dict(self, mock_client, monkeypatch): # noqa: ARG002 mock_client appears unused but is required + monkeypatch.setenv("MONGO_CONNECTION_STRING", "test_conn_str") + data = { "type": "haystack_integrations.components.retrievers.mongodb_atlas.embedding_retriever.MongoDBAtlasEmbeddingRetriever", # noqa: E501 "init_parameters": { diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index e3a314141..bc0d1c434 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -14,6 +14,34 @@ from opensearchpy.exceptions import RequestError +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_to_dict(_mock_opensearch_client): + document_store = OpenSearchDocumentStore(hosts="some hosts") + res = document_store.to_dict() + assert res == { + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + }, + } + + +@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") +def test_from_dict(_mock_opensearch_client): + data = { + "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "index": "default", + }, + } + document_store = OpenSearchDocumentStore.from_dict(data) + assert document_store._hosts == "some hosts" + assert document_store._index == "default" + + +@pytest.mark.integration class TestDocumentStore(DocumentStoreBaseTests): """ Common test cases will be provided by `DocumentStoreBaseTests` but @@ -87,31 +115,6 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do super().assert_documents_are_equal(received, expected) - @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") - def test_to_dict(self, _mock_opensearch_client): - document_store = OpenSearchDocumentStore(hosts="some hosts") - res = document_store.to_dict() - assert res == { - "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - }, - } - - @patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch") - def test_from_dict(self, _mock_opensearch_client): - data = { - "type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore", - "init_parameters": { - "hosts": "some hosts", - "index": "default", - }, - } - document_store = OpenSearchDocumentStore.from_dict(data) - assert document_store._hosts == "some hosts" - assert document_store._index == "default" - def test_write_documents(self, document_store: OpenSearchDocumentStore): docs = [Document(id="1")] assert document_store.write_documents(docs) == 1 diff --git a/integrations/pgvector/tests/conftest.py b/integrations/pgvector/tests/conftest.py index 068f2ac54..94b35a04d 100644 --- a/integrations/pgvector/tests/conftest.py +++ b/integrations/pgvector/tests/conftest.py @@ -1,4 +1,5 @@ import os +from unittest.mock import patch import pytest from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore @@ -24,3 +25,38 @@ def document_store(request): yield store store.delete_table() + + +@pytest.fixture +def patches_for_unit_tests(): + with patch("haystack_integrations.document_stores.pgvector.document_store.connect") as mock_connect, patch( + "haystack_integrations.document_stores.pgvector.document_store.register_vector" + ) as mock_register, patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.delete_table" + ) as mock_delete, patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._create_table_if_not_exists" + ) as mock_create, patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore._handle_hnsw" + ) as mock_hnsw: + + yield mock_connect, mock_register, mock_delete, mock_create, mock_hnsw + + +@pytest.fixture +def mock_store(patches_for_unit_tests, monkeypatch): # noqa: ARG001 patches are not explicitly called but necessary + monkeypatch.setenv("PG_CONN_STR", "some-connection-string") + table_name = "haystack" + embedding_dimension = 768 + vector_function = "cosine_similarity" + recreate_table = True + search_strategy = "exact_nearest_neighbor" + + store = PgvectorDocumentStore( + table_name=table_name, + embedding_dimension=embedding_dimension, + vector_function=vector_function, + recreate_table=recreate_table, + search_strategy=search_strategy, + ) + + yield store diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 1e158f134..bf5ccd5d4 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -13,6 +13,7 @@ from pandas import DataFrame +@pytest.mark.integration class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest): def test_write_documents(self, document_store: PgvectorDocumentStore): docs = [Document(id="1")] @@ -25,7 +26,6 @@ def test_write_blob(self, document_store: PgvectorDocumentStore): docs = [Document(id="1", blob=bytestream)] document_store.write_documents(docs) - # TODO: update when filters are implemented retrieved_docs = document_store.filter_documents() assert retrieved_docs == docs @@ -35,185 +35,194 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore): document_store.write_documents(docs) - # TODO: update when filters are implemented retrieved_docs = document_store.filter_documents() assert retrieved_docs == docs - def test_init(self): - document_store = PgvectorDocumentStore( - table_name="my_table", - embedding_dimension=512, - vector_function="l2_distance", - recreate_table=True, - search_strategy="hnsw", - hnsw_recreate_index_if_exists=True, - hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, - hnsw_ef_search=50, - ) - - assert document_store.table_name == "my_table" - assert document_store.embedding_dimension == 512 - assert document_store.vector_function == "l2_distance" - assert document_store.recreate_table - assert document_store.search_strategy == "hnsw" - assert document_store.hnsw_recreate_index_if_exists - assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128} - assert document_store.hnsw_ef_search == 50 - - def test_to_dict(self): - document_store = PgvectorDocumentStore( - table_name="my_table", - embedding_dimension=512, - vector_function="l2_distance", - recreate_table=True, - search_strategy="hnsw", - hnsw_recreate_index_if_exists=True, - hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, - hnsw_ef_search=50, - ) - - assert document_store.to_dict() == { - "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", - "init_parameters": { - "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, - "table_name": "my_table", - "embedding_dimension": 512, - "vector_function": "l2_distance", - "recreate_table": True, - "search_strategy": "hnsw", - "hnsw_recreate_index_if_exists": True, - "hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128}, - "hnsw_ef_search": 50, - }, - } - - def test_from_haystack_to_pg_documents(self): - haystack_docs = [ - Document( - id="1", - content="This is a text", - meta={"meta_key": "meta_value"}, - embedding=[0.1, 0.2, 0.3], - score=0.5, - ), - Document( - id="2", - dataframe=DataFrame({"col1": [1, 2], "col2": [3, 4]}), - meta={"meta_key": "meta_value"}, - embedding=[0.4, 0.5, 0.6], - score=0.6, - ), - Document( - id="3", - blob=ByteStream(b"test", meta={"blob_meta_key": "blob_meta_value"}, mime_type="mime_type"), - meta={"meta_key": "meta_value"}, - embedding=[0.7, 0.8, 0.9], - score=0.7, - ), - ] - - with patch( - "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" - ) as mock_init: - mock_init.return_value = None - ds = PgvectorDocumentStore(connection_string="test") - - pg_docs = ds._from_haystack_to_pg_documents(haystack_docs) - - assert pg_docs[0]["id"] == "1" - assert pg_docs[0]["content"] == "This is a text" - assert pg_docs[0]["dataframe"] is None - assert pg_docs[0]["blob_data"] is None - assert pg_docs[0]["blob_meta"] is None - assert pg_docs[0]["blob_mime_type"] is None - assert pg_docs[0]["meta"].obj == {"meta_key": "meta_value"} - assert pg_docs[0]["embedding"] == [0.1, 0.2, 0.3] - assert "score" not in pg_docs[0] - - assert pg_docs[1]["id"] == "2" - assert pg_docs[1]["content"] is None - assert pg_docs[1]["dataframe"].obj == DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json() - assert pg_docs[1]["blob_data"] is None - assert pg_docs[1]["blob_meta"] is None - assert pg_docs[1]["blob_mime_type"] is None - assert pg_docs[1]["meta"].obj == {"meta_key": "meta_value"} - assert pg_docs[1]["embedding"] == [0.4, 0.5, 0.6] - assert "score" not in pg_docs[1] - - assert pg_docs[2]["id"] == "3" - assert pg_docs[2]["content"] is None - assert pg_docs[2]["dataframe"] is None - assert pg_docs[2]["blob_data"] == b"test" - assert pg_docs[2]["blob_meta"].obj == {"blob_meta_key": "blob_meta_value"} - assert pg_docs[2]["blob_mime_type"] == "mime_type" - assert pg_docs[2]["meta"].obj == {"meta_key": "meta_value"} - assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9] - assert "score" not in pg_docs[2] - - def test_from_pg_to_haystack_documents(self): - pg_docs = [ - { - "id": "1", - "content": "This is a text", - "dataframe": None, - "blob_data": None, - "blob_meta": None, - "blob_mime_type": None, - "meta": {"meta_key": "meta_value"}, - "embedding": "[0.1, 0.2, 0.3]", - }, - { - "id": "2", - "content": None, - "dataframe": DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json(), - "blob_data": None, - "blob_meta": None, - "blob_mime_type": None, - "meta": {"meta_key": "meta_value"}, - "embedding": "[0.4, 0.5, 0.6]", - }, - { - "id": "3", - "content": None, - "dataframe": None, - "blob_data": b"test", - "blob_meta": {"blob_meta_key": "blob_meta_value"}, - "blob_mime_type": "mime_type", - "meta": {"meta_key": "meta_value"}, - "embedding": "[0.7, 0.8, 0.9]", - }, - ] - - with patch( - "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" - ) as mock_init: - mock_init.return_value = None - ds = PgvectorDocumentStore(connection_string="test") - - haystack_docs = ds._from_pg_to_haystack_documents(pg_docs) - - assert haystack_docs[0].id == "1" - assert haystack_docs[0].content == "This is a text" - assert haystack_docs[0].dataframe is None - assert haystack_docs[0].blob is None - assert haystack_docs[0].meta == {"meta_key": "meta_value"} - assert haystack_docs[0].embedding == [0.1, 0.2, 0.3] - assert haystack_docs[0].score is None - - assert haystack_docs[1].id == "2" - assert haystack_docs[1].content is None - assert haystack_docs[1].dataframe.equals(DataFrame({"col1": [1, 2], "col2": [3, 4]})) - assert haystack_docs[1].blob is None - assert haystack_docs[1].meta == {"meta_key": "meta_value"} - assert haystack_docs[1].embedding == [0.4, 0.5, 0.6] - assert haystack_docs[1].score is None - - assert haystack_docs[2].id == "3" - assert haystack_docs[2].content is None - assert haystack_docs[2].dataframe is None - assert haystack_docs[2].blob.data == b"test" - assert haystack_docs[2].blob.meta == {"blob_meta_key": "blob_meta_value"} - assert haystack_docs[2].blob.mime_type == "mime_type" - assert haystack_docs[2].meta == {"meta_key": "meta_value"} - assert haystack_docs[2].embedding == [0.7, 0.8, 0.9] - assert haystack_docs[2].score is None + +@pytest.mark.usefixtures("patches_for_unit_tests") +def test_init(monkeypatch): + monkeypatch.setenv("PG_CONN_STR", "some_connection_string") + + document_store = PgvectorDocumentStore( + table_name="my_table", + embedding_dimension=512, + vector_function="l2_distance", + recreate_table=True, + search_strategy="hnsw", + hnsw_recreate_index_if_exists=True, + hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, + hnsw_ef_search=50, + ) + + assert document_store.table_name == "my_table" + assert document_store.embedding_dimension == 512 + assert document_store.vector_function == "l2_distance" + assert document_store.recreate_table + assert document_store.search_strategy == "hnsw" + assert document_store.hnsw_recreate_index_if_exists + assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128} + assert document_store.hnsw_ef_search == 50 + + +@pytest.mark.usefixtures("patches_for_unit_tests") +def test_to_dict(monkeypatch): + monkeypatch.setenv("PG_CONN_STR", "some_connection_string") + + document_store = PgvectorDocumentStore( + table_name="my_table", + embedding_dimension=512, + vector_function="l2_distance", + recreate_table=True, + search_strategy="hnsw", + hnsw_recreate_index_if_exists=True, + hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128}, + hnsw_ef_search=50, + ) + + assert document_store.to_dict() == { + "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", + "init_parameters": { + "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, + "table_name": "my_table", + "embedding_dimension": 512, + "vector_function": "l2_distance", + "recreate_table": True, + "search_strategy": "hnsw", + "hnsw_recreate_index_if_exists": True, + "hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128}, + "hnsw_ef_search": 50, + }, + } + + +def test_from_haystack_to_pg_documents(): + haystack_docs = [ + Document( + id="1", + content="This is a text", + meta={"meta_key": "meta_value"}, + embedding=[0.1, 0.2, 0.3], + score=0.5, + ), + Document( + id="2", + dataframe=DataFrame({"col1": [1, 2], "col2": [3, 4]}), + meta={"meta_key": "meta_value"}, + embedding=[0.4, 0.5, 0.6], + score=0.6, + ), + Document( + id="3", + blob=ByteStream(b"test", meta={"blob_meta_key": "blob_meta_value"}, mime_type="mime_type"), + meta={"meta_key": "meta_value"}, + embedding=[0.7, 0.8, 0.9], + score=0.7, + ), + ] + + with patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" + ) as mock_init: + mock_init.return_value = None + ds = PgvectorDocumentStore(connection_string="test") + + pg_docs = ds._from_haystack_to_pg_documents(haystack_docs) + + assert pg_docs[0]["id"] == "1" + assert pg_docs[0]["content"] == "This is a text" + assert pg_docs[0]["dataframe"] is None + assert pg_docs[0]["blob_data"] is None + assert pg_docs[0]["blob_meta"] is None + assert pg_docs[0]["blob_mime_type"] is None + assert pg_docs[0]["meta"].obj == {"meta_key": "meta_value"} + assert pg_docs[0]["embedding"] == [0.1, 0.2, 0.3] + assert "score" not in pg_docs[0] + + assert pg_docs[1]["id"] == "2" + assert pg_docs[1]["content"] is None + assert pg_docs[1]["dataframe"].obj == DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json() + assert pg_docs[1]["blob_data"] is None + assert pg_docs[1]["blob_meta"] is None + assert pg_docs[1]["blob_mime_type"] is None + assert pg_docs[1]["meta"].obj == {"meta_key": "meta_value"} + assert pg_docs[1]["embedding"] == [0.4, 0.5, 0.6] + assert "score" not in pg_docs[1] + + assert pg_docs[2]["id"] == "3" + assert pg_docs[2]["content"] is None + assert pg_docs[2]["dataframe"] is None + assert pg_docs[2]["blob_data"] == b"test" + assert pg_docs[2]["blob_meta"].obj == {"blob_meta_key": "blob_meta_value"} + assert pg_docs[2]["blob_mime_type"] == "mime_type" + assert pg_docs[2]["meta"].obj == {"meta_key": "meta_value"} + assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9] + assert "score" not in pg_docs[2] + + +def test_from_pg_to_haystack_documents(): + pg_docs = [ + { + "id": "1", + "content": "This is a text", + "dataframe": None, + "blob_data": None, + "blob_meta": None, + "blob_mime_type": None, + "meta": {"meta_key": "meta_value"}, + "embedding": "[0.1, 0.2, 0.3]", + }, + { + "id": "2", + "content": None, + "dataframe": DataFrame({"col1": [1, 2], "col2": [3, 4]}).to_json(), + "blob_data": None, + "blob_meta": None, + "blob_mime_type": None, + "meta": {"meta_key": "meta_value"}, + "embedding": "[0.4, 0.5, 0.6]", + }, + { + "id": "3", + "content": None, + "dataframe": None, + "blob_data": b"test", + "blob_meta": {"blob_meta_key": "blob_meta_value"}, + "blob_mime_type": "mime_type", + "meta": {"meta_key": "meta_value"}, + "embedding": "[0.7, 0.8, 0.9]", + }, + ] + + with patch( + "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore.__init__" + ) as mock_init: + mock_init.return_value = None + ds = PgvectorDocumentStore(connection_string="test") + + haystack_docs = ds._from_pg_to_haystack_documents(pg_docs) + + assert haystack_docs[0].id == "1" + assert haystack_docs[0].content == "This is a text" + assert haystack_docs[0].dataframe is None + assert haystack_docs[0].blob is None + assert haystack_docs[0].meta == {"meta_key": "meta_value"} + assert haystack_docs[0].embedding == [0.1, 0.2, 0.3] + assert haystack_docs[0].score is None + + assert haystack_docs[1].id == "2" + assert haystack_docs[1].content is None + assert haystack_docs[1].dataframe.equals(DataFrame({"col1": [1, 2], "col2": [3, 4]})) + assert haystack_docs[1].blob is None + assert haystack_docs[1].meta == {"meta_key": "meta_value"} + assert haystack_docs[1].embedding == [0.4, 0.5, 0.6] + assert haystack_docs[1].score is None + + assert haystack_docs[2].id == "3" + assert haystack_docs[2].content is None + assert haystack_docs[2].dataframe is None + assert haystack_docs[2].blob.data == b"test" + assert haystack_docs[2].blob.meta == {"blob_meta_key": "blob_meta_value"} + assert haystack_docs[2].blob.mime_type == "mime_type" + assert haystack_docs[2].meta == {"meta_key": "meta_value"} + assert haystack_docs[2].embedding == [0.7, 0.8, 0.9] + assert haystack_docs[2].score is None diff --git a/integrations/pgvector/tests/test_embedding_retrieval.py b/integrations/pgvector/tests/test_embedding_retrieval.py index 1d5e8e297..2c384f57c 100644 --- a/integrations/pgvector/tests/test_embedding_retrieval.py +++ b/integrations/pgvector/tests/test_embedding_retrieval.py @@ -10,6 +10,7 @@ from numpy.random import rand +@pytest.mark.integration class TestEmbeddingRetrieval: @pytest.fixture def document_store_w_hnsw_index(self, request): diff --git a/integrations/pgvector/tests/test_filters.py b/integrations/pgvector/tests/test_filters.py index 8b2dc8ec9..bda10e3c0 100644 --- a/integrations/pgvector/tests/test_filters.py +++ b/integrations/pgvector/tests/test_filters.py @@ -15,6 +15,7 @@ from psycopg.types.json import Jsonb +@pytest.mark.integration class TestFilters(FilterDocumentsTest): def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): """ @@ -35,6 +36,9 @@ def assert_documents_are_equal(self, received: List[Document], expected: List[Do received_doc.embedding, expected_doc.embedding = None, None assert received_doc == expected_doc + @pytest.mark.skip(reason="NOT operator is not supported in PgvectorDocumentStore") + def test_not_operator(self, document_store, filterable_docs): ... + def test_complex_filter(self, document_store, filterable_docs): document_store.write_documents(filterable_docs) filters = { @@ -69,111 +73,119 @@ def test_complex_filter(self, document_store, filterable_docs): ], ) - @pytest.mark.skip(reason="NOT operator is not supported in PgvectorDocumentStore") - def test_not_operator(self, document_store, filterable_docs): ... - def test_treat_meta_field(self): - assert _treat_meta_field(field="meta.number", value=9) == "(meta->>'number')::integer" - assert _treat_meta_field(field="meta.number", value=[1, 2, 3]) == "(meta->>'number')::integer" - assert _treat_meta_field(field="meta.name", value="my_name") == "meta->>'name'" - assert _treat_meta_field(field="meta.name", value=["my_name"]) == "meta->>'name'" - assert _treat_meta_field(field="meta.number", value=1.1) == "(meta->>'number')::real" - assert _treat_meta_field(field="meta.number", value=[1.1, 2.2, 3.3]) == "(meta->>'number')::real" - assert _treat_meta_field(field="meta.bool", value=True) == "(meta->>'bool')::boolean" - assert _treat_meta_field(field="meta.bool", value=[True, False, True]) == "(meta->>'bool')::boolean" - - # do not cast the field if its value is not one of the known types, an empty list or None - assert _treat_meta_field(field="meta.other", value={"a": 3, "b": "example"}) == "meta->>'other'" - assert _treat_meta_field(field="meta.empty_list", value=[]) == "meta->>'empty_list'" - assert _treat_meta_field(field="meta.name", value=None) == "meta->>'name'" - - def test_comparison_condition_dataframe_jsonb_conversion(self): - dataframe = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) - condition = {"field": "meta.df", "operator": "==", "value": dataframe} - field, values = _parse_comparison_condition(condition) - assert field == "(meta.df)::jsonb = %s" - - # we check each slot of the Jsonb object because it does not implement __eq__ - assert values[0].obj == Jsonb(dataframe.to_json()).obj - assert values[0].dumps == Jsonb(dataframe.to_json()).dumps - - def test_comparison_condition_missing_operator(self): - condition = {"field": "meta.type", "value": "article"} - with pytest.raises(FilterError): - _parse_comparison_condition(condition) - - def test_comparison_condition_missing_value(self): - condition = {"field": "meta.type", "operator": "=="} - with pytest.raises(FilterError): - _parse_comparison_condition(condition) - - def test_comparison_condition_unknown_operator(self): - condition = {"field": "meta.type", "operator": "unknown", "value": "article"} - with pytest.raises(FilterError): - _parse_comparison_condition(condition) - - def test_logical_condition_missing_operator(self): - condition = {"conditions": []} - with pytest.raises(FilterError): - _parse_logical_condition(condition) - - def test_logical_condition_missing_conditions(self): - condition = {"operator": "AND"} - with pytest.raises(FilterError): - _parse_logical_condition(condition) - - def test_logical_condition_unknown_operator(self): - condition = {"operator": "unknown", "conditions": []} - with pytest.raises(FilterError): - _parse_logical_condition(condition) - - def test_logical_condition_nested(self): - condition = { - "operator": "AND", - "conditions": [ - { - "operator": "OR", - "conditions": [ - {"field": "meta.domain", "operator": "!=", "value": "science"}, - {"field": "meta.chapter", "operator": "in", "value": ["intro", "conclusion"]}, - ], - }, - { - "operator": "OR", - "conditions": [ - {"field": "meta.number", "operator": ">=", "value": 90}, - {"field": "meta.author", "operator": "not in", "value": ["John", "Jane"]}, - ], - }, - ], - } - query, values = _parse_logical_condition(condition) - assert query == ( - "((meta->>'domain' IS DISTINCT FROM %s OR meta->>'chapter' = ANY(%s)) " - "AND ((meta->>'number')::integer >= %s OR meta->>'author' IS NULL OR meta->>'author' != ALL(%s)))" - ) - assert values == ["science", [["intro", "conclusion"]], 90, [["John", "Jane"]]] - - def test_convert_filters_to_where_clause_and_params(self): - filters = { - "operator": "AND", - "conditions": [ - {"field": "meta.number", "operator": "==", "value": 100}, - {"field": "meta.chapter", "operator": "==", "value": "intro"}, - ], - } - where_clause, params = _convert_filters_to_where_clause_and_params(filters) - assert where_clause == SQL(" WHERE ") + SQL("((meta->>'number')::integer = %s AND meta->>'chapter' = %s)") - assert params == (100, "intro") - - def test_convert_filters_to_where_clause_and_params_handle_null(self): - filters = { - "operator": "AND", - "conditions": [ - {"field": "meta.number", "operator": "==", "value": None}, - {"field": "meta.chapter", "operator": "==", "value": "intro"}, - ], - } - where_clause, params = _convert_filters_to_where_clause_and_params(filters) - assert where_clause == SQL(" WHERE ") + SQL("(meta->>'number' IS NULL AND meta->>'chapter' = %s)") - assert params == ("intro",) +def test_treat_meta_field(): + assert _treat_meta_field(field="meta.number", value=9) == "(meta->>'number')::integer" + assert _treat_meta_field(field="meta.number", value=[1, 2, 3]) == "(meta->>'number')::integer" + assert _treat_meta_field(field="meta.name", value="my_name") == "meta->>'name'" + assert _treat_meta_field(field="meta.name", value=["my_name"]) == "meta->>'name'" + assert _treat_meta_field(field="meta.number", value=1.1) == "(meta->>'number')::real" + assert _treat_meta_field(field="meta.number", value=[1.1, 2.2, 3.3]) == "(meta->>'number')::real" + assert _treat_meta_field(field="meta.bool", value=True) == "(meta->>'bool')::boolean" + assert _treat_meta_field(field="meta.bool", value=[True, False, True]) == "(meta->>'bool')::boolean" + + # do not cast the field if its value is not one of the known types, an empty list or None + assert _treat_meta_field(field="meta.other", value={"a": 3, "b": "example"}) == "meta->>'other'" + assert _treat_meta_field(field="meta.empty_list", value=[]) == "meta->>'empty_list'" + assert _treat_meta_field(field="meta.name", value=None) == "meta->>'name'" + + +def test_comparison_condition_dataframe_jsonb_conversion(): + dataframe = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + condition = {"field": "meta.df", "operator": "==", "value": dataframe} + field, values = _parse_comparison_condition(condition) + assert field == "(meta.df)::jsonb = %s" + + # we check each slot of the Jsonb object because it does not implement __eq__ + assert values[0].obj == Jsonb(dataframe.to_json()).obj + assert values[0].dumps == Jsonb(dataframe.to_json()).dumps + + +def test_comparison_condition_missing_operator(): + condition = {"field": "meta.type", "value": "article"} + with pytest.raises(FilterError): + _parse_comparison_condition(condition) + + +def test_comparison_condition_missing_value(): + condition = {"field": "meta.type", "operator": "=="} + with pytest.raises(FilterError): + _parse_comparison_condition(condition) + + +def test_comparison_condition_unknown_operator(): + condition = {"field": "meta.type", "operator": "unknown", "value": "article"} + with pytest.raises(FilterError): + _parse_comparison_condition(condition) + + +def test_logical_condition_missing_operator(): + condition = {"conditions": []} + with pytest.raises(FilterError): + _parse_logical_condition(condition) + + +def test_logical_condition_missing_conditions(): + condition = {"operator": "AND"} + with pytest.raises(FilterError): + _parse_logical_condition(condition) + + +def test_logical_condition_unknown_operator(): + condition = {"operator": "unknown", "conditions": []} + with pytest.raises(FilterError): + _parse_logical_condition(condition) + + +def test_logical_condition_nested(): + condition = { + "operator": "AND", + "conditions": [ + { + "operator": "OR", + "conditions": [ + {"field": "meta.domain", "operator": "!=", "value": "science"}, + {"field": "meta.chapter", "operator": "in", "value": ["intro", "conclusion"]}, + ], + }, + { + "operator": "OR", + "conditions": [ + {"field": "meta.number", "operator": ">=", "value": 90}, + {"field": "meta.author", "operator": "not in", "value": ["John", "Jane"]}, + ], + }, + ], + } + query, values = _parse_logical_condition(condition) + assert query == ( + "((meta->>'domain' IS DISTINCT FROM %s OR meta->>'chapter' = ANY(%s)) " + "AND ((meta->>'number')::integer >= %s OR meta->>'author' IS NULL OR meta->>'author' != ALL(%s)))" + ) + assert values == ["science", [["intro", "conclusion"]], 90, [["John", "Jane"]]] + + +def test_convert_filters_to_where_clause_and_params(): + filters = { + "operator": "AND", + "conditions": [ + {"field": "meta.number", "operator": "==", "value": 100}, + {"field": "meta.chapter", "operator": "==", "value": "intro"}, + ], + } + where_clause, params = _convert_filters_to_where_clause_and_params(filters) + assert where_clause == SQL(" WHERE ") + SQL("((meta->>'number')::integer = %s AND meta->>'chapter' = %s)") + assert params == (100, "intro") + + +def test_convert_filters_to_where_clause_and_params_handle_null(): + filters = { + "operator": "AND", + "conditions": [ + {"field": "meta.number", "operator": "==", "value": None}, + {"field": "meta.chapter", "operator": "==", "value": "intro"}, + ], + } + where_clause, params = _convert_filters_to_where_clause_and_params(filters) + assert where_clause == SQL(" WHERE ") + SQL("(meta->>'number' IS NULL AND meta->>'chapter' = %s)") + assert params == ("intro",) diff --git a/integrations/pgvector/tests/test_retriever.py b/integrations/pgvector/tests/test_retriever.py index 8eab10de5..61381c24e 100644 --- a/integrations/pgvector/tests/test_retriever.py +++ b/integrations/pgvector/tests/test_retriever.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from unittest.mock import Mock +import pytest from haystack.dataclasses import Document from haystack.utils.auth import EnvVarSecret from haystack_integrations.components.retrievers.pgvector import PgvectorEmbeddingRetriever @@ -10,25 +11,25 @@ class TestRetriever: - def test_init_default(self, document_store: PgvectorDocumentStore): - retriever = PgvectorEmbeddingRetriever(document_store=document_store) - assert retriever.document_store == document_store + def test_init_default(self, mock_store): + retriever = PgvectorEmbeddingRetriever(document_store=mock_store) + assert retriever.document_store == mock_store assert retriever.filters == {} assert retriever.top_k == 10 - assert retriever.vector_function == document_store.vector_function + assert retriever.vector_function == mock_store.vector_function - def test_init(self, document_store: PgvectorDocumentStore): + def test_init(self, mock_store): retriever = PgvectorEmbeddingRetriever( - document_store=document_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" + document_store=mock_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" ) - assert retriever.document_store == document_store + assert retriever.document_store == mock_store assert retriever.filters == {"field": "value"} assert retriever.top_k == 5 assert retriever.vector_function == "l2_distance" - def test_to_dict(self, document_store: PgvectorDocumentStore): + def test_to_dict(self, mock_store): retriever = PgvectorEmbeddingRetriever( - document_store=document_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" + document_store=mock_store, filters={"field": "value"}, top_k=5, vector_function="l2_distance" ) res = retriever.to_dict() t = "haystack_integrations.components.retrievers.pgvector.embedding_retriever.PgvectorEmbeddingRetriever" @@ -39,7 +40,7 @@ def test_to_dict(self, document_store: PgvectorDocumentStore): "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, - "table_name": "haystack_test_to_dict", + "table_name": "haystack", "embedding_dimension": 768, "vector_function": "cosine_similarity", "recreate_table": True, @@ -55,7 +56,9 @@ def test_to_dict(self, document_store: PgvectorDocumentStore): }, } - def test_from_dict(self): + @pytest.mark.usefixtures("patches_for_unit_tests") + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("PG_CONN_STR", "some-connection-string") t = "haystack_integrations.components.retrievers.pgvector.embedding_retriever.PgvectorEmbeddingRetriever" data = { "type": t, diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index a2b32d578..801751483 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -35,6 +35,7 @@ ) +@pytest.mark.integration class TestWeaviateDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest): @pytest.fixture def document_store(self, request) -> WeaviateDocumentStore: