From a05da361c6918c78f36be462f79103d616a1d368 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Mon, 12 Feb 2024 15:19:33 +0100 Subject: [PATCH 01/11] initial import --- .../document_stores/pgvector/document_store.py | 12 ++++++------ integrations/pgvector/tests/test_document_store.py | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 097e86c7e..dfdf94576 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -8,6 +8,7 @@ from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy +from haystack.utils.auth import Secret from haystack.utils.filters import convert from psycopg import Error, IntegrityError, connect from psycopg.abc import Query @@ -69,7 +70,7 @@ class PgvectorDocumentStore: def __init__( self, *, - connection_string: str, + connection_string: Secret = Secret.from_env_var("PG_CONN_STR"), table_name: str = "haystack_documents", embedding_dimension: int = 768, vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"] = "cosine_similarity", @@ -84,8 +85,8 @@ def __init__( It is meant to be connected to a PostgreSQL database with the pgvector extension installed. A specific table to store Haystack documents will be created if it doesn't exist yet. - :param connection_string: The connection string to use to connect to the PostgreSQL database. - e.g. "postgresql://USER:PASSWORD@HOST:PORT/DB_NAME" + :param connection_string: The connection string to use to connect to the PostgreSQL database, defined as an + environment variable, e.g.: PG_CONN_STR="postgresql://USER:PASSWORD@HOST:PORT/DB_NAME" :param table_name: The name of the table to use to store Haystack documents. Defaults to "haystack_documents". :param embedding_dimension: The dimension of the embedding. Defaults to 768. :param vector_function: The similarity function to use when searching for similar embeddings. @@ -116,8 +117,7 @@ def __init__( "hnsw". You can find more information about this parameter in the pgvector documentation: https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw """ - - self.connection_string = connection_string + self.connection_string = connection_string.resolve_value() self.table_name = table_name self.embedding_dimension = embedding_dimension if vector_function not in VALID_VECTOR_FUNCTIONS: @@ -130,7 +130,7 @@ def __init__( self.hnsw_index_creation_kwargs = hnsw_index_creation_kwargs or {} self.hnsw_ef_search = hnsw_ef_search - connection = connect(connection_string) + connection = connect(self.connection_string) connection.autocommit = True self._connection = connection diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index e8d9107d7..0325844a7 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import os from unittest.mock import patch import pytest @@ -40,8 +41,8 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore): assert retrieved_docs == docs def test_init(self): + os.environ["PG_CONN_STR"] = "postgresql://postgres:postgres@localhost:5432/postgres" document_store = PgvectorDocumentStore( - connection_string="postgresql://postgres:postgres@localhost:5432/postgres", table_name="my_table", embedding_dimension=512, vector_function="l2_distance", From 90bfa1cd38117f4038bffef0928b7b7ab6becace Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Mon, 12 Feb 2024 18:37:40 +0100 Subject: [PATCH 02/11] adding Secret support and fixing tests --- integrations/pgvector/examples/example.py | 3 +-- .../document_stores/pgvector/document_store.py | 3 ++- integrations/pgvector/tests/conftest.py | 7 ++++--- integrations/pgvector/tests/test_document_store.py | 2 -- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/integrations/pgvector/examples/example.py b/integrations/pgvector/examples/example.py index 14c2cba60..dbbe495a1 100644 --- a/integrations/pgvector/examples/example.py +++ b/integrations/pgvector/examples/example.py @@ -1,7 +1,6 @@ # Before running this example, ensure you have PostgreSQL installed with the pgvector extension. # For a quick setup using Docker: -# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -# -e POSTGRES_DB=postgres ankane/pgvector +# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector # Install required packages for this example, including pgvector-haystack and other libraries needed # for Markdown conversion and embeddings generation. Use the following command: diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index dfdf94576..57a7b44e2 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -117,7 +117,8 @@ def __init__( "hnsw". You can find more information about this parameter in the pgvector documentation: https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw """ - self.connection_string = connection_string.resolve_value() + + self.connection_string = connection_string if isinstance(connection_string, str) else connection_string.resolve_value() self.table_name = table_name self.embedding_dimension = embedding_dimension if vector_function not in VALID_VECTOR_FUNCTIONS: diff --git a/integrations/pgvector/tests/conftest.py b/integrations/pgvector/tests/conftest.py index 743e8de14..982ba95e3 100644 --- a/integrations/pgvector/tests/conftest.py +++ b/integrations/pgvector/tests/conftest.py @@ -1,10 +1,11 @@ +import os import pytest from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore @pytest.fixture def document_store(request): - connection_string = "postgresql://postgres:postgres@localhost:5432/postgres" + os.environ["PG_CONN_STR"] = "postgresql://postgres:postgres@localhost:5432/postgres" table_name = f"haystack_{request.node.name}" embedding_dimension = 768 vector_function = "cosine_similarity" @@ -12,13 +13,13 @@ def document_store(request): search_strategy = "exact_nearest_neighbor" store = PgvectorDocumentStore( - connection_string=connection_string, table_name=table_name, embedding_dimension=embedding_dimension, vector_function=vector_function, recreate_table=recreate_table, - search_strategy=search_strategy, + search_strategy=search_strategy ) + yield store store.delete_table() diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 0325844a7..199577df7 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -41,7 +41,6 @@ def test_write_dataframe(self, document_store: PgvectorDocumentStore): assert retrieved_docs == docs def test_init(self): - os.environ["PG_CONN_STR"] = "postgresql://postgres:postgres@localhost:5432/postgres" document_store = PgvectorDocumentStore( table_name="my_table", embedding_dimension=512, @@ -65,7 +64,6 @@ def test_init(self): def test_to_dict(self): document_store = PgvectorDocumentStore( - connection_string="postgresql://postgres:postgres@localhost:5432/postgres", table_name="my_table", embedding_dimension=512, vector_function="l2_distance", From 63a9601aa6cd4ec76c08473d2a4e2c9597eea5cf Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 09:22:39 +0100 Subject: [PATCH 03/11] completing docs --- integrations/pgvector/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/integrations/pgvector/README.md b/integrations/pgvector/README.md index 277c732f4..5a41afc43 100644 --- a/integrations/pgvector/README.md +++ b/integrations/pgvector/README.md @@ -20,7 +20,11 @@ pip install pgvector-haystack ## Testing -TODO +Ensure you have PostgreSQL installed with the `pgvector` extension, for a quick setup using Docker: +``` +docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector +``` + ```console hatch run test From 359a7b8adcabe7d18ed3077b508d7d473424dd87 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 09:27:26 +0100 Subject: [PATCH 04/11] nit --- integrations/pgvector/examples/example.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integrations/pgvector/examples/example.py b/integrations/pgvector/examples/example.py index dbbe495a1..8dbe45913 100644 --- a/integrations/pgvector/examples/example.py +++ b/integrations/pgvector/examples/example.py @@ -1,6 +1,7 @@ # Before running this example, ensure you have PostgreSQL installed with the pgvector extension. # For a quick setup using Docker: -# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector +# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres +# ankane/pgvector # Install required packages for this example, including pgvector-haystack and other libraries needed # for Markdown conversion and embeddings generation. Use the following command: From 696c5b963712dcbe66a409e454e2bb04580afaff Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 09:27:49 +0100 Subject: [PATCH 05/11] nit --- integrations/pgvector/examples/example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/pgvector/examples/example.py b/integrations/pgvector/examples/example.py index 8dbe45913..14c2cba60 100644 --- a/integrations/pgvector/examples/example.py +++ b/integrations/pgvector/examples/example.py @@ -1,7 +1,7 @@ # Before running this example, ensure you have PostgreSQL installed with the pgvector extension. # For a quick setup using Docker: -# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres -# ankane/pgvector +# docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres +# -e POSTGRES_DB=postgres ankane/pgvector # Install required packages for this example, including pgvector-haystack and other libraries needed # for Markdown conversion and embeddings generation. Use the following command: From 88b4725e2ebd15b6605d073d26fc9089654762b7 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 09:33:11 +0100 Subject: [PATCH 06/11] code formating --- integrations/pgvector/examples/example.py | 4 +++- .../document_stores/pgvector/document_store.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/integrations/pgvector/examples/example.py b/integrations/pgvector/examples/example.py index 14c2cba60..91da91d19 100644 --- a/integrations/pgvector/examples/example.py +++ b/integrations/pgvector/examples/example.py @@ -10,6 +10,7 @@ # Download some Markdown files to index. # git clone https://github.com/anakin87/neural-search-pills +import os import glob from haystack import Pipeline @@ -20,9 +21,10 @@ from haystack_integrations.components.retrievers.pgvector import PgvectorEmbeddingRetriever from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore +os.environ["PG_CONN_STR"] = "postgresql://postgres:postgres@localhost:5432/postgres" + # Initialize PgvectorDocumentStore document_store = PgvectorDocumentStore( - connection_string="postgresql://postgres:postgres@localhost:5432/postgres", table_name="haystack_test", embedding_dimension=768, vector_function="cosine_similarity", diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 57a7b44e2..c8d619763 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -118,7 +118,9 @@ def __init__( https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw """ - self.connection_string = connection_string if isinstance(connection_string, str) else connection_string.resolve_value() + self.connection_string = ( + connection_string if isinstance(connection_string, str) else connection_string.resolve_value() + ) self.table_name = table_name self.embedding_dimension = embedding_dimension if vector_function not in VALID_VECTOR_FUNCTIONS: From cdeb8b17db634b8e1aa653476e15e502dd0f1a5b Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 10:17:02 +0100 Subject: [PATCH 07/11] linting and typing --- integrations/pgvector/examples/example.py | 2 +- integrations/pgvector/pyproject.toml | 2 ++ integrations/pgvector/tests/conftest.py | 3 ++- integrations/pgvector/tests/test_document_store.py | 1 - 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/integrations/pgvector/examples/example.py b/integrations/pgvector/examples/example.py index 91da91d19..764c915d1 100644 --- a/integrations/pgvector/examples/example.py +++ b/integrations/pgvector/examples/example.py @@ -10,8 +10,8 @@ # Download some Markdown files to index. # git clone https://github.com/anakin87/neural-search-pills -import os import glob +import os from haystack import Pipeline from haystack.components.converters import MarkdownToDocument diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index 65ded967f..f88937fe7 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -138,6 +138,8 @@ ignore = [ "S105", "S106", "S107", # Ignore complexity "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + # ignore function-call-in-default-argument + "B008", ] unfixable = [ # Don't touch unused imports diff --git a/integrations/pgvector/tests/conftest.py b/integrations/pgvector/tests/conftest.py index 982ba95e3..068f2ac54 100644 --- a/integrations/pgvector/tests/conftest.py +++ b/integrations/pgvector/tests/conftest.py @@ -1,4 +1,5 @@ import os + import pytest from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore @@ -17,7 +18,7 @@ def document_store(request): embedding_dimension=embedding_dimension, vector_function=vector_function, recreate_table=recreate_table, - search_strategy=search_strategy + search_strategy=search_strategy, ) yield store diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 199577df7..732f0fcbc 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -import os from unittest.mock import patch import pytest From 7b87fe096ded72681115e06457f5e7ebbd9b8f9e Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 16:29:24 +0100 Subject: [PATCH 08/11] fixing tests --- .../document_stores/pgvector/document_store.py | 8 +++----- integrations/pgvector/tests/test_document_store.py | 3 +-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index c8d619763..498413c40 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -118,9 +118,7 @@ def __init__( https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw """ - self.connection_string = ( - connection_string if isinstance(connection_string, str) else connection_string.resolve_value() - ) + self.connection_string = connection_string self.table_name = table_name self.embedding_dimension = embedding_dimension if vector_function not in VALID_VECTOR_FUNCTIONS: @@ -133,7 +131,7 @@ def __init__( self.hnsw_index_creation_kwargs = hnsw_index_creation_kwargs or {} self.hnsw_ef_search = hnsw_ef_search - connection = connect(self.connection_string) + connection = connect(self.connection_string.resolve_value()) connection.autocommit = True self._connection = connection @@ -154,7 +152,7 @@ def __init__( def to_dict(self) -> Dict[str, Any]: return default_to_dict( self, - connection_string=self.connection_string, + connection_string=self.connection_string.to_dict(), table_name=self.table_name, embedding_dimension=self.embedding_dimension, vector_function=self.vector_function, diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 732f0fcbc..b4ad2f03c 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -51,7 +51,6 @@ def test_init(self): hnsw_ef_search=50, ) - assert document_store.connection_string == "postgresql://postgres:postgres@localhost:5432/postgres" assert document_store.table_name == "my_table" assert document_store.embedding_dimension == 512 assert document_store.vector_function == "l2_distance" @@ -76,7 +75,7 @@ def test_to_dict(self): assert document_store.to_dict() == { "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { - "connection_string": "postgresql://postgres:postgres@localhost:5432/postgres", + "connection_string": {'env_vars': ['PG_CONN_STR'], 'strict': True, 'type': 'env_var'}, "table_name": "my_table", "embedding_dimension": 512, "vector_function": "l2_distance", From cc92e1b24ec734a10fc2f6a92e995181fe240fc6 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 17:33:31 +0100 Subject: [PATCH 09/11] adding custom from_dict --- integrations/pgvector/README.md | 3 ++- .../pgvector/embedding_retriever.py | 5 ++--- .../pgvector/document_store.py | 19 ++++++++++++++----- integrations/pgvector/tests/test_retriever.py | 9 ++++++--- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/integrations/pgvector/README.md b/integrations/pgvector/README.md index 5a41afc43..ed6772d5c 100644 --- a/integrations/pgvector/README.md +++ b/integrations/pgvector/README.md @@ -20,11 +20,12 @@ pip install pgvector-haystack ## Testing -Ensure you have PostgreSQL installed with the `pgvector` extension, for a quick setup using Docker: +Ensure that you have a PostgreSQL running with the `pgvector` extension. For a quick setup using Docker, run: ``` docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector ``` +then run the tests: ```console hatch run test diff --git a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py index 26807e9bd..4b8df868b 100644 --- a/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py +++ b/integrations/pgvector/src/haystack_integrations/components/retrievers/pgvector/embedding_retriever.py @@ -68,9 +68,8 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "PgvectorEmbeddingRetriever": - data["init_parameters"]["document_store"] = default_from_dict( - PgvectorDocumentStore, data["init_parameters"]["document_store"] - ) + doc_store_params = data["init_parameters"]["document_store"] + data["init_parameters"]["document_store"] = PgvectorDocumentStore.from_dict(doc_store_params) return default_from_dict(cls, data) @component.output_types(documents=List[Document]) diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 498413c40..1ac725d98 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -4,7 +4,7 @@ import logging from typing import Any, Dict, List, Literal, Optional -from haystack import default_to_dict +from haystack import default_to_dict, default_from_dict from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy @@ -163,6 +163,13 @@ def to_dict(self) -> Dict[str, Any]: hnsw_ef_search=self.hnsw_ef_search, ) + @classmethod + def from_dict(cls, init_parameters: Dict[str, Any]) -> "PgvectorDocumentStore": + conn_str_data = init_parameters['init_parameters']["connection_string"] + conn_str = Secret.from_dict(conn_str_data) if conn_str_data is not None else None + init_parameters['init_parameters']["connection_string"] = conn_str + return default_from_dict(cls, init_parameters) + def _execute_sql( self, sql_query: Query, params: Optional[tuple] = None, error_msg: str = "", cursor: Optional[Cursor] = None ): @@ -222,7 +229,7 @@ def _handle_hnsw(self): ) self._execute_sql(sql_set_hnsw_ef_search, error_msg="Could not set hnsw.ef_search") - index_esists = bool( + index_exists = bool( self._execute_sql( "SELECT 1 FROM pg_indexes WHERE tablename = %s AND indexname = %s", (self.table_name, HNSW_INDEX_NAME), @@ -230,7 +237,7 @@ def _handle_hnsw(self): ).fetchone() ) - if index_esists and not self.hnsw_recreate_index_if_exists: + if index_exists and not self.hnsw_recreate_index_if_exists: logger.warning( "HNSW index already exists and won't be recreated. " "If you want to recreate it, pass 'hnsw_recreate_index_if_exists=True' to the " @@ -374,7 +381,8 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D return written_docs - def _from_haystack_to_pg_documents(self, documents: List[Document]) -> List[Dict[str, Any]]: + @staticmethod + def _from_haystack_to_pg_documents(documents: List[Document]) -> List[Dict[str, Any]]: """ Internal method to convert a list of Haystack Documents to a list of dictionaries that can be used to insert documents into the PgvectorDocumentStore. @@ -396,7 +404,8 @@ def _from_haystack_to_pg_documents(self, documents: List[Document]) -> List[Dict return db_documents - def _from_pg_to_haystack_documents(self, documents: List[Dict[str, Any]]) -> List[Document]: + @staticmethod + def _from_pg_to_haystack_documents(documents: List[Dict[str, Any]]) -> List[Document]: """ Internal method to convert a list of dictionaries from pgvector to a list of Haystack Documents. """ diff --git a/integrations/pgvector/tests/test_retriever.py b/integrations/pgvector/tests/test_retriever.py index cca6bbc9f..36f9f6f66 100644 --- a/integrations/pgvector/tests/test_retriever.py +++ b/integrations/pgvector/tests/test_retriever.py @@ -1,12 +1,15 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +import os from unittest.mock import Mock from haystack.dataclasses import Document from haystack_integrations.components.retrievers.pgvector import PgvectorEmbeddingRetriever from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore +from haystack.utils.auth import EnvVarSecret + class TestRetriever: def test_init_default(self, document_store: PgvectorDocumentStore): @@ -37,7 +40,7 @@ def test_to_dict(self, document_store: PgvectorDocumentStore): "document_store": { "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { - "connection_string": "postgresql://postgres:postgres@localhost:5432/postgres", + "connection_string": {'env_vars': ['PG_CONN_STR'], 'strict': True, 'type': 'env_var'}, "table_name": "haystack_test_to_dict", "embedding_dimension": 768, "vector_function": "cosine_similarity", @@ -62,7 +65,7 @@ def test_from_dict(self): "document_store": { "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { - "connection_string": "postgresql://postgres:postgres@localhost:5432/postgres", + "connection_string": {'env_vars': ['PG_CONN_STR'], 'strict': True, 'type': 'env_var'}, "table_name": "haystack_test_to_dict", "embedding_dimension": 768, "vector_function": "cosine_similarity", @@ -83,7 +86,7 @@ def test_from_dict(self): document_store = retriever.document_store assert isinstance(document_store, PgvectorDocumentStore) - assert document_store.connection_string == "postgresql://postgres:postgres@localhost:5432/postgres" + assert isinstance(document_store.connection_string, EnvVarSecret) assert document_store.table_name == "haystack_test_to_dict" assert document_store.embedding_dimension == 768 assert document_store.vector_function == "cosine_similarity" From 622c50c1aba04cc0ff7b2e7e550a2f4e6455ecd1 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 13 Feb 2024 20:30:32 +0100 Subject: [PATCH 10/11] adding test coverage --- integrations/pgvector/README.md | 6 ++++++ integrations/pgvector/pyproject.toml | 11 +++++------ .../document_stores/pgvector/document_store.py | 6 +++--- integrations/pgvector/tests/test_document_store.py | 2 +- integrations/pgvector/tests/test_retriever.py | 8 +++----- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/integrations/pgvector/README.md b/integrations/pgvector/README.md index ed6772d5c..a2d325c54 100644 --- a/integrations/pgvector/README.md +++ b/integrations/pgvector/README.md @@ -31,6 +31,12 @@ then run the tests: hatch run test ``` +To run the coverage report: + +```console +hatch run cov +``` + ## License `pgvector-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index f88937fe7..178d9f7e8 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -158,23 +158,22 @@ ban-relative-imports = "parents" # examples can contain "print" commands "examples/**/*" = ["T201"] + [tool.coverage.run] -source_pkgs = ["src", "tests"] +source = ["haystack_integrations"] branch = true parallel = true - -[tool.coverage.paths] -weaviate_haystack = ["src/haystack_integrations", "*/pgvector-haystack/src"] -tests = ["tests", "*/pgvector-haystack/tests"] - [tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing=true exclude_lines = [ "no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] + [[tool.mypy.overrides]] module = [ "haystack.*", diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 1ac725d98..2210c401b 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -4,7 +4,7 @@ import logging from typing import Any, Dict, List, Literal, Optional -from haystack import default_to_dict, default_from_dict +from haystack import default_from_dict, default_to_dict from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy @@ -165,9 +165,9 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, init_parameters: Dict[str, Any]) -> "PgvectorDocumentStore": - conn_str_data = init_parameters['init_parameters']["connection_string"] + conn_str_data = init_parameters["init_parameters"]["connection_string"] conn_str = Secret.from_dict(conn_str_data) if conn_str_data is not None else None - init_parameters['init_parameters']["connection_string"] = conn_str + init_parameters["init_parameters"]["connection_string"] = conn_str return default_from_dict(cls, init_parameters) def _execute_sql( diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index b4ad2f03c..1e158f134 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -75,7 +75,7 @@ def test_to_dict(self): assert document_store.to_dict() == { "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { - "connection_string": {'env_vars': ['PG_CONN_STR'], 'strict': True, 'type': 'env_var'}, + "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, "table_name": "my_table", "embedding_dimension": 512, "vector_function": "l2_distance", diff --git a/integrations/pgvector/tests/test_retriever.py b/integrations/pgvector/tests/test_retriever.py index 36f9f6f66..8eab10de5 100644 --- a/integrations/pgvector/tests/test_retriever.py +++ b/integrations/pgvector/tests/test_retriever.py @@ -1,15 +1,13 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -import os from unittest.mock import Mock from haystack.dataclasses import Document +from haystack.utils.auth import EnvVarSecret from haystack_integrations.components.retrievers.pgvector import PgvectorEmbeddingRetriever from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore -from haystack.utils.auth import EnvVarSecret - class TestRetriever: def test_init_default(self, document_store: PgvectorDocumentStore): @@ -40,7 +38,7 @@ def test_to_dict(self, document_store: PgvectorDocumentStore): "document_store": { "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { - "connection_string": {'env_vars': ['PG_CONN_STR'], 'strict': True, 'type': 'env_var'}, + "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, "table_name": "haystack_test_to_dict", "embedding_dimension": 768, "vector_function": "cosine_similarity", @@ -65,7 +63,7 @@ def test_from_dict(self): "document_store": { "type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore", "init_parameters": { - "connection_string": {'env_vars': ['PG_CONN_STR'], 'strict': True, 'type': 'env_var'}, + "connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"}, "table_name": "haystack_test_to_dict", "embedding_dimension": 768, "vector_function": "cosine_similarity", From daee33ce544afb203ec87f840e509a9b05943e61 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 14 Feb 2024 10:35:30 +0100 Subject: [PATCH 11/11] use deserialize_secrets_inplace() --- .../document_stores/pgvector/document_store.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 2210c401b..798c75276 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -8,7 +8,7 @@ from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy -from haystack.utils.auth import Secret +from haystack.utils.auth import Secret, deserialize_secrets_inplace from haystack.utils.filters import convert from psycopg import Error, IntegrityError, connect from psycopg.abc import Query @@ -164,11 +164,9 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, init_parameters: Dict[str, Any]) -> "PgvectorDocumentStore": - conn_str_data = init_parameters["init_parameters"]["connection_string"] - conn_str = Secret.from_dict(conn_str_data) if conn_str_data is not None else None - init_parameters["init_parameters"]["connection_string"] = conn_str - return default_from_dict(cls, init_parameters) + def from_dict(cls, data: Dict[str, Any]) -> "PgvectorDocumentStore": + deserialize_secrets_inplace(data["init_parameters"], ["connection_string"]) + return default_from_dict(cls, data) def _execute_sql( self, sql_query: Query, params: Optional[tuple] = None, error_msg: str = "", cursor: Optional[Cursor] = None