Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: explain different connection string formats in the docstring #1132

Merged
merged 9 commits into from
Oct 15, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
self,
*,
connection_string: Secret = Secret.from_env_var("PG_CONN_STR"),
connection_param_kwargs: Optional[Dict[str, Secret]] = None,
table_name: str = "haystack_documents",
language: str = "english",
embedding_dimension: int = 768,
Expand All @@ -97,6 +98,12 @@ def __init__(

:param connection_string: The connection string to use to connect to the PostgreSQL database, defined as an
environment variable, e.g.: `PG_CONN_STR="postgresql://USER:PASSWORD@HOST:PORT/DB_NAME"`
:param connection_param_kwargs: A dictionary of parameters for the PostgreSQL connection.
You can specify individual connections parameters here instead of a `connection_string`
Common parameters include 'user', 'password', 'host', 'port', & 'dbname'. For a complete list, refer to the
[PostgreSQL documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS).
Use the `Secret.from_env_var()` method to securely load parameters from environment variables.
Note that parameters specified here take precedence over those in the `connection_string`.
:param table_name: The name of the table to use to store Haystack documents.
:param language: The language to be used to parse query and document content in keyword retrieval.
To see the list of available languages, you can run the following SQL query in your PostgreSQL database:
Expand Down Expand Up @@ -132,6 +139,7 @@ def __init__(
"""

self.connection_string = connection_string
self.connection_param_kwargs = connection_param_kwargs or {}
self.table_name = table_name
self.embedding_dimension = embedding_dimension
if vector_function not in VALID_VECTOR_FUNCTIONS:
Expand Down Expand Up @@ -172,8 +180,15 @@ def connection(self):
return self._connection

def _create_connection(self):
conn_str = self.connection_string.resolve_value() or ""
connection = connect(conn_str)
# if connection_param_kwargs are provided use them
if self.connection_param_kwargs:
params = {key: value.resolve_value() for key, value in self.connection_param_kwargs.items()}
connection = connect(**params)
# otherwise, use the connection string
else:
conn_str = self.connection_string.resolve_value() or ""
connection = connect(conn_str)

connection.autocommit = True
connection.execute("CREATE EXTENSION IF NOT EXISTS vector")
register_vector(connection) # Note: this must be called before creating the cursors.
Expand Down Expand Up @@ -214,6 +229,7 @@ def to_dict(self) -> Dict[str, Any]:
hnsw_ef_search=self.hnsw_ef_search,
keyword_index_name=self.keyword_index_name,
language=self.language,
connection_param_kwargs={key: value.to_dict() for key, value in self.connection_param_kwargs.items()},
)

@classmethod
Expand All @@ -226,6 +242,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "PgvectorDocumentStore":
:returns:
Deserialized component.
"""
connection_params = data["init_parameters"]["connection_param_kwargs"]
deserialize_secrets_inplace(connection_params, connection_params.keys())

deserialize_secrets_inplace(data["init_parameters"], ["connection_string"])
return default_from_dict(cls, data)

Expand Down
182 changes: 182 additions & 0 deletions integrations/pgvector/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,50 @@ def test_init(monkeypatch):
assert document_store.keyword_index_name == "my_keyword_index"


@pytest.mark.usefixtures("patches_for_unit_tests")
def test_init_with_connection_param_kwargs(monkeypatch):
monkeypatch.setenv("PG_PASSWORD", "postgres_password")
monkeypatch.setenv("PG_USER", "postgres_user")
monkeypatch.setenv("PG_HOST", "postgres_host")
monkeypatch.setenv("PG_PORT", "postgres_port")

document_store = PgvectorDocumentStore(
table_name="my_table",
embedding_dimension=512,
vector_function="l2_distance",
recreate_table=True,
search_strategy="hnsw",
hnsw_recreate_index_if_exists=True,
hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
hnsw_index_name="my_hnsw_index",
hnsw_ef_search=50,
keyword_index_name="my_keyword_index",
connection_param_kwargs={
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
},
)

assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
assert document_store.hnsw_index_name == "my_hnsw_index"
assert document_store.hnsw_ef_search == 50
assert document_store.keyword_index_name == "my_keyword_index"
assert document_store.connection_param_kwargs == {
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
}


@pytest.mark.usefixtures("patches_for_unit_tests")
def test_to_dict(monkeypatch):
monkeypatch.setenv("PG_CONN_STR", "some_connection_string")
Expand Down Expand Up @@ -103,6 +147,144 @@ def test_to_dict(monkeypatch):
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {},
},
}


def test_from_dict(monkeypatch):
monkeypatch.setenv("PG_CONN_STR", "some_connection_string")

data = {
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"table_name": "my_table",
"embedding_dimension": 512,
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
"language": "english",
"hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {},
},
}

document_store = PgvectorDocumentStore.from_dict(data)

assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
assert document_store.hnsw_index_name == "my_hnsw_index"
assert document_store.hnsw_ef_search == 50
assert document_store.keyword_index_name == "my_keyword_index"
assert document_store.connection_param_kwargs == {}


def test_from_dict_with_connection_param_kwargs(monkeypatch):
monkeypatch.setenv("PG_PASSWORD", "postgres_password")
monkeypatch.setenv("PG_USER", "postgres_user")
monkeypatch.setenv("PG_HOST", "postgres_host")
monkeypatch.setenv("PG_PORT", "postgres_port")

data = {
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"table_name": "my_table",
"embedding_dimension": 512,
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
"language": "english",
"hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {
"user": {"env_vars": ["PG_USER"], "strict": True, "type": "env_var"},
"host": {"env_vars": ["PG_HOST"], "strict": True, "type": "env_var"},
"password": {"env_vars": ["PG_PASSWORD"], "strict": True, "type": "env_var"},
"port": {"env_vars": ["PG_PORT"], "strict": True, "type": "env_var"},
},
},
}

document_store = PgvectorDocumentStore.from_dict(data)

assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
assert document_store.hnsw_index_creation_kwargs == {"m": 32, "ef_construction": 128}
assert document_store.hnsw_index_name == "my_hnsw_index"
assert document_store.hnsw_ef_search == 50
assert document_store.keyword_index_name == "my_keyword_index"
assert document_store.connection_param_kwargs == {
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
}


def test_to_dict_with_connection_param_kwargs(monkeypatch):
monkeypatch.setenv("PG_PASSWORD", "postgres_password")
monkeypatch.setenv("PG_USER", "postgres_user")
monkeypatch.setenv("PG_HOST", "postgres_host")
monkeypatch.setenv("PG_PORT", "postgres_port")

document_store = PgvectorDocumentStore(
table_name="my_table",
embedding_dimension=512,
vector_function="l2_distance",
recreate_table=True,
search_strategy="hnsw",
hnsw_recreate_index_if_exists=True,
hnsw_index_creation_kwargs={"m": 32, "ef_construction": 128},
hnsw_index_name="my_hnsw_index",
hnsw_ef_search=50,
keyword_index_name="my_keyword_index",
connection_param_kwargs={
"user": Secret.from_env_var("PG_USER"),
"host": Secret.from_env_var("PG_HOST"),
"password": Secret.from_env_var("PG_PASSWORD"),
"port": Secret.from_env_var("PG_PORT"),
},
)

assert document_store.to_dict() == {
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"table_name": "my_table",
"embedding_dimension": 512,
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
"language": "english",
"hnsw_index_creation_kwargs": {"m": 32, "ef_construction": 128},
"hnsw_index_name": "my_hnsw_index",
"hnsw_ef_search": 50,
"keyword_index_name": "my_keyword_index",
"connection_param_kwargs": {
"user": {"env_vars": ["PG_USER"], "strict": True, "type": "env_var"},
"host": {"env_vars": ["PG_HOST"], "strict": True, "type": "env_var"},
"password": {"env_vars": ["PG_PASSWORD"], "strict": True, "type": "env_var"},
"port": {"env_vars": ["PG_PORT"], "strict": True, "type": "env_var"},
},
},
}

Expand Down
5 changes: 5 additions & 0 deletions integrations/pgvector/tests/test_retrievers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def test_to_dict(self, mock_store):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -91,6 +92,7 @@ def test_from_dict(self, monkeypatch):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -186,6 +188,7 @@ def test_to_dict(self, mock_store):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -215,6 +218,7 @@ def test_from_dict(self, monkeypatch):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down Expand Up @@ -263,6 +267,7 @@ def test_from_dict_without_filter_policy(self, monkeypatch):
"hnsw_index_name": "haystack_hnsw_index",
"hnsw_ef_search": None,
"keyword_index_name": "haystack_keyword_index",
"connection_param_kwargs": {},
},
},
"filters": {"field": "value"},
Expand Down