From 648793a3724940d8b4a0bbbad9398d7c80a807c1 Mon Sep 17 00:00:00 2001 From: AnesBenmerzoug Date: Wed, 6 Nov 2024 10:19:14 +0100 Subject: [PATCH 1/9] Allow passing boto3 config to AmazonBedrockChatGenerator --- .../amazon_bedrock/chat/chat_generator.py | 14 +++++++++++++- .../amazon_bedrock/tests/test_chat_generator.py | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 6bb3cc301..44aba9b37 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -3,6 +3,7 @@ import re from typing import Any, Callable, ClassVar, Dict, List, Optional, Type +from botocore.config import Config from botocore.exceptions import ClientError from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import ChatMessage, StreamingChunk @@ -77,6 +78,7 @@ def __init__( stop_words: Optional[List[str]] = None, streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, truncate: Optional[bool] = True, + boto3_config: Optional[Dict[str, Any]] = None, ): """ Initializes the `AmazonBedrockChatGenerator` with the provided parameters. The parameters are passed to the @@ -110,6 +112,11 @@ def __init__( [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk) object and switches the streaming mode on. :param truncate: Whether to truncate the prompt messages or not. + :param boto3_config: The configuration for the boto3 client. + + :raises ValueError: If the model name is empty or None. + :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly or the model is + not supported. """ if not model: msg = "'model' cannot be None or empty string" @@ -121,6 +128,7 @@ def __init__( self.aws_region_name = aws_region_name self.aws_profile_name = aws_profile_name self.truncate = truncate + self.boto3_config = boto3_config # get the model adapter for the given model model_adapter_cls = self.get_model_adapter(model=model) @@ -141,7 +149,10 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: aws_region_name=resolve_secret(aws_region_name), aws_profile_name=resolve_secret(aws_profile_name), ) - self.client = session.client("bedrock-runtime") + config: Optional[Config] = None + if self.boto3_config: + config = Config(**self.boto3_config) + self.client = session.client("bedrock-runtime", config=config) except Exception as exception: msg = ( "Could not connect to Amazon Bedrock. Make sure the AWS environment is configured correctly. " @@ -256,6 +267,7 @@ def to_dict(self) -> Dict[str, Any]: generation_kwargs=self.model_adapter.generation_kwargs, streaming_callback=callback_name, truncate=self.truncate, + boto3_config=self.boto3_config, ) @classmethod diff --git a/integrations/amazon_bedrock/tests/test_chat_generator.py b/integrations/amazon_bedrock/tests/test_chat_generator.py index 571e03eb2..02b75bab6 100644 --- a/integrations/amazon_bedrock/tests/test_chat_generator.py +++ b/integrations/amazon_bedrock/tests/test_chat_generator.py @@ -48,6 +48,7 @@ def test_to_dict(mock_boto3_session): "stop_words": [], "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "truncate": True, + "boto3_config": None, }, } @@ -71,12 +72,16 @@ def test_from_dict(mock_boto3_session): "generation_kwargs": {"temperature": 0.7}, "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "truncate": True, + "boto3_config": { + "read_timeout": 1000, + }, }, } ) assert generator.model == "anthropic.claude-v2" assert generator.model_adapter.generation_kwargs == {"temperature": 0.7} assert generator.streaming_callback == print_streaming_chunk + assert generator.boto3_config == {"read_timeout": 1000} def test_default_constructor(mock_boto3_session, set_env_variables): From fea2ee7cd44932989f5966471b62effe4cb321aa Mon Sep 17 00:00:00 2001 From: AnesBenmerzoug Date: Wed, 6 Nov 2024 10:23:10 +0100 Subject: [PATCH 2/9] Allow passing boto3 config to AmazonBedrockDocumentEmbedder --- .../embedders/amazon_bedrock/document_embedder.py | 11 ++++++++++- .../amazon_bedrock/tests/test_document_embedder.py | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py index 1b8fde124..c4c216e69 100755 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py @@ -2,6 +2,7 @@ import logging from typing import Any, Dict, List, Literal, Optional +from botocore.config import Config from botocore.exceptions import ClientError from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import Document @@ -73,6 +74,7 @@ def __init__( progress_bar: bool = True, meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", + boto3_config: Optional[Dict[str, Any]] = None, **kwargs, ): """ @@ -98,6 +100,7 @@ def __init__( to keep the logs clean. :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. + :param boto3_config: The configuration for the boto3 client. :param kwargs: Additional parameters to pass for model inference. For example, `input_type` and `truncate` for Cohere models. :raises ValueError: If the model is not supported. @@ -110,6 +113,8 @@ def __init__( ) raise ValueError(msg) + self.boto3_config = boto3_config + def resolve_secret(secret: Optional[Secret]) -> Optional[str]: return secret.resolve_value() if secret else None @@ -121,7 +126,10 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: aws_region_name=resolve_secret(aws_region_name), aws_profile_name=resolve_secret(aws_profile_name), ) - self._client = session.client("bedrock-runtime") + config: Optional[Config] = None + if self.boto3_config: + config = Config(**self.boto3_config) + self._client = session.client("bedrock-runtime", config=config) except Exception as exception: msg = ( "Could not connect to Amazon Bedrock. Make sure the AWS environment is configured correctly. " @@ -269,6 +277,7 @@ def to_dict(self) -> Dict[str, Any]: progress_bar=self.progress_bar, meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, + boto3_config=self.boto3_config, **self.kwargs, ) diff --git a/integrations/amazon_bedrock/tests/test_document_embedder.py b/integrations/amazon_bedrock/tests/test_document_embedder.py index 9856c97bb..a405a0af6 100644 --- a/integrations/amazon_bedrock/tests/test_document_embedder.py +++ b/integrations/amazon_bedrock/tests/test_document_embedder.py @@ -86,6 +86,7 @@ def test_to_dict(self, mock_boto3_session): "progress_bar": True, "meta_fields_to_embed": [], "embedding_separator": "\n", + "boto3_config": None, }, } @@ -106,6 +107,9 @@ def test_from_dict(self, mock_boto3_session): "progress_bar": True, "meta_fields_to_embed": [], "embedding_separator": "\n", + "boto3_config": { + "read_timeout": 1000, + }, }, } @@ -117,6 +121,7 @@ def test_from_dict(self, mock_boto3_session): assert embedder.progress_bar assert embedder.meta_fields_to_embed == [] assert embedder.embedding_separator == "\n" + assert embedder.boto3_config == {"read_timeout": 1000} def test_init_invalid_model(self): with pytest.raises(ValueError): From c9e6df9790e664671755e3b7ce85665411729f25 Mon Sep 17 00:00:00 2001 From: AnesBenmerzoug Date: Wed, 6 Nov 2024 10:27:31 +0100 Subject: [PATCH 3/9] Allow passing boto3 config to AmazonBedrockTextEmbedder --- .../amazon_bedrock/document_embedder.py | 23 ++++++++-------- .../embedders/amazon_bedrock/text_embedder.py | 26 ++++++++++++------- .../tests/test_text_embedder.py | 5 ++++ 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py index c4c216e69..9dc8cbcc5 100755 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py @@ -113,7 +113,18 @@ def __init__( ) raise ValueError(msg) + self.model = model + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + self.aws_session_token = aws_session_token + self.aws_region_name = aws_region_name + self.aws_profile_name = aws_profile_name + self.batch_size = batch_size + self.progress_bar = progress_bar + self.meta_fields_to_embed = meta_fields_to_embed or [] + self.embedding_separator = embedding_separator self.boto3_config = boto3_config + self.kwargs = kwargs def resolve_secret(secret: Optional[Secret]) -> Optional[str]: return secret.resolve_value() if secret else None @@ -137,18 +148,6 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: ) raise AmazonBedrockConfigurationError(msg) from exception - self.model = model - self.aws_access_key_id = aws_access_key_id - self.aws_secret_access_key = aws_secret_access_key - self.aws_session_token = aws_session_token - self.aws_region_name = aws_region_name - self.aws_profile_name = aws_profile_name - self.batch_size = batch_size - self.progress_bar = progress_bar - self.meta_fields_to_embed = meta_fields_to_embed or [] - self.embedding_separator = embedding_separator - self.kwargs = kwargs - def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: """ Prepare the texts to embed by concatenating the Document text with the metadata fields to embed. diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py index 0cceda92f..0acd51da5 100755 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py @@ -2,6 +2,7 @@ import logging from typing import Any, Dict, List, Literal, Optional +from botocore.config import Config from botocore.exceptions import ClientError from haystack import component, default_from_dict, default_to_dict from haystack.utils.auth import Secret, deserialize_secrets_inplace @@ -62,6 +63,7 @@ def __init__( aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008 aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008 aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008 + boto3_config: Optional[Dict[str, Any]] = None, **kwargs, ): """ @@ -81,6 +83,7 @@ def __init__( :param aws_session_token: AWS session token. :param aws_region_name: AWS region name. :param aws_profile_name: AWS profile name. + :param boto3_config: The configuration for the boto3 client. :param kwargs: Additional parameters to pass for model inference. For example, `input_type` and `truncate` for Cohere models. :raises ValueError: If the model is not supported. @@ -92,6 +95,15 @@ def __init__( ) raise ValueError(msg) + self.model = model + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + self.aws_session_token = aws_session_token + self.aws_region_name = aws_region_name + self.aws_profile_name = aws_profile_name + self.boto3_config = boto3_config + self.kwargs = kwargs + def resolve_secret(secret: Optional[Secret]) -> Optional[str]: return secret.resolve_value() if secret else None @@ -103,7 +115,10 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: aws_region_name=resolve_secret(aws_region_name), aws_profile_name=resolve_secret(aws_profile_name), ) - self._client = session.client("bedrock-runtime") + config: Optional[Config] = None + if self.boto3_config: + config = Config(**self.boto3_config) + self._client = session.client("bedrock-runtime", config=config) except Exception as exception: msg = ( "Could not connect to Amazon Bedrock. Make sure the AWS environment is configured correctly. " @@ -111,14 +126,6 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: ) raise AmazonBedrockConfigurationError(msg) from exception - self.model = model - self.aws_access_key_id = aws_access_key_id - self.aws_secret_access_key = aws_secret_access_key - self.aws_session_token = aws_session_token - self.aws_region_name = aws_region_name - self.aws_profile_name = aws_profile_name - self.kwargs = kwargs - @component.output_types(embedding=List[float]) def run(self, text: str): """Embeds the input text using the Amazon Bedrock model. @@ -185,6 +192,7 @@ def to_dict(self) -> Dict[str, Any]: aws_region_name=self.aws_region_name.to_dict() if self.aws_region_name else None, aws_profile_name=self.aws_profile_name.to_dict() if self.aws_profile_name else None, model=self.model, + boto3_config=self.boto3_config, **self.kwargs, ) diff --git a/integrations/amazon_bedrock/tests/test_text_embedder.py b/integrations/amazon_bedrock/tests/test_text_embedder.py index 4f4e92448..2518b5c5f 100644 --- a/integrations/amazon_bedrock/tests/test_text_embedder.py +++ b/integrations/amazon_bedrock/tests/test_text_embedder.py @@ -59,6 +59,7 @@ def test_to_dict(self, mock_boto3_session): "aws_profile_name": {"type": "env_var", "env_vars": ["AWS_PROFILE"], "strict": False}, "model": "cohere.embed-english-v3", "input_type": "search_query", + "boto3_config": None, }, } @@ -76,6 +77,9 @@ def test_from_dict(self, mock_boto3_session): "aws_profile_name": {"type": "env_var", "env_vars": ["AWS_PROFILE"], "strict": False}, "model": "cohere.embed-english-v3", "input_type": "search_query", + "boto3_config": { + "read_timeout": 1000, + }, }, } @@ -83,6 +87,7 @@ def test_from_dict(self, mock_boto3_session): assert embedder.model == "cohere.embed-english-v3" assert embedder.kwargs == {"input_type": "search_query"} + assert embedder.boto3_config == {"read_timeout": 1000} def test_init_invalid_model(self): with pytest.raises(ValueError): From 8dd906b5f555871a7c4a821724290a56651f06af Mon Sep 17 00:00:00 2001 From: AnesBenmerzoug Date: Wed, 6 Nov 2024 10:54:10 +0100 Subject: [PATCH 4/9] Remove whitespace from blank line --- .../components/generators/amazon_bedrock/chat/chat_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 44aba9b37..3abf01069 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -113,7 +113,7 @@ def __init__( switches the streaming mode on. :param truncate: Whether to truncate the prompt messages or not. :param boto3_config: The configuration for the boto3 client. - + :raises ValueError: If the model name is empty or None. :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly or the model is not supported. From e90884248c36af9d003629dec2ac6d3fdbafae91 Mon Sep 17 00:00:00 2001 From: Anes Benmerzoug Date: Wed, 6 Nov 2024 11:12:07 +0100 Subject: [PATCH 5/9] Reorder setting attributes for readability --- .../generators/amazon_bedrock/chat/chat_generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 3abf01069..6e0012b1a 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -127,9 +127,12 @@ def __init__( self.aws_session_token = aws_session_token self.aws_region_name = aws_region_name self.aws_profile_name = aws_profile_name + self.stop_words = stop_words or [] + self.streaming_callback = streaming_callback self.truncate = truncate self.boto3_config = boto3_config + # get the model adapter for the given model model_adapter_cls = self.get_model_adapter(model=model) if not model_adapter_cls: @@ -160,9 +163,6 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: ) raise AmazonBedrockConfigurationError(msg) from exception - self.stop_words = stop_words or [] - self.streaming_callback = streaming_callback - @component.output_types(replies=List[ChatMessage]) def run( self, From 4648f73b89f9065e03239727959d7ba7f09e26e0 Mon Sep 17 00:00:00 2001 From: Anes Benmerzoug Date: Wed, 6 Nov 2024 11:13:38 +0100 Subject: [PATCH 6/9] Remove blank line --- .../components/generators/amazon_bedrock/chat/chat_generator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 6e0012b1a..183198bce 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -132,7 +132,6 @@ def __init__( self.truncate = truncate self.boto3_config = boto3_config - # get the model adapter for the given model model_adapter_cls = self.get_model_adapter(model=model) if not model_adapter_cls: From 05f04085d4c8575bf59296737f806fa279d82e3b Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Wed, 6 Nov 2024 10:28:00 +0100 Subject: [PATCH 7/9] fix: adapt our implementation to breaking changes in Chroma 0.5.17 (#1165) * fix chroma breaking changes * improve warning * better warning --- integrations/chroma/pyproject.toml | 2 +- .../document_stores/chroma/document_store.py | 7 +- .../document_stores/chroma/filters.py | 8 +-- .../chroma/tests/test_document_store.py | 65 ++++++++++++++++++- 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/integrations/chroma/pyproject.toml b/integrations/chroma/pyproject.toml index 7f0943a30..cfe7a606e 100644 --- a/integrations/chroma/pyproject.toml +++ b/integrations/chroma/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai", "chromadb>=0.5.0", "typing_extensions>=4.8.0"] +dependencies = ["haystack-ai", "chromadb>=0.5.17", "typing_extensions>=4.8.0"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/chroma#readme" diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py index 6a83937a4..439e4b144 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py @@ -248,9 +248,12 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D if doc.content is None: logger.warning( - "ChromaDocumentStore can only store the text field of Documents: " - "'array', 'dataframe' and 'blob' will be dropped." + "ChromaDocumentStore cannot store documents with `content=None`. " + "`array`, `dataframe` and `blob` are not supported. " + "Document with id %s will be skipped.", + doc.id, ) + continue data = {"ids": [doc.id], "documents": [doc.content]} if doc.meta: diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py index 60046b6ad..df49da673 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py @@ -1,6 +1,6 @@ from collections import defaultdict from dataclasses import dataclass -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from chromadb.api.types import validate_where, validate_where_document @@ -34,8 +34,8 @@ class ChromaFilter: """ ids: List[str] - where: Dict[str, Any] - where_document: Dict[str, Any] + where: Optional[Dict[str, Any]] + where_document: Optional[Dict[str, Any]] def _convert_filters(filters: Dict[str, Any]) -> ChromaFilter: @@ -80,7 +80,7 @@ def _convert_filters(filters: Dict[str, Any]) -> ChromaFilter: msg = f"Invalid '{test_clause}' : {e}" raise ChromaDocumentStoreFilterError(msg) from e - return ChromaFilter(ids=ids, where=where, where_document=where_document) + return ChromaFilter(ids=ids, where=where or None, where_document=where_document or None) def _convert_filter_clause(filters: Dict[str, Any]) -> Dict[str, Any]: diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 987f6d8b7..ed815251e 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -13,9 +13,12 @@ from chromadb.api.types import Documents, EmbeddingFunction, Embeddings from haystack import Document from haystack.testing.document_store import ( + TEST_EMBEDDING_1, + TEST_EMBEDDING_2, CountDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest, + _random_embeddings, ) from haystack_integrations.document_stores.chroma import ChromaDocumentStore @@ -51,6 +54,67 @@ def document_store(self) -> ChromaDocumentStore: get_func.return_value = _TestEmbeddingFunction() return ChromaDocumentStore(embedding_function="test_function", collection_name=str(uuid.uuid1())) + @pytest.fixture + def filterable_docs(self) -> List[Document]: + """ + This fixture has been copied from haystack/testing/document_store.py and modified to + remove the documents that don't have textual content, as Chroma does not support writing them. + """ + documents = [] + for i in range(3): + documents.append( + Document( + content=f"A Foo Document {i}", + meta={ + "name": f"name_{i}", + "page": "100", + "chapter": "intro", + "number": 2, + "date": "1969-07-21T20:17:40", + }, + embedding=_random_embeddings(768), + ) + ) + documents.append( + Document( + content=f"A Bar Document {i}", + meta={ + "name": f"name_{i}", + "page": "123", + "chapter": "abstract", + "number": -2, + "date": "1972-12-11T19:54:58", + }, + embedding=_random_embeddings(768), + ) + ) + documents.append( + Document( + content=f"A Foobar Document {i}", + meta={ + "name": f"name_{i}", + "page": "90", + "chapter": "conclusion", + "number": -10, + "date": "1989-11-09T17:53:00", + }, + embedding=_random_embeddings(768), + ) + ) + documents.append( + Document( + content=f"Document {i} without embedding", + meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"}, + ) + ) + documents.append( + Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1) + ) + documents.append( + Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2) + ) + return documents + def assert_documents_are_equal(self, received: List[Document], expected: List[Document]): """ Assert that two lists of Documents are equal. @@ -283,7 +347,6 @@ def test_contains(self, document_store: ChromaDocumentStore, filterable_docs: Li ) def test_multiple_contains(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): - filterable_docs = [doc for doc in filterable_docs if doc.content] # remove documents without content document_store.write_documents(filterable_docs) filters = { "operator": "OR", From f08beea52686c160edb435568d4ba2fa4ed9c774 Mon Sep 17 00:00:00 2001 From: HaystackBot Date: Wed, 6 Nov 2024 09:32:25 +0000 Subject: [PATCH 8/9] Update the changelog --- integrations/chroma/CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/integrations/chroma/CHANGELOG.md b/integrations/chroma/CHANGELOG.md index c129d00ae..591c0ec39 100644 --- a/integrations/chroma/CHANGELOG.md +++ b/integrations/chroma/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## [integrations/chroma-v1.0.0] - 2024-11-06 + +### 🐛 Bug Fixes + +- Fixing Chroma tests due `chromadb` update behaviour change (#1148) +- Adapt our implementation to breaking changes in Chroma 0.5.17 (#1165) + +### ⚙️ Miscellaneous Tasks + +- Adopt uv as installer (#1142) + ## [integrations/chroma-v0.22.1] - 2024-09-30 ### Chroma From acad2c098e4e475f51e451341f8918dab90b856b Mon Sep 17 00:00:00 2001 From: Anes Benmerzoug Date: Thu, 14 Nov 2024 02:57:11 +0100 Subject: [PATCH 9/9] Parametrize to_dict and from_dict tests with boto3_config --- .../tests/test_chat_generator.py | 33 ++++++++++++----- .../tests/test_document_embedder.py | 32 +++++++++++++---- .../amazon_bedrock/tests/test_generator.py | 36 ++++++++++++++----- 3 files changed, 77 insertions(+), 24 deletions(-) diff --git a/integrations/amazon_bedrock/tests/test_chat_generator.py b/integrations/amazon_bedrock/tests/test_chat_generator.py index 02b75bab6..da783979f 100644 --- a/integrations/amazon_bedrock/tests/test_chat_generator.py +++ b/integrations/amazon_bedrock/tests/test_chat_generator.py @@ -1,7 +1,7 @@ import json import logging import os -from typing import Optional, Type +from typing import Any, Dict, Optional, Type from unittest.mock import MagicMock, patch import pytest @@ -26,7 +26,16 @@ ] -def test_to_dict(mock_boto3_session): +@pytest.mark.parametrize( + "boto3_config", + [ + None, + { + "read_timeout": 1000, + }, + ], +) +def test_to_dict(mock_boto3_session: Any, boto3_config: Optional[Dict[str, Any]]): """ Test that the to_dict method returns the correct dictionary without aws credentials """ @@ -34,6 +43,7 @@ def test_to_dict(mock_boto3_session): model="anthropic.claude-v2", generation_kwargs={"temperature": 0.7}, streaming_callback=print_streaming_chunk, + boto3_config=boto3_config, ) expected_dict = { "type": KLASS, @@ -48,14 +58,23 @@ def test_to_dict(mock_boto3_session): "stop_words": [], "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "truncate": True, - "boto3_config": None, + "boto3_config": boto3_config, }, } assert generator.to_dict() == expected_dict -def test_from_dict(mock_boto3_session): +@pytest.mark.parametrize( + "boto3_config", + [ + None, + { + "read_timeout": 1000, + }, + ], +) +def test_from_dict(mock_boto3_session: Any, boto3_config: Optional[Dict[str, Any]]): """ Test that the from_dict method returns the correct object """ @@ -72,16 +91,14 @@ def test_from_dict(mock_boto3_session): "generation_kwargs": {"temperature": 0.7}, "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "truncate": True, - "boto3_config": { - "read_timeout": 1000, - }, + "boto3_config": boto3_config, }, } ) assert generator.model == "anthropic.claude-v2" assert generator.model_adapter.generation_kwargs == {"temperature": 0.7} assert generator.streaming_callback == print_streaming_chunk - assert generator.boto3_config == {"read_timeout": 1000} + assert generator.boto3_config == boto3_config def test_default_constructor(mock_boto3_session, set_env_variables): diff --git a/integrations/amazon_bedrock/tests/test_document_embedder.py b/integrations/amazon_bedrock/tests/test_document_embedder.py index a405a0af6..05672e9c7 100644 --- a/integrations/amazon_bedrock/tests/test_document_embedder.py +++ b/integrations/amazon_bedrock/tests/test_document_embedder.py @@ -1,4 +1,5 @@ import io +from typing import Any, Dict, Optional from unittest.mock import patch import pytest @@ -66,10 +67,20 @@ def test_connection_error(self, mock_boto3_session): input_type="fake_input_type", ) - def test_to_dict(self, mock_boto3_session): + @pytest.mark.parametrize( + "boto3_config", + [ + None, + { + "read_timeout": 1000, + }, + ], + ) + def test_to_dict(self, mock_boto3_session: Any, boto3_config: Optional[Dict[str, Any]]): embedder = AmazonBedrockDocumentEmbedder( model="cohere.embed-english-v3", input_type="search_document", + boto3_config=boto3_config, ) expected_dict = { @@ -86,13 +97,22 @@ def test_to_dict(self, mock_boto3_session): "progress_bar": True, "meta_fields_to_embed": [], "embedding_separator": "\n", - "boto3_config": None, + "boto3_config": boto3_config, }, } assert embedder.to_dict() == expected_dict - def test_from_dict(self, mock_boto3_session): + @pytest.mark.parametrize( + "boto3_config", + [ + None, + { + "read_timeout": 1000, + }, + ], + ) + def test_from_dict(self, mock_boto3_session: Any, boto3_config: Optional[Dict[str, Any]]): data = { "type": TYPE, "init_parameters": { @@ -107,9 +127,7 @@ def test_from_dict(self, mock_boto3_session): "progress_bar": True, "meta_fields_to_embed": [], "embedding_separator": "\n", - "boto3_config": { - "read_timeout": 1000, - }, + "boto3_config": boto3_config, }, } @@ -121,7 +139,7 @@ def test_from_dict(self, mock_boto3_session): assert embedder.progress_bar assert embedder.meta_fields_to_embed == [] assert embedder.embedding_separator == "\n" - assert embedder.boto3_config == {"read_timeout": 1000} + assert embedder.boto3_config == boto3_config def test_init_invalid_model(self): with pytest.raises(ValueError): diff --git a/integrations/amazon_bedrock/tests/test_generator.py b/integrations/amazon_bedrock/tests/test_generator.py index be645218e..54b185da5 100644 --- a/integrations/amazon_bedrock/tests/test_generator.py +++ b/integrations/amazon_bedrock/tests/test_generator.py @@ -1,4 +1,4 @@ -from typing import Optional, Type +from typing import Any, Dict, Optional, Type from unittest.mock import MagicMock, call, patch import pytest @@ -17,11 +17,22 @@ ) -def test_to_dict(mock_boto3_session): +@pytest.mark.parametrize( + "boto3_config", + [ + None, + { + "read_timeout": 1000, + }, + ], +) +def test_to_dict(mock_boto3_session: Any, boto3_config: Optional[Dict[str, Any]]): """ Test that the to_dict method returns the correct dictionary without aws credentials """ - generator = AmazonBedrockGenerator(model="anthropic.claude-v2", max_length=99, truncate=False, temperature=10) + generator = AmazonBedrockGenerator( + model="anthropic.claude-v2", max_length=99, truncate=False, temperature=10, boto3_config=boto3_config + ) expected_dict = { "type": "haystack_integrations.components.generators.amazon_bedrock.generator.AmazonBedrockGenerator", @@ -36,14 +47,23 @@ def test_to_dict(mock_boto3_session): "truncate": False, "temperature": 10, "streaming_callback": None, - "boto3_config": None, + "boto3_config": boto3_config, }, } assert generator.to_dict() == expected_dict -def test_from_dict(mock_boto3_session): +@pytest.mark.parametrize( + "boto3_config", + [ + None, + { + "read_timeout": 1000, + }, + ], +) +def test_from_dict(mock_boto3_session: Any, boto3_config: Optional[Dict[str, Any]]): """ Test that the from_dict method returns the correct object """ @@ -58,16 +78,14 @@ def test_from_dict(mock_boto3_session): "aws_profile_name": {"type": "env_var", "env_vars": ["AWS_PROFILE"], "strict": False}, "model": "anthropic.claude-v2", "max_length": 99, - "boto3_config": { - "read_timeout": 1000, - }, + "boto3_config": boto3_config, }, } ) assert generator.max_length == 99 assert generator.model == "anthropic.claude-v2" - assert generator.boto3_config == {"read_timeout": 1000} + assert generator.boto3_config == boto3_config def test_default_constructor(mock_boto3_session, set_env_variables):