diff --git a/integrations/amazon_bedrock/src/haystack_integrations/common/amazon_bedrock/errors.py b/integrations/amazon_bedrock/src/haystack_integrations/common/amazon_bedrock/errors.py index aa8a3f6e4..130c7d5e6 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/common/amazon_bedrock/errors.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/common/amazon_bedrock/errors.py @@ -1,6 +1,3 @@ -from typing import Optional - - class AmazonBedrockError(Exception): """ Any error generated by the Amazon Bedrock integration. @@ -10,41 +7,14 @@ class AmazonBedrockError(Exception): `AmazonBedrockError.message` will exist and have the expected content. """ - def __init__( - self, - message: Optional[str] = None, - ): - super().__init__() - if message: - self.message = message - - def __getattr__(self, attr): - # If self.__cause__ is None, it will raise the expected AttributeError - getattr(self.__cause__, attr) - - def __str__(self): - return self.message - - def __repr__(self): - return str(self) - class AWSConfigurationError(AmazonBedrockError): """Exception raised when AWS is not configured correctly""" - def __init__(self, message: Optional[str] = None): - super().__init__(message=message) - class AmazonBedrockConfigurationError(AmazonBedrockError): """Exception raised when AmazonBedrock node is not configured correctly""" - def __init__(self, message: Optional[str] = None): - super().__init__(message=message) - class AmazonBedrockInferenceError(AmazonBedrockError): """Exception for issues that occur in the Bedrock inference node""" - - def __init__(self, message: Optional[str] = None): - super().__init__(message=message) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py index 8cf98cd45..a5621cbd2 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py @@ -77,13 +77,12 @@ def __init__( :param model: The embedding model to use. The model has to be specified in the format outlined in the Amazon Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html). - :type model: Literal["amazon.titan-embed-text-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"] :param aws_access_key_id: AWS access key ID. :param aws_secret_access_key: AWS secret access key. :param aws_session_token: AWS session token. :param aws_region_name: AWS region name. :param aws_profile_name: AWS profile name. - :param batch_size: Number of Documents to encode at once. Default is 32. + :param batch_size: Number of Documents to encode at once. Only Cohere models support batch inference. This parameter is ignored for Amazon Titan models. :param progress_bar: Whether to show a progress bar or not. Can be helpful to disable in production deployments to keep the logs clean. @@ -91,6 +90,8 @@ def __init__( :param embedding_separator: Separator used to concatenate the meta fields to the Document text. :param kwargs: Additional parameters to pass for model inference. For example, `input_type` and `truncate` for Cohere models. + :raises ValueError: If the model is not supported. + :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly. """ if not model or model not in SUPPORTED_EMBEDDING_MODELS: @@ -218,6 +219,13 @@ def _embed_titan(self, documents: List[Document]) -> List[Document]: @component.output_types(documents=List[Document]) def run(self, documents: List[Document]): + """Embed the provided `Document`s using the specified model. + + :param documents: The `Document`s to embed. + :returns: A dictionary with the following keys: + - `documents`: The `Document`s with the `embedding` field populated. + :raises AmazonBedrockInferenceError: If the inference fails. + """ if not isinstance(documents, list) or documents and not isinstance(documents[0], Document): msg = ( "AmazonBedrockDocumentEmbedder expects a list of Documents as input." @@ -234,8 +242,10 @@ def run(self, documents: List[Document]): def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. - :returns: The serialized component as a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ return default_to_dict( self, @@ -255,7 +265,12 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockDocumentEmbedder": """ - Deserialize this component from a dictionary. + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. """ deserialize_secrets_inplace( data["init_parameters"], diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py index ed6768737..91a9e3b72 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py @@ -66,7 +66,6 @@ def __init__( :param model: The embedding model to use. The model has to be specified in the format outlined in the Amazon Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html). - :type model: Literal["amazon.titan-embed-text-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"] :param aws_access_key_id: AWS access key ID. :param aws_secret_access_key: AWS secret access key. :param aws_session_token: AWS session token. @@ -74,6 +73,8 @@ def __init__( :param aws_profile_name: AWS profile name. :param kwargs: Additional parameters to pass for model inference. For example, `input_type` and `truncate` for Cohere models. + :raises ValueError: If the model is not supported. + :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly. """ if not model or model not in SUPPORTED_EMBEDDING_MODELS: msg = "Please provide a valid model from the list of supported models: " + ", ".join( @@ -110,6 +111,14 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: @component.output_types(embedding=List[float]) def run(self, text: str): + """Embeds the input text using the Amazon Bedrock model. + + :param text: The input text to embed. + :returns: A dictionary with the following keys: + - `embedding`: The embedding of the input text. + :raises TypeError: If the input text is not a string. + :raises AmazonBedrockInferenceError: If the model inference fails. + """ if not isinstance(text, str): msg = ( "AmazonBedrockTextEmbedder expects a string as an input." @@ -153,8 +162,10 @@ def run(self, text: str): def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. - :returns: The serialized component as a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ return default_to_dict( self, @@ -170,9 +181,12 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockTextEmbedder": """ - Deserialize this component from a dictionary. - :param data: The dictionary representation of this component. - :returns: The deserialized component instance. + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. """ deserialize_secrets_inplace( data["init_parameters"], diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py index f842f0ef5..0fbf04de7 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py @@ -103,6 +103,10 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]: Prepares the body for the Claude model :param prompt: The prompt to be sent to the model. + :param inference_kwargs: Additional keyword arguments passed to the handler. + :returns: A dictionary with the following keys: + - `prompt`: The prompt to be sent to the model. + - specified inference parameters. """ default_params = { "max_tokens_to_sample": self.max_length, @@ -146,7 +150,9 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]: :param prompt: The prompt to be sent to the model. :param inference_kwargs: Additional keyword arguments passed to the handler. - :returns: A dictionary containing the body for the request. + :returns: A dictionary with the following keys: + - `prompt`: The prompt to be sent to the model. + - specified inference parameters. """ default_params = { "max_tokens": self.max_length, @@ -191,6 +197,14 @@ class AI21LabsJurassic2Adapter(BedrockModelAdapter): """ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]: + """Prepares the body for the Jurassic 2 model. + + :param prompt: The prompt to be sent to the model. + :param inference_kwargs: Additional keyword arguments passed to the handler. + :returns: A dictionary with the following keys: + - `prompt`: The prompt to be sent to the model. + - specified inference parameters. + """ default_params = { "maxTokens": self.max_length, "stopSequences": None, @@ -226,7 +240,9 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]: :param prompt: The prompt to be sent to the model. :param inference_kwargs: Additional keyword arguments passed to the handler. - :returns: A dictionary containing the body for the request. + :returns: A dictionary with the following keys + - `inputText`: The prompt to be sent to the model. + - specified inference parameters. """ default_params = { "maxTokenCount": self.max_length, @@ -270,7 +286,9 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]: :param prompt: The prompt to be sent to the model. :param inference_kwargs: Additional keyword arguments passed to the handler. - :returns: A dictionary containing the body for the request. + :returns: A dictionary with the following keys: + - `prompt`: The prompt to be sent to the model. + - specified inference parameters. """ default_params = { "max_gen_len": self.max_length, diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py index f6af48ae1..0b1b73812 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py @@ -84,7 +84,9 @@ def __init__( :param aws_profile_name: The AWS profile name. :param max_length: The maximum length of the generated text. :param kwargs: Additional keyword arguments to be passed to the model. - + :raises ValueError: If the model name is empty or None. + :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly or the model is + not supported. """ if not model: msg = "'model' cannot be None or empty string" @@ -226,7 +228,9 @@ def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): :param prompt: The prompt to generate a response for. :param generation_kwargs: Additional keyword arguments passed to the generator. :returns: A dictionary with the following keys: - - `replies`: A list of generated responses (strings). + - `replies`: A list of generated responses. + :raises ValueError: If the prompt is empty or None. + :raises AmazonBedrockInferenceError: If the model cannot be invoked. """ return {"replies": self.invoke(prompt=prompt, **(generation_kwargs or {}))} @@ -269,7 +273,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockGenerator": :param data: Dictionary to deserialize from. :returns: - Deserialized component. + Deserialized component. """ deserialize_secrets_inplace( data["init_parameters"], diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py index ddc276264..f4dc1aa4f 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py @@ -15,6 +15,8 @@ def __init__(self, tokenizer: Union[str, PreTrainedTokenizerBase], model_max_len :param tokenizer: The tokenizer to be used to tokenize the prompt. :param model_max_length: The maximum length of the prompt and answer tokens combined. :param max_length: The maximum length of the answer tokens. + :raises ValueError: If the tokenizer is not a string or a `PreTrainedTokenizer` or `PreTrainedTokenizerFast` + instance. """ if isinstance(tokenizer, str): self.tokenizer = AutoTokenizer.from_pretrained(tokenizer)