Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: review integrations bedrock #550

Merged
merged 5 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from typing import Optional
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's all just duplicate code. Simple inheriting is enough



class AmazonBedrockError(Exception):
"""
Any error generated by the Amazon Bedrock integration.
Expand All @@ -10,41 +7,14 @@ class AmazonBedrockError(Exception):
`AmazonBedrockError.message` will exist and have the expected content.
"""

def __init__(
self,
message: Optional[str] = None,
):
super().__init__()
if message:
self.message = message

def __getattr__(self, attr):
# If self.__cause__ is None, it will raise the expected AttributeError
getattr(self.__cause__, attr)

def __str__(self):
return self.message

def __repr__(self):
return str(self)


class AWSConfigurationError(AmazonBedrockError):
"""Exception raised when AWS is not configured correctly"""

def __init__(self, message: Optional[str] = None):
super().__init__(message=message)


class AmazonBedrockConfigurationError(AmazonBedrockError):
"""Exception raised when AmazonBedrock node is not configured correctly"""

def __init__(self, message: Optional[str] = None):
super().__init__(message=message)


class AmazonBedrockInferenceError(AmazonBedrockError):
"""Exception for issues that occur in the Bedrock inference node"""

def __init__(self, message: Optional[str] = None):
super().__init__(message=message)
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,21 @@ def __init__(

:param model: The embedding model to use. The model has to be specified in the format outlined in the Amazon
Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html).
:type model: Literal["amazon.titan-embed-text-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"]
:param aws_access_key_id: AWS access key ID.
:param aws_secret_access_key: AWS secret access key.
:param aws_session_token: AWS session token.
:param aws_region_name: AWS region name.
:param aws_profile_name: AWS profile name.
:param batch_size: Number of Documents to encode at once. Default is 32.
:param batch_size: Number of Documents to encode at once.
Only Cohere models support batch inference. This parameter is ignored for Amazon Titan models.
:param progress_bar: Whether to show a progress bar or not. Can be helpful to disable in production deployments
to keep the logs clean.
:param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text.
:param embedding_separator: Separator used to concatenate the meta fields to the Document text.
:param kwargs: Additional parameters to pass for model inference. For example, `input_type` and `truncate` for
Cohere models.
:raises ValueError: If the model is not supported.
:raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
"""

if not model or model not in SUPPORTED_EMBEDDING_MODELS:
Expand Down Expand Up @@ -218,6 +219,13 @@ def _embed_titan(self, documents: List[Document]) -> List[Document]:

@component.output_types(documents=List[Document])
def run(self, documents: List[Document]):
"""Embed the provided `Document`s using the specified model.

:param documents: The `Document`s to embed.
:returns: A dictionary with the following keys:
- `documents`: The `Document`s with the `embedding` field populated.
:raises AmazonBedrockInferenceError: If the inference fails.
"""
if not isinstance(documents, list) or documents and not isinstance(documents[0], Document):
msg = (
"AmazonBedrockDocumentEmbedder expects a list of Documents as input."
Expand All @@ -234,8 +242,10 @@ def run(self, documents: List[Document]):

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
:returns: The serialized component as a dictionary.
Serializes the component to a dictionary.

:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
Expand All @@ -255,7 +265,12 @@ def to_dict(self) -> Dict[str, Any]:
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockDocumentEmbedder":
"""
Deserialize this component from a dictionary.
Deserializes the component from a dictionary.

:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
deserialize_secrets_inplace(
data["init_parameters"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ def __init__(

:param model: The embedding model to use. The model has to be specified in the format outlined in the Amazon
Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html).
:type model: Literal["amazon.titan-embed-text-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"]
:param aws_access_key_id: AWS access key ID.
:param aws_secret_access_key: AWS secret access key.
:param aws_session_token: AWS session token.
:param aws_region_name: AWS region name.
:param aws_profile_name: AWS profile name.
:param kwargs: Additional parameters to pass for model inference. For example, `input_type` and `truncate` for
Cohere models.
:raises ValueError: If the model is not supported.
:raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
"""
if not model or model not in SUPPORTED_EMBEDDING_MODELS:
msg = "Please provide a valid model from the list of supported models: " + ", ".join(
Expand Down Expand Up @@ -110,6 +111,14 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]:

@component.output_types(embedding=List[float])
def run(self, text: str):
"""Embeds the input text using the Amazon Bedrock model.

:param text: The input text to embed.
:returns: A dictionary with the following keys:
- `embedding`: The embedding of the input text.
:raises TypeError: If the input text is not a string.
:raises AmazonBedrockInferenceError: If the model inference fails.
"""
if not isinstance(text, str):
msg = (
"AmazonBedrockTextEmbedder expects a string as an input."
Expand Down Expand Up @@ -153,8 +162,10 @@ def run(self, text: str):

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
:returns: The serialized component as a dictionary.
Serializes the component to a dictionary.

:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
Expand All @@ -170,9 +181,12 @@ def to_dict(self) -> Dict[str, Any]:
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockTextEmbedder":
"""
Deserialize this component from a dictionary.
:param data: The dictionary representation of this component.
:returns: The deserialized component instance.
Deserializes the component from a dictionary.

:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
deserialize_secrets_inplace(
data["init_parameters"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]:
Prepares the body for the Claude model

:param prompt: The prompt to be sent to the model.
:param inference_kwargs: Additional keyword arguments passed to the handler.
:returns: A dictionary with the following keys:
- `prompt`: The prompt to be sent to the model.
- specified inference parameters.
"""
default_params = {
"max_tokens_to_sample": self.max_length,
Expand Down Expand Up @@ -146,7 +150,9 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]:

:param prompt: The prompt to be sent to the model.
:param inference_kwargs: Additional keyword arguments passed to the handler.
:returns: A dictionary containing the body for the request.
:returns: A dictionary with the following keys:
- `prompt`: The prompt to be sent to the model.
- specified inference parameters.
"""
default_params = {
"max_tokens": self.max_length,
Expand Down Expand Up @@ -191,6 +197,14 @@ class AI21LabsJurassic2Adapter(BedrockModelAdapter):
"""

def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]:
"""Prepares the body for the Jurassic 2 model.

:param prompt: The prompt to be sent to the model.
:param inference_kwargs: Additional keyword arguments passed to the handler.
:returns: A dictionary with the following keys:
- `prompt`: The prompt to be sent to the model.
- specified inference parameters.
"""
default_params = {
"maxTokens": self.max_length,
"stopSequences": None,
Expand Down Expand Up @@ -226,7 +240,9 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]:

:param prompt: The prompt to be sent to the model.
:param inference_kwargs: Additional keyword arguments passed to the handler.
:returns: A dictionary containing the body for the request.
:returns: A dictionary with the following keys
- `inputText`: The prompt to be sent to the model.
- specified inference parameters.
"""
default_params = {
"maxTokenCount": self.max_length,
Expand Down Expand Up @@ -270,7 +286,9 @@ def prepare_body(self, prompt: str, **inference_kwargs) -> Dict[str, Any]:

:param prompt: The prompt to be sent to the model.
:param inference_kwargs: Additional keyword arguments passed to the handler.
:returns: A dictionary containing the body for the request.
:returns: A dictionary with the following keys:
- `prompt`: The prompt to be sent to the model.
- specified inference parameters.
"""
default_params = {
"max_gen_len": self.max_length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def __init__(
:param aws_profile_name: The AWS profile name.
:param max_length: The maximum length of the generated text.
:param kwargs: Additional keyword arguments to be passed to the model.

:raises ValueError: If the model name is empty or None.
:raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly or the model is
not supported.
"""
if not model:
msg = "'model' cannot be None or empty string"
Expand Down Expand Up @@ -226,7 +228,9 @@ def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
:param prompt: The prompt to generate a response for.
:param generation_kwargs: Additional keyword arguments passed to the generator.
:returns: A dictionary with the following keys:
- `replies`: A list of generated responses (strings).
- `replies`: A list of generated responses.
:raises ValueError: If the prompt is empty or None.
:raises AmazonBedrockInferenceError: If the model cannot be invoked.
"""
return {"replies": self.invoke(prompt=prompt, **(generation_kwargs or {}))}

Expand Down Expand Up @@ -269,7 +273,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockGenerator":
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
Deserialized component.
"""
deserialize_secrets_inplace(
data["init_parameters"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def __init__(self, tokenizer: Union[str, PreTrainedTokenizerBase], model_max_len
:param tokenizer: The tokenizer to be used to tokenize the prompt.
:param model_max_length: The maximum length of the prompt and answer tokens combined.
:param max_length: The maximum length of the answer tokens.
:raises ValueError: If the tokenizer is not a string or a `PreTrainedTokenizer` or `PreTrainedTokenizerFast`
instance.
"""
if isinstance(tokenizer, str):
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer)
Expand Down