diff --git a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py index 3c84b4081..c55fcab67 100644 --- a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py +++ b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/chat/gemini.py @@ -17,9 +17,10 @@ @component class GoogleAIGeminiChatGenerator: """ - GoogleAIGeminiGenerator is a multi modal generator supporting Gemini via Google Makersuite. + `GoogleAIGeminiChatGenerator` is a multimodal generator supporting Gemini via Google AI Studio. + It uses the `ChatMessage` dataclass to interact with the model. - Sample usage: + Usage example: ```python from haystack.utils import Secret from haystack.dataclasses.chat_message import ChatMessage @@ -40,7 +41,7 @@ class GoogleAIGeminiChatGenerator: ``` - This is a more advanced usage that also uses function calls: + Usage example with function calling: ```python from haystack.utils import Secret from haystack.dataclasses.chat_message import ChatMessage @@ -53,7 +54,7 @@ def get_current_weather(location: str, unit: str = "celsius") -> str: # Call a weather API and return some text ... - # Define the function interface so that Gemini can call it + # Define the function interface get_current_weather_func = FunctionDeclaration( name="get_current_weather", description="Get the current weather in a given location", @@ -88,12 +89,6 @@ def get_current_weather(location: str, unit: str = "celsius") -> str: for reply in res["replies"]: print(reply.content) ``` - - Input: - - **messages** A list of ChatMessage objects. - - Output: - - **replies** A list of ChatMessage objects containing the one or more replies from the model. """ def __init__( @@ -106,7 +101,7 @@ def __init__( tools: Optional[List[Tool]] = None, ): """ - Initialize a GoogleAIGeminiChatGenerator instance. + Initializes a `GoogleAIGeminiChatGenerator` instance. To get an API key, visit: https://makersuite.google.com @@ -115,24 +110,18 @@ def __init__( * `gemini-pro-vision` * `gemini-ultra` - :param api_key: Google Makersuite API key. - :param model: Name of the model to use, defaults to "gemini-pro-vision" - :param generation_config: The generation config to use, defaults to None. - Can either be a GenerationConfig object or a dictionary of parameters. - Accepted parameters are: - - temperature - - top_p - - top_k - - candidate_count - - max_output_tokens - - stop_sequences - :param safety_settings: The safety settings to use, defaults to None. - A dictionary of HarmCategory to HarmBlockThreshold. - :param tools: The tools to use, defaults to None. - A list of Tool objects that can be used to modify the generation process. + :param api_key: Google AI Studio API key. + :param model: Name of the model to use. + :param generation_config: The generation config to use. + Can either be a `GenerationConfig` object or a dictionary of parameters. + For the available parameters, see + [the `GenerationConfig` API reference](https://ai.google.dev/api/python/google/generativeai/GenerationConfig). + :param safety_settings: The safety settings to use. + A dictionary with `HarmCategory` as keys and `HarmBlockThreshold` as values. + For more information, see [the API reference](https://ai.google.dev/api) + :param tools: A list of Tool objects that can be used for [Function calling](https://ai.google.dev/docs/function_calling). """ - # Authenticate, if api_key is None it will use the GOOGLE_API_KEY env variable genai.configure(api_key=api_key.resolve_value()) self._api_key = api_key @@ -155,6 +144,12 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A } def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ data = default_to_dict( self, api_key=self._api_key.to_dict(), @@ -173,6 +168,14 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiChatGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) if (tools := data["init_parameters"].get("tools")) is not None: @@ -247,6 +250,15 @@ def _message_to_content(self, message: ChatMessage) -> Content: @component.output_types(replies=List[ChatMessage]) def run(self, messages: List[ChatMessage]): + """ + Generates text based on the provided messages. + + :param messages: + A list of `ChatMessage` instances, representing the input messages. + :returns: + A dictionary containing the following key: + - `replies`: A list containing the generated responses as `ChatMessage` instances. + """ history = [self._message_to_content(m) for m in messages[:-1]] session = self._model.start_chat(history=history) diff --git a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py index 3929e7d5e..319c1dbf0 100644 --- a/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py +++ b/integrations/google_ai/src/haystack_integrations/components/generators/google_ai/gemini.py @@ -17,9 +17,9 @@ @component class GoogleAIGeminiGenerator: """ - GoogleAIGeminiGenerator is a multi modal generator supporting Gemini via Google Makersuite. + `GoogleAIGeminiGenerator` is a multimodal generator supporting Gemini via Google AI Studio. - Sample usage: + Usage example: ```python from haystack.utils import Secret from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator @@ -30,7 +30,7 @@ class GoogleAIGeminiGenerator: print(answer) ``` - This is a more advanced usage that also uses text and images as input: + Multimodal usage example: ```python import requests from haystack.utils import Secret @@ -58,12 +58,6 @@ class GoogleAIGeminiGenerator: for answer in result["answers"]: print(answer) ``` - - Input: - - **parts** A eterogeneous list of strings, ByteStream or Part objects. - - Output: - - **answers** A list of strings or dictionaries with function calls. """ def __init__( @@ -76,7 +70,7 @@ def __init__( tools: Optional[List[Tool]] = None, ): """ - Initialize a GoogleAIGeminiGenerator instance. + Initializes a `GoogleAIGeminiGenerator` instance. To get an API key, visit: https://makersuite.google.com @@ -85,21 +79,16 @@ def __init__( * `gemini-pro-vision` * `gemini-ultra` - :param api_key: Google Makersuite API key. - :param model: Name of the model to use, defaults to "gemini-pro-vision" - :param generation_config: The generation config to use, defaults to None. - Can either be a GenerationConfig object or a dictionary of parameters. - Accepted parameters are: - - temperature - - top_p - - top_k - - candidate_count - - max_output_tokens - - stop_sequences - :param safety_settings: The safety settings to use, defaults to None. - A dictionary of HarmCategory to HarmBlockThreshold. - :param tools: The tools to use, defaults to None. - A list of Tool objects that can be used to modify the generation process. + :param api_key: Google AI Studio API key. + :param model: Name of the model to use. + :param generation_config: The generation config to use. + Can either be a `GenerationConfig` object or a dictionary of parameters. + For the available parameters, see + [the `GenerationConfig` API reference](https://ai.google.dev/api/python/google/generativeai/GenerationConfig). + :param safety_settings: The safety settings to use. + A dictionary with `HarmCategory` as keys and `HarmBlockThreshold` as values. + For more information, see [the API reference](https://ai.google.dev/api) + :param tools: A list of Tool objects that can be used for [Function calling](https://ai.google.dev/docs/function_calling). """ genai.configure(api_key=api_key.resolve_value()) @@ -123,6 +112,12 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A } def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ data = default_to_dict( self, api_key=self._api_key.to_dict(), @@ -141,6 +136,14 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GoogleAIGeminiGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) if (tools := data["init_parameters"].get("tools")) is not None: @@ -172,6 +175,16 @@ def _convert_part(self, part: Union[str, ByteStream, Part]) -> Part: @component.output_types(answers=List[Union[str, Dict[str, str]]]) def run(self, parts: Variadic[Union[str, ByteStream, Part]]): + """ + Generates text based on the given input parts. + + :param parts: + A heterogeneous list of strings, `ByteStream` or `Part` objects. + :returns: + A dictionary containing the following key: + - `answers`: A list of strings or dictionaries with function calls. + """ + converted_parts = [self._convert_part(p) for p in parts] contents = [Content(parts=converted_parts, role="user")] diff --git a/integrations/google_vertex/example_assets/robot1.jpg b/integrations/google_vertex/example_assets/robot1.jpg new file mode 100644 index 000000000..a3962db1b Binary files /dev/null and b/integrations/google_vertex/example_assets/robot1.jpg differ diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py index 83322b33b..14102eb4b 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/captioner.py @@ -12,16 +12,44 @@ @component class VertexAIImageCaptioner: + """ + `VertexAIImageCaptioner` enables text generation using Google Vertex AI imagetext generative model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). + + Usage example: + ```python + import requests + + from haystack.dataclasses.byte_stream import ByteStream + from haystack_integrations.components.generators.google_vertex import VertexAIImageCaptioner + + captioner = VertexAIImageCaptioner(project_id=project_id) + + image = ByteStream( + data=requests.get( + "https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/main/integrations/google_vertex/example_assets/robot1.jpg" + ).content + ) + result = captioner.run(image=image) + + for caption in result["captions"]: + print(caption) + + >>> two gold robots are standing next to each other in the desert + ``` + """ + def __init__(self, *, model: str = "imagetext", project_id: str, location: Optional[str] = None, **kwargs): """ Generate image captions using a Google Vertex AI model. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. - :param model: Name of the model to use, defaults to "imagetext". + :param model: Name of the model to use. :param location: The default location to use when making API calls, if not set uses us-central-1. Defaults to None. :param kwargs: Additional keyword arguments to pass to the model. @@ -39,15 +67,35 @@ def __init__(self, *, model: str = "imagetext", project_id: str, location: Optio self._model = ImageTextModel.from_pretrained(self._model_name) def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ return default_to_dict( self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageCaptioner": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ return default_from_dict(cls, data) @component.output_types(captions=List[str]) def run(self, image: ByteStream): + """Prompts the model to generate captions for the given image. + + :param image: The image to generate captions for. + :returns: A dictionary with the following keys: + - `captions`: A list of captions generated by the model. + """ captions = self._model.get_captions(image=Image(image.data), **self._kwargs) return {"captions": captions} diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py index 5a6137765..f08a69b5f 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/chat/gemini.py @@ -22,6 +22,30 @@ @component class VertexAIGeminiChatGenerator: + """ + `VertexAIGeminiChatGenerator` enables chat completion using Google Gemini models. + + `VertexAIGeminiChatGenerator` supports both `gemini-pro` and `gemini-pro-vision` models. + Prompting with images requires `gemini-pro-vision`. Function calling, instead, requires `gemini-pro`. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). + + Usage example: + ```python + from haystack.dataclasses import ChatMessage + from haystack_integrations.components.generators.google_vertex import VertexAIGeminiChatGenerator + + gemini_chat = VertexAIGeminiChatGenerator(project_id=project_id) + + messages = [ChatMessage.from_user("Tell me the name of a movie")] + res = gemini_chat.run(messages) + + print(res["replies"][0].content) + >>> The Shawshank Redemption + ``` + """ + def __init__( self, *, @@ -33,18 +57,25 @@ def __init__( tools: Optional[List[Tool]] = None, ): """ - Multi modal generator using Gemini model via Google Vertex AI. + `VertexAIGeminiChatGenerator` enables chat completion using Google Gemini models. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. :param model: Name of the model to use, defaults to "gemini-pro-vision". :param location: The default location to use when making API calls, if not set uses us-central-1. Defaults to None. - :param kwargs: Additional keyword arguments to pass to the model. - For a list of supported arguments see the `GenerativeModel.generate_content()` documentation. + :param generation_config: Configuration for the generation process. + See the [GenerationConfig documentation](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.GenerationConfig + for a list of supported arguments. + :param safety_settings: Safety settings to use when generating content. See the documentation + for [HarmBlockThreshold](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.HarmBlockThreshold) + and [HarmCategory](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.HarmCategory) + for more details. + :param tools: List of tools to use when generating content. See the documentation for + [Tool](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.Tool) + the list of supported arguments. """ # Login to GCP. This will fail if user has not set up their gcloud SDK @@ -84,6 +115,12 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A } def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ data = default_to_dict( self, model=self._model_name, @@ -101,6 +138,14 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAIGeminiChatGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ if (tools := data["init_parameters"].get("tools")) is not None: data["init_parameters"]["tools"] = [Tool.from_dict(t) for t in tools] if (generation_config := data["init_parameters"].get("generation_config")) is not None: @@ -151,6 +196,12 @@ def _message_to_content(self, message: ChatMessage) -> Content: @component.output_types(replies=List[ChatMessage]) def run(self, messages: List[ChatMessage]): + """Prompts Google Vertex AI Gemini model to generate a response to a list of messages. + + :param messages: The last message is the prompt, the rest are the history. + :returns: A dictionary with the following keys: + - `replies`: A list of ChatMessage objects representing the model's replies. + """ history = [self._message_to_content(m) for m in messages[:-1]] session = self._model.start_chat(history=history) diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py index 1914af289..f8889373c 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/code_generator.py @@ -11,18 +11,50 @@ @component class VertexAICodeGenerator: + """ + This component enables code generation using Google Vertex AI generative model. + + `VertexAICodeGenerator` supports `code-bison`, `code-bison-32k`, and `code-gecko`. + + Usage example: + ```python + from haystack_integrations.components.generators.google_vertex import VertexAICodeGenerator + + generator = VertexAICodeGenerator(project_id=project_id) + + result = generator.run(prefix="def to_json(data):") + + for answer in result["answers"]: + print(answer) + + >>> ```python + >>> import json + >>> + >>> def to_json(data): + >>> \"\"\"Converts a Python object to a JSON string. + >>> + >>> Args: + >>> data: The Python object to convert. + >>> + >>> Returns: + >>> A JSON string representing the Python object. + >>> \"\"\" + >>> + >>> return json.dumps(data) + >>> ``` + ``` + """ + def __init__(self, *, model: str = "code-bison", project_id: str, location: Optional[str] = None, **kwargs): """ Generate code using a Google Vertex AI model. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. - :param model: Name of the model to use, defaults to "text-bison". + :param model: Name of the model to use. :param location: The default location to use when making API calls, if not set uses us-central-1. - Defaults to None. :param kwargs: Additional keyword arguments to pass to the model. For a list of supported arguments see the `TextGenerationModel.predict()` documentation. """ @@ -38,16 +70,38 @@ def __init__(self, *, model: str = "code-bison", project_id: str, location: Opti self._model = CodeGenerationModel.from_pretrained(self._model_name) def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ return default_to_dict( self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAICodeGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ return default_from_dict(cls, data) @component.output_types(answers=List[str]) def run(self, prefix: str, suffix: Optional[str] = None): + """ + Generate code using a Google Vertex AI model. + + :param prefix: Code before the current point. + :param suffix: Code after the current point. + :returns: A dictionary with the following keys: + - `answers`: A list of generated code snippets. + """ res = self._model.predict(prefix=prefix, suffix=suffix, **self._kwargs) # Handle the case where the model returns multiple candidates answers = [c.text for c in res.candidates] if hasattr(res, "candidates") else [res.text] diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py index 698b07b01..1383f176d 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/gemini.py @@ -22,6 +22,35 @@ @component class VertexAIGeminiGenerator: + """ + `VertexAIGeminiGenerator` enables text generation using Google Gemini models. + + `VertexAIGeminiGenerator` supports both `gemini-pro` and `gemini-pro-vision` models. + Prompting with images requires `gemini-pro-vision`. Function calling, instead, requires `gemini-pro`. + + Usage example: + ```python + from haystack_integrations.components.generators.google_vertex import VertexAIGeminiGenerator + + + gemini = VertexAIGeminiGenerator(project_id=project_id) + result = gemini.run(parts = ["What is the most interesting thing you know?"]) + for answer in result["answers"]: + print(answer) + + >>> 1. **The Origin of Life:** How and where did life begin? The answers to this ... + >>> 2. **The Unseen Universe:** The vast majority of the universe is ... + >>> 3. **Quantum Entanglement:** This eerie phenomenon in quantum mechanics allows ... + >>> 4. **Time Dilation:** Einstein's theory of relativity revealed that time can ... + >>> 5. **The Fermi Paradox:** Despite the vastness of the universe and the ... + >>> 6. **Biological Evolution:** The idea that life evolves over time through natural ... + >>> 7. **Neuroplasticity:** The brain's ability to adapt and change throughout life, ... + >>> 8. **The Goldilocks Zone:** The concept of the habitable zone, or the Goldilocks zone, ... + >>> 9. **String Theory:** This theoretical framework in physics aims to unify all ... + >>> 10. **Consciousness:** The nature of human consciousness and how it arises ... + ``` + """ + def __init__( self, *, @@ -33,18 +62,17 @@ def __init__( tools: Optional[List[Tool]] = None, ): """ - Multi modal generator using Gemini model via Google Vertex AI. + Multi-modal generator using Gemini model via Google Vertex AI. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. - :param model: Name of the model to use, defaults to "gemini-pro-vision". + :param model: Name of the model to use. :param location: The default location to use when making API calls, if not set uses us-central-1. - Defaults to None. - :param generation_config: The generation config to use, defaults to None. - Can either be a GenerationConfig object or a dictionary of parameters. + :param generation_config: The generation config to use. + Can either be a [`GenerationConfig`](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.GenerationConfig) + object or a dictionary of parameters. Accepted fields are: - temperature - top_p @@ -52,10 +80,13 @@ def __init__( - candidate_count - max_output_tokens - stop_sequences - :param safety_settings: The safety settings to use, defaults to None. - A dictionary of HarmCategory to HarmBlockThreshold. - :param tools: The tools to use, defaults to None. - A list of Tool objects that can be used to modify the generation process. + :param safety_settings: The safety settings to use. See the documentation + for [HarmBlockThreshold](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.HarmBlockThreshold) + and [HarmCategory](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.HarmCategory) + for more details. + :param tools: List of tools to use when generating content. See the documentation for + [Tool](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.preview.generative_models.Tool) + the list of supported arguments. """ # Login to GCP. This will fail if user has not set up their gcloud SDK @@ -95,6 +126,12 @@ def _generation_config_to_dict(self, config: Union[GenerationConfig, Dict[str, A } def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ data = default_to_dict( self, model=self._model_name, @@ -112,6 +149,14 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAIGeminiGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ if (tools := data["init_parameters"].get("tools")) is not None: data["init_parameters"]["tools"] = [Tool.from_dict(t) for t in tools] if (generation_config := data["init_parameters"].get("generation_config")) is not None: @@ -132,6 +177,13 @@ def _convert_part(self, part: Union[str, ByteStream, Part]) -> Part: @component.output_types(answers=List[Union[str, Dict[str, str]]]) def run(self, parts: Variadic[Union[str, ByteStream, Part]]): + """ + Generates content using the Gemini model. + + :param parts: Prompt for the model. + :returns: A dictionary with the following keys: + - `answers`: A list of generated content. + """ converted_parts = [self._convert_part(p) for p in parts] contents = [Content(parts=converted_parts, role="user")] diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py index c81c88fe8..422e1cfe6 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/image_generator.py @@ -12,18 +12,34 @@ @component class VertexAIImageGenerator: + """ + This component enables image generation using Google Vertex AI generative model. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). + + Usage example: + ```python + from pathlib import Path + + from haystack_integrations.components.generators.google_vertex import VertexAIImageGenerator + + generator = VertexAIImageGenerator(project_id=project_id) + result = generator.run(prompt="Generate an image of a cute cat") + result["images"][0].to_file(Path("my_image.png")) + ``` + """ + def __init__(self, *, model: str = "imagegeneration", project_id: str, location: Optional[str] = None, **kwargs): """ Generates images using a Google Vertex AI model. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. - :param model: Name of the model to use, defaults to "imagegeneration". + :param model: Name of the model to use. :param location: The default location to use when making API calls, if not set uses us-central-1. - Defaults to None. :param kwargs: Additional keyword arguments to pass to the model. For a list of supported arguments see the `ImageGenerationModel.generate_images()` documentation. """ @@ -39,16 +55,38 @@ def __init__(self, *, model: str = "imagegeneration", project_id: str, location: self._model = ImageGenerationModel.from_pretrained(self._model_name) def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ return default_to_dict( self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ return default_from_dict(cls, data) @component.output_types(images=List[ByteStream]) def run(self, prompt: str, negative_prompt: Optional[str] = None): + """Produces images based on the given prompt. + + :param prompt: The prompt to generate images from. + :param negative_prompt: A description of what you want to omit in + the generated images. + :returns: A dictionary with the following keys: + - images: A list of ByteStream objects, each containing an image. + """ negative_prompt = negative_prompt or self._kwargs.get("negative_prompt") res = self._model.generate_images(prompt=prompt, negative_prompt=negative_prompt, **self._kwargs) images = [ByteStream(data=i._image_bytes, meta=i.generation_parameters) for i in res.images] diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py index 276364227..79c343b02 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/question_answering.py @@ -12,18 +12,39 @@ @component class VertexAIImageQA: + """ + This component enables text generation (image captioning) using Google Vertex AI generative models. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). + + Usage example: + ```python + from haystack.dataclasses.byte_stream import ByteStream + from haystack_integrations.components.generators.google_vertex import VertexAIImageQA + + qa = VertexAIImageQA(project_id=project_id) + + image = ByteStream.from_file_path("dog.jpg") + + res = qa.run(image=image, question="What color is this dog") + + print(res["answers"][0]) + + >>> white + ``` + """ + def __init__(self, *, model: str = "imagetext", project_id: str, location: Optional[str] = None, **kwargs): """ Answers questions about an image using a Google Vertex AI model. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. - :param model: Name of the model to use, defaults to "imagetext". + :param model: Name of the model to use. :param location: The default location to use when making API calls, if not set uses us-central-1. - Defaults to None. :param kwargs: Additional keyword arguments to pass to the model. For a list of supported arguments see the `ImageTextModel.ask_question()` documentation. """ @@ -39,15 +60,36 @@ def __init__(self, *, model: str = "imagetext", project_id: str, location: Optio self._model = ImageTextModel.from_pretrained(self._model_name) def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ return default_to_dict( self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAIImageQA": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ return default_from_dict(cls, data) @component.output_types(answers=List[str]) def run(self, image: ByteStream, question: str): + """Prompts model to answer a question about an image. + + :param image: The image to ask the question about. + :param question: The question to ask. + :returns: A dictionary with the following keys: + - answers: A list of answers to the question. + """ answers = self._model.ask_question(image=Image(image.data), question=question, **self._kwargs) return {"answers": answers} diff --git a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py index 6022bcf4f..e16954f8f 100644 --- a/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py +++ b/integrations/google_vertex/src/haystack_integrations/components/generators/google_vertex/text_generator.py @@ -13,18 +13,48 @@ @component class VertexAITextGenerator: + """ + This component enables text generation using Google Vertex AI generative models. + + `VertexAITextGenerator` supports `text-bison`, `text-unicorn` and `text-bison-32k` models. + + Authenticates using Google Cloud Application Default Credentials (ADCs). + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). + + Usage example: + ```python + from haystack_integrations.components.generators.google_vertex import VertexAITextGenerator + + generator = VertexAITextGenerator(project_id=project_id) + res = generator.run("Tell me a good interview question for a software engineer.") + + print(res["answers"][0]) + + >>> **Question:** + >>> You are given a list of integers and a target sum. + >>> Find all unique combinations of numbers in the list that add up to the target sum. + >>> + >>> **Example:** + >>> + >>> ``` + >>> Input: [1, 2, 3, 4, 5], target = 7 + >>> Output: [[1, 2, 4], [3, 4]] + >>> ``` + >>> + >>> **Follow-up:** What if the list contains duplicate numbers? + ``` + """ + def __init__(self, *, model: str = "text-bison", project_id: str, location: Optional[str] = None, **kwargs): """ Generate text using a Google Vertex AI model. Authenticates using Google Cloud Application Default Credentials (ADCs). - For more information see the official Google documentation: - https://cloud.google.com/docs/authentication/provide-credentials-adc + For more information see the official [Google documentation](https://cloud.google.com/docs/authentication/provide-credentials-adc). :param project_id: ID of the GCP project to use. - :param model: Name of the model to use, defaults to "text-bison". + :param model: Name of the model to use. :param location: The default location to use when making API calls, if not set uses us-central-1. - Defaults to None. :param kwargs: Additional keyword arguments to pass to the model. For a list of supported arguments see the `TextGenerationModel.predict()` documentation. """ @@ -40,6 +70,12 @@ def __init__(self, *, model: str = "text-bison", project_id: str, location: Opti self._model = TextGenerationModel.from_pretrained(self._model_name) def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ data = default_to_dict( self, model=self._model_name, project_id=self._project_id, location=self._location, **self._kwargs ) @@ -57,6 +93,14 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "VertexAITextGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ if (grounding_source := data["init_parameters"].get("grounding_source")) is not None: module_name, class_name = grounding_source["type"].rsplit(".", 1) module = importlib.import_module(module_name) @@ -67,6 +111,15 @@ def from_dict(cls, data: Dict[str, Any]) -> "VertexAITextGenerator": @component.output_types(answers=List[str], safety_attributes=Dict[str, float], citations=List[Dict[str, Any]]) def run(self, prompt: str): + """Prompts the model to generate text. + + :param prompt: The prompt to use for text generation. + :returns: A dictionary with the following keys: + - answers: A list of generated answers. + - safety_attributes: A dictionary with the [safety scores](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/responsible-ai#safety_attribute_descriptions) + of each answer. + - citations: A list of citations for each answer. + """ res = self._model.predict(prompt=prompt, **self._kwargs) answers = [] diff --git a/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_document_embedder.py b/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_document_embedder.py index 4ccfb9da5..a868c6c1b 100644 --- a/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_document_embedder.py +++ b/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_document_embedder.py @@ -23,16 +23,32 @@ def _alt_progress_bar(x: Any) -> Any: class GradientDocumentEmbedder: """ A component for computing Document embeddings using Gradient AI API. + The embedding of each Document is stored in the `embedding` field of the Document. + Usage example: ```python - embedder = GradientDocumentEmbedder(model="bge_large") - p = Pipeline() - p.add_component(embedder, name="document_embedder") - p.add_component(instance=GradientDocumentEmbedder( - p.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer") - p.connect("document_embedder", "document_writer") - p.run({"document_embedder": {"documents": documents}}) + from haystack import Pipeline + from haystack.document_stores.in_memory import InMemoryDocumentStore + from haystack.components.writers import DocumentWriter + from haystack import Document + + from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder + + documents = [ + Document(content="My name is Jean and I live in Paris."), + Document(content="My name is Mark and I live in Berlin."), + Document(content="My name is Giorgio and I live in Rome."), + ] + + indexing_pipeline = Pipeline() + indexing_pipeline.add_component(instance=GradientDocumentEmbedder(), name="document_embedder") + indexing_pipeline.add_component( + instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer") + ) + indexing_pipeline.connect("document_embedder", "document_writer") + indexing_pipeline.run({"document_embedder": {"documents": documents}}) + >>> {'document_writer': {'documents_written': 3}} ``` """ @@ -53,7 +69,7 @@ def __init__( :param batch_size: Update cycle for tqdm progress bar, default is to update every 32_768 docs. :param access_token: The Gradient access token. :param workspace_id: The Gradient workspace ID. - :param host: The Gradient host. By default it uses https://api.gradient.ai/. + :param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/). :param progress_bar: Whether to show a progress bar while embedding the documents. """ self._batch_size = batch_size @@ -75,8 +91,12 @@ def _get_telemetry_data(self) -> Dict[str, Any]: def to_dict(self) -> dict: """ - Serialize the component to a Python dictionary. + Serialize this component to a dictionary. + + :returns: + The serialized component as a dictionary. """ + return default_to_dict( self, model=self._model_name, @@ -91,13 +111,17 @@ def to_dict(self) -> dict: def from_dict(cls, data: Dict[str, Any]) -> "GradientDocumentEmbedder": """ Deserialize this component from a dictionary. + + :param data: The dictionary representation of this component. + :returns: + The deserialized component instance. """ deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"]) return default_from_dict(cls, data) def warm_up(self) -> None: """ - Load the embedding model. + Initializes the component. """ if not hasattr(self, "_embedding_model"): self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name) @@ -125,9 +149,14 @@ def _generate_embeddings(self, documents: List[Document], batch_size: int) -> Li def run(self, documents: List[Document]): """ Embed a list of Documents. + The embedding of each Document is stored in the `embedding` field of the Document. :param documents: A list of Documents to embed. + :returns: + A dictionary with the following keys: + - `documents`: The embedded Documents. + """ if not isinstance(documents, list) or documents and any(not isinstance(doc, Document) for doc in documents): msg = "GradientDocumentEmbedder expects a list of Documents as input.\ diff --git a/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_text_embedder.py b/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_text_embedder.py index 029d5c52f..77b2d6250 100644 --- a/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_text_embedder.py +++ b/integrations/gradient/src/haystack_integrations/components/embedders/gradient/gradient_text_embedder.py @@ -8,15 +8,23 @@ @component class GradientTextEmbedder: """ - A component for embedding strings using models hosted on Gradient AI (https://gradient.ai). + A component for embedding strings using models hosted on [Gradient AI](https://gradient.ai). + Usage example: ```python - embedder = GradientTextEmbedder(model="bge_large") + from haystack_integrations.components.embedders.gradient import GradientTextEmbedder + from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever + from haystack.document_stores.in_memory import InMemoryDocumentStore + from haystack import Pipeline + + embedder = p = Pipeline() - p.add_component(instance=embedder, name="text_embedder") - p.add_component(instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever") + p.add_component("text_embedder", GradientTextEmbedder(model="bge-large")) + p.add_component("retriever", InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore())) p.connect("text_embedder", "retriever") - p.run("embed me!!!") + p.run(data={"text_embedder": {"text":"You can embed me put I'll return no matching documents"}}) + >>> No Documents found with embeddings. Returning empty list. To generate embeddings, use a DocumentEmbedder. + >>> {'retriever': {'documents': []}} ``` """ @@ -34,7 +42,7 @@ def __init__( :param model: The name of the model to use. :param access_token: The Gradient access token. :param workspace_id: The Gradient workspace ID. - :param host: The Gradient host. By default it uses https://api.gradient.ai/. + :param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/). """ self._host = host self._model_name = model @@ -53,7 +61,10 @@ def _get_telemetry_data(self) -> Dict[str, Any]: def to_dict(self) -> dict: """ - Serialize the component to a Python dictionary. + Serialize this component to a dictionary. + + :returns: + The serialized component as a dictionary. """ return default_to_dict( self, @@ -67,13 +78,17 @@ def to_dict(self) -> dict: def from_dict(cls, data: Dict[str, Any]) -> "GradientTextEmbedder": """ Deserialize this component from a dictionary. + + :param data: The dictionary representation of this component. + :returns: + The deserialized component instance. """ deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"]) return default_from_dict(cls, data) def warm_up(self) -> None: """ - Load the embedding model. + Initializes the component. """ if not hasattr(self, "_embedding_model"): self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name) diff --git a/integrations/gradient/src/haystack_integrations/components/generators/gradient/base.py b/integrations/gradient/src/haystack_integrations/components/generators/gradient/base.py index 9176c3e4b..71b39d309 100644 --- a/integrations/gradient/src/haystack_integrations/components/generators/gradient/base.py +++ b/integrations/gradient/src/haystack_integrations/components/generators/gradient/base.py @@ -16,7 +16,10 @@ class GradientGenerator: Queries the LLM using Gradient AI's SDK ('gradientai' package). See [Gradient AI API](https://docs.gradient.ai/docs/sdk-quickstart) for more details. + Usage example: ```python + from haystack_integrations.components.generators.gradient import GradientGenerator + llm = GradientGenerator(base_model_slug="llama2-7b-chat") llm.warm_up() print(llm.run(prompt="What is the meaning of life?")) @@ -40,17 +43,17 @@ def __init__( """ Create a GradientGenerator component. - :param access_token: The Gradient access token. If not provided it's read from the environment - variable GRADIENT_ACCESS_TOKEN. + :param access_token: The Gradient access token as a `Secret`. If not provided it's read from the environment + variable `GRADIENT_ACCESS_TOKEN`. :param base_model_slug: The base model slug to use. - :param host: The Gradient host. By default it uses https://api.gradient.ai/. + :param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/). :param max_generated_token_count: The maximum number of tokens to generate. :param model_adapter_id: The model adapter ID to use. :param temperature: The temperature to use. :param top_k: The top k to use. :param top_p: The top p to use. - :param workspace_id: The Gradient workspace ID. If not provided it's read from the environment - variable GRADIENT_WORKSPACE_ID. + :param workspace_id: The Gradient workspace ID as a `Secret`. If not provided it's read from the environment + variable `GRADIENT_WORKSPACE_ID`. """ self._access_token = access_token self._base_model_slug = base_model_slug @@ -84,6 +87,9 @@ def __init__( def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. + + :returns: + The serialized component as a dictionary. """ return default_to_dict( self, @@ -102,7 +108,12 @@ def to_dict(self) -> Dict[str, Any]: def from_dict(cls, data: Dict[str, Any]) -> "GradientGenerator": """ Deserialize this component from a dictionary. + + :param data: The dictionary representation of this component. + :returns: + The deserialized component instance. """ + deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"]) return default_from_dict(cls, data) diff --git a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py index 0eff7bc82..84865cee5 100644 --- a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py +++ b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py @@ -8,9 +8,9 @@ from pathlib import Path from typing import Any, Dict, List, Literal, Optional, Union -from haystack import Document, component, default_to_dict +from haystack import Document, component, default_from_dict, default_to_dict from haystack.components.converters.utils import normalize_metadata -from haystack.utils import Secret +from haystack.utils import Secret, deserialize_secrets_inplace from tqdm import tqdm from unstructured.documents.elements import Element @@ -108,6 +108,18 @@ def to_dict(self) -> Dict[str, Any]: progress_bar=self.progress_bar, ) + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "UnstructuredFileConverter": + """ + Deserializes the component from a dictionary. + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) + return default_from_dict(cls, data) + @component.output_types(documents=List[Document]) def run( self, diff --git a/integrations/unstructured/tests/test_converter.py b/integrations/unstructured/tests/test_converter.py index 7a5e135ac..5d1a6c091 100644 --- a/integrations/unstructured/tests/test_converter.py +++ b/integrations/unstructured/tests/test_converter.py @@ -52,6 +52,27 @@ def test_to_dict(self): }, } + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("UNSTRUCTURED_API_KEY", "test-api-key") + converter_dict = { + "type": "haystack_integrations.components.converters.unstructured.converter.UnstructuredFileConverter", + "init_parameters": { + "api_url": "http://custom-url:8000/general", + "api_key": {"env_vars": ["UNSTRUCTURED_API_KEY"], "strict": False, "type": "env_var"}, + "document_creation_mode": "one-doc-per-element", + "separator": "|", + "unstructured_kwargs": {"foo": "bar"}, + "progress_bar": False, + }, + } + converter = UnstructuredFileConverter.from_dict(converter_dict) + assert converter.api_url == "http://custom-url:8000/general" + assert converter.api_key.resolve_value() == "test-api-key" + assert converter.document_creation_mode == "one-doc-per-element" + assert converter.separator == "|" + assert converter.unstructured_kwargs == {"foo": "bar"} + assert not converter.progress_bar + @pytest.mark.integration def test_run_one_doc_per_file(self, samples_path): pdf_path = samples_path / "sample_pdf.pdf" diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py index 3699d50f6..877706786 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/evaluator.py @@ -18,11 +18,33 @@ @component class UpTrainEvaluator: """ - A component that uses the UpTrain framework to evaluate inputs against a specific metric. - - The supported metrics are defined by :class:`UpTrainMetric`. The inputs of the component - metric-dependent. The output is a nested list of evaluation results where each inner list - contains the results for a single input. + A component that uses the [UpTrain framework](https://docs.uptrain.ai/getting-started/introduction) + to evaluate inputs against a specific metric. Supported metrics are defined by `UpTrainMetric`. + + Usage example: + ```python + from haystack_integrations.components.evaluators.uptrain import UpTrainEvaluator, UpTrainMetric + from haystack.utils import Secret + + evaluator = UpTrainEvaluator( + metric=UpTrainMetric.FACTUAL_ACCURACY, + api="openai", + api_key=Secret.from_env_var("OPENAI_API_KEY"), + ) + output = evaluator.run( + questions=["Which is the most popular global sport?"], + contexts=[ + [ + "Football is undoubtedly the world's most popular sport with" + "major events like the FIFA World Cup and sports personalities" + "like Ronaldo and Messi, drawing a followership of more than 4" + "billion people." + ] + ], + responses=["Football is the most popular sport with around 4 billion" "followers worldwide"], + ) + print(output["results"]) + ``` """ _backend_metric: Union[Evals, ParametricEval] @@ -44,15 +66,15 @@ def __init__( The metric to use for evaluation. :param metric_params: Parameters to pass to the metric's constructor. + Refer to the `UpTrainMetric` class for more details + on required parameters. :param api: - The API to use for evaluation. - - Supported APIs: "openai", "uptrain". + The API to use for evaluation. Supported APIs: + `openai`, `uptrain`. :param api_key: The API key to use. :param api_params: Additional parameters to pass to the API client. - Required parameters for the UpTrain API: `project_name`. """ self.metric = metric if isinstance(metric, UpTrainMetric) else UpTrainMetric.from_str(metric) @@ -69,38 +91,20 @@ def __init__( @component.output_types(results=List[List[Dict[str, Any]]]) def run(self, **inputs) -> Dict[str, Any]: """ - Run the UpTrain evaluator. - - Example: - ```python - pipeline = Pipeline() - evaluator = UpTrainEvaluator( - metric=UpTrainMetric.FACTUAL_ACCURACY, - api="openai", - api_key=Secret.from_env_var("OPENAI_API_KEY"), - ) - pipeline.add_component("evaluator", evaluator) - - # Each metric expects a specific set of parameters as input. Refer to the - # UpTrainMetric class' documentation for more details. - output = pipeline.run({"evaluator": { - "questions": ["question], - "contexts": [["context", "another context"]], - "responses": ["response"] - }}) - ``` + Run the UpTrain evaluator on the provided inputs. :param inputs: The inputs to evaluate. These are determined by the metric being calculated. See `UpTrainMetric` for more information. :returns: - A nested list of metric results. Each input can have one or more + A dictionary with a single `results` entry that contains + a nested list of metric results. Each input can have one or more results, depending on the metric. Each result is a dictionary containing the following keys and values: - * `name` - The name of the metric. - * `score` - The score of the metric. - * `explanation` - An optional explanation of the score. + - `name` - The name of the metric. + - `score` - The score of the metric. + - `explanation` - An optional explanation of the score. """ # The backend requires random access to the data, so we can't stream it. InputConverters.validate_input_parameters(self.metric, self.descriptor.input_parameters, inputs) @@ -125,7 +129,12 @@ def run(self, **inputs) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + :raises DeserializationError: + If the component cannot be serialized. """ def check_serializable(obj: Any): @@ -151,18 +160,17 @@ def check_serializable(obj: Any): @classmethod def from_dict(cls, data: Dict[str, Any]) -> "UpTrainEvaluator": """ - Deserialize a component from a dictionary. + Deserializes the component from a dictionary. :param data: - The dictionary to deserialize from. + Dictionary to deserialize from. + :returns: + Deserialized component. """ deserialize_secrets_inplace(data["init_parameters"], ["api_key"]) return default_from_dict(cls, data) def _init_backend(self): - """ - Initialize the UpTrain backend. - """ if isinstance(self.descriptor.backend, Evals): if self.metric_params is not None: msg = ( diff --git a/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/metrics.py b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/metrics.py index 6f7854aee..a13843d4a 100644 --- a/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/metrics.py +++ b/integrations/uptrain/src/haystack_integrations/components/evaluators/uptrain/metrics.py @@ -14,48 +14,51 @@ class UpTrainMetric(Enum): Metrics supported by UpTrain. """ - #: Context relevance. + #: Context relevance.\ #: Inputs - `questions: List[str], contexts: List[List[str]]` CONTEXT_RELEVANCE = "context_relevance" - #: Factual accuracy. + #: Factual accuracy.\ #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]` FACTUAL_ACCURACY = "factual_accuracy" - #: Response relevance. + #: Response relevance.\ #: Inputs - `questions: List[str], responses: List[str]` RESPONSE_RELEVANCE = "response_relevance" - #: Response completeness. + #: Response completeness.\ #: Inputs - `questions: List[str], responses: List[str]` RESPONSE_COMPLETENESS = "response_completeness" - #: Response completeness with respect to context. + #: Response completeness with respect to context.\ #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]` RESPONSE_COMPLETENESS_WRT_CONTEXT = "response_completeness_wrt_context" - #: Response consistency. + #: Response consistency.\ #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]` RESPONSE_CONSISTENCY = "response_consistency" - #: Response conciseness. + #: Response conciseness.\ #: Inputs - `questions: List[str], responses: List[str]` RESPONSE_CONCISENESS = "response_conciseness" - #: Language critique. + #: Language critique.\ #: Inputs - `responses: List[str]` CRITIQUE_LANGUAGE = "critique_language" - #: Tone critique. - #: Inputs - `responses: List[str]` + #: Tone critique.\ + #: Inputs - `responses: List[str]`\ + #: Parameters - `llm_persona: str` CRITIQUE_TONE = "critique_tone" - #: Guideline adherence. - #: Inputs - `questions: List[str], responses: List[str]` + #: Guideline adherence.\ + #: Inputs - `questions: List[str], responses: List[str]`\ + #: Parameters - `guideline: str`, `guideline_name: str`, `response_schema: Optional[str]` GUIDELINE_ADHERENCE = "guideline_adherence" - #: Response matching. - #: Inputs - `responses: List[str], ground_truths: List[str]` + #: Response matching.\ + #: Inputs - `responses: List[str], ground_truths: List[str]`\ + #: Parameters - `method: str` RESPONSE_MATCHING = "response_matching" def __str__(self):