Add NvidiaGenerator (#557)

* NvidiaGenerator first draft * Refine generator * Move function to get model id in client * Simplify invocation to keep it in line with embedders * Rename nvidia_generator.py to generator.py * Export NvidiaGenerator at package level * Add NvidiaGenerator tests * Fix embedders tests * Update docstring * Remove some unecessary logic, add usage output, fix docstrings * Update integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py * Fix linting --------- Co-authored-by: Madeesh Kannan <[email protected]>
deepset-ai · Mar 7, 2024 · ead381e · ead381e
1 parent 24c06fb
commit ead381e
Show file tree

Hide file tree

Showing 12 changed files with 465 additions and 27 deletions.
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py
@@ -1,31 +1,10 @@
 from dataclasses import asdict, dataclass
 from typing import Any, Dict, List, Literal, Union
 
-from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
-
-from .models import NvidiaEmbeddingModel
-
 MAX_INPUT_STRING_LENGTH = 2048
 MAX_INPUTS = 50
 
 
-def get_model_nvcf_id(model: NvidiaEmbeddingModel, client: NvidiaCloudFunctionsClient) -> str:
-    """
-    Returns the Nvidia Cloud Functions UUID for the given model.
-    """
-
-    available_functions = client.available_functions()
-    func = available_functions.get(str(model))
-    if func is None:
-        msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend"
-        raise ValueError(msg)
-    elif func.status != "ACTIVE":
-        msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend"
-        raise ValueError(msg)
-
-    return func.id
-
-
 @dataclass
 class EmbeddingsRequest:
     input: Union[str, List[str]]

diff --git a/...rations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/...rations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
@@ -5,7 +5,7 @@
 from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
 from tqdm import tqdm
 
-from ._schema import MAX_INPUTS, EmbeddingsRequest, EmbeddingsResponse, Usage, get_model_nvcf_id
+from ._schema import MAX_INPUTS, EmbeddingsRequest, EmbeddingsResponse, Usage
 from .models import NvidiaEmbeddingModel
 
 
@@ -96,7 +96,7 @@ def warm_up(self):
         if self._initialized:
             return
 
-        self.nvcf_id = get_model_nvcf_id(self.model, self.client)
+        self.nvcf_id = self.client.get_model_nvcf_id(str(self.model))
         self._initialized = True
 
     def to_dict(self) -> Dict[str, Any]:

diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
@@ -4,7 +4,7 @@
 from haystack.utils import Secret, deserialize_secrets_inplace
 from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
 
-from ._schema import EmbeddingsRequest, EmbeddingsResponse, get_model_nvcf_id
+from ._schema import EmbeddingsRequest, EmbeddingsResponse
 from .models import NvidiaEmbeddingModel
 
 
@@ -74,7 +74,7 @@ def warm_up(self):
         if self._initialized:
             return
 
-        self.nvcf_id = get_model_nvcf_id(self.model, self.client)
+        self.nvcf_id = self.client.get_model_nvcf_id(str(self.model))
         self._initialized = True
 
     def to_dict(self) -> Dict[str, Any]:

diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py
@@ -1,3 +1,6 @@
-# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <[email protected]>
 #
 # SPDX-License-Identifier: Apache-2.0
+from .generator import NvidiaGenerator
+
+__all__ = ["NvidiaGenerator"]
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <[email protected]>
+#
+# SPDX-License-Identifier: Apache-2.0
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class Message:
+    content: str
+    role: str
+
+
+@dataclass
+class GenerationRequest:
+    messages: List[Message]
+    temperature: float = 0.2
+    top_p: float = 0.7
+    max_tokens: int = 1024
+    seed: Optional[int] = None
+    bad: Optional[List[str]] = None
+    stop: Optional[List[str]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+
+
+@dataclass
+class Choice:
+    index: int
+    message: Message
+    finish_reason: str
+
+
+@dataclass
+class Usage:
+    completion_tokens: int
+    prompt_tokens: int
+    total_tokens: int
+
+
+@dataclass
+class GenerationResponse:
+    id: str
+    choices: List[Choice]
+    usage: Usage
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "GenerationResponse":
+        try:
+            return cls(
+                id=data["id"],
+                choices=[
+                    Choice(
+                        index=choice["index"],
+                        message=Message(content=choice["message"]["content"], role=choice["message"]["role"]),
+                        finish_reason=choice["finish_reason"],
+                    )
+                    for choice in data["choices"]
+                ],
+                usage=Usage(
+                    completion_tokens=data["usage"]["completion_tokens"],
+                    prompt_tokens=data["usage"]["prompt_tokens"],
+                    total_tokens=data["usage"]["total_tokens"],
+                ),
+            )
+        except (KeyError, TypeError) as e:
+            msg = f"Failed to parse {cls.__name__} from data: {data}"
+            raise ValueError(msg) from e
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <[email protected]>
 #
 # SPDX-License-Identifier: Apache-2.0
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
@@ -0,0 +1,154 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <[email protected]>
+#
+# SPDX-License-Identifier: Apache-2.0
+from typing import Any, Dict, List, Optional, Union
+
+from haystack import component, default_from_dict, default_to_dict
+from haystack.utils.auth import Secret, deserialize_secrets_inplace
+from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient
+
+from ._schema import GenerationRequest, GenerationResponse, Message
+from .models import NvidiaGeneratorModel
+
+
+@component
+class NvidiaGenerator:
+    """
+    A component for generating text using generative models provided by
+    [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/).
+
+    Usage example:
+    ```python
+    from haystack_integrations.components.generators.nvidia import NvidiaGenerator
+
+    generator = NvidiaGenerator(
+        model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B,
+        model_arguments={
+            "temperature": 0.2,
+            "top_p": 0.7,
+            "max_tokens": 1024,
+            "seed": None,
+            "bad": None,
+            "stop": None,
+        },
+    )
+    generator.warm_up()
+
+    result = generator.run(prompt="What is the answer?")
+    print(result["replies"])
+    print(result["meta"])
+    ```
+    """
+
+    def __init__(
+        self,
+        model: Union[str, NvidiaGeneratorModel],
+        api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"),
+        model_arguments: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Create a NvidiaGenerator component.
+
+        :param model:
+            Name of the model to use for text generation.
+            See the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models)
+            for more information on the supported models.
+        :param api_key:
+            Nvidia API key to use for authentication.
+        :param model_arguments:
+            Additional arguments to pass to the model provider. Different models accept different arguments.
+            Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models)
+            to know the supported arguments.
+
+        :raises ValueError: If `model` is not supported.
+        """
+        if isinstance(model, str):
+            model = NvidiaGeneratorModel.from_str(model)
+
+        self._model = model
+        self._api_key = api_key
+        self._model_arguments = model_arguments or {}
+        # This is initialized in warm_up
+        self._model_id = None
+
+        self._client = NvidiaCloudFunctionsClient(
+            api_key=api_key,
+            headers={
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+            },
+        )
+
+    def warm_up(self):
+        """
+        Initializes the component.
+        """
+        if self._model_id is not None:
+            return
+        self._model_id = self._client.get_model_nvcf_id(str(self._model))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
+        return default_to_dict(
+            self, model=str(self._model), api_key=self._api_key.to_dict(), model_arguments=self._model_arguments
+        )
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator":
+        """
+        Deserializes the component from a dictionary.
+
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+           Deserialized component.
+        """
+        init_params = data.get("init_parameters", {})
+        deserialize_secrets_inplace(init_params, ["api_key"])
+        return default_from_dict(cls, data)
+
+    @component.output_types(replies=List[str], meta=List[Dict[str, Any]], usage=Dict[str, int])
+    def run(self, prompt: str):
+        """
+        Queries the model with the provided prompt.
+
+        :param prompt:
+            Text to be sent to the generative model.
+        :returns:
+            A dictionary with the following keys:
+            - `replies` - Replies generated by the model.
+            - `meta` - Metadata for each reply.
+            - `usage` - Usage statistics for the model.
+        """
+        if self._model_id is None:
+            msg = "The generation model has not been loaded. Call warm_up() before running."
+            raise RuntimeError(msg)
+
+        messages = [Message(role="user", content=prompt)]
+        request = GenerationRequest(messages=messages, **self._model_arguments).to_dict()
+        json_response = self._client.query_function(self._model_id, request)
+
+        replies = []
+        meta = []
+        data = GenerationResponse.from_dict(json_response)
+        for choice in data.choices:
+            replies.append(choice.message.content)
+            meta.append(
+                {
+                    "role": choice.message.role,
+                    "finish_reason": choice.finish_reason,
+                }
+            )
+
+        usage = {
+            "completion_tokens": data.usage.completion_tokens,
+            "prompt_tokens": data.usage.prompt_tokens,
+            "total_tokens": data.usage.total_tokens,
+        }
+
+        return {"replies": replies, "meta": meta, "usage": usage}
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <[email protected]>
+#
+# SPDX-License-Identifier: Apache-2.0
+from enum import Enum
+
+
+class NvidiaGeneratorModel(Enum):
+    """
+    Generator models supported by NvidiaGenerator and NvidiaChatGenerator.
+    """
+
+    NV_LLAMA2_RLHF_70B = "playground_nv_llama2_rlhf_70b"
+    STEERLM_LLAMA_70B = "playground_steerlm_llama_70b"
+    NEMOTRON_STEERLM_8B = "playground_nemotron_steerlm_8b"
+    NEMOTRON_QA_8B = "playground_nemotron_qa_8b"
+
+    def __str__(self):
+        return self.value
+
+    @classmethod
+    def from_str(cls, string: str) -> "NvidiaGeneratorModel":
+        """
+        Create a generator model from a string.
+
+        :param string:
+            String to convert.
+        :returns:
+            A generator model.
+        """
+        enum_map = {e.value: e for e in NvidiaGeneratorModel}
+        models = enum_map.get(string)
+        if models is None:
+            msg = f"Unknown model '{string}'. Supported models are: {list(enum_map.keys())}"
+            raise ValueError(msg)
+        return models
diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py
@@ -64,3 +64,19 @@ def available_functions(self) -> Dict[str, AvailableNvidiaCloudFunctions]:
             )
             for f in response.json()["functions"]
         }
+
+    def get_model_nvcf_id(self, model: str) -> str:
+        """
+        Returns the Nvidia Cloud Functions UUID for the given model.
+        """
+
+        available_functions = self.available_functions()
+        func = available_functions.get(model)
+        if func is None:
+            msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend"
+            raise ValueError(msg)
+        elif func.status != "ACTIVE":
+            msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend"
+            raise ValueError(msg)
+
+        return func.id
diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py
@@ -20,6 +20,9 @@ def available_functions(self):
             )
         }
 
+    def get_model_nvcf_id(self, model):
+        return "fake-id"
+
 
 class TestNvidiaDocumentEmbedder:
     def test_init_default(self, monkeypatch):