diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py index a0598be86..fc4e0e5bf 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py @@ -1,31 +1,10 @@ from dataclasses import asdict, dataclass from typing import Any, Dict, List, Literal, Union -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient - -from .models import NvidiaEmbeddingModel - MAX_INPUT_STRING_LENGTH = 2048 MAX_INPUTS = 50 -def get_model_nvcf_id(model: NvidiaEmbeddingModel, client: NvidiaCloudFunctionsClient) -> str: - """ - Returns the Nvidia Cloud Functions UUID for the given model. - """ - - available_functions = client.available_functions() - func = available_functions.get(str(model)) - if func is None: - msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend" - raise ValueError(msg) - elif func.status != "ACTIVE": - msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend" - raise ValueError(msg) - - return func.id - - @dataclass class EmbeddingsRequest: input: Union[str, List[str]] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index bbc68b492..25c104b97 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -5,7 +5,7 @@ from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient from tqdm import tqdm -from ._schema import MAX_INPUTS, EmbeddingsRequest, EmbeddingsResponse, Usage, get_model_nvcf_id +from ._schema import MAX_INPUTS, EmbeddingsRequest, EmbeddingsResponse, Usage from .models import NvidiaEmbeddingModel @@ -96,7 +96,7 @@ def warm_up(self): if self._initialized: return - self.nvcf_id = get_model_nvcf_id(self.model, self.client) + self.nvcf_id = self.client.get_model_nvcf_id(str(self.model)) self._initialized = True def to_dict(self) -> Dict[str, Any]: diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index a2636b4b8..a377934e3 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -4,7 +4,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient -from ._schema import EmbeddingsRequest, EmbeddingsResponse, get_model_nvcf_id +from ._schema import EmbeddingsRequest, EmbeddingsResponse from .models import NvidiaEmbeddingModel @@ -74,7 +74,7 @@ def warm_up(self): if self._initialized: return - self.nvcf_id = get_model_nvcf_id(self.model, self.client) + self.nvcf_id = self.client.get_model_nvcf_id(str(self.model)) self._initialized = True def to_dict(self) -> Dict[str, Any]: diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py index e873bc332..18354ea17 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py @@ -1,3 +1,6 @@ -# SPDX-FileCopyrightText: 2023-present deepset GmbH +# SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +from .generator import NvidiaGenerator + +__all__ = ["NvidiaGenerator"] diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py new file mode 100644 index 000000000..4e19d05ac --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py @@ -0,0 +1,69 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from dataclasses import asdict, dataclass +from typing import Any, Dict, List, Optional + + +@dataclass +class Message: + content: str + role: str + + +@dataclass +class GenerationRequest: + messages: List[Message] + temperature: float = 0.2 + top_p: float = 0.7 + max_tokens: int = 1024 + seed: Optional[int] = None + bad: Optional[List[str]] = None + stop: Optional[List[str]] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class Choice: + index: int + message: Message + finish_reason: str + + +@dataclass +class Usage: + completion_tokens: int + prompt_tokens: int + total_tokens: int + + +@dataclass +class GenerationResponse: + id: str + choices: List[Choice] + usage: Usage + + @classmethod + def from_dict(cls, data: dict) -> "GenerationResponse": + try: + return cls( + id=data["id"], + choices=[ + Choice( + index=choice["index"], + message=Message(content=choice["message"]["content"], role=choice["message"]["role"]), + finish_reason=choice["finish_reason"], + ) + for choice in data["choices"] + ], + usage=Usage( + completion_tokens=data["usage"]["completion_tokens"], + prompt_tokens=data["usage"]["prompt_tokens"], + total_tokens=data["usage"]["total_tokens"], + ), + ) + except (KeyError, TypeError) as e: + msg = f"Failed to parse {cls.__name__} from data: {data}" + raise ValueError(msg) from e diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/__init__.py index e873bc332..6b5e14dc1 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/__init__.py @@ -1,3 +1,3 @@ -# SPDX-FileCopyrightText: 2023-present deepset GmbH +# SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py new file mode 100644 index 000000000..cb24c7fe0 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -0,0 +1,154 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Dict, List, Optional, Union + +from haystack import component, default_from_dict, default_to_dict +from haystack.utils.auth import Secret, deserialize_secrets_inplace +from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient + +from ._schema import GenerationRequest, GenerationResponse, Message +from .models import NvidiaGeneratorModel + + +@component +class NvidiaGenerator: + """ + A component for generating text using generative models provided by + [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/). + + Usage example: + ```python + from haystack_integrations.components.generators.nvidia import NvidiaGenerator + + generator = NvidiaGenerator( + model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B, + model_arguments={ + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + generator.warm_up() + + result = generator.run(prompt="What is the answer?") + print(result["replies"]) + print(result["meta"]) + ``` + """ + + def __init__( + self, + model: Union[str, NvidiaGeneratorModel], + api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + model_arguments: Optional[Dict[str, Any]] = None, + ): + """ + Create a NvidiaGenerator component. + + :param model: + Name of the model to use for text generation. + See the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) + for more information on the supported models. + :param api_key: + Nvidia API key to use for authentication. + :param model_arguments: + Additional arguments to pass to the model provider. Different models accept different arguments. + Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) + to know the supported arguments. + + :raises ValueError: If `model` is not supported. + """ + if isinstance(model, str): + model = NvidiaGeneratorModel.from_str(model) + + self._model = model + self._api_key = api_key + self._model_arguments = model_arguments or {} + # This is initialized in warm_up + self._model_id = None + + self._client = NvidiaCloudFunctionsClient( + api_key=api_key, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + }, + ) + + def warm_up(self): + """ + Initializes the component. + """ + if self._model_id is not None: + return + self._model_id = self._client.get_model_nvcf_id(str(self._model)) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, model=str(self._model), api_key=self._api_key.to_dict(), model_arguments=self._model_arguments + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator": + """ + Deserializes the component from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ + init_params = data.get("init_parameters", {}) + deserialize_secrets_inplace(init_params, ["api_key"]) + return default_from_dict(cls, data) + + @component.output_types(replies=List[str], meta=List[Dict[str, Any]], usage=Dict[str, int]) + def run(self, prompt: str): + """ + Queries the model with the provided prompt. + + :param prompt: + Text to be sent to the generative model. + :returns: + A dictionary with the following keys: + - `replies` - Replies generated by the model. + - `meta` - Metadata for each reply. + - `usage` - Usage statistics for the model. + """ + if self._model_id is None: + msg = "The generation model has not been loaded. Call warm_up() before running." + raise RuntimeError(msg) + + messages = [Message(role="user", content=prompt)] + request = GenerationRequest(messages=messages, **self._model_arguments).to_dict() + json_response = self._client.query_function(self._model_id, request) + + replies = [] + meta = [] + data = GenerationResponse.from_dict(json_response) + for choice in data.choices: + replies.append(choice.message.content) + meta.append( + { + "role": choice.message.role, + "finish_reason": choice.finish_reason, + } + ) + + usage = { + "completion_tokens": data.usage.completion_tokens, + "prompt_tokens": data.usage.prompt_tokens, + "total_tokens": data.usage.total_tokens, + } + + return {"replies": replies, "meta": meta, "usage": usage} diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py new file mode 100644 index 000000000..448fb7aec --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from enum import Enum + + +class NvidiaGeneratorModel(Enum): + """ + Generator models supported by NvidiaGenerator and NvidiaChatGenerator. + """ + + NV_LLAMA2_RLHF_70B = "playground_nv_llama2_rlhf_70b" + STEERLM_LLAMA_70B = "playground_steerlm_llama_70b" + NEMOTRON_STEERLM_8B = "playground_nemotron_steerlm_8b" + NEMOTRON_QA_8B = "playground_nemotron_qa_8b" + + def __str__(self): + return self.value + + @classmethod + def from_str(cls, string: str) -> "NvidiaGeneratorModel": + """ + Create a generator model from a string. + + :param string: + String to convert. + :returns: + A generator model. + """ + enum_map = {e.value: e for e in NvidiaGeneratorModel} + models = enum_map.get(string) + if models is None: + msg = f"Unknown model '{string}'. Supported models are: {list(enum_map.keys())}" + raise ValueError(msg) + return models diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py index e582b09ba..b486f05b3 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py @@ -64,3 +64,19 @@ def available_functions(self) -> Dict[str, AvailableNvidiaCloudFunctions]: ) for f in response.json()["functions"] } + + def get_model_nvcf_id(self, model: str) -> str: + """ + Returns the Nvidia Cloud Functions UUID for the given model. + """ + + available_functions = self.available_functions() + func = available_functions.get(model) + if func is None: + msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend" + raise ValueError(msg) + elif func.status != "ACTIVE": + msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend" + raise ValueError(msg) + + return func.id diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 4f19633e8..ed8af93c9 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -20,6 +20,9 @@ def available_functions(self): ) } + def get_model_nvcf_id(self, model): + return "fake-id" + class TestNvidiaDocumentEmbedder: def test_init_default(self, monkeypatch): diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py new file mode 100644 index 000000000..b10b60951 --- /dev/null +++ b/integrations/nvidia/tests/test_generator.py @@ -0,0 +1,176 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import os +from unittest.mock import patch + +import pytest +from haystack.utils import Secret +from haystack_integrations.components.generators.nvidia import NvidiaGenerator +from haystack_integrations.components.generators.nvidia.models import NvidiaGeneratorModel + + +class TestNvidiaGenerator: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + generator = NvidiaGenerator(NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B) + + assert generator._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert generator._model == NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B + assert generator._model_arguments == {} + + def test_init_with_parameters(self): + generator = NvidiaGenerator( + api_key=Secret.from_token("fake-api-key"), + model="playground_nemotron_steerlm_8b", + model_arguments={ + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + assert generator._api_key == Secret.from_token("fake-api-key") + assert generator._model == NvidiaGeneratorModel.NEMOTRON_STEERLM_8B + assert generator._model_arguments == { + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + } + + def test_init_fail_wo_api_key(self, monkeypatch): + monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + with pytest.raises(ValueError): + NvidiaGenerator("playground_nemotron_steerlm_8b") + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + generator = NvidiaGenerator(NvidiaGeneratorModel.NEMOTRON_STEERLM_8B) + data = generator.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator", + "init_parameters": { + "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "model": "playground_nemotron_steerlm_8b", + "model_arguments": {}, + }, + } + + def test_to_dict_with_custom_init_parameters(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + generator = NvidiaGenerator( + model=NvidiaGeneratorModel.NEMOTRON_STEERLM_8B, + model_arguments={ + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + data = generator.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator", + "init_parameters": { + "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "model": "playground_nemotron_steerlm_8b", + "model_arguments": { + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + }, + } + + @patch("haystack_integrations.components.generators.nvidia.generator.NvidiaCloudFunctionsClient") + def test_run(self, mock_client): + generator = NvidiaGenerator( + model=NvidiaGeneratorModel.NEMOTRON_STEERLM_8B, + api_key=Secret.from_token("fake-api-key"), + model_arguments={ + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + mock_client.get_model_nvcf_id.return_value = "some_id" + generator._client = mock_client + generator.warm_up() + mock_client.get_model_nvcf_id.assert_called_once_with("playground_nemotron_steerlm_8b") + + mock_client.query_function.return_value = { + "id": "some_id", + "choices": [ + { + "index": 0, + "message": {"content": "42", "role": "assistant"}, + "finish_reason": "stop", + } + ], + "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2}, + } + result = generator.run(prompt="What is the answer?") + mock_client.query_function.assert_called_once_with( + "some_id", + { + "messages": [ + {"content": "What is the answer?", "role": "user"}, + ], + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + assert result == { + "replies": ["42"], + "meta": [ + { + "finish_reason": "stop", + "role": "assistant", + }, + ], + "usage": { + "total_tokens": 21, + "prompt_tokens": 19, + "completion_tokens": 2, + }, + } + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_API_KEY", None), + reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", + ) + @pytest.mark.integration + def test_run_integration(self): + generator = NvidiaGenerator( + model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B, + model_arguments={ + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + generator.warm_up() + result = generator.run(prompt="What is the answer?") + + assert result["replies"] + assert result["meta"] + assert result["usage"] diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index b4239308b..8ba2f6783 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -18,6 +18,9 @@ def available_functions(self): ) } + def get_model_nvcf_id(self, model): + return "fake-id" + class TestNvidiaTextEmbedder: def test_init_default(self, monkeypatch):