From 31df85ab882b8154f2b1f42d010b61f0b543f947 Mon Sep 17 00:00:00 2001 From: shadeMe Date: Mon, 10 Jun 2024 17:22:47 +0200 Subject: [PATCH] refactor: Remove deprecated Nvidia Cloud Functions backend and related code. --- .../embedders/nvidia/_nvcf_backend.py | 111 ---------------- .../embedders/nvidia/document_embedder.py | 39 +++--- .../embedders/nvidia/text_embedder.py | 39 +++--- .../generators/nvidia/_nvcf_backend.py | 119 ------------------ .../components/generators/nvidia/_schema.py | 69 ---------- .../components/generators/nvidia/generator.py | 31 ++--- .../utils/nvidia/__init__.py | 3 - .../utils/nvidia/client.py | 82 ------------ .../nvidia/tests/test_document_embedder.py | 112 +++++------------ integrations/nvidia/tests/test_generator.py | 91 -------------- .../nvidia/tests/test_text_embedder.py | 74 ++++------- 11 files changed, 97 insertions(+), 673 deletions(-) delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py delete mode 100644 integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py delete mode 100644 integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py deleted file mode 100644 index 65371de54..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py +++ /dev/null @@ -1,111 +0,0 @@ -import warnings -from dataclasses import asdict, dataclass -from typing import Any, Dict, List, Literal, Optional, Tuple, Union - -from haystack.utils.auth import Secret -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient - -from .backend import EmbedderBackend - -MAX_INPUT_STRING_LENGTH = 2048 -MAX_INPUTS = 50 - - -class NvcfBackend(EmbedderBackend): - def __init__( - self, - model: str, - api_key: Secret, - model_kwargs: Optional[Dict[str, Any]] = None, - ): - warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2) - if not model.startswith("playground_"): - model = f"playground_{model}" - - super().__init__(model=model, model_kwargs=model_kwargs) - - self.api_key = api_key - self.client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) - self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) - - def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: - request = EmbeddingsRequest(input=texts, **self.model_kwargs).to_dict() - json_response = self.client.query_function(self.nvcf_id, request) - response = EmbeddingsResponse.from_dict(json_response) - - # Sort resulting embeddings by index - assert all(isinstance(r.embedding, list) for r in response.data) - sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)] # type: ignore - metadata = {"usage": response.usage.to_dict()} - return sorted_embeddings, metadata - - -@dataclass -class EmbeddingsRequest: - input: Union[str, List[str]] - model: Literal["query", "passage"] - encoding_format: Literal["float", "base64"] = "float" - - def __post_init__(self): - if isinstance(self.input, list): - if len(self.input) > MAX_INPUTS: - msg = f"The number of inputs should not exceed {MAX_INPUTS}" - raise ValueError(msg) - else: - self.input = [self.input] - - if len(self.input) == 0: - msg = "The number of inputs should not be 0" - raise ValueError(msg) - - if any(len(x) > MAX_INPUT_STRING_LENGTH for x in self.input): - msg = f"The length of each input should not exceed {MAX_INPUT_STRING_LENGTH} characters" - raise ValueError(msg) - - if self.encoding_format not in ["float", "base64"]: - msg = "encoding_format should be either 'float' or 'base64'" - raise ValueError(msg) - - if self.model not in ["query", "passage"]: - msg = "model should be either 'query' or 'passage'" - raise ValueError(msg) - - def to_dict(self) -> Dict[str, Any]: - return asdict(self) - - -@dataclass -class Usage: - prompt_tokens: int - total_tokens: int - - def to_dict(self) -> Dict[str, Any]: - return asdict(self) - - -@dataclass -class Embeddings: - index: int - embedding: Union[List[float], str] - - -@dataclass -class EmbeddingsResponse: - data: List[Embeddings] - usage: Usage - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "EmbeddingsResponse": - try: - embeddings = [Embeddings(**x) for x in data["data"]] - usage = Usage(**data["usage"]) - return cls(data=embeddings, usage=usage) - except (KeyError, TypeError) as e: - msg = f"Failed to parse EmbeddingsResponse from data: {data}" - raise ValueError(msg) from e diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 45680acce..fad264a46 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -5,7 +5,6 @@ from tqdm import tqdm from ._nim_backend import NimBackend -from ._nvcf_backend import NvcfBackend from .backend import EmbedderBackend from .truncate import EmbeddingTruncateMode @@ -14,8 +13,7 @@ class NvidiaDocumentEmbedder: """ A component for embedding documents using embedding models provided by - [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) - and NVIDIA Inference Microservices. + [NVIDIA NIMs](https://ai.nvidia.com). Usage example: ```python @@ -23,7 +21,7 @@ class NvidiaDocumentEmbedder: doc = Document(content="I love pizza!") - text_embedder = NvidiaDocumentEmbedder(model="nvolveqa_40k") + text_embedder = NvidiaDocumentEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia") text_embedder.warm_up() result = document_embedder.run([doc]) @@ -33,9 +31,9 @@ class NvidiaDocumentEmbedder: def __init__( self, - model: str, + model: str = "NV-Embed-QA", api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), - api_url: Optional[str] = None, + api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia", prefix: str = "", suffix: str = "", batch_size: int = 32, @@ -50,9 +48,9 @@ def __init__( :param model: Embedding model to use. :param api_key: - API key for the NVIDIA AI Foundation Endpoints. + API key for the NVIDIA NIM. :param api_url: - Custom API URL for the NVIDIA Inference Microservices. + Custom API URL for the NVIDIA NIM. :param prefix: A string to add to the beginning of each text. :param suffix: @@ -95,22 +93,15 @@ def warm_up(self): if self._initialized: return - if self.api_url is None: - if self.api_key is None: - msg = "API key is required for NVIDIA AI Foundation Endpoints." - raise ValueError(msg) - - self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"}) - else: - model_kwargs = {"input_type": "passage"} - if self.truncate is not None: - model_kwargs["truncate"] = str(self.truncate) - self.backend = NimBackend( - self.model, - api_url=self.api_url, - api_key=self.api_key, - model_kwargs=model_kwargs, - ) + model_kwargs = {"input_type": "passage"} + if self.truncate is not None: + model_kwargs["truncate"] = str(self.truncate) + self.backend = NimBackend( + self.model, + api_url=self.api_url, + api_key=self.api_key, + model_kwargs=model_kwargs, + ) self._initialized = True diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index b3ad4544e..8923f8c81 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -4,7 +4,6 @@ from haystack.utils import Secret, deserialize_secrets_inplace from ._nim_backend import NimBackend -from ._nvcf_backend import NvcfBackend from .backend import EmbedderBackend from .truncate import EmbeddingTruncateMode @@ -13,8 +12,7 @@ class NvidiaTextEmbedder: """ A component for embedding strings using embedding models provided by - [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) - and NVIDIA Inference Microservices. + [NVIDIA NIMs](https://ai.nvidia.com). For models that differentiate between query and document inputs, this component embeds the input string as a query. @@ -25,7 +23,7 @@ class NvidiaTextEmbedder: text_to_embed = "I love pizza!" - text_embedder = NvidiaTextEmbedder(model="nvolveqa_40k") + text_embedder = NvidiaTextEmbedder(model="NV-Embed-QA", api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia") text_embedder.warm_up() print(text_embedder.run(text_to_embed)) @@ -34,9 +32,9 @@ class NvidiaTextEmbedder: def __init__( self, - model: str, + model: str = "NV-Embed-QA", api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), - api_url: Optional[str] = None, + api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia", prefix: str = "", suffix: str = "", truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, @@ -47,9 +45,9 @@ def __init__( :param model: Embedding model to use. :param api_key: - API key for the NVIDIA AI Foundation Endpoints. + API key for the NVIDIA NIM. :param api_url: - Custom API URL for the NVIDIA Inference Microservices. + Custom API URL for the NVIDIA NIM. :param prefix: A string to add to the beginning of each text. :param suffix: @@ -79,22 +77,15 @@ def warm_up(self): if self._initialized: return - if self.api_url is None: - if self.api_key is None: - msg = "API key is required for NVIDIA AI Foundation Endpoints." - raise ValueError(msg) - - self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"}) - else: - model_kwargs = {"input_type": "query"} - if self.truncate is not None: - model_kwargs["truncate"] = str(self.truncate) - self.backend = NimBackend( - self.model, - api_url=self.api_url, - api_key=self.api_key, - model_kwargs=model_kwargs, - ) + model_kwargs = {"input_type": "query"} + if self.truncate is not None: + model_kwargs["truncate"] = str(self.truncate) + self.backend = NimBackend( + self.model, + api_url=self.api_url, + api_key=self.api_key, + model_kwargs=model_kwargs, + ) self._initialized = True diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py deleted file mode 100644 index 95d024fb8..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py +++ /dev/null @@ -1,119 +0,0 @@ -import warnings -from dataclasses import asdict, dataclass -from typing import Any, Dict, List, Optional, Tuple - -from haystack.utils.auth import Secret -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient - -from .backend import GeneratorBackend - - -class NvcfBackend(GeneratorBackend): - def __init__( - self, - model: str, - api_key: Secret, - model_kwargs: Optional[Dict[str, Any]] = None, - ): - warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2) - if not model.startswith("playground_"): - model = f"playground_{model}" - - super().__init__(model=model, model_kwargs=model_kwargs) - - self.api_key = api_key - self.client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) - self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) - - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: - messages = [Message(role="user", content=prompt)] - request = GenerationRequest(messages=messages, **self.model_kwargs).to_dict() - json_response = self.client.query_function(self.nvcf_id, request) - response = GenerationResponse.from_dict(json_response) - - replies = [] - meta = [] - for choice in response.choices: - replies.append(choice.message.content) - meta.append( - { - "role": choice.message.role, - "finish_reason": choice.finish_reason, - "usage": { - "completion_tokens": response.usage.completion_tokens, - "prompt_tokens": response.usage.prompt_tokens, - "total_tokens": response.usage.total_tokens, - }, - } - ) - return replies, meta - - -@dataclass -class Message: - content: str - role: str - - -@dataclass -class GenerationRequest: - messages: List[Message] - temperature: float = 0.2 - top_p: float = 0.7 - max_tokens: int = 1024 - seed: Optional[int] = None - bad: Optional[List[str]] = None - stop: Optional[List[str]] = None - - def to_dict(self) -> Dict[str, Any]: - return asdict(self) - - -@dataclass -class Choice: - index: int - message: Message - finish_reason: str - - -@dataclass -class Usage: - completion_tokens: int - prompt_tokens: int - total_tokens: int - - -@dataclass -class GenerationResponse: - id: str - choices: List[Choice] - usage: Usage - - @classmethod - def from_dict(cls, data: dict) -> "GenerationResponse": - try: - return cls( - id=data["id"], - choices=[ - Choice( - index=choice["index"], - message=Message(content=choice["message"]["content"], role=choice["message"]["role"]), - finish_reason=choice["finish_reason"], - ) - for choice in data["choices"] - ], - usage=Usage( - completion_tokens=data["usage"]["completion_tokens"], - prompt_tokens=data["usage"]["prompt_tokens"], - total_tokens=data["usage"]["total_tokens"], - ), - ) - except (KeyError, TypeError) as e: - msg = f"Failed to parse {cls.__name__} from data: {data}" - raise ValueError(msg) from e diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py deleted file mode 100644 index 4e19d05ac..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_schema.py +++ /dev/null @@ -1,69 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from dataclasses import asdict, dataclass -from typing import Any, Dict, List, Optional - - -@dataclass -class Message: - content: str - role: str - - -@dataclass -class GenerationRequest: - messages: List[Message] - temperature: float = 0.2 - top_p: float = 0.7 - max_tokens: int = 1024 - seed: Optional[int] = None - bad: Optional[List[str]] = None - stop: Optional[List[str]] = None - - def to_dict(self) -> Dict[str, Any]: - return asdict(self) - - -@dataclass -class Choice: - index: int - message: Message - finish_reason: str - - -@dataclass -class Usage: - completion_tokens: int - prompt_tokens: int - total_tokens: int - - -@dataclass -class GenerationResponse: - id: str - choices: List[Choice] - usage: Usage - - @classmethod - def from_dict(cls, data: dict) -> "GenerationResponse": - try: - return cls( - id=data["id"], - choices=[ - Choice( - index=choice["index"], - message=Message(content=choice["message"]["content"], role=choice["message"]["role"]), - finish_reason=choice["finish_reason"], - ) - for choice in data["choices"] - ], - usage=Usage( - completion_tokens=data["usage"]["completion_tokens"], - prompt_tokens=data["usage"]["prompt_tokens"], - total_tokens=data["usage"]["total_tokens"], - ), - ) - except (KeyError, TypeError) as e: - msg = f"Failed to parse {cls.__name__} from data: {data}" - raise ValueError(msg) from e diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 827706172..7038e6251 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -7,7 +7,6 @@ from haystack.utils.auth import Secret, deserialize_secrets_inplace from ._nim_backend import NimBackend -from ._nvcf_backend import NvcfBackend from .backend import GeneratorBackend _DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1" @@ -43,7 +42,7 @@ class NvidiaGenerator: def __init__( self, model: str, - api_url: Optional[str] = _DEFAULT_API_URL, + api_url: str = _DEFAULT_API_URL, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, ): @@ -55,9 +54,9 @@ def __init__( See the [NVIDIA NIMs](https://ai.nvidia.com) for more information on the supported models. :param api_key: - API key for the NVIDIA AI Foundation Endpoints. + API key for the NVIDIA NIM. :param api_url: - Custom API URL for the NVIDIA Inference Microservices. + Custom API URL for the NVIDIA NIM. :param model_arguments: Additional arguments to pass to the model provider. Different models accept different arguments. Search your model in the [NVIDIA NIMs](https://ai.nvidia.com) @@ -77,21 +76,15 @@ def warm_up(self): if self._backend is not None: return - if self._api_url is None: - if self._api_key is None: - msg = "API key is required for NVIDIA AI Foundation Endpoints." - raise ValueError(msg) - self._backend = NvcfBackend(self._model, api_key=self._api_key, model_kwargs=self._model_arguments) - else: - if self._api_url == _DEFAULT_API_URL and self._api_key is None: - msg = "API key is required for hosted NVIDIA NIMs." - raise ValueError(msg) - self._backend = NimBackend( - self._model, - api_url=self._api_url, - api_key=self._api_key, - model_kwargs=self._model_arguments, - ) + if self._api_url == _DEFAULT_API_URL and self._api_key is None: + msg = "API key is required for hosted NVIDIA NIMs." + raise ValueError(msg) + self._backend = NimBackend( + self._model, + api_url=self._api_url, + api_key=self._api_key, + model_kwargs=self._model_arguments, + ) def to_dict(self) -> Dict[str, Any]: """ diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py deleted file mode 100644 index b8015cfda..000000000 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .client import NvidiaCloudFunctionsClient - -__all__ = ["NvidiaCloudFunctionsClient"] diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py deleted file mode 100644 index b486f05b3..000000000 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/client.py +++ /dev/null @@ -1,82 +0,0 @@ -import copy -from dataclasses import dataclass -from typing import Dict, Optional - -import requests -from haystack.utils import Secret - -FUNCTIONS_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/functions" -INVOKE_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions" -STATUS_ENDPOINT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status" - -ACCEPTED_STATUS_CODE = 202 - - -@dataclass -class AvailableNvidiaCloudFunctions: - name: str - id: str - status: Optional[str] = None - - -class NvidiaCloudFunctionsClient: - def __init__(self, *, api_key: Secret, headers: Dict[str, str], timeout: int = 60): - self.api_key = api_key.resolve_value() - if self.api_key is None: - msg = "Nvidia Cloud Functions API key is not set." - raise ValueError(msg) - - self.fetch_url_format = STATUS_ENDPOINT - self.headers = copy.deepcopy(headers) - self.headers.update( - { - "Authorization": f"Bearer {self.api_key}", - } - ) - self.timeout = timeout - self.session = requests.Session() - - def query_function(self, func_id: str, payload: Dict[str, str]) -> Dict[str, str]: - invoke_url = f"{INVOKE_ENDPOINT}/{func_id}" - - response = self.session.post(invoke_url, headers=self.headers, json=payload, timeout=self.timeout) - request_id = response.headers.get("NVCF-REQID") - if request_id is None: - msg = "NVCF-REQID header not found in response" - raise ValueError(msg) - - while response.status_code == ACCEPTED_STATUS_CODE: - fetch_url = f"{self.fetch_url_format}/{request_id}" - response = self.session.get(fetch_url, headers=self.headers, timeout=self.timeout) - - response.raise_for_status() - return response.json() - - def available_functions(self) -> Dict[str, AvailableNvidiaCloudFunctions]: - response = self.session.get(FUNCTIONS_ENDPOINT, headers=self.headers, timeout=self.timeout) - response.raise_for_status() - - return { - f["name"]: AvailableNvidiaCloudFunctions( - name=f["name"], - id=f["id"], - status=f.get("status"), - ) - for f in response.json()["functions"] - } - - def get_model_nvcf_id(self, model: str) -> str: - """ - Returns the Nvidia Cloud Functions UUID for the given model. - """ - - available_functions = self.available_functions() - func = available_functions.get(model) - if func is None: - msg = f"Model '{model}' was not found on the Nvidia Cloud Functions backend" - raise ValueError(msg) - elif func.status != "ACTIVE": - msg = f"Model '{model}' is not currently active/usable on the Nvidia Cloud Functions backend" - raise ValueError(msg) - - return func.id diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index ed31df65e..06587cd78 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -1,19 +1,30 @@ import os -from unittest.mock import Mock, patch import pytest from haystack import Document from haystack.utils import Secret from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder +from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend + + +class MockBackend(EmbedderBackend): + def __init__(self, model, model_kwargs): + super().__init__(model, model_kwargs) + + def embed(self, texts): + inputs = texts + data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] + return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} class TestNvidiaDocumentEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaDocumentEmbedder("nvolveqa_40k") + embedder = NvidiaDocumentEmbedder() assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == "nvolveqa_40k" + assert embedder.model == "NV-Embed-QA" + assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia" assert embedder.prefix == "" assert embedder.suffix == "" assert embedder.batch_size == 32 @@ -25,6 +36,7 @@ def test_init_with_parameters(self): embedder = NvidiaDocumentEmbedder( api_key=Secret.from_token("fake-api-key"), model="nvolveqa_40k", + api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/test", prefix="prefix", suffix="suffix", batch_size=30, @@ -35,6 +47,7 @@ def test_init_with_parameters(self): assert embedder.api_key == Secret.from_token("fake-api-key") assert embedder.model == "nvolveqa_40k" + assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia/test" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" assert embedder.batch_size == 30 @@ -56,7 +69,7 @@ def test_to_dict(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "api_url": None, + "api_url": "https://ai.api.nvidia.com/v1/retrieval/nvidia", "model": "playground_nvolveqa_40k", "prefix": "", "suffix": "", @@ -117,7 +130,7 @@ def from_dict(self, monkeypatch): } component = NvidiaDocumentEmbedder.from_dict(data) assert component.model == "nvolveqa_40k" - assert component.api_url is None + assert component.api_url == "https://example.com" assert component.prefix == "prefix" assert component.suffix == "suffix" assert component.batch_size == 32 @@ -169,8 +182,7 @@ def test_prepare_texts_to_embed_w_suffix(self): "my_prefix document number 4 my_suffix", ] - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_embed_batch(self, mock_client_class): + def test_embed_batch(self): texts = ["text 1", "text 2", "text 3", "text 4", "text 5"] embedder = NvidiaDocumentEmbedder( @@ -178,17 +190,8 @@ def test_embed_batch(self, mock_client_class): api_key=Secret.from_token("fake-api-key"), ) - def mock_query_function(_, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=mock_query_function, - ) - mock_client_class.return_value = mock_client embedder.warm_up() + embedder.backend = MockBackend("aa", None) embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2) @@ -201,8 +204,7 @@ def mock_query_function(_, payload): assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}} - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run(self, mock_client_class): + def test_run(self): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), @@ -218,17 +220,8 @@ def test_run(self, mock_client_class): embedding_separator=" | ", ) - def mock_query_function(_, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=mock_query_function, - ) - mock_client_class.return_value = mock_client embedder.warm_up() + embedder.backend = MockBackend("aa", None) result = embedder.run(documents=docs) @@ -244,8 +237,7 @@ def mock_query_function(_, payload): assert all(isinstance(x, float) for x in doc.embedding) assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}} - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run_custom_batch_size(self, mock_client_class): + def test_run_custom_batch_size(self): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), @@ -261,17 +253,8 @@ def test_run_custom_batch_size(self, mock_client_class): batch_size=1, ) - def mock_query_function(_, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=mock_query_function, - ) - mock_client_class.return_value = mock_client embedder.warm_up() + embedder.backend = MockBackend("aa", None) result = embedder.run(documents=docs) @@ -288,21 +271,11 @@ def mock_query_function(_, payload): assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}} - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run_wrong_input_format(self, mock_client_class): + def test_run_wrong_input_format(self): embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - def mock_query_function(_, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=mock_query_function, - ) - mock_client_class.return_value = mock_client embedder.warm_up() + embedder.backend = MockBackend("aa", None) string_input = "text" list_integers_input = [1, 2, 3] @@ -313,21 +286,11 @@ def mock_query_function(_, payload): with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"): embedder.run(documents=list_integers_input) - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run_on_empty_list(self, mock_client_class): + def test_run_on_empty_list(self): embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - def mock_query_function(_, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=mock_query_function, - ) - mock_client_class.return_value = mock_client embedder.warm_up() + embedder.backend = MockBackend("aa", None) empty_list_input = [] result = embedder.run(documents=empty_list_input) @@ -339,25 +302,6 @@ def mock_query_function(_, payload): not os.environ.get("NVIDIA_API_KEY", None), reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", ) - @pytest.mark.integration - def test_run_integration(self): - embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k") - embedder.warm_up() - - docs = [ - Document(content="I love cheese", meta={"topic": "Cuisine"}), - Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), - ] - - result = embedder.run(docs) - docs_with_embeddings = result["documents"] - - assert isinstance(docs_with_embeddings, list) - assert len(docs_with_embeddings) == len(docs) - for doc in docs_with_embeddings: - assert isinstance(doc.embedding, list) - assert isinstance(doc.embedding[0], float) - @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 882bcdc88..60f83dc43 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 import os -from unittest.mock import Mock, patch import pytest from haystack.utils import Secret @@ -94,96 +93,6 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): }, } - @patch("haystack_integrations.components.generators.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run_deprecated_nvcf(self, mock_client_class): - generator = NvidiaGenerator( - api_url=None, # force use of deprecated NVCF backend - model="playground_nemotron_steerlm_8b", - api_key=Secret.from_token("fake-api-key"), - model_arguments={ - "temperature": 0.2, - "top_p": 0.7, - "max_tokens": 1024, - "seed": None, - "bad": None, - "stop": None, - }, - ) - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=Mock( - return_value={ - "id": "some_id", - "choices": [ - { - "index": 0, - "message": {"content": "42", "role": "assistant"}, - "finish_reason": "stop", - } - ], - "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2}, - } - ), - ) - mock_client_class.return_value = mock_client - with pytest.warns(DeprecationWarning): - generator.warm_up() - - result = generator.run(prompt="What is the answer?") - mock_client.query_function.assert_called_once_with( - "some_id", - { - "messages": [ - {"content": "What is the answer?", "role": "user"}, - ], - "temperature": 0.2, - "top_p": 0.7, - "max_tokens": 1024, - "seed": None, - "bad": None, - "stop": None, - }, - ) - assert result == { - "replies": ["42"], - "meta": [ - { - "finish_reason": "stop", - "role": "assistant", - "usage": { - "total_tokens": 21, - "prompt_tokens": 19, - "completion_tokens": 2, - }, - }, - ], - } - - @pytest.mark.skipif( - not os.environ.get("NVIDIA_API_KEY", None), - reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", - ) - @pytest.mark.integration - def test_run_integration_with_nvcf_backend(self): - generator = NvidiaGenerator( - api_url=None, # force use of deprecated NVCF backend - model="playground_nv_llama2_rlhf_70b", - model_arguments={ - "temperature": 0.2, - "top_p": 0.7, - "max_tokens": 1024, - "seed": None, - "bad": None, - "stop": None, - }, - ) - with pytest.warns(DeprecationWarning): - generator.warm_up() - result = generator.run(prompt="What is the answer?") - - assert result["replies"] - assert result["meta"] - @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and " diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 206f79c10..30f529534 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -1,18 +1,29 @@ import os -from unittest.mock import Mock, patch import pytest from haystack.utils import Secret from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder +from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend + + +class MockBackend(EmbedderBackend): + def __init__(self, model, model_kwargs): + super().__init__(model, model_kwargs) + + def embed(self, texts): + inputs = texts + data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] + return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} class TestNvidiaTextEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaTextEmbedder("nvolveqa_40k") + embedder = NvidiaTextEmbedder() assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == "nvolveqa_40k" + assert embedder.model == "NV-Embed-QA" + assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia" assert embedder.prefix == "" assert embedder.suffix == "" @@ -20,11 +31,13 @@ def test_init_with_parameters(self): embedder = NvidiaTextEmbedder( api_key=Secret.from_token("fake-api-key"), model="nvolveqa_40k", + api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/test", prefix="prefix", suffix="suffix", ) assert embedder.api_key == Secret.from_token("fake-api-key") assert embedder.model == "nvolveqa_40k" + assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia/test" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" @@ -42,7 +55,7 @@ def test_to_dict(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "api_url": None, + "api_url": "https://ai.api.nvidia.com/v1/retrieval/nvidia", "model": "nvolveqa_40k", "prefix": "", "suffix": "", @@ -54,6 +67,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") component = NvidiaTextEmbedder( model="nvolveqa_40k", + api_url="https://example.com", prefix="prefix", suffix="suffix", truncate=EmbeddingTruncateMode.START, @@ -63,7 +77,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "api_url": None, + "api_url": "https://example.com", "model": "nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", @@ -77,7 +91,7 @@ def from_dict(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "api_url": None, + "api_url": "https://example.com", "model": "nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", @@ -86,27 +100,19 @@ def from_dict(self, monkeypatch): } component = NvidiaTextEmbedder.from_dict(data) assert component.model == "nvolveqa_40k" - assert component.api_url is None + assert component.api_url == "https://example.com" assert component.prefix == "prefix" assert component.suffix == "suffix" assert component.truncate == "START" - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run(self, mock_client_class): + def test_run(self): embedder = NvidiaTextEmbedder( "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix" ) - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=Mock( - return_value={ - "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}], - "usage": {"total_tokens": 4, "prompt_tokens": 4}, - } - ), - ) - mock_client_class.return_value = mock_client + embedder.warm_up() + embedder.backend = MockBackend("aa", None) + result = embedder.run(text="The food was delicious") assert len(result["embedding"]) == 3 @@ -115,42 +121,16 @@ def test_run(self, mock_client_class): "usage": {"prompt_tokens": 4, "total_tokens": 4}, } - @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") - def test_run_wrong_input_format(self, mock_client_class): + def test_run_wrong_input_format(self): embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - mock_client = Mock( - get_model_nvcf_id=Mock(return_value="some_id"), - query_function=Mock( - return_value={ - "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}], - "usage": {"total_tokens": 4, "prompt_tokens": 4}, - } - ), - ) - mock_client_class.return_value = mock_client embedder.warm_up() + embedder.backend = MockBackend("aa", None) list_integers_input = [1, 2, 3] with pytest.raises(TypeError, match="NvidiaTextEmbedder expects a string as an input"): embedder.run(text=list_integers_input) - @pytest.mark.skipif( - not os.environ.get("NVIDIA_API_KEY", None), - reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", - ) - @pytest.mark.integration - def test_run_integration_with_nvcf_backend(self): - embedder = NvidiaTextEmbedder("playground_nvolveqa_40k") - embedder.warm_up() - - result = embedder.run("A transformer is a deep learning architecture") - embedding = result["embedding"] - meta = result["meta"] - - assert all(isinstance(x, float) for x in embedding) - assert "usage" in meta - @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and "