From 00520de74f20f61beeca173070d3edae96e0e508 Mon Sep 17 00:00:00 2001 From: shadeMe Date: Wed, 13 Mar 2024 13:22:38 +0100 Subject: [PATCH 01/20] Abstract service endpoint backend --- .../embedders/nvidia/_nim_backend.py | 14 ++++ .../nvidia/{_schema.py => _nvcf_backend.py} | 46 ++++++++++- .../components/embedders/nvidia/backend.py | 29 +++++++ .../embedders/nvidia/document_embedder.py | 76 +++++++++---------- .../components/embedders/nvidia/models.py | 31 -------- .../embedders/nvidia/text_embedder.py | 58 +++++++------- 6 files changed, 154 insertions(+), 100 deletions(-) create mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py rename integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/{_schema.py => _nvcf_backend.py} (53%) create mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py new file mode 100644 index 000000000..01ce13ece --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -0,0 +1,14 @@ +from typing import Any, Dict, Optional + +from .backend import EmbedderBackend + + +class NimBackend(EmbedderBackend): + def __init__( + self, + model: str, + api_url: str, + batch_size: int, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + pass diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py similarity index 53% rename from integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py rename to integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py index fc4e0e5bf..2711a71f8 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py @@ -1,10 +1,54 @@ from dataclasses import asdict, dataclass -from typing import Any, Dict, List, Literal, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +from haystack.utils.auth import Secret +from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient + +from .backend import EmbedderBackend MAX_INPUT_STRING_LENGTH = 2048 MAX_INPUTS = 50 +class NvcfBackend(EmbedderBackend): + def __init__( + self, + model: str, + api_key: Secret, + batch_size: int, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + if not model.startswith("playground_"): + model = f"playground_{model}" + + super().__init__(model=model, model_kwargs=model_kwargs) + + if batch_size > MAX_INPUTS: + msg = f"NVIDIA Cloud Functions currently support a maximum batch size of {MAX_INPUTS}." + raise ValueError(msg) + + self.api_key = api_key + self.client = NvidiaCloudFunctionsClient( + api_key=api_key, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + }, + ) + self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) + + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + request = EmbeddingsRequest(input=texts, **self.model_kwargs).to_dict() + json_response = self.client.query_function(self.nvcf_id, request) + response = EmbeddingsResponse.from_dict(json_response) + + # Sort resulting embeddings by index + assert all(isinstance(r.embedding, list) for r in response.data) + sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)] # type: ignore + metadata = {"usage": response.usage.to_dict()} + return sorted_embeddings, metadata + + @dataclass class EmbeddingsRequest: input: Union[str, List[str]] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py new file mode 100644 index 000000000..09e9b7c80 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + + +class EmbedderBackend(ABC): + def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): + """ + Initialize the backend. + + :param model: + The name of the model to use. + :param model_kwargs: + Additional keyword arguments to pass to the model. + """ + self.model_name = model + self.model_kwargs = model_kwargs or {} + + @abstractmethod + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + """ + Invoke the backend and embed the given texts. + + :param texts: + Texts to embed. + :return: + Vector representation of the texts and + metadata returned by the service. + """ + pass diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 25c104b97..4c0424943 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -1,19 +1,20 @@ -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient from tqdm import tqdm -from ._schema import MAX_INPUTS, EmbeddingsRequest, EmbeddingsResponse, Usage -from .models import NvidiaEmbeddingModel +from .backend import EmbedderBackend +from ._nvcf_backend import NvcfBackend +from ._nim_backend import NimBackend @component class NvidiaDocumentEmbedder: """ A component for embedding documents using embedding models provided by - [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/). + [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) + and NVIDIA NeMo Inference Microservices. Usage example: ```python @@ -21,7 +22,7 @@ class NvidiaDocumentEmbedder: doc = Document(content="I love pizza!") - text_embedder = NvidiaDocumentEmbedder(model=NvidiaEmbeddingModel.NVOLVE_40K) + text_embedder = NvidiaDocumentEmbedder(model="nvolveqa_40k") text_embedder.warm_up() result = document_embedder.run([doc]) @@ -31,8 +32,9 @@ class NvidiaDocumentEmbedder: def __init__( self, - model: Union[str, NvidiaEmbeddingModel], - api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + model: str, + api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), + api_url: Optional[str] = None, prefix: str = "", suffix: str = "", batch_size: int = 32, @@ -47,6 +49,8 @@ def __init__( Embedding model to use. :param api_key: API key for the NVIDIA AI Foundation Endpoints. + :param api_url: + Custom API URL for the NVIDIA NeMo Inference Microservices. :param prefix: A string to add to the beginning of each text. :param suffix: @@ -62,16 +66,9 @@ def __init__( Separator used to concatenate the meta fields to the Document text. """ - if isinstance(model, str): - model = NvidiaEmbeddingModel.from_str(model) - - # Upper-limit for the endpoint. - if batch_size > MAX_INPUTS: - msg = f"NVIDIA Cloud Functions currently support a maximum batch size of {MAX_INPUTS}." - raise ValueError(msg) - self.api_key = api_key self.model = model + self.api_url = api_url self.prefix = prefix self.suffix = suffix self.batch_size = batch_size @@ -79,14 +76,7 @@ def __init__( self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator - self.client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) - self.nvcf_id = None + self.backend: Optional[EmbedderBackend] = None self._initialized = False def warm_up(self): @@ -96,7 +86,17 @@ def warm_up(self): if self._initialized: return - self.nvcf_id = self.client.get_model_nvcf_id(str(self.model)) + if self.api_url is None: + if self.api_key is None: + msg = "API key is required for NVIDIA AI Foundation Endpoints." + raise ValueError(msg) + + self.backend = NvcfBackend( + self.model, api_key=self.api_key, batch_size=self.batch_size, model_kwargs={"model": "passage"} + ) + else: + self.backend = NimBackend(self.model, api_url=self.api_url, batch_size=self.batch_size) + self._initialized = True def to_dict(self) -> Dict[str, Any]: @@ -108,8 +108,9 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - api_key=self.api_key.to_dict(), - model=str(self.model), + api_key=self.api_key.to_dict() if self.api_key else None, + model=self.model, + api_url=self.api_url, prefix=self.prefix, suffix=self.suffix, batch_size=self.batch_size, @@ -128,7 +129,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaDocumentEmbedder": :returns: The deserialized component. """ - data["init_parameters"]["model"] = NvidiaEmbeddingModel.from_str(data["init_parameters"]["model"]) deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @@ -147,27 +147,23 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List[List[float]], Dict[str, Any]]: all_embeddings: List[List[float]] = [] - usage = Usage(prompt_tokens=0, total_tokens=0) - assert self.nvcf_id is not None + usage_prompt_tokens = 0 + usage_total_tokens = 0 + + assert self.backend is not None for i in tqdm( range(0, len(texts_to_embed), batch_size), disable=not self.progress_bar, desc="Calculating embeddings" ): batch = texts_to_embed[i : i + batch_size] - request = EmbeddingsRequest(input=batch, model="passage").to_dict() - json_response = self.client.query_function(self.nvcf_id, request) - response = EmbeddingsResponse.from_dict(json_response) - - # Sort resulting embeddings by index - assert all(isinstance(r.embedding, list) for r in response.data) - sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)] # type: ignore + sorted_embeddings, meta = self.backend.embed(batch) all_embeddings.extend(sorted_embeddings) - usage.prompt_tokens += response.usage.prompt_tokens - usage.total_tokens += response.usage.total_tokens + usage_prompt_tokens += meta.get("usage", {}).get("prompt_tokens", 0) + usage_total_tokens += meta.get("usage", {}).get("total_tokens", 0) - return all_embeddings, {"usage": usage.to_dict()} + return all_embeddings, {"usage": {"prompt_tokens": usage_prompt_tokens, "total_tokens": usage_total_tokens}} @component.output_types(documents=List[Document], meta=Dict[str, Any]) def run(self, documents: List[Document]): diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py deleted file mode 100644 index dd11ac727..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py +++ /dev/null @@ -1,31 +0,0 @@ -from enum import Enum - - -class NvidiaEmbeddingModel(Enum): - """ - [NVIDIA AI Foundation models](https://catalog.ngc.nvidia.com/ai-foundation-models) - used for generating embeddings. - """ - - #: [Retrieval QA Embedding Model](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/nvolve-40k). - NVOLVE_40K = "playground_nvolveqa_40k" - - def __str__(self): - return self.value - - @classmethod - def from_str(cls, string: str) -> "NvidiaEmbeddingModel": - """ - Create an embedding model from a string. - - :param string: - String to convert. - :returns: - Embedding model. - """ - enum_map = {e.value: e for e in NvidiaEmbeddingModel} - emb_model = enum_map.get(string) - if emb_model is None: - msg = f"Unknown embedding model '{string}'. Supported modes are: {list(enum_map.keys())}" - raise ValueError(msg) - return emb_model diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index a377934e3..c6dfaa6b3 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -1,18 +1,19 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Tuple from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient -from ._schema import EmbeddingsRequest, EmbeddingsResponse -from .models import NvidiaEmbeddingModel +from .backend import EmbedderBackend +from ._nvcf_backend import NvcfBackend +from ._nim_backend import NimBackend @component class NvidiaTextEmbedder: """ A component for embedding strings using embedding models provided by - [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/). + [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) + and NVIDIA NeMo Inference Microservices. For models that differentiate between query and document inputs, this component embeds the input string as a query. @@ -23,7 +24,7 @@ class NvidiaTextEmbedder: text_to_embed = "I love pizza!" - text_embedder = NvidiaTextEmbedder(model=NvidiaEmbeddingModel.NVOLVE_40K) + text_embedder = NvidiaTextEmbedder(model="nvolveqa_40k") text_embedder.warm_up() print(text_embedder.run(text_to_embed)) @@ -32,8 +33,9 @@ class NvidiaTextEmbedder: def __init__( self, - model: Union[str, NvidiaEmbeddingModel], - api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + model: str, + api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), + api_url: Optional[str] = None, prefix: str = "", suffix: str = "", ): @@ -44,27 +46,21 @@ def __init__( Embedding model to use. :param api_key: API key for the NVIDIA AI Foundation Endpoints. + :param api_url: + Custom API URL for the NVIDIA NeMo Inference Microservices. :param prefix: A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. """ - if isinstance(model, str): - model = NvidiaEmbeddingModel.from_str(model) - self.api_key = api_key self.model = model + self.api_url = api_url self.prefix = prefix self.suffix = suffix - self.client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) - self.nvcf_id = None + + self.backend: Optional[EmbedderBackend] = None self._initialized = False def warm_up(self): @@ -74,7 +70,15 @@ def warm_up(self): if self._initialized: return - self.nvcf_id = self.client.get_model_nvcf_id(str(self.model)) + if self.api_url is None: + if self.api_key is None: + msg = "API key is required for NVIDIA AI Foundation Endpoints." + raise ValueError(msg) + + self.backend = NvcfBackend(self.model, api_key=self.api_key, batch_size=1, model_kwargs={"model": "query"}) + else: + self.backend = NimBackend(self.model, api_url=self.api_url, batch_size=1) + self._initialized = True def to_dict(self) -> Dict[str, Any]: @@ -86,8 +90,9 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - api_key=self.api_key.to_dict(), - model=str(self.model), + api_key=self.api_key.to_dict() if self.api_key else None, + model=self.model, + api_url=self.api_url, prefix=self.prefix, suffix=self.suffix, ) @@ -102,7 +107,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder": :returns: The deserialized component. """ - data["init_parameters"]["model"] = NvidiaEmbeddingModel.from_str(data["init_parameters"]["model"]) deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @@ -132,10 +136,8 @@ def run(self, text: str): ) raise TypeError(msg) - assert self.nvcf_id is not None + assert self.backend is not None text_to_embed = self.prefix + text + self.suffix - request = EmbeddingsRequest(input=text_to_embed, model="query").to_dict() - json_response = self.client.query_function(self.nvcf_id, request) - response = EmbeddingsResponse.from_dict(json_response) + sorted_embeddings, meta = self.backend.embed([text_to_embed]) - return {"embedding": response.data[0].embedding, "meta": {"usage": response.usage.to_dict()}} + return {"embedding": sorted_embeddings[0], "meta": meta} From 3fea5946ab4db95d915bbf202df391e5e2ce673c Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Wed, 13 Mar 2024 17:38:09 +0100 Subject: [PATCH 02/20] Abstract generators backend --- .../components/generators/nvidia/__init__.py | 3 +- .../generators/nvidia/_nim_backend.py | 16 +++ .../generators/nvidia/_nvcf_backend.py | 120 ++++++++++++++++++ .../components/generators/nvidia/backend.py | 29 +++++ .../components/generators/nvidia/generator.py | 71 ++++------- .../components/generators/nvidia/models.py | 35 ----- integrations/nvidia/tests/test_generator.py | 55 ++++---- 7 files changed, 222 insertions(+), 107 deletions(-) create mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py create mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py create mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py index 3a315843d..18354ea17 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py @@ -2,6 +2,5 @@ # # SPDX-License-Identifier: Apache-2.0 from .generator import NvidiaGenerator -from .models import NvidiaGeneratorModel -__all__ = ["NvidiaGenerator", "NvidiaGeneratorModel"] +__all__ = ["NvidiaGenerator"] diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py new file mode 100644 index 000000000..4c46e09b7 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -0,0 +1,16 @@ +from typing import Any, Dict, List, Optional, Tuple + +from .backend import GeneratorBackend + + +class NimBackend(GeneratorBackend): + def __init__( + self, + model: str, + api_url: str, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + pass + + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: + return [], [], {} diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py new file mode 100644 index 000000000..d53ce9cce --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py @@ -0,0 +1,120 @@ +from dataclasses import asdict, dataclass +from typing import Any, Dict, List, Optional, Tuple + +from haystack.utils.auth import Secret +from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient + +from .backend import GeneratorBackend + +MAX_INPUT_STRING_LENGTH = 2048 +MAX_INPUTS = 50 + + +class NvcfBackend(GeneratorBackend): + def __init__( + self, + model: str, + api_key: Secret, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + if not model.startswith("playground_"): + model = f"playground_{model}" + + super().__init__(model=model, model_kwargs=model_kwargs) + + self.api_key = api_key + self.client = NvidiaCloudFunctionsClient( + api_key=api_key, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + }, + ) + self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) + + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: + messages = [Message(role="user", content=prompt)] + request = GenerationRequest(messages=messages, **self.model_kwargs).to_dict() + json_response = self.client.query_function(self.nvcf_id, request) + response = GenerationResponse.from_dict(json_response) + + replies = [] + meta = [] + for choice in response.choices: + replies.append(choice.message.content) + meta.append( + { + "role": choice.message.role, + "finish_reason": choice.finish_reason, + } + ) + usage = { + "completion_tokens": response.usage.completion_tokens, + "prompt_tokens": response.usage.prompt_tokens, + "total_tokens": response.usage.total_tokens, + } + return replies, meta, usage + + +@dataclass +class Message: + content: str + role: str + + +@dataclass +class GenerationRequest: + messages: List[Message] + temperature: float = 0.2 + top_p: float = 0.7 + max_tokens: int = 1024 + seed: Optional[int] = None + bad: Optional[List[str]] = None + stop: Optional[List[str]] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class Choice: + index: int + message: Message + finish_reason: str + + +@dataclass +class Usage: + completion_tokens: int + prompt_tokens: int + total_tokens: int + + +@dataclass +class GenerationResponse: + id: str + choices: List[Choice] + usage: Usage + + @classmethod + def from_dict(cls, data: dict) -> "GenerationResponse": + try: + return cls( + id=data["id"], + choices=[ + Choice( + index=choice["index"], + message=Message(content=choice["message"]["content"], role=choice["message"]["role"]), + finish_reason=choice["finish_reason"], + ) + for choice in data["choices"] + ], + usage=Usage( + completion_tokens=data["usage"]["completion_tokens"], + prompt_tokens=data["usage"]["prompt_tokens"], + total_tokens=data["usage"]["total_tokens"], + ), + ) + except (KeyError, TypeError) as e: + msg = f"Failed to parse {cls.__name__} from data: {data}" + raise ValueError(msg) from e diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py new file mode 100644 index 000000000..793402c50 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + + +class GeneratorBackend(ABC): + def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): + """ + Initialize the backend. + + :param model: + The name of the model to use. + :param model_kwargs: + Additional keyword arguments to pass to the model. + """ + self.model_name = model + self.model_kwargs = model_kwargs or {} + + @abstractmethod + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: + """ + Invoke the backend and prompt the model. + + :param prompt: + Prompt text. + :return: + Vector representation of the generated texts related + metadata and usage statistics returned by the service. + """ + pass diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 46550baab..1ff79385a 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -1,14 +1,14 @@ # SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional from haystack import component, default_from_dict, default_to_dict from haystack.utils.auth import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient -from ._schema import GenerationRequest, GenerationResponse, Message -from .models import NvidiaGeneratorModel +from ._nim_backend import NimBackend +from ._nvcf_backend import NvcfBackend +from .backend import GeneratorBackend @component @@ -42,7 +42,8 @@ class NvidiaGenerator: def __init__( self, - model: Union[str, NvidiaGeneratorModel], + model: str, + api_url: Optional[str] = None, api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, ): @@ -59,33 +60,28 @@ def __init__( Additional arguments to pass to the model provider. Different models accept different arguments. Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) to know the supported arguments. - - :raises ValueError: If `model` is not supported. """ - if isinstance(model, str): - model = NvidiaGeneratorModel.from_str(model) - self._model = model + self._api_url = api_url self._api_key = api_key self._model_arguments = model_arguments or {} - # This is initialized in warm_up - self._model_id = None - - self._client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) + + self._backend: Optional[GeneratorBackend] = None def warm_up(self): """ Initializes the component. """ - if self._model_id is not None: + if self._backend is not None: return - self._model_id = self._client.get_model_nvcf_id(str(self._model)) + + if self._api_url is None: + if self._api_key is None: + msg = "API key is required for NVIDIA AI Foundation Endpoints." + raise ValueError(msg) + self._backend = NvcfBackend(self._model, api_key=self._api_key, model_kwargs=self._model_arguments) + else: + self._backend = NimBackend(self._model, api_url=self._api_url, model_kwargs=self._model_arguments) def to_dict(self) -> Dict[str, Any]: """ @@ -95,7 +91,11 @@ def to_dict(self) -> Dict[str, Any]: Dictionary with serialized data. """ return default_to_dict( - self, model=str(self._model), api_key=self._api_key.to_dict(), model_arguments=self._model_arguments + self, + model=self._model, + api_url=self._api_url, + api_key=self._api_key.to_dict(), + model_arguments=self._model_arguments, ) @classmethod @@ -125,30 +125,11 @@ def run(self, prompt: str): - `meta` - Metadata for each reply. - `usage` - Usage statistics for the model. """ - if self._model_id is None: + if self._backend is None: msg = "The generation model has not been loaded. Call warm_up() before running." raise RuntimeError(msg) - messages = [Message(role="user", content=prompt)] - request = GenerationRequest(messages=messages, **self._model_arguments).to_dict() - json_response = self._client.query_function(self._model_id, request) - - replies = [] - meta = [] - data = GenerationResponse.from_dict(json_response) - for choice in data.choices: - replies.append(choice.message.content) - meta.append( - { - "role": choice.message.role, - "finish_reason": choice.finish_reason, - } - ) - - usage = { - "completion_tokens": data.usage.completion_tokens, - "prompt_tokens": data.usage.prompt_tokens, - "total_tokens": data.usage.total_tokens, - } + assert self._backend is not None + replies, meta, usage = self._backend.generate(prompt=prompt) return {"replies": replies, "meta": meta, "usage": usage} diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py deleted file mode 100644 index 448fb7aec..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py +++ /dev/null @@ -1,35 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from enum import Enum - - -class NvidiaGeneratorModel(Enum): - """ - Generator models supported by NvidiaGenerator and NvidiaChatGenerator. - """ - - NV_LLAMA2_RLHF_70B = "playground_nv_llama2_rlhf_70b" - STEERLM_LLAMA_70B = "playground_steerlm_llama_70b" - NEMOTRON_STEERLM_8B = "playground_nemotron_steerlm_8b" - NEMOTRON_QA_8B = "playground_nemotron_qa_8b" - - def __str__(self): - return self.value - - @classmethod - def from_str(cls, string: str) -> "NvidiaGeneratorModel": - """ - Create a generator model from a string. - - :param string: - String to convert. - :returns: - A generator model. - """ - enum_map = {e.value: e for e in NvidiaGeneratorModel} - models = enum_map.get(string) - if models is None: - msg = f"Unknown model '{string}'. Supported models are: {list(enum_map.keys())}" - raise ValueError(msg) - return models diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index b10b60951..fa81d323c 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -2,21 +2,20 @@ # # SPDX-License-Identifier: Apache-2.0 import os -from unittest.mock import patch +from unittest.mock import Mock, patch import pytest from haystack.utils import Secret from haystack_integrations.components.generators.nvidia import NvidiaGenerator -from haystack_integrations.components.generators.nvidia.models import NvidiaGeneratorModel class TestNvidiaGenerator: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - generator = NvidiaGenerator(NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B) + generator = NvidiaGenerator("playground_nv_llama2_rlhf_70b") assert generator._api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert generator._model == NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B + assert generator._model == "playground_nv_llama2_rlhf_70b" assert generator._model_arguments == {} def test_init_with_parameters(self): @@ -33,7 +32,7 @@ def test_init_with_parameters(self): }, ) assert generator._api_key == Secret.from_token("fake-api-key") - assert generator._model == NvidiaGeneratorModel.NEMOTRON_STEERLM_8B + assert generator._model == "playground_nemotron_steerlm_8b" assert generator._model_arguments == { "temperature": 0.2, "top_p": 0.7, @@ -50,11 +49,12 @@ def test_init_fail_wo_api_key(self, monkeypatch): def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - generator = NvidiaGenerator(NvidiaGeneratorModel.NEMOTRON_STEERLM_8B) + generator = NvidiaGenerator("playground_nemotron_steerlm_8b") data = generator.to_dict() assert data == { "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator", "init_parameters": { + "api_url": None, "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, "model": "playground_nemotron_steerlm_8b", "model_arguments": {}, @@ -64,7 +64,8 @@ def test_to_dict(self, monkeypatch): def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NEMOTRON_STEERLM_8B, + model="playground_nemotron_steerlm_8b", + api_url="https://my.url.com", model_arguments={ "temperature": 0.2, "top_p": 0.7, @@ -79,6 +80,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": "https://my.url.com", "model": "playground_nemotron_steerlm_8b", "model_arguments": { "temperature": 0.2, @@ -91,10 +93,10 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): }, } - @patch("haystack_integrations.components.generators.nvidia.generator.NvidiaCloudFunctionsClient") - def test_run(self, mock_client): + @patch("haystack_integrations.components.generators.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run(self, mock_client_class): generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NEMOTRON_STEERLM_8B, + model="playground_nemotron_steerlm_8b", api_key=Secret.from_token("fake-api-key"), model_arguments={ "temperature": 0.2, @@ -105,22 +107,25 @@ def test_run(self, mock_client): "stop": None, }, ) - mock_client.get_model_nvcf_id.return_value = "some_id" - generator._client = mock_client + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=Mock( + return_value={ + "id": "some_id", + "choices": [ + { + "index": 0, + "message": {"content": "42", "role": "assistant"}, + "finish_reason": "stop", + } + ], + "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2}, + } + ), + ) + mock_client_class.return_value = mock_client generator.warm_up() - mock_client.get_model_nvcf_id.assert_called_once_with("playground_nemotron_steerlm_8b") - mock_client.query_function.return_value = { - "id": "some_id", - "choices": [ - { - "index": 0, - "message": {"content": "42", "role": "assistant"}, - "finish_reason": "stop", - } - ], - "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2}, - } result = generator.run(prompt="What is the answer?") mock_client.query_function.assert_called_once_with( "some_id", @@ -158,7 +163,7 @@ def test_run(self, mock_client): @pytest.mark.integration def test_run_integration(self): generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B, + model="playground_nv_llama2_rlhf_70b", model_arguments={ "temperature": 0.2, "top_p": 0.7, From 3e89885fe1c06e63fec02f1864d2c4a5c5912fd2 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 14 Mar 2024 17:09:01 +0100 Subject: [PATCH 03/20] Implement NimBackend --- .github/workflows/nvidia.yml | 1 + integrations/nvidia/pyproject.toml | 2 +- .../generators/nvidia/_nim_backend.py | 59 ++++++++++++++++++- .../components/generators/nvidia/generator.py | 7 ++- integrations/nvidia/tests/test_generator.py | 33 ++++++++++- 5 files changed, 97 insertions(+), 5 deletions(-) diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml index 6e7562c17..fcbbdcdeb 100644 --- a/.github/workflows/nvidia.yml +++ b/.github/workflows/nvidia.yml @@ -22,6 +22,7 @@ env: PYTHONUNBUFFERED: "1" FORCE_COLOR: "1" NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} jobs: run: diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index f443e91f9..05830e350 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.0.0b6"] +dependencies = ["haystack-ai", "requests"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme" diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py index 4c46e09b7..92e9ab5ba 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -1,7 +1,12 @@ from typing import Any, Dict, List, Optional, Tuple +import requests +from haystack.utils.auth import Secret + from .backend import GeneratorBackend +REQUEST_TIMEOUT = 60 + class NimBackend(GeneratorBackend): def __init__( @@ -9,8 +14,58 @@ def __init__( model: str, api_url: str, model_kwargs: Optional[Dict[str, Any]] = None, + api_key: Optional[Secret] = None, ): - pass + headers = { + "Content-Type": "application/json", + } + if api_key: + headers["Authorization"] = f"Bearer {api_key.resolve_value()}" + self.session = requests.Session() + self.session.headers.update(headers) + + self.model = model + self.api_url = api_url + self.model_kwargs = model_kwargs or {} def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: - return [], [], {} + url = f"{self.api_url}/chat/completions" + + res = self.session.post( + url, + json={ + "model": self.model, + "messages": [ + { + "role": "user", + "content": prompt, + }, + ], + **self.model_kwargs, + }, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + completions = res.json() + choices = completions["choices"] + # Sort the choices by index, we don't know whether they're out of order or not + choices.sort(key=lambda c: c["index"]) + replies = [] + meta = [] + for choice in choices: + message = choice["message"] + replies.append(message["content"]) + choice_meta = { + "role": message["role"], + "finish_reason": choice["finish_reason"], + } + meta.append(choice_meta) + + usage = { + "prompt_tokens": completions["usage"]["prompt_tokens"], + "completion_tokens": completions["usage"]["completion_tokens"], + "total_tokens": completions["usage"]["total_tokens"], + } + + return replies, meta, usage diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 1ff79385a..206c2779a 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -81,7 +81,12 @@ def warm_up(self): raise ValueError(msg) self._backend = NvcfBackend(self._model, api_key=self._api_key, model_kwargs=self._model_arguments) else: - self._backend = NimBackend(self._model, api_url=self._api_url, model_kwargs=self._model_arguments) + self._backend = NimBackend( + self._model, + api_url=self._api_url, + model_kwargs=self._model_arguments, + api_key=self._api_key, + ) def to_dict(self) -> Dict[str, Any]: """ diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index fa81d323c..c095ab538 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -161,7 +161,7 @@ def test_run(self, mock_client_class): reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", ) @pytest.mark.integration - def test_run_integration(self): + def test_run_integration_with_nvcf_backend(self): generator = NvidiaGenerator( model="playground_nv_llama2_rlhf_70b", model_arguments={ @@ -179,3 +179,34 @@ def test_run_integration(self): assert result["replies"] assert result["meta"] assert result["usage"] + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the Nvidia API key to run this test.", + ) + @pytest.mark.integration + def test_run_integration_with_nim_backend(self): + """ + This test requires an OpenAI API key to run. + We use OpenAI in this case as the NinBackend must communicate with an OpenAI compatible + API endpoint and this is the easiest way to test it. + """ + generator = NvidiaGenerator( + model="gpt-3.5-turbo", + api_url="https://api.openai.com/v1", + api_key=Secret.from_env_var(["OPENAI_API"]), + model_arguments={ + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": None, + "bad": None, + "stop": None, + }, + ) + generator.warm_up() + result = generator.run(prompt="What is the answer?") + + assert result["replies"] + assert result["meta"] + assert result["usage"] From 8a42f852ce44a609e878244a1354bab2dfcb29dd Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 14 Mar 2024 17:41:18 +0100 Subject: [PATCH 04/20] Implement NimBackend for embedders --- .../components/embedders/nvidia/__init__.py | 2 - .../embedders/nvidia/_nim_backend.py | 38 +++++++++++++++++-- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py index 6ad2f9f6b..588aca2e6 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py @@ -1,9 +1,7 @@ from .document_embedder import NvidiaDocumentEmbedder -from .models import NvidiaEmbeddingModel from .text_embedder import NvidiaTextEmbedder __all__ = [ "NvidiaDocumentEmbedder", - "NvidiaEmbeddingModel", "NvidiaTextEmbedder", ] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py index 01ce13ece..dd9b85958 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -1,4 +1,7 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional, Tuple + +import requests +from haystack.utils.auth import Secret from .backend import EmbedderBackend @@ -8,7 +11,36 @@ def __init__( self, model: str, api_url: str, - batch_size: int, model_kwargs: Optional[Dict[str, Any]] = None, + api_key: Optional[Secret] = None, ): - pass + headers = { + "Content-Type": "application/json", + } + if api_key: + headers["Authorization"] = f"Bearer {api_key.resolve_value()}" + self.session = requests.Session() + self.session.headers.update(headers) + + self.model = model + self.api_url = api_url + self.model_kwargs = model_kwargs or {} + + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + url = f"{self.api_url}/embeddings" + + res = self.session.post( + url, + json={ + "model": self.model, + "input": texts, + **self.model_kwargs, + }, + ) + res.raise_for_status() + + data = res.json() + # Sort the embeddings by index, we don't know whether they're out of order or not + embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] + + return embeddings, data["usage"] From 8f17a5808eff10cf06c732d8de99e5ac62628573 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 14 Mar 2024 17:45:07 +0100 Subject: [PATCH 05/20] Fix embedders backends arguments --- .../components/embedders/nvidia/_nvcf_backend.py | 12 +----------- .../components/embedders/nvidia/document_embedder.py | 10 ++++------ 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py index 2711a71f8..dbd039e65 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py @@ -7,7 +7,6 @@ from .backend import EmbedderBackend MAX_INPUT_STRING_LENGTH = 2048 -MAX_INPUTS = 50 class NvcfBackend(EmbedderBackend): @@ -15,7 +14,6 @@ def __init__( self, model: str, api_key: Secret, - batch_size: int, model_kwargs: Optional[Dict[str, Any]] = None, ): if not model.startswith("playground_"): @@ -23,10 +21,6 @@ def __init__( super().__init__(model=model, model_kwargs=model_kwargs) - if batch_size > MAX_INPUTS: - msg = f"NVIDIA Cloud Functions currently support a maximum batch size of {MAX_INPUTS}." - raise ValueError(msg) - self.api_key = api_key self.client = NvidiaCloudFunctionsClient( api_key=api_key, @@ -56,11 +50,7 @@ class EmbeddingsRequest: encoding_format: Literal["float", "base64"] = "float" def __post_init__(self): - if isinstance(self.input, list): - if len(self.input) > MAX_INPUTS: - msg = f"The number of inputs should not exceed {MAX_INPUTS}" - raise ValueError(msg) - else: + if not isinstance(self.input, list): self.input = [self.input] if len(self.input) == 0: diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 4c0424943..24c5add31 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -4,9 +4,9 @@ from haystack.utils import Secret, deserialize_secrets_inplace from tqdm import tqdm -from .backend import EmbedderBackend -from ._nvcf_backend import NvcfBackend from ._nim_backend import NimBackend +from ._nvcf_backend import NvcfBackend +from .backend import EmbedderBackend @component @@ -91,11 +91,9 @@ def warm_up(self): msg = "API key is required for NVIDIA AI Foundation Endpoints." raise ValueError(msg) - self.backend = NvcfBackend( - self.model, api_key=self.api_key, batch_size=self.batch_size, model_kwargs={"model": "passage"} - ) + self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"}) else: - self.backend = NimBackend(self.model, api_url=self.api_url, batch_size=self.batch_size) + self.backend = NimBackend(self.model, api_url=self.api_url, api_key=self.api_key) self._initialized = True From f25434854520ef44b96a85979fc7ab5a240a00f5 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 14 Mar 2024 17:51:56 +0100 Subject: [PATCH 06/20] Fix text embedder backend arguments --- .../components/embedders/nvidia/text_embedder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index c6dfaa6b3..e0645e0bf 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -3,9 +3,9 @@ from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from .backend import EmbedderBackend -from ._nvcf_backend import NvcfBackend from ._nim_backend import NimBackend +from ._nvcf_backend import NvcfBackend +from .backend import EmbedderBackend @component @@ -75,9 +75,9 @@ def warm_up(self): msg = "API key is required for NVIDIA AI Foundation Endpoints." raise ValueError(msg) - self.backend = NvcfBackend(self.model, api_key=self.api_key, batch_size=1, model_kwargs={"model": "query"}) + self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"}) else: - self.backend = NimBackend(self.model, api_url=self.api_url, batch_size=1) + self.backend = NimBackend(self.model, api_url=self.api_url, api_key=self.api_key) self._initialized = True From 5cdb5eb60ac14789a667d5207db861946e1c4fb1 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 14 Mar 2024 18:03:02 +0100 Subject: [PATCH 07/20] Make embedders nim backend consistent with nvcf one --- .../components/embedders/nvidia/_nim_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py index dd9b85958..d42719d7d 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -43,4 +43,4 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: # Sort the embeddings by index, we don't know whether they're out of order or not embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] - return embeddings, data["usage"] + return embeddings, {"usage": data["usage"]} From 24eb1bdfa43816b45d49852268b763bc7dec0aae Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 14 Mar 2024 18:14:58 +0100 Subject: [PATCH 08/20] Fix tests --- .../nvidia/tests/test_document_embedder.py | 114 ++++++++++++------ integrations/nvidia/tests/test_generator.py | 8 +- .../nvidia/tests/test_text_embedder.py | 91 +++++++++----- 3 files changed, 138 insertions(+), 75 deletions(-) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index ed8af93c9..501be205d 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -1,36 +1,19 @@ import os +from unittest.mock import Mock, patch import pytest from haystack import Document from haystack.utils import Secret -from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder, NvidiaEmbeddingModel -from haystack_integrations.utils.nvidia.client import AvailableNvidiaCloudFunctions - - -class MockClient: - def query_function(self, func_id, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - def available_functions(self): - return { - NvidiaEmbeddingModel.NVOLVE_40K.value: AvailableNvidiaCloudFunctions( - name=NvidiaEmbeddingModel.NVOLVE_40K.value, id="fake-id", status="ACTIVE" - ) - } - - def get_model_nvcf_id(self, model): - return "fake-id" +from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder class TestNvidiaDocumentEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaDocumentEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + embedder = NvidiaDocumentEmbedder("nvolveqa_40k") assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "" assert embedder.suffix == "" assert embedder.batch_size == 32 @@ -41,7 +24,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): embedder = NvidiaDocumentEmbedder( api_key=Secret.from_token("fake-api-key"), - model="playground_nvolveqa_40k", + model="nvolveqa_40k", prefix="prefix", suffix="suffix", batch_size=30, @@ -51,7 +34,7 @@ def test_init_with_parameters(self): ) assert embedder.api_key == Secret.from_token("fake-api-key") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" assert embedder.batch_size == 30 @@ -61,12 +44,9 @@ def test_init_with_parameters(self): def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + embedder = NvidiaDocumentEmbedder("nvolveqa_40k") with pytest.raises(ValueError): - NvidiaDocumentEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) - - def test_init_fail_batch_size(self, monkeypatch): - with pytest.raises(ValueError): - NvidiaDocumentEmbedder(model="playground_nvolveqa_40k", batch_size=55) + embedder.warm_up() def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") @@ -76,6 +56,7 @@ def test_to_dict(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": None, "model": "playground_nvolveqa_40k", "prefix": "", "suffix": "", @@ -90,6 +71,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") component = NvidiaDocumentEmbedder( model="playground_nvolveqa_40k", + api_url="https://example.com", prefix="prefix", suffix="suffix", batch_size=10, @@ -102,6 +84,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": "https://example.com", "model": "playground_nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", @@ -155,14 +138,25 @@ def test_prepare_texts_to_embed_w_suffix(self): "my_prefix document number 4 my_suffix", ] - def test_embed_batch(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_embed_batch(self, mock_client_class): texts = ["text 1", "text 2", "text 3", "text 4", "text 5"] embedder = NvidiaDocumentEmbedder( "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), ) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2) @@ -176,7 +170,8 @@ def test_embed_batch(self): assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}} - def test_run(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run(self, mock_client_class): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), @@ -191,7 +186,17 @@ def test_run(self): meta_fields_to_embed=["topic"], embedding_separator=" | ", ) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() result = embedder.run(documents=docs) @@ -208,7 +213,8 @@ def test_run(self): assert all(isinstance(x, float) for x in doc.embedding) assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}} - def test_run_custom_batch_size(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_custom_batch_size(self, mock_client_class): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), @@ -223,7 +229,17 @@ def test_run_custom_batch_size(self): embedding_separator=" | ", batch_size=1, ) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() result = embedder.run(documents=docs) @@ -241,9 +257,20 @@ def test_run_custom_batch_size(self): assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}} - def test_run_wrong_input_format(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_wrong_input_format(self, mock_client_class): embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() string_input = "text" @@ -255,9 +282,20 @@ def test_run_wrong_input_format(self): with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"): embedder.run(documents=list_integers_input) - def test_run_on_empty_list(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_on_empty_list(self, mock_client_class): embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() empty_list_input = [] diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index c095ab538..44fc31f3c 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -44,8 +44,9 @@ def test_init_with_parameters(self): def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + generator = NvidiaGenerator("playground_nemotron_steerlm_8b") with pytest.raises(ValueError): - NvidiaGenerator("playground_nemotron_steerlm_8b") + generator.warm_up() def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") @@ -194,14 +195,11 @@ def test_run_integration_with_nim_backend(self): generator = NvidiaGenerator( model="gpt-3.5-turbo", api_url="https://api.openai.com/v1", - api_key=Secret.from_env_var(["OPENAI_API"]), + api_key=Secret.from_env_var(["OPENAI_API_KEY"]), model_arguments={ "temperature": 0.2, "top_p": 0.7, "max_tokens": 1024, - "seed": None, - "bad": None, - "stop": None, }, ) generator.warm_up() diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 8ba2f6783..6865277fe 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -1,63 +1,49 @@ import os +from unittest.mock import Mock, patch import pytest from haystack.utils import Secret -from haystack_integrations.components.embedders.nvidia import NvidiaEmbeddingModel, NvidiaTextEmbedder -from haystack_integrations.utils.nvidia.client import AvailableNvidiaCloudFunctions - - -class MockClient: - def query_function(self, func_id, payload): - data = [{"index": 0, "embedding": [0.1, 0.2, 0.3]}] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - def available_functions(self): - return { - NvidiaEmbeddingModel.NVOLVE_40K.value: AvailableNvidiaCloudFunctions( - name=NvidiaEmbeddingModel.NVOLVE_40K.value, id="fake-id", status="ACTIVE" - ) - } - - def get_model_nvcf_id(self, model): - return "fake-id" +from haystack_integrations.components.embedders.nvidia import NvidiaTextEmbedder class TestNvidiaTextEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaTextEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + embedder = NvidiaTextEmbedder("nvolveqa_40k") assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "" assert embedder.suffix == "" def test_init_with_parameters(self): embedder = NvidiaTextEmbedder( api_key=Secret.from_token("fake-api-key"), - model="playground_nvolveqa_40k", + model="nvolveqa_40k", prefix="prefix", suffix="suffix", ) assert embedder.api_key == Secret.from_token("fake-api-key") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + embedder = NvidiaTextEmbedder("nvolveqa_40k") with pytest.raises(ValueError): - NvidiaTextEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + embedder.warm_up() def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - component = NvidiaTextEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + component = NvidiaTextEmbedder("nvolveqa_40k") data = component.to_dict() assert data == { "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "model": "playground_nvolveqa_40k", + "api_url": None, + "model": "nvolveqa_40k", "prefix": "", "suffix": "", }, @@ -66,7 +52,7 @@ def test_to_dict(self, monkeypatch): def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") component = NvidiaTextEmbedder( - model=NvidiaEmbeddingModel.NVOLVE_40K, + model="nvolveqa_40k", prefix="prefix", suffix="suffix", ) @@ -75,17 +61,28 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "model": "playground_nvolveqa_40k", + "api_url": None, + "model": "nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", }, } - def test_run(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run(self, mock_client_class): embedder = NvidiaTextEmbedder( "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix" ) - embedder.client = MockClient() + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=Mock( + return_value={ + "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}], + "usage": {"total_tokens": 4, "prompt_tokens": 4}, + } + ), + ) + mock_client_class.return_value = mock_client embedder.warm_up() result = embedder.run(text="The food was delicious") @@ -95,9 +92,19 @@ def test_run(self): "usage": {"prompt_tokens": 4, "total_tokens": 4}, } - def test_run_wrong_input_format(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_wrong_input_format(self, mock_client_class): embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - embedder.client = MockClient() + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=Mock( + return_value={ + "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}], + "usage": {"total_tokens": 4, "prompt_tokens": 4}, + } + ), + ) + mock_client_class.return_value = mock_client embedder.warm_up() list_integers_input = [1, 2, 3] @@ -110,7 +117,7 @@ def test_run_wrong_input_format(self): reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", ) @pytest.mark.integration - def test_run_integration(self): + def test_run_integration_with_nvcf_backend(self): embedder = NvidiaTextEmbedder("playground_nvolveqa_40k") embedder.warm_up() @@ -120,3 +127,23 @@ def test_run_integration(self): assert all(isinstance(x, float) for x in embedding) assert "usage" in meta + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the Nvidia API key to run this test.", + ) + @pytest.mark.integration + def test_run_integration_with_nim_backend(self): + embedder = NvidiaTextEmbedder( + model="text-embedding-ada-002", + api_url="https://api.openai.com/v1", + api_key=Secret.from_env_var(["OPENAI_API_KEY"]), + ) + embedder.warm_up() + + result = embedder.run("A transformer is a deep learning architecture") + embedding = result["embedding"] + meta = result["meta"] + + assert all(isinstance(x, float) for x in embedding) + assert "usage" in meta From 89ca47004b5124b7d58e83489628fa1290863629 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Fri, 15 Mar 2024 17:44:09 +0100 Subject: [PATCH 09/20] Update headers, the generator endpoint, and the embedders input_type param --- .../components/embedders/nvidia/_nim_backend.py | 1 + .../components/embedders/nvidia/document_embedder.py | 4 +++- .../components/embedders/nvidia/text_embedder.py | 4 +++- .../components/generators/nvidia/_nim_backend.py | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py index d42719d7d..a5312d3f9 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -16,6 +16,7 @@ def __init__( ): headers = { "Content-Type": "application/json", + "accept": "application/json", } if api_key: headers["Authorization"] = f"Bearer {api_key.resolve_value()}" diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 24c5add31..1f6db6a41 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -93,7 +93,9 @@ def warm_up(self): self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"}) else: - self.backend = NimBackend(self.model, api_url=self.api_url, api_key=self.api_key) + self.backend = NimBackend( + self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs={"input_type": "passage"} + ) self._initialized = True diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index e0645e0bf..bb1122545 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -77,7 +77,9 @@ def warm_up(self): self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"}) else: - self.backend = NimBackend(self.model, api_url=self.api_url, api_key=self.api_key) + self.backend = NimBackend( + self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs={"input_type": "query"} + ) self._initialized = True diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py index 92e9ab5ba..9d5123c8a 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -18,6 +18,7 @@ def __init__( ): headers = { "Content-Type": "application/json", + "accept": "application/json", } if api_key: headers["Authorization"] = f"Bearer {api_key.resolve_value()}" From 2b654ccd249274ac7e9b7371b49352a707665688 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 15:17:55 +0100 Subject: [PATCH 10/20] Update docstrings --- .../components/generators/nvidia/generator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 206c2779a..10c9b969a 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -22,14 +22,11 @@ class NvidiaGenerator: from haystack_integrations.components.generators.nvidia import NvidiaGenerator, NvidiaGeneratorModel generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B, + model="nv_llama2_rlhf_70b", model_arguments={ "temperature": 0.2, "top_p": 0.7, "max_tokens": 1024, - "seed": None, - "bad": None, - "stop": None, }, ) generator.warm_up() @@ -37,6 +34,7 @@ class NvidiaGenerator: result = generator.run(prompt="What is the answer?") print(result["replies"]) print(result["meta"]) + print(result["usage"]) ``` """ @@ -55,7 +53,9 @@ def __init__( See the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) for more information on the supported models. :param api_key: - Nvidia API key to use for authentication. + API key for the NVIDIA AI Foundation Endpoints. + :param api_url: + Custom API URL for the NVIDIA NeMo Inference Microservices. :param model_arguments: Additional arguments to pass to the model provider. Different models accept different arguments. Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) From 038480bb9cbab0f34c70e284808f5831ccaaaed4 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 15:38:32 +0100 Subject: [PATCH 11/20] Make api_key optional in generator --- .../components/generators/nvidia/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 10c9b969a..4e5b8d968 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -42,7 +42,7 @@ def __init__( self, model: str, api_url: Optional[str] = None, - api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, ): """ From 05e80962fb83f21441aec22c841c5d093e8c0fe3 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 16:15:59 +0100 Subject: [PATCH 12/20] Remove api_key from NIM backend --- .../components/embedders/nvidia/_nim_backend.py | 7 +++---- .../components/embedders/nvidia/document_embedder.py | 4 +--- .../components/embedders/nvidia/text_embedder.py | 4 +--- .../components/generators/nvidia/_nim_backend.py | 4 ---- .../components/generators/nvidia/generator.py | 3 +-- 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py index a5312d3f9..27e0dbeac 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -1,10 +1,11 @@ from typing import Any, Dict, List, Optional, Tuple import requests -from haystack.utils.auth import Secret from .backend import EmbedderBackend +REQUEST_TIMEOUT = 60 + class NimBackend(EmbedderBackend): def __init__( @@ -12,14 +13,11 @@ def __init__( model: str, api_url: str, model_kwargs: Optional[Dict[str, Any]] = None, - api_key: Optional[Secret] = None, ): headers = { "Content-Type": "application/json", "accept": "application/json", } - if api_key: - headers["Authorization"] = f"Bearer {api_key.resolve_value()}" self.session = requests.Session() self.session.headers.update(headers) @@ -37,6 +35,7 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: "input": texts, **self.model_kwargs, }, + timeout=REQUEST_TIMEOUT, ) res.raise_for_status() diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 1f6db6a41..5b62da87b 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -93,9 +93,7 @@ def warm_up(self): self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"}) else: - self.backend = NimBackend( - self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs={"input_type": "passage"} - ) + self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "passage"}) self._initialized = True diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index bb1122545..97ead9c2c 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -77,9 +77,7 @@ def warm_up(self): self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"}) else: - self.backend = NimBackend( - self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs={"input_type": "query"} - ) + self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "query"}) self._initialized = True diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py index 9d5123c8a..f0b9711ac 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -1,7 +1,6 @@ from typing import Any, Dict, List, Optional, Tuple import requests -from haystack.utils.auth import Secret from .backend import GeneratorBackend @@ -14,14 +13,11 @@ def __init__( model: str, api_url: str, model_kwargs: Optional[Dict[str, Any]] = None, - api_key: Optional[Secret] = None, ): headers = { "Content-Type": "application/json", "accept": "application/json", } - if api_key: - headers["Authorization"] = f"Bearer {api_key.resolve_value()}" self.session = requests.Session() self.session.headers.update(headers) diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 4e5b8d968..a7cb8ad11 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -85,7 +85,6 @@ def warm_up(self): self._model, api_url=self._api_url, model_kwargs=self._model_arguments, - api_key=self._api_key, ) def to_dict(self) -> Dict[str, Any]: @@ -99,7 +98,7 @@ def to_dict(self) -> Dict[str, Any]: self, model=self._model, api_url=self._api_url, - api_key=self._api_key.to_dict(), + api_key=self._api_key.to_dict() if self._api_key else None, model_arguments=self._model_arguments, ) From 2a7e350279cb53e53de5f1ec93bdfca07670e015 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 16:16:49 +0100 Subject: [PATCH 13/20] Move usage in metadata in generator --- .../generators/nvidia/_nim_backend.py | 17 +++++++++-------- .../generators/nvidia/_nvcf_backend.py | 14 +++++++------- .../components/generators/nvidia/backend.py | 4 ++-- .../components/generators/nvidia/generator.py | 7 +++---- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py index f0b9711ac..499a60b78 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -25,7 +25,9 @@ def __init__( self.api_url = api_url self.model_kwargs = model_kwargs or {} - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: + # We're using the chat completion endpoint as the local containers don't support + # the /completions endpoint. So both the non-chat and chat generator will use this. url = f"{self.api_url}/chat/completions" res = self.session.post( @@ -56,13 +58,12 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[s choice_meta = { "role": message["role"], "finish_reason": choice["finish_reason"], + "usage": { + "prompt_tokens": completions["usage"]["prompt_tokens"], + "completion_tokens": completions["usage"]["completion_tokens"], + "total_tokens": completions["usage"]["total_tokens"], + }, } meta.append(choice_meta) - usage = { - "prompt_tokens": completions["usage"]["prompt_tokens"], - "completion_tokens": completions["usage"]["completion_tokens"], - "total_tokens": completions["usage"]["total_tokens"], - } - - return replies, meta, usage + return replies, meta diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py index d53ce9cce..244000230 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py @@ -32,7 +32,7 @@ def __init__( ) self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: messages = [Message(role="user", content=prompt)] request = GenerationRequest(messages=messages, **self.model_kwargs).to_dict() json_response = self.client.query_function(self.nvcf_id, request) @@ -46,14 +46,14 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[s { "role": choice.message.role, "finish_reason": choice.finish_reason, + "usage": { + "completion_tokens": response.usage.completion_tokens, + "prompt_tokens": response.usage.prompt_tokens, + "total_tokens": response.usage.total_tokens, + }, } ) - usage = { - "completion_tokens": response.usage.completion_tokens, - "prompt_tokens": response.usage.prompt_tokens, - "total_tokens": response.usage.total_tokens, - } - return replies, meta, usage + return replies, meta @dataclass diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py index 793402c50..d14199daf 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py @@ -16,7 +16,7 @@ def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): self.model_kwargs = model_kwargs or {} @abstractmethod - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[str, Any]]: + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: """ Invoke the backend and prompt the model. @@ -24,6 +24,6 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]], Dict[s Prompt text. :return: Vector representation of the generated texts related - metadata and usage statistics returned by the service. + metadata returned by the service. """ pass diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index a7cb8ad11..f2f94c3a2 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -116,7 +116,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator": deserialize_secrets_inplace(init_params, ["api_key"]) return default_from_dict(cls, data) - @component.output_types(replies=List[str], meta=List[Dict[str, Any]], usage=Dict[str, int]) + @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) def run(self, prompt: str): """ Queries the model with the provided prompt. @@ -127,13 +127,12 @@ def run(self, prompt: str): A dictionary with the following keys: - `replies` - Replies generated by the model. - `meta` - Metadata for each reply. - - `usage` - Usage statistics for the model. """ if self._backend is None: msg = "The generation model has not been loaded. Call warm_up() before running." raise RuntimeError(msg) assert self._backend is not None - replies, meta, usage = self._backend.generate(prompt=prompt) + replies, meta = self._backend.generate(prompt=prompt) - return {"replies": replies, "meta": meta, "usage": usage} + return {"replies": replies, "meta": meta} From 14b9a91ee431656c343305225d04be1b413e80ed Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 16:17:04 +0100 Subject: [PATCH 14/20] Update tests --- .../nvidia/tests/test_document_embedder.py | 30 +++++++++++++++++++ integrations/nvidia/tests/test_generator.py | 30 ++++++++----------- .../nvidia/tests/test_text_embedder.py | 13 ++++---- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 501be205d..faab92d43 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -326,3 +326,33 @@ def test_run_integration(self): for doc in docs_with_embeddings: assert isinstance(doc.embedding, list) assert isinstance(doc.embedding[0], float) + + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), + reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " + "NVIDIA_NIM_ENDPOINT_URL containing the local URL to call.", + ) + @pytest.mark.integration + def test_run_integration_with_nim_backend(self): + model = os.environ["NVIDIA_NIM_EMBEDDER_MODEL"] + url = os.environ["NVIDIA_NIM_ENDPOINT_URL"] + embedder = NvidiaDocumentEmbedder( + model=model, + api_url=url, + api_key=None, + ) + embedder.warm_up() + docs = [ + Document(content="I love cheese", meta={"topic": "Cuisine"}), + Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), + ] + + result = embedder.run(docs) + docs_with_embeddings = result["documents"] + + assert isinstance(docs_with_embeddings, list) + assert len(docs_with_embeddings) == len(docs) + for doc in docs_with_embeddings: + assert isinstance(doc.embedding, list) + assert isinstance(doc.embedding[0], float) diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 44fc31f3c..b21ea1314 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -148,13 +148,13 @@ def test_run(self, mock_client_class): { "finish_reason": "stop", "role": "assistant", + "usage": { + "total_tokens": 21, + "prompt_tokens": 19, + "completion_tokens": 2, + }, }, ], - "usage": { - "total_tokens": 21, - "prompt_tokens": 19, - "completion_tokens": 2, - }, } @pytest.mark.skipif( @@ -179,23 +179,20 @@ def test_run_integration_with_nvcf_backend(self): assert result["replies"] assert result["meta"] - assert result["usage"] @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the Nvidia API key to run this test.", + not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), + reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and " + "NVIDIA_NIM_ENDPOINT_URL containing the local URL to call.", ) @pytest.mark.integration def test_run_integration_with_nim_backend(self): - """ - This test requires an OpenAI API key to run. - We use OpenAI in this case as the NinBackend must communicate with an OpenAI compatible - API endpoint and this is the easiest way to test it. - """ + model = os.environ["NVIDIA_NIM_GENERATOR_MODEL"] + url = os.environ["NVIDIA_NIM_ENDPOINT_URL"] generator = NvidiaGenerator( - model="gpt-3.5-turbo", - api_url="https://api.openai.com/v1", - api_key=Secret.from_env_var(["OPENAI_API_KEY"]), + model=model, + api_url=url, + api_key=None, model_arguments={ "temperature": 0.2, "top_p": 0.7, @@ -207,4 +204,3 @@ def test_run_integration_with_nim_backend(self): assert result["replies"] assert result["meta"] - assert result["usage"] diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 6865277fe..39ee02206 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -129,15 +129,18 @@ def test_run_integration_with_nvcf_backend(self): assert "usage" in meta @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the Nvidia API key to run this test.", + not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), + reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " + "NVIDIA_NIM_ENDPOINT_URL containing the local URL to call.", ) @pytest.mark.integration def test_run_integration_with_nim_backend(self): + model = os.environ["NVIDIA_NIM_EMBEDDER_MODEL"] + url = os.environ["NVIDIA_NIM_ENDPOINT_URL"] embedder = NvidiaTextEmbedder( - model="text-embedding-ada-002", - api_url="https://api.openai.com/v1", - api_key=Secret.from_env_var(["OPENAI_API_KEY"]), + model=model, + api_url=url, + api_key=None, ) embedder.warm_up() From a79e362420994b95232ef6058a47f109458b3eda Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 16:17:54 +0100 Subject: [PATCH 15/20] Remove OPENAI_API_KEY env var from workflow --- .github/workflows/nvidia.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml index fcbbdcdeb..6e7562c17 100644 --- a/.github/workflows/nvidia.yml +++ b/.github/workflows/nvidia.yml @@ -22,7 +22,6 @@ env: PYTHONUNBUFFERED: "1" FORCE_COLOR: "1" NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} jobs: run: From 96289b2a2fa9e03bb70d150732a24215368ba90f Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 16:59:23 +0100 Subject: [PATCH 16/20] Fix integration tests --- integrations/nvidia/tests/test_generator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index b21ea1314..9a157a9d1 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -195,8 +195,6 @@ def test_run_integration_with_nim_backend(self): api_key=None, model_arguments={ "temperature": 0.2, - "top_p": 0.7, - "max_tokens": 1024, }, ) generator.warm_up() From 9003c671b5f9752aa598c7451e1a52dbce4de03b Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 17:35:40 +0100 Subject: [PATCH 17/20] Fix linting --- .../components/embedders/nvidia/text_embedder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index 97ead9c2c..79dda0d81 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace From 5326432231fce30e037689adb106e5d8198f7671 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 17:42:25 +0100 Subject: [PATCH 18/20] Fix linting again --- integrations/nvidia/tests/test_document_embedder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index faab92d43..7ac89d5e2 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -327,7 +327,6 @@ def test_run_integration(self): assert isinstance(doc.embedding, list) assert isinstance(doc.embedding[0], float) - @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " From a5d6bc1c718e414e7f71f70d5ac2dd3366338990 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 18:04:26 +0100 Subject: [PATCH 19/20] Address PR comments --- .../components/embedders/nvidia/_nvcf_backend.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py index dbd039e65..d3e0d4a15 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py @@ -6,7 +6,7 @@ from .backend import EmbedderBackend -MAX_INPUT_STRING_LENGTH = 2048 +MAX_INPUTS = 50 class NvcfBackend(EmbedderBackend): @@ -50,7 +50,11 @@ class EmbeddingsRequest: encoding_format: Literal["float", "base64"] = "float" def __post_init__(self): - if not isinstance(self.input, list): + if isinstance(self.input, list): + if len(self.input) > MAX_INPUTS: + msg = f"The number of inputs should not exceed {MAX_INPUTS}" + raise ValueError(msg) + else: self.input = [self.input] if len(self.input) == 0: From 875773f57b267906e3764e87ce5070bcc7bf6649 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 18 Mar 2024 18:12:48 +0100 Subject: [PATCH 20/20] Fix NVCF backend --- .../components/embedders/nvidia/_nvcf_backend.py | 1 + .../components/generators/nvidia/_nvcf_backend.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py index d3e0d4a15..7d4b07dca 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py @@ -6,6 +6,7 @@ from .backend import EmbedderBackend +MAX_INPUT_STRING_LENGTH = 2048 MAX_INPUTS = 50 diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py index 244000230..c0686c132 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py @@ -6,9 +6,6 @@ from .backend import GeneratorBackend -MAX_INPUT_STRING_LENGTH = 2048 -MAX_INPUTS = 50 - class NvcfBackend(GeneratorBackend): def __init__(