feat: Improvements to NvidiaRanker and adding user input timeout (#1193)

* Lots of fixes * Remove unused import * Fix readme * linting * Add more logging * Follow same private/public attribute as other components * Add tests * Linting * Add another test * Add timeout to to_dict * Update integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py Co-authored-by: David S. Batista <[email protected]> --------- Co-authored-by: David S. Batista <[email protected]>
deepset-ai · Nov 21, 2024 · 16bc80f · 16bc80f
1 parent 96e3951
commit 16bc80f
Show file tree

Hide file tree

Showing 10 changed files with 279 additions and 66 deletions.
diff --git a/integrations/nvidia/README.md b/integrations/nvidia/README.md
@@ -38,7 +38,7 @@ hatch run test
 To only run unit tests:
 
 ```
-hatch run test -m"not integration"
+hatch run test -m "not integration"
 ```
 
 To run the linters `ruff` and `mypy`:

diff --git a/...rations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/...rations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
@@ -2,16 +2,19 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import os
 import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-from haystack import Document, component, default_from_dict, default_to_dict
+from haystack import Document, component, default_from_dict, default_to_dict, logging
 from haystack.utils import Secret, deserialize_secrets_inplace
 from tqdm import tqdm
 
 from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode
 from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 
+logger = logging.getLogger(__name__)
+
 _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"
 
 
@@ -47,6 +50,7 @@ def __init__(
         meta_fields_to_embed: Optional[List[str]] = None,
         embedding_separator: str = "\n",
         truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
+        timeout: Optional[float] = None,
     ):
         """
         Create a NvidiaTextEmbedder component.
@@ -74,8 +78,11 @@ def __init__(
         :param embedding_separator:
             Separator used to concatenate the meta fields to the Document text.
         :param truncate:
-            Specifies how inputs longer that the maximum token length should be truncated.
+            Specifies how inputs longer than the maximum token length should be truncated.
             If None the behavior is model-dependent, see the official documentation for more information.
+        :param timeout:
+            Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
+            or set to 60 by default.
         """
 
         self.api_key = api_key
@@ -98,6 +105,10 @@ def __init__(
         if is_hosted(api_url) and not self.model:  # manually set default model
             self.model = "nvidia/nv-embedqa-e5-v5"
 
+        if timeout is None:
+            timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
+        self.timeout = timeout
+
     def default_model(self):
         """Set default model in local NIM mode."""
         valid_models = [
@@ -128,10 +139,11 @@ def warm_up(self):
         if self.truncate is not None:
             model_kwargs["truncate"] = str(self.truncate)
         self.backend = NimBackend(
-            self.model,
+            model=self.model,
             api_url=self.api_url,
             api_key=self.api_key,
             model_kwargs=model_kwargs,
+            timeout=self.timeout,
         )
 
         self._initialized = True
@@ -158,6 +170,7 @@ def to_dict(self) -> Dict[str, Any]:
             meta_fields_to_embed=self.meta_fields_to_embed,
             embedding_separator=self.embedding_separator,
             truncate=str(self.truncate) if self.truncate is not None else None,
+            timeout=self.timeout,
         )
 
     @classmethod
@@ -238,8 +251,7 @@ def run(self, documents: List[Document]):
 
         for doc in documents:
             if not doc.content:
-                msg = f"Document '{doc.id}' has no content to embed."
-                raise ValueError(msg)
+                logger.warning(f"Document '{doc.id}' has no content to embed.")
 
         texts_to_embed = self._prepare_texts_to_embed(documents)
         embeddings, metadata = self._embed_batch(texts_to_embed, self.batch_size)

diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
@@ -2,15 +2,18 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import os
 import warnings
 from typing import Any, Dict, List, Optional, Union
 
-from haystack import component, default_from_dict, default_to_dict
+from haystack import component, default_from_dict, default_to_dict, logging
 from haystack.utils import Secret, deserialize_secrets_inplace
 
 from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode
 from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 
+logger = logging.getLogger(__name__)
+
 _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"
 
 
@@ -44,6 +47,7 @@ def __init__(
         prefix: str = "",
         suffix: str = "",
         truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
+        timeout: Optional[float] = None,
     ):
         """
         Create a NvidiaTextEmbedder component.
@@ -64,6 +68,9 @@ def __init__(
         :param truncate:
             Specifies how inputs longer that the maximum token length should be truncated.
             If None the behavior is model-dependent, see the official documentation for more information.
+        :param timeout:
+            Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
+            or set to 60 by default.
         """
 
         self.api_key = api_key
@@ -82,13 +89,23 @@ def __init__(
         if is_hosted(api_url) and not self.model:  # manually set default model
             self.model = "nvidia/nv-embedqa-e5-v5"
 
+        if timeout is None:
+            timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
+        self.timeout = timeout
+
     def default_model(self):
         """Set default model in local NIM mode."""
         valid_models = [
             model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
         ]
         name = next(iter(valid_models), None)
         if name:
+            logger.warning(
+                "Default model is set as: {model_name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                model_name=name,
+            )
             warnings.warn(
                 f"Default model is set as: {name}. \n"
                 "Set model using model parameter. \n"
@@ -112,10 +129,11 @@ def warm_up(self):
         if self.truncate is not None:
             model_kwargs["truncate"] = str(self.truncate)
         self.backend = NimBackend(
-            self.model,
+            model=self.model,
             api_url=self.api_url,
             api_key=self.api_key,
             model_kwargs=model_kwargs,
+            timeout=self.timeout,
         )
 
         self._initialized = True
@@ -138,6 +156,7 @@ def to_dict(self) -> Dict[str, Any]:
             prefix=self.prefix,
             suffix=self.suffix,
             truncate=str(self.truncate) if self.truncate is not None else None,
+            timeout=self.timeout,
         )
 
     @classmethod
@@ -150,7 +169,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder":
         :returns:
             The deserialized component.
         """
-        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
+        init_parameters = data.get("init_parameters", {})
+        if init_parameters:
+            deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
         return default_from_dict(cls, data)
 
     @component.output_types(embedding=List[float], meta=Dict[str, Any])
@@ -162,7 +183,7 @@ def run(self, text: str):
             The text to embed.
         :returns:
             A dictionary with the following keys and values:
-            - `embedding` - Embeddng of the text.
+            - `embedding` - Embedding of the text.
             - `meta` - Metadata on usage statistics, etc.
         :raises RuntimeError:
             If the component was not initialized.

diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import os
 import warnings
 from typing import Any, Dict, List, Optional
 
@@ -49,6 +50,7 @@ def __init__(
         api_url: str = _DEFAULT_API_URL,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         model_arguments: Optional[Dict[str, Any]] = None,
+        timeout: Optional[float] = None,
     ):
         """
         Create a NvidiaGenerator component.
@@ -70,6 +72,9 @@ def __init__(
             specific to a model.
             Search your model in the [NVIDIA NIM](https://ai.nvidia.com)
             to find the arguments it accepts.
+        :param timeout:
+            Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
+            or set to 60 by default.
         """
         self._model = model
         self._api_url = url_validation(api_url, _DEFAULT_API_URL, ["v1/chat/completions"])
@@ -79,6 +84,9 @@ def __init__(
         self._backend: Optional[Any] = None
 
         self.is_hosted = is_hosted(api_url)
+        if timeout is None:
+            timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
+        self.timeout = timeout
 
     def default_model(self):
         """Set default model in local NIM mode."""
@@ -110,10 +118,11 @@ def warm_up(self):
             msg = "API key is required for hosted NVIDIA NIMs."
             raise ValueError(msg)
         self._backend = NimBackend(
-            self._model,
+            model=self._model,
             api_url=self._api_url,
             api_key=self._api_key,
             model_kwargs=self._model_arguments,
+            timeout=self.timeout,
         )
 
         if not self.is_hosted and not self._model: