Skip to content

Commit

Permalink
feat: Improvements to NvidiaRanker and adding user input timeout (#1193)
Browse files Browse the repository at this point in the history
* Lots of fixes

* Remove unused import

* Fix readme

* linting

* Add more logging

* Follow same private/public attribute as other components

* Add tests

* Linting

* Add another test

* Add timeout to to_dict

* Update integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py

Co-authored-by: David S. Batista <[email protected]>

---------

Co-authored-by: David S. Batista <[email protected]>
  • Loading branch information
sjrl and davidsbatista authored Nov 21, 2024
1 parent 96e3951 commit 16bc80f
Show file tree
Hide file tree
Showing 10 changed files with 279 additions and 66 deletions.
2 changes: 1 addition & 1 deletion integrations/nvidia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ hatch run test
To only run unit tests:

```
hatch run test -m"not integration"
hatch run test -m "not integration"
```

To run the linters `ruff` and `mypy`:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import warnings
from typing import Any, Dict, List, Optional, Tuple, Union

from haystack import Document, component, default_from_dict, default_to_dict
from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.utils import Secret, deserialize_secrets_inplace
from tqdm import tqdm

from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode
from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation

logger = logging.getLogger(__name__)

_DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"


Expand Down Expand Up @@ -47,6 +50,7 @@ def __init__(
meta_fields_to_embed: Optional[List[str]] = None,
embedding_separator: str = "\n",
truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
timeout: Optional[float] = None,
):
"""
Create a NvidiaTextEmbedder component.
Expand Down Expand Up @@ -74,8 +78,11 @@ def __init__(
:param embedding_separator:
Separator used to concatenate the meta fields to the Document text.
:param truncate:
Specifies how inputs longer that the maximum token length should be truncated.
Specifies how inputs longer than the maximum token length should be truncated.
If None the behavior is model-dependent, see the official documentation for more information.
:param timeout:
Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
or set to 60 by default.
"""

self.api_key = api_key
Expand All @@ -98,6 +105,10 @@ def __init__(
if is_hosted(api_url) and not self.model: # manually set default model
self.model = "nvidia/nv-embedqa-e5-v5"

if timeout is None:
timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
self.timeout = timeout

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
Expand Down Expand Up @@ -128,10 +139,11 @@ def warm_up(self):
if self.truncate is not None:
model_kwargs["truncate"] = str(self.truncate)
self.backend = NimBackend(
self.model,
model=self.model,
api_url=self.api_url,
api_key=self.api_key,
model_kwargs=model_kwargs,
timeout=self.timeout,
)

self._initialized = True
Expand All @@ -158,6 +170,7 @@ def to_dict(self) -> Dict[str, Any]:
meta_fields_to_embed=self.meta_fields_to_embed,
embedding_separator=self.embedding_separator,
truncate=str(self.truncate) if self.truncate is not None else None,
timeout=self.timeout,
)

@classmethod
Expand Down Expand Up @@ -238,8 +251,7 @@ def run(self, documents: List[Document]):

for doc in documents:
if not doc.content:
msg = f"Document '{doc.id}' has no content to embed."
raise ValueError(msg)
logger.warning(f"Document '{doc.id}' has no content to embed.")

texts_to_embed = self._prepare_texts_to_embed(documents)
embeddings, metadata = self._embed_batch(texts_to_embed, self.batch_size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import warnings
from typing import Any, Dict, List, Optional, Union

from haystack import component, default_from_dict, default_to_dict
from haystack import component, default_from_dict, default_to_dict, logging
from haystack.utils import Secret, deserialize_secrets_inplace

from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode
from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation

logger = logging.getLogger(__name__)

_DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"


Expand Down Expand Up @@ -44,6 +47,7 @@ def __init__(
prefix: str = "",
suffix: str = "",
truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
timeout: Optional[float] = None,
):
"""
Create a NvidiaTextEmbedder component.
Expand All @@ -64,6 +68,9 @@ def __init__(
:param truncate:
Specifies how inputs longer that the maximum token length should be truncated.
If None the behavior is model-dependent, see the official documentation for more information.
:param timeout:
Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
or set to 60 by default.
"""

self.api_key = api_key
Expand All @@ -82,13 +89,23 @@ def __init__(
if is_hosted(api_url) and not self.model: # manually set default model
self.model = "nvidia/nv-embedqa-e5-v5"

if timeout is None:
timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
self.timeout = timeout

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
]
name = next(iter(valid_models), None)
if name:
logger.warning(
"Default model is set as: {model_name}. \n"
"Set model using model parameter. \n"
"To get available models use available_models property.",
model_name=name,
)
warnings.warn(
f"Default model is set as: {name}. \n"
"Set model using model parameter. \n"
Expand All @@ -112,10 +129,11 @@ def warm_up(self):
if self.truncate is not None:
model_kwargs["truncate"] = str(self.truncate)
self.backend = NimBackend(
self.model,
model=self.model,
api_url=self.api_url,
api_key=self.api_key,
model_kwargs=model_kwargs,
timeout=self.timeout,
)

self._initialized = True
Expand All @@ -138,6 +156,7 @@ def to_dict(self) -> Dict[str, Any]:
prefix=self.prefix,
suffix=self.suffix,
truncate=str(self.truncate) if self.truncate is not None else None,
timeout=self.timeout,
)

@classmethod
Expand All @@ -150,7 +169,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder":
:returns:
The deserialized component.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
init_parameters = data.get("init_parameters", {})
if init_parameters:
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data)

@component.output_types(embedding=List[float], meta=Dict[str, Any])
Expand All @@ -162,7 +183,7 @@ def run(self, text: str):
The text to embed.
:returns:
A dictionary with the following keys and values:
- `embedding` - Embeddng of the text.
- `embedding` - Embedding of the text.
- `meta` - Metadata on usage statistics, etc.
:raises RuntimeError:
If the component was not initialized.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import warnings
from typing import Any, Dict, List, Optional

Expand Down Expand Up @@ -49,6 +50,7 @@ def __init__(
api_url: str = _DEFAULT_API_URL,
api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
model_arguments: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
):
"""
Create a NvidiaGenerator component.
Expand All @@ -70,6 +72,9 @@ def __init__(
specific to a model.
Search your model in the [NVIDIA NIM](https://ai.nvidia.com)
to find the arguments it accepts.
:param timeout:
Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
or set to 60 by default.
"""
self._model = model
self._api_url = url_validation(api_url, _DEFAULT_API_URL, ["v1/chat/completions"])
Expand All @@ -79,6 +84,9 @@ def __init__(
self._backend: Optional[Any] = None

self.is_hosted = is_hosted(api_url)
if timeout is None:
timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
self.timeout = timeout

def default_model(self):
"""Set default model in local NIM mode."""
Expand Down Expand Up @@ -110,10 +118,11 @@ def warm_up(self):
msg = "API key is required for hosted NVIDIA NIMs."
raise ValueError(msg)
self._backend = NimBackend(
self._model,
model=self._model,
api_url=self._api_url,
api_key=self._api_key,
model_kwargs=self._model_arguments,
timeout=self.timeout,
)

if not self.is_hosted and not self._model:
Expand Down
Loading

0 comments on commit 16bc80f

Please sign in to comment.