diff --git a/integrations/nvidia/README.md b/integrations/nvidia/README.md index e28f0ede9..558c34d28 100644 --- a/integrations/nvidia/README.md +++ b/integrations/nvidia/README.md @@ -38,7 +38,7 @@ hatch run test To only run unit tests: ``` -hatch run test -m"not integration" +hatch run test -m "not integration" ``` To run the linters `ruff` and `mypy`: diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 606ec78fd..6519efbab 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -2,16 +2,19 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional, Tuple, Union -from haystack import Document, component, default_from_dict, default_to_dict +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace from tqdm import tqdm from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation +logger = logging.getLogger(__name__) + _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -47,6 +50,7 @@ def __init__( meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, + timeout: Optional[float] = None, ): """ Create a NvidiaTextEmbedder component. @@ -74,8 +78,11 @@ def __init__( :param embedding_separator: Separator used to concatenate the meta fields to the Document text. :param truncate: - Specifies how inputs longer that the maximum token length should be truncated. + Specifies how inputs longer than the maximum token length should be truncated. If None the behavior is model-dependent, see the official documentation for more information. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ self.api_key = api_key @@ -98,6 +105,10 @@ def __init__( if is_hosted(api_url) and not self.model: # manually set default model self.model = "nvidia/nv-embedqa-e5-v5" + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout + def default_model(self): """Set default model in local NIM mode.""" valid_models = [ @@ -128,10 +139,11 @@ def warm_up(self): if self.truncate is not None: model_kwargs["truncate"] = str(self.truncate) self.backend = NimBackend( - self.model, + model=self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs=model_kwargs, + timeout=self.timeout, ) self._initialized = True @@ -158,6 +170,7 @@ def to_dict(self) -> Dict[str, Any]: meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, truncate=str(self.truncate) if self.truncate is not None else None, + timeout=self.timeout, ) @classmethod @@ -238,8 +251,7 @@ def run(self, documents: List[Document]): for doc in documents: if not doc.content: - msg = f"Document '{doc.id}' has no content to embed." - raise ValueError(msg) + logger.warning(f"Document '{doc.id}' has no content to embed.") texts_to_embed = self._prepare_texts_to_embed(documents) embeddings, metadata = self._embed_batch(texts_to_embed, self.batch_size) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index 4b7072f33..a93aa8caa 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -2,15 +2,18 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional, Union -from haystack import component, default_from_dict, default_to_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation +logger = logging.getLogger(__name__) + _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -44,6 +47,7 @@ def __init__( prefix: str = "", suffix: str = "", truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, + timeout: Optional[float] = None, ): """ Create a NvidiaTextEmbedder component. @@ -64,6 +68,9 @@ def __init__( :param truncate: Specifies how inputs longer that the maximum token length should be truncated. If None the behavior is model-dependent, see the official documentation for more information. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ self.api_key = api_key @@ -82,6 +89,10 @@ def __init__( if is_hosted(api_url) and not self.model: # manually set default model self.model = "nvidia/nv-embedqa-e5-v5" + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout + def default_model(self): """Set default model in local NIM mode.""" valid_models = [ @@ -89,6 +100,12 @@ def default_model(self): ] name = next(iter(valid_models), None) if name: + logger.warning( + "Default model is set as: {model_name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + model_name=name, + ) warnings.warn( f"Default model is set as: {name}. \n" "Set model using model parameter. \n" @@ -112,10 +129,11 @@ def warm_up(self): if self.truncate is not None: model_kwargs["truncate"] = str(self.truncate) self.backend = NimBackend( - self.model, + model=self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs=model_kwargs, + timeout=self.timeout, ) self._initialized = True @@ -138,6 +156,7 @@ def to_dict(self) -> Dict[str, Any]: prefix=self.prefix, suffix=self.suffix, truncate=str(self.truncate) if self.truncate is not None else None, + timeout=self.timeout, ) @classmethod @@ -150,7 +169,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder": :returns: The deserialized component. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) + init_parameters = data.get("init_parameters", {}) + if init_parameters: + deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @component.output_types(embedding=List[float], meta=Dict[str, Any]) @@ -162,7 +183,7 @@ def run(self, text: str): The text to embed. :returns: A dictionary with the following keys and values: - - `embedding` - Embeddng of the text. + - `embedding` - Embedding of the text. - `meta` - Metadata on usage statistics, etc. :raises RuntimeError: If the component was not initialized. diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 5bf71a9e1..5047d0682 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional @@ -49,6 +50,7 @@ def __init__( api_url: str = _DEFAULT_API_URL, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, + timeout: Optional[float] = None, ): """ Create a NvidiaGenerator component. @@ -70,6 +72,9 @@ def __init__( specific to a model. Search your model in the [NVIDIA NIM](https://ai.nvidia.com) to find the arguments it accepts. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ self._model = model self._api_url = url_validation(api_url, _DEFAULT_API_URL, ["v1/chat/completions"]) @@ -79,6 +84,9 @@ def __init__( self._backend: Optional[Any] = None self.is_hosted = is_hosted(api_url) + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout def default_model(self): """Set default model in local NIM mode.""" @@ -110,10 +118,11 @@ def warm_up(self): msg = "API key is required for hosted NVIDIA NIMs." raise ValueError(msg) self._backend = NimBackend( - self._model, + model=self._model, api_url=self._api_url, api_key=self._api_key, model_kwargs=self._model_arguments, + timeout=self.timeout, ) if not self.is_hosted and not self._model: diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index 9938b37d1..66203a490 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional, Union @@ -58,6 +59,11 @@ def __init__( api_url: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), top_k: int = 5, + query_prefix: str = "", + document_prefix: str = "", + meta_fields_to_embed: Optional[List[str]] = None, + embedding_separator: str = "\n", + timeout: Optional[float] = None, ): """ Create a NvidiaRanker component. @@ -72,6 +78,19 @@ def __init__( Custom API URL for the NVIDIA NIM. :param top_k: Number of documents to return. + :param query_prefix: + A string to add at the beginning of the query text before ranking. + Use it to prepend the text with an instruction, as required by reranking models like `bge`. + :param document_prefix: + A string to add at the beginning of each document before ranking. You can use it to prepend the document + with an instruction, as required by embedding models like `bge`. + :param meta_fields_to_embed: + List of metadata fields to embed with the document. + :param embedding_separator: + Separator to concatenate metadata fields to the document. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ if model is not None and not isinstance(model, str): msg = "Ranker expects the `model` parameter to be a string." @@ -86,27 +105,35 @@ def __init__( raise TypeError(msg) # todo: detect default in non-hosted case (when api_url is provided) - self._model = model or _DEFAULT_MODEL - self._truncate = truncate - self._api_key = api_key + self.model = model or _DEFAULT_MODEL + self.truncate = truncate + self.api_key = api_key # if no api_url is provided, we're using a hosted model and can # - assume the default url will work, because there's only one model # - assume we won't call backend.models() if api_url is not None: - self._api_url = url_validation(api_url, None, ["v1/ranking"]) - self._endpoint = None # we let backend.rank() handle the endpoint + self.api_url = url_validation(api_url, None, ["v1/ranking"]) + self.endpoint = None # we let backend.rank() handle the endpoint else: - if self._model not in _MODEL_ENDPOINT_MAP: + if self.model not in _MODEL_ENDPOINT_MAP: msg = f"Model '{model}' is unknown. Please provide an api_url to access it." raise ValueError(msg) - self._api_url = None # we handle the endpoint - self._endpoint = _MODEL_ENDPOINT_MAP[self._model] + self.api_url = None # we handle the endpoint + self.endpoint = _MODEL_ENDPOINT_MAP[self.model] if api_key is None: self._api_key = Secret.from_env_var("NVIDIA_API_KEY") - self._top_k = top_k + self.top_k = top_k self._initialized = False self._backend: Optional[Any] = None + self.query_prefix = query_prefix + self.document_prefix = document_prefix + self.meta_fields_to_embed = meta_fields_to_embed or [] + self.embedding_separator = embedding_separator + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout + def to_dict(self) -> Dict[str, Any]: """ Serialize the ranker to a dictionary. @@ -115,11 +142,16 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model=self._model, - top_k=self._top_k, - truncate=self._truncate, - api_url=self._api_url, - api_key=self._api_key.to_dict() if self._api_key else None, + model=self.model, + top_k=self.top_k, + truncate=self.truncate, + api_url=self.api_url, + api_key=self.api_key.to_dict() if self.api_key else None, + query_prefix=self.query_prefix, + document_prefix=self.document_prefix, + meta_fields_to_embed=self.meta_fields_to_embed, + embedding_separator=self.embedding_separator, + timeout=self.timeout, ) @classmethod @@ -143,18 +175,31 @@ def warm_up(self): """ if not self._initialized: model_kwargs = {} - if self._truncate is not None: - model_kwargs.update(truncate=str(self._truncate)) + if self.truncate is not None: + model_kwargs.update(truncate=str(self.truncate)) self._backend = NimBackend( - self._model, - api_url=self._api_url, - api_key=self._api_key, + model=self.model, + api_url=self.api_url, + api_key=self.api_key, model_kwargs=model_kwargs, + timeout=self.timeout, ) - if not self._model: - self._model = _DEFAULT_MODEL + if not self.model: + self.model = _DEFAULT_MODEL self._initialized = True + def _prepare_documents_to_embed(self, documents: List[Document]) -> List[str]: + document_texts = [] + for doc in documents: + meta_values_to_embed = [ + str(doc.meta[key]) + for key in self.meta_fields_to_embed + if key in doc.meta and doc.meta[key] # noqa: RUF019 + ] + text_to_embed = self.embedding_separator.join([*meta_values_to_embed, doc.content or ""]) + document_texts.append(self.document_prefix + text_to_embed) + return document_texts + @component.output_types(documents=List[Document]) def run( self, @@ -193,18 +238,22 @@ def run( if len(documents) == 0: return {"documents": []} - top_k = top_k if top_k is not None else self._top_k + top_k = top_k if top_k is not None else self.top_k if top_k < 1: logger.warning("top_k should be at least 1, returning nothing") warnings.warn("top_k should be at least 1, returning nothing", stacklevel=2) return {"documents": []} assert self._backend is not None + + query_text = self.query_prefix + query + document_texts = self._prepare_documents_to_embed(documents=documents) + # rank result is list[{index: int, logit: float}] sorted by logit sorted_indexes_and_scores = self._backend.rank( - query, - documents, - endpoint=self._endpoint, + query_text=query_text, + document_texts=document_texts, + endpoint=self.endpoint, ) sorted_documents = [] for item in sorted_indexes_and_scores[:top_k]: diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index 0279cf608..15b35e4b2 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -2,14 +2,17 @@ # # SPDX-License-Identifier: Apache-2.0 +import os from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple import requests -from haystack import Document +from haystack import logging from haystack.utils import Secret -REQUEST_TIMEOUT = 60 +logger = logging.getLogger(__name__) + +REQUEST_TIMEOUT = 60.0 @dataclass @@ -35,6 +38,7 @@ def __init__( api_url: str, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_kwargs: Optional[Dict[str, Any]] = None, + timeout: Optional[float] = None, ): headers = { "Content-Type": "application/json", @@ -50,6 +54,9 @@ def __init__( self.model = model self.api_url = api_url self.model_kwargs = model_kwargs or {} + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", REQUEST_TIMEOUT)) + self.timeout = timeout def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: url = f"{self.api_url}/embeddings" @@ -62,10 +69,11 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: "input": texts, **self.model_kwargs, }, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() except requests.HTTPError as e: + logger.error("Error when calling NIM embedding endpoint: Error - {error}", error=e.response.text) msg = f"Failed to query embedding endpoint: Error - {e.response.text}" raise ValueError(msg) from e @@ -94,10 +102,11 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: ], **self.model_kwargs, }, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() except requests.HTTPError as e: + logger.error("Error when calling NIM chat completion endpoint: Error - {error}", error=e.response.text) msg = f"Failed to query chat completion endpoint: Error - {e.response.text}" raise ValueError(msg) from e @@ -132,21 +141,22 @@ def models(self) -> List[Model]: res = self.session.get( url, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() data = res.json()["data"] models = [Model(element["id"]) for element in data if "id" in element] if not models: + logger.error("No hosted model were found at URL '{u}'.", u=url) msg = f"No hosted model were found at URL '{url}'." raise ValueError(msg) return models def rank( self, - query: str, - documents: List[Document], + query_text: str, + document_texts: List[str], endpoint: Optional[str] = None, ) -> List[Dict[str, Any]]: url = endpoint or f"{self.api_url}/ranking" @@ -156,18 +166,22 @@ def rank( url, json={ "model": self.model, - "query": {"text": query}, - "passages": [{"text": doc.content} for doc in documents], + "query": {"text": query_text}, + "passages": [{"text": text} for text in document_texts], **self.model_kwargs, }, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() except requests.HTTPError as e: + logger.error("Error when calling NIM ranking endpoint: Error - {error}", error=e.response.text) msg = f"Failed to rank endpoint: Error - {e.response.text}" raise ValueError(msg) from e data = res.json() - assert "rankings" in data, f"Expected 'rankings' in response, got {data}" + if "rankings" not in data: + logger.error("Expected 'rankings' in response, got {d}", d=data) + msg = f"Expected 'rankings' in response, got {data}" + raise ValueError(msg) return data["rankings"] diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 7e0e02f3d..8c01f0759 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -75,6 +75,7 @@ def test_to_dict(self, monkeypatch): "meta_fields_to_embed": [], "embedding_separator": "\n", "truncate": None, + "timeout": 60.0, }, } @@ -90,6 +91,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): meta_fields_to_embed=["test_field"], embedding_separator=" | ", truncate=EmbeddingTruncateMode.END, + timeout=45.0, ) data = component.to_dict() assert data == { @@ -105,6 +107,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", "truncate": "END", + "timeout": 45.0, }, } @@ -123,6 +126,7 @@ def test_from_dict(self, monkeypatch): "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", "truncate": "START", + "timeout": 45.0, }, } component = NvidiaDocumentEmbedder.from_dict(data) @@ -135,6 +139,7 @@ def test_from_dict(self, monkeypatch): assert component.meta_fields_to_embed == ["test_field"] assert component.embedding_separator == " | " assert component.truncate == EmbeddingTruncateMode.START + assert component.timeout == 45.0 def test_from_dict_defaults(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") @@ -152,6 +157,7 @@ def test_from_dict_defaults(self, monkeypatch): assert component.meta_fields_to_embed == [] assert component.embedding_separator == "\n" assert component.truncate is None + assert component.timeout == 60.0 def test_prepare_texts_to_embed_w_metadata(self): documents = [ @@ -347,7 +353,7 @@ def test_run_wrong_input_format(self): with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"): embedder.run(documents=list_integers_input) - def test_run_empty_document(self): + def test_run_empty_document(self, caplog): model = "playground_nvolveqa_40k" api_key = Secret.from_token("fake-api-key") embedder = NvidiaDocumentEmbedder(model, api_key=api_key) @@ -355,8 +361,10 @@ def test_run_empty_document(self): embedder.warm_up() embedder.backend = MockBackend(model=model, api_key=api_key) - with pytest.raises(ValueError, match="no content to embed"): + # Write check using caplog that a logger.warning is raised + with caplog.at_level("WARNING"): embedder.run(documents=[Document(content="")]) + assert "has no content to embed." in caplog.text def test_run_on_empty_list(self): model = "playground_nvolveqa_40k" @@ -372,6 +380,19 @@ def test_run_on_empty_list(self): assert result["documents"] is not None assert not result["documents"] # empty list + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + embedder = NvidiaDocumentEmbedder(timeout=10.0) + embedder.warm_up() + assert embedder.backend.timeout == 10.0 + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + embedder = NvidiaDocumentEmbedder() + embedder.warm_up() + assert embedder.backend.timeout == 45.0 + @pytest.mark.skipif( not os.environ.get("NVIDIA_API_KEY", None), reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 055830ae5..414de4884 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -124,6 +124,19 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): }, } + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + generator = NvidiaGenerator(timeout=10.0) + generator.warm_up() + assert generator._backend.timeout == 10.0 + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + generator = NvidiaGenerator() + generator.warm_up() + assert generator._backend.timeout == 45.0 + @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and " diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index d66bb0f65..3d93dc028 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -19,8 +19,8 @@ class TestNvidiaRanker: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") client = NvidiaRanker() - assert client._model == _DEFAULT_MODEL - assert client._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.model == _DEFAULT_MODEL + assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") def test_init_with_parameters(self): client = NvidiaRanker( @@ -29,10 +29,10 @@ def test_init_with_parameters(self): top_k=3, truncate="END", ) - assert client._api_key == Secret.from_token("fake-api-key") - assert client._model == _DEFAULT_MODEL - assert client._top_k == 3 - assert client._truncate == RankerTruncateMode.END + assert client.api_key == Secret.from_token("fake-api-key") + assert client.model == _DEFAULT_MODEL + assert client.top_k == 3 + assert client.truncate == RankerTruncateMode.END def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) @@ -43,7 +43,7 @@ def test_init_fail_wo_api_key(self, monkeypatch): def test_init_pass_wo_api_key_w_api_url(self): url = "https://url.bogus/v1" client = NvidiaRanker(api_url=url) - assert client._api_url == url + assert client.api_url == url def test_warm_up_required(self): client = NvidiaRanker() @@ -271,6 +271,11 @@ def test_to_dict(self) -> None: "truncate": None, "api_url": None, "api_key": {"type": "env_var", "env_vars": ["NVIDIA_API_KEY"], "strict": True}, + "query_prefix": "", + "document_prefix": "", + "meta_fields_to_embed": [], + "embedding_separator": "\n", + "timeout": 60.0, }, } @@ -284,14 +289,24 @@ def test_from_dict(self) -> None: "truncate": None, "api_url": None, "api_key": {"type": "env_var", "env_vars": ["NVIDIA_API_KEY"], "strict": True}, + "query_prefix": "", + "document_prefix": "", + "meta_fields_to_embed": [], + "embedding_separator": "\n", + "timeout": 45.0, }, } ) - assert client._model == "nvidia/nv-rerankqa-mistral-4b-v3" - assert client._top_k == 5 - assert client._truncate is None - assert client._api_url is None - assert client._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.model == "nvidia/nv-rerankqa-mistral-4b-v3" + assert client.top_k == 5 + assert client.truncate is None + assert client.api_url is None + assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.query_prefix == "" + assert client.document_prefix == "" + assert client.meta_fields_to_embed == [] + assert client.embedding_separator == "\n" + assert client.timeout == 45.0 def test_from_dict_defaults(self) -> None: client = NvidiaRanker.from_dict( @@ -300,8 +315,49 @@ def test_from_dict_defaults(self) -> None: "init_parameters": {}, } ) - assert client._model == "nvidia/nv-rerankqa-mistral-4b-v3" - assert client._top_k == 5 - assert client._truncate is None - assert client._api_url is None - assert client._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.model == "nvidia/nv-rerankqa-mistral-4b-v3" + assert client.top_k == 5 + assert client.truncate is None + assert client.api_url is None + assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.query_prefix == "" + assert client.document_prefix == "" + assert client.meta_fields_to_embed == [] + assert client.embedding_separator == "\n" + assert client.timeout == 60.0 + + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + client = NvidiaRanker(timeout=10.0) + client.warm_up() + assert client._backend.timeout == 10.0 + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + client = NvidiaRanker() + client.warm_up() + assert client._backend.timeout == 45.0 + + def test_prepare_texts_to_embed_w_metadata(self): + documents = [ + Document(content=f"document number {i}:\ncontent", meta={"meta_field": f"meta_value {i}"}) for i in range(5) + ] + + ranker = NvidiaRanker( + model=None, + api_key=Secret.from_token("fake-api-key"), + meta_fields_to_embed=["meta_field"], + embedding_separator=" | ", + ) + + prepared_texts = ranker._prepare_documents_to_embed(documents) + + # note that newline is replaced by space + assert prepared_texts == [ + "meta_value 0 | document number 0:\ncontent", + "meta_value 1 | document number 1:\ncontent", + "meta_value 2 | document number 2:\ncontent", + "meta_value 3 | document number 3:\ncontent", + "meta_value 4 | document number 4:\ncontent", + ] diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 278fa5191..b572cc046 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -56,6 +56,7 @@ def test_to_dict(self, monkeypatch): "prefix": "", "suffix": "", "truncate": None, + "timeout": 60.0, }, } @@ -67,6 +68,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): prefix="prefix", suffix="suffix", truncate=EmbeddingTruncateMode.START, + timeout=10.0, ) data = component.to_dict() assert data == { @@ -78,6 +80,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "prefix": "prefix", "suffix": "suffix", "truncate": "START", + "timeout": 10.0, }, } @@ -92,6 +95,7 @@ def test_from_dict(self, monkeypatch): "prefix": "prefix", "suffix": "suffix", "truncate": "START", + "timeout": 10.0, }, } component = NvidiaTextEmbedder.from_dict(data) @@ -100,6 +104,7 @@ def test_from_dict(self, monkeypatch): assert component.prefix == "prefix" assert component.suffix == "suffix" assert component.truncate == EmbeddingTruncateMode.START + assert component.timeout == 10.0 def test_from_dict_defaults(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") @@ -175,6 +180,19 @@ def test_run_empty_string(self): with pytest.raises(ValueError, match="empty string"): embedder.run(text="") + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + embedder = NvidiaTextEmbedder(timeout=10.0) + embedder.warm_up() + assert embedder.backend.timeout == 10.0 + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + embedder = NvidiaTextEmbedder() + embedder.warm_up() + assert embedder.backend.timeout == 45.0 + @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and "