From 3c14c52fc38903ec907d28da384b0a36119a3892 Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Tue, 7 May 2024 17:03:50 +0200 Subject: [PATCH] Update Nvidia integration to support new endpoints (#701) * Add support for Nvidia catalog API for generator * Add support for Nvidia catalog API for embedders * Add NVIDIA_CATALOG_API_KEY in Nvidia integration workflow * Enable ruff auto formatting for tests * Fix linting * Simplify Secret import and enhance docstring Co-authored-by: Madeesh Kannan * Add deprecation warnings for NvcfBackend * Add truncate parameter for embedders * Fix linting * Use enum for truncate mode in embedders * Change how truncate argument is handled * Fix truncate conversion * Update truncate docstring --------- Co-authored-by: Madeesh Kannan --- .github/workflows/nvidia.yml | 3 +- integrations/nvidia/pyproject.toml | 1 - .../components/embedders/nvidia/__init__.py | 6 +- .../embedders/nvidia/_nim_backend.py | 6 ++ .../embedders/nvidia/_nvcf_backend.py | 2 + .../embedders/nvidia/document_embedder.py | 22 ++++++- .../embedders/nvidia/text_embedder.py | 22 ++++++- .../components/embedders/nvidia/truncate.py | 32 ++++++++++ .../generators/nvidia/_nim_backend.py | 17 +++++- .../generators/nvidia/_nvcf_backend.py | 2 + .../components/generators/nvidia/generator.py | 1 + .../nvidia/tests/test_document_embedder.py | 60 ++++++++++++++++++- integrations/nvidia/tests/test_generator.py | 20 +++++++ .../nvidia/tests/test_text_embedder.py | 45 +++++++++++++- 14 files changed, 224 insertions(+), 15 deletions(-) create mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml index 316e509be..34e6a3c0e 100644 --- a/.github/workflows/nvidia.yml +++ b/.github/workflows/nvidia.yml @@ -22,6 +22,7 @@ env: PYTHONUNBUFFERED: "1" FORCE_COLOR: "1" NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + NVIDIA_CATALOG_API_KEY: ${{ secrets.NVIDIA_CATALOG_API_KEY }} jobs: run: @@ -73,7 +74,7 @@ jobs: uses: ./.github/actions/send_failure with: title: | - core-integrations failure: + core-integrations failure: ${{ (steps.tests.conclusion == 'nightly-haystack-main') && 'nightly-haystack-main' || 'tests' }} - ${{ github.workflow }} api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index 753f4f938..6d823407b 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -117,7 +117,6 @@ unfixable = [ # Don't touch unused imports "F401", ] -extend-exclude = ["tests", "example"] [tool.ruff.isort] known-first-party = ["src"] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py index 588aca2e6..bc2d9372c 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py @@ -1,7 +1,5 @@ from .document_embedder import NvidiaDocumentEmbedder from .text_embedder import NvidiaTextEmbedder +from .truncate import EmbeddingTruncateMode -__all__ = [ - "NvidiaDocumentEmbedder", - "NvidiaTextEmbedder", -] +__all__ = ["NvidiaDocumentEmbedder", "NvidiaTextEmbedder", "EmbeddingTruncateMode"] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py index 27e0dbeac..26ba33c71 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -1,6 +1,7 @@ from typing import Any, Dict, List, Optional, Tuple import requests +from haystack.utils import Secret from .backend import EmbedderBackend @@ -12,12 +13,17 @@ def __init__( self, model: str, api_url: str, + api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None, ): headers = { "Content-Type": "application/json", "accept": "application/json", } + + if api_key: + headers["authorization"] = f"Bearer {api_key.resolve_value()}" + self.session = requests.Session() self.session.headers.update(headers) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py index 7d4b07dca..65371de54 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py @@ -1,3 +1,4 @@ +import warnings from dataclasses import asdict, dataclass from typing import Any, Dict, List, Literal, Optional, Tuple, Union @@ -17,6 +18,7 @@ def __init__( api_key: Secret, model_kwargs: Optional[Dict[str, Any]] = None, ): + warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2) if not model.startswith("playground_"): model = f"playground_{model}" diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index da181bd22..45680acce 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace @@ -7,6 +7,7 @@ from ._nim_backend import NimBackend from ._nvcf_backend import NvcfBackend from .backend import EmbedderBackend +from .truncate import EmbeddingTruncateMode @component @@ -41,6 +42,7 @@ def __init__( progress_bar: bool = True, meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", + truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, ): """ Create a NvidiaTextEmbedder component. @@ -64,6 +66,9 @@ def __init__( List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. + :param truncate: + Specifies how inputs longer that the maximum token length should be truncated. + If None the behavior is model-dependent, see the official documentation for more information. """ self.api_key = api_key @@ -76,6 +81,10 @@ def __init__( self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator + if isinstance(truncate, str): + truncate = EmbeddingTruncateMode.from_str(truncate) + self.truncate = truncate + self.backend: Optional[EmbedderBackend] = None self._initialized = False @@ -93,7 +102,15 @@ def warm_up(self): self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"}) else: - self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "passage"}) + model_kwargs = {"input_type": "passage"} + if self.truncate is not None: + model_kwargs["truncate"] = str(self.truncate) + self.backend = NimBackend( + self.model, + api_url=self.api_url, + api_key=self.api_key, + model_kwargs=model_kwargs, + ) self._initialized = True @@ -115,6 +132,7 @@ def to_dict(self) -> Dict[str, Any]: progress_bar=self.progress_bar, meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, + truncate=str(self.truncate) if self.truncate is not None else None, ) @classmethod diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index 6af5ba25f..b3ad4544e 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace @@ -6,6 +6,7 @@ from ._nim_backend import NimBackend from ._nvcf_backend import NvcfBackend from .backend import EmbedderBackend +from .truncate import EmbeddingTruncateMode @component @@ -38,6 +39,7 @@ def __init__( api_url: Optional[str] = None, prefix: str = "", suffix: str = "", + truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, ): """ Create a NvidiaTextEmbedder component. @@ -52,6 +54,9 @@ def __init__( A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. + :param truncate: + Specifies how inputs longer that the maximum token length should be truncated. + If None the behavior is model-dependent, see the official documentation for more information. """ self.api_key = api_key @@ -60,6 +65,10 @@ def __init__( self.prefix = prefix self.suffix = suffix + if isinstance(truncate, str): + truncate = EmbeddingTruncateMode.from_str(truncate) + self.truncate = truncate + self.backend: Optional[EmbedderBackend] = None self._initialized = False @@ -77,7 +86,15 @@ def warm_up(self): self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"}) else: - self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "query"}) + model_kwargs = {"input_type": "query"} + if self.truncate is not None: + model_kwargs["truncate"] = str(self.truncate) + self.backend = NimBackend( + self.model, + api_url=self.api_url, + api_key=self.api_key, + model_kwargs=model_kwargs, + ) self._initialized = True @@ -95,6 +112,7 @@ def to_dict(self) -> Dict[str, Any]: api_url=self.api_url, prefix=self.prefix, suffix=self.suffix, + truncate=str(self.truncate) if self.truncate is not None else None, ) @classmethod diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py new file mode 100644 index 000000000..2c32eabb1 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/truncate.py @@ -0,0 +1,32 @@ +from enum import Enum + + +class EmbeddingTruncateMode(Enum): + """ + Specifies how inputs to the NVIDIA embedding components are truncated. + If START, the input will be truncated from the start. + If END, the input will be truncated from the end. + """ + + START = "START" + END = "END" + + def __str__(self): + return self.value + + @classmethod + def from_str(cls, string: str) -> "EmbeddingTruncateMode": + """ + Create an truncate mode from a string. + + :param string: + String to convert. + :returns: + Truncate mode. + """ + enum_map = {e.value: e for e in EmbeddingTruncateMode} + opt_mode = enum_map.get(string) + if opt_mode is None: + msg = f"Unknown truncate mode '{string}'. Supported modes are: {list(enum_map.keys())}" + raise ValueError(msg) + return opt_mode diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py index 499a60b78..879fe8a6b 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -1,6 +1,7 @@ from typing import Any, Dict, List, Optional, Tuple import requests +from haystack.utils import Secret from .backend import GeneratorBackend @@ -12,12 +13,17 @@ def __init__( self, model: str, api_url: str, + api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None, ): headers = { "Content-Type": "application/json", "accept": "application/json", } + + if api_key: + headers["authorization"] = f"Bearer {api_key.resolve_value()}" + self.session = requests.Session() self.session.headers.update(headers) @@ -26,8 +32,9 @@ def __init__( self.model_kwargs = model_kwargs or {} def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: - # We're using the chat completion endpoint as the local containers don't support + # We're using the chat completion endpoint as the NIM API doesn't support # the /completions endpoint. So both the non-chat and chat generator will use this. + # This is the same for local containers and the cloud API. url = f"{self.api_url}/chat/completions" res = self.session.post( @@ -57,13 +64,17 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: replies.append(message["content"]) choice_meta = { "role": message["role"], - "finish_reason": choice["finish_reason"], "usage": { "prompt_tokens": completions["usage"]["prompt_tokens"], - "completion_tokens": completions["usage"]["completion_tokens"], "total_tokens": completions["usage"]["total_tokens"], }, } + # These fields could be null, the others will always be present + if "finish_reason" in choice: + choice_meta["finish_reason"] = choice["finish_reason"] + if "completion_tokens" in completions["usage"]: + choice_meta["usage"]["completion_tokens"] = completions["usage"]["completion_tokens"] + meta.append(choice_meta) return replies, meta diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py index c0686c132..95d024fb8 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py @@ -1,3 +1,4 @@ +import warnings from dataclasses import asdict, dataclass from typing import Any, Dict, List, Optional, Tuple @@ -14,6 +15,7 @@ def __init__( api_key: Secret, model_kwargs: Optional[Dict[str, Any]] = None, ): + warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2) if not model.startswith("playground_"): model = f"playground_{model}" diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index b6db399e6..d20478d93 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -85,6 +85,7 @@ def warm_up(self): self._backend = NimBackend( self._model, api_url=self._api_url, + api_key=self._api_key, model_kwargs=self._model_arguments, ) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 7ac89d5e2..28858dcf7 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -4,7 +4,7 @@ import pytest from haystack import Document from haystack.utils import Secret -from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder +from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder class TestNvidiaDocumentEmbedder: @@ -64,6 +64,7 @@ def test_to_dict(self, monkeypatch): "progress_bar": True, "meta_fields_to_embed": [], "embedding_separator": "\n", + "truncate": None, }, } @@ -78,6 +79,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): progress_bar=False, meta_fields_to_embed=["test_field"], embedding_separator=" | ", + truncate=EmbeddingTruncateMode.END, ) data = component.to_dict() assert data == { @@ -92,9 +94,38 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "progress_bar": False, "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", + "truncate": "END", }, } + def from_dict(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + data = { + "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder", + "init_parameters": { + "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": "https://example.com", + "model": "playground_nvolveqa_40k", + "prefix": "prefix", + "suffix": "suffix", + "batch_size": 10, + "progress_bar": False, + "meta_fields_to_embed": ["test_field"], + "embedding_separator": " | ", + "truncate": "START", + }, + } + component = NvidiaDocumentEmbedder.from_dict(data) + assert component.model == "nvolveqa_40k" + assert component.api_url is None + assert component.prefix == "prefix" + assert component.suffix == "suffix" + assert component.batch_size == 32 + assert component.progress_bar + assert component.meta_fields_to_embed == [] + assert component.embedding_separator == "\n" + assert component.truncate == EmbeddingTruncateMode.START + def test_prepare_texts_to_embed_w_metadata(self): documents = [ Document(content=f"document number {i}:\ncontent", meta={"meta_field": f"meta_value {i}"}) for i in range(5) @@ -355,3 +386,30 @@ def test_run_integration_with_nim_backend(self): for doc in docs_with_embeddings: assert isinstance(doc.embedding, list) assert isinstance(doc.embedding[0], float) + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_CATALOG_API_KEY", None), + reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.", + ) + @pytest.mark.integration + def test_run_integration_with_api_catalog(self): + embedder = NvidiaDocumentEmbedder( + model="NV-Embed-QA", + api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia", + api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"), + ) + embedder.warm_up() + + docs = [ + Document(content="I love cheese", meta={"topic": "Cuisine"}), + Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), + ] + + result = embedder.run(docs) + docs_with_embeddings = result["documents"] + + assert isinstance(docs_with_embeddings, list) + assert len(docs_with_embeddings) == len(docs) + for doc in docs_with_embeddings: + assert isinstance(doc.embedding, list) + assert isinstance(doc.embedding[0], float) diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 9a157a9d1..102ef3508 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -202,3 +202,23 @@ def test_run_integration_with_nim_backend(self): assert result["replies"] assert result["meta"] + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_CATALOG_API_KEY", None), + reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.", + ) + @pytest.mark.integration + def test_run_integration_with_api_catalog(self): + generator = NvidiaGenerator( + model="meta/llama3-70b-instruct", + api_url="https://integrate.api.nvidia.com/v1", + api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"), + model_arguments={ + "temperature": 0.2, + }, + ) + generator.warm_up() + result = generator.run(prompt="What is the answer?") + + assert result["replies"] + assert result["meta"] diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 39ee02206..b85ac39bf 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -3,7 +3,7 @@ import pytest from haystack.utils import Secret -from haystack_integrations.components.embedders.nvidia import NvidiaTextEmbedder +from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder class TestNvidiaTextEmbedder: @@ -46,6 +46,7 @@ def test_to_dict(self, monkeypatch): "model": "nvolveqa_40k", "prefix": "", "suffix": "", + "truncate": None, }, } @@ -55,6 +56,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): model="nvolveqa_40k", prefix="prefix", suffix="suffix", + truncate=EmbeddingTruncateMode.START, ) data = component.to_dict() assert data == { @@ -65,9 +67,30 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "model": "nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", + "truncate": "START", }, } + def from_dict(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + data = { + "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", + "init_parameters": { + "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": None, + "model": "nvolveqa_40k", + "prefix": "prefix", + "suffix": "suffix", + "truncate": "START", + }, + } + component = NvidiaTextEmbedder.from_dict(data) + assert component.model == "nvolveqa_40k" + assert component.api_url is None + assert component.prefix == "prefix" + assert component.suffix == "suffix" + assert component.truncate == "START" + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") def test_run(self, mock_client_class): embedder = NvidiaTextEmbedder( @@ -150,3 +173,23 @@ def test_run_integration_with_nim_backend(self): assert all(isinstance(x, float) for x in embedding) assert "usage" in meta + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_CATALOG_API_KEY", None), + reason="Export an env var called NVIDIA_CATALOG_API_KEY containing the Nvidia API key to run this test.", + ) + @pytest.mark.integration + def test_run_integration_with_api_catalog(self): + embedder = NvidiaTextEmbedder( + model="NV-Embed-QA", + api_url="https://ai.api.nvidia.com/v1/retrieval/nvidia", + api_key=Secret.from_env_var("NVIDIA_CATALOG_API_KEY"), + ) + embedder.warm_up() + + result = embedder.run("A transformer is a deep learning architecture") + embedding = result["embedding"] + meta = result["meta"] + + assert all(isinstance(x, float) for x in embedding) + assert "usage" in meta