Skip to content

Commit

Permalink
Update Nvidia integration to support new endpoints (#701)
Browse files Browse the repository at this point in the history
* Add support for Nvidia catalog API for generator

* Add support for Nvidia catalog API for embedders

* Add NVIDIA_CATALOG_API_KEY in Nvidia integration workflow

* Enable ruff auto formatting for tests

* Fix linting

* Simplify Secret import and enhance docstring

Co-authored-by: Madeesh Kannan <[email protected]>

* Add deprecation warnings for NvcfBackend

* Add truncate parameter for embedders

* Fix linting

* Use enum for truncate mode in embedders

* Change how truncate argument is handled

* Fix truncate conversion

* Update truncate docstring

---------

Co-authored-by: Madeesh Kannan <[email protected]>
  • Loading branch information
silvanocerza and shadeMe authored May 7, 2024
1 parent 975e0e5 commit 3c14c52
Show file tree
Hide file tree
Showing 14 changed files with 224 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/nvidia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
NVIDIA_CATALOG_API_KEY: ${{ secrets.NVIDIA_CATALOG_API_KEY }}

jobs:
run:
Expand Down Expand Up @@ -73,7 +74,7 @@ jobs:
uses: ./.github/actions/send_failure
with:
title: |
core-integrations failure:
core-integrations failure:
${{ (steps.tests.conclusion == 'nightly-haystack-main') && 'nightly-haystack-main' || 'tests' }}
- ${{ github.workflow }}
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
1 change: 0 additions & 1 deletion integrations/nvidia/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ unfixable = [
# Don't touch unused imports
"F401",
]
extend-exclude = ["tests", "example"]

[tool.ruff.isort]
known-first-party = ["src"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from .document_embedder import NvidiaDocumentEmbedder
from .text_embedder import NvidiaTextEmbedder
from .truncate import EmbeddingTruncateMode

__all__ = [
"NvidiaDocumentEmbedder",
"NvidiaTextEmbedder",
]
__all__ = ["NvidiaDocumentEmbedder", "NvidiaTextEmbedder", "EmbeddingTruncateMode"]
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, Dict, List, Optional, Tuple

import requests
from haystack.utils import Secret

from .backend import EmbedderBackend

Expand All @@ -12,12 +13,17 @@ def __init__(
self,
model: str,
api_url: str,
api_key: Optional[Secret] = None,
model_kwargs: Optional[Dict[str, Any]] = None,
):
headers = {
"Content-Type": "application/json",
"accept": "application/json",
}

if api_key:
headers["authorization"] = f"Bearer {api_key.resolve_value()}"

self.session = requests.Session()
self.session.headers.update(headers)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from dataclasses import asdict, dataclass
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

Expand All @@ -17,6 +18,7 @@ def __init__(
api_key: Secret,
model_kwargs: Optional[Dict[str, Any]] = None,
):
warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2)
if not model.startswith("playground_"):
model = f"playground_{model}"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, Union

from haystack import Document, component, default_from_dict, default_to_dict
from haystack.utils import Secret, deserialize_secrets_inplace
Expand All @@ -7,6 +7,7 @@
from ._nim_backend import NimBackend
from ._nvcf_backend import NvcfBackend
from .backend import EmbedderBackend
from .truncate import EmbeddingTruncateMode


@component
Expand Down Expand Up @@ -41,6 +42,7 @@ def __init__(
progress_bar: bool = True,
meta_fields_to_embed: Optional[List[str]] = None,
embedding_separator: str = "\n",
truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
):
"""
Create a NvidiaTextEmbedder component.
Expand All @@ -64,6 +66,9 @@ def __init__(
List of meta fields that should be embedded along with the Document text.
:param embedding_separator:
Separator used to concatenate the meta fields to the Document text.
:param truncate:
Specifies how inputs longer that the maximum token length should be truncated.
If None the behavior is model-dependent, see the official documentation for more information.
"""

self.api_key = api_key
Expand All @@ -76,6 +81,10 @@ def __init__(
self.meta_fields_to_embed = meta_fields_to_embed or []
self.embedding_separator = embedding_separator

if isinstance(truncate, str):
truncate = EmbeddingTruncateMode.from_str(truncate)
self.truncate = truncate

self.backend: Optional[EmbedderBackend] = None
self._initialized = False

Expand All @@ -93,7 +102,15 @@ def warm_up(self):

self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"})
else:
self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "passage"})
model_kwargs = {"input_type": "passage"}
if self.truncate is not None:
model_kwargs["truncate"] = str(self.truncate)
self.backend = NimBackend(
self.model,
api_url=self.api_url,
api_key=self.api_key,
model_kwargs=model_kwargs,
)

self._initialized = True

Expand All @@ -115,6 +132,7 @@ def to_dict(self) -> Dict[str, Any]:
progress_bar=self.progress_bar,
meta_fields_to_embed=self.meta_fields_to_embed,
embedding_separator=self.embedding_separator,
truncate=str(self.truncate) if self.truncate is not None else None,
)

@classmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union

from haystack import component, default_from_dict, default_to_dict
from haystack.utils import Secret, deserialize_secrets_inplace

from ._nim_backend import NimBackend
from ._nvcf_backend import NvcfBackend
from .backend import EmbedderBackend
from .truncate import EmbeddingTruncateMode


@component
Expand Down Expand Up @@ -38,6 +39,7 @@ def __init__(
api_url: Optional[str] = None,
prefix: str = "",
suffix: str = "",
truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
):
"""
Create a NvidiaTextEmbedder component.
Expand All @@ -52,6 +54,9 @@ def __init__(
A string to add to the beginning of each text.
:param suffix:
A string to add to the end of each text.
:param truncate:
Specifies how inputs longer that the maximum token length should be truncated.
If None the behavior is model-dependent, see the official documentation for more information.
"""

self.api_key = api_key
Expand All @@ -60,6 +65,10 @@ def __init__(
self.prefix = prefix
self.suffix = suffix

if isinstance(truncate, str):
truncate = EmbeddingTruncateMode.from_str(truncate)
self.truncate = truncate

self.backend: Optional[EmbedderBackend] = None
self._initialized = False

Expand All @@ -77,7 +86,15 @@ def warm_up(self):

self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"})
else:
self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "query"})
model_kwargs = {"input_type": "query"}
if self.truncate is not None:
model_kwargs["truncate"] = str(self.truncate)
self.backend = NimBackend(
self.model,
api_url=self.api_url,
api_key=self.api_key,
model_kwargs=model_kwargs,
)

self._initialized = True

Expand All @@ -95,6 +112,7 @@ def to_dict(self) -> Dict[str, Any]:
api_url=self.api_url,
prefix=self.prefix,
suffix=self.suffix,
truncate=str(self.truncate) if self.truncate is not None else None,
)

@classmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from enum import Enum


class EmbeddingTruncateMode(Enum):
"""
Specifies how inputs to the NVIDIA embedding components are truncated.
If START, the input will be truncated from the start.
If END, the input will be truncated from the end.
"""

START = "START"
END = "END"

def __str__(self):
return self.value

@classmethod
def from_str(cls, string: str) -> "EmbeddingTruncateMode":
"""
Create an truncate mode from a string.
:param string:
String to convert.
:returns:
Truncate mode.
"""
enum_map = {e.value: e for e in EmbeddingTruncateMode}
opt_mode = enum_map.get(string)
if opt_mode is None:
msg = f"Unknown truncate mode '{string}'. Supported modes are: {list(enum_map.keys())}"
raise ValueError(msg)
return opt_mode
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, Dict, List, Optional, Tuple

import requests
from haystack.utils import Secret

from .backend import GeneratorBackend

Expand All @@ -12,12 +13,17 @@ def __init__(
self,
model: str,
api_url: str,
api_key: Optional[Secret] = None,
model_kwargs: Optional[Dict[str, Any]] = None,
):
headers = {
"Content-Type": "application/json",
"accept": "application/json",
}

if api_key:
headers["authorization"] = f"Bearer {api_key.resolve_value()}"

self.session = requests.Session()
self.session.headers.update(headers)

Expand All @@ -26,8 +32,9 @@ def __init__(
self.model_kwargs = model_kwargs or {}

def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
# We're using the chat completion endpoint as the local containers don't support
# We're using the chat completion endpoint as the NIM API doesn't support
# the /completions endpoint. So both the non-chat and chat generator will use this.
# This is the same for local containers and the cloud API.
url = f"{self.api_url}/chat/completions"

res = self.session.post(
Expand Down Expand Up @@ -57,13 +64,17 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
replies.append(message["content"])
choice_meta = {
"role": message["role"],
"finish_reason": choice["finish_reason"],
"usage": {
"prompt_tokens": completions["usage"]["prompt_tokens"],
"completion_tokens": completions["usage"]["completion_tokens"],
"total_tokens": completions["usage"]["total_tokens"],
},
}
# These fields could be null, the others will always be present
if "finish_reason" in choice:
choice_meta["finish_reason"] = choice["finish_reason"]
if "completion_tokens" in completions["usage"]:
choice_meta["usage"]["completion_tokens"] = completions["usage"]["completion_tokens"]

meta.append(choice_meta)

return replies, meta
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from dataclasses import asdict, dataclass
from typing import Any, Dict, List, Optional, Tuple

Expand All @@ -14,6 +15,7 @@ def __init__(
api_key: Secret,
model_kwargs: Optional[Dict[str, Any]] = None,
):
warnings.warn("Nvidia NGC is deprecated, use Nvidia NIM instead.", DeprecationWarning, stacklevel=2)
if not model.startswith("playground_"):
model = f"playground_{model}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def warm_up(self):
self._backend = NimBackend(
self._model,
api_url=self._api_url,
api_key=self._api_key,
model_kwargs=self._model_arguments,
)

Expand Down
Loading

0 comments on commit 3c14c52

Please sign in to comment.