Skip to content

Commit

Permalink
add support for nvidia/llama-3.2-nv-embedqa-1b-v2's dimensions param
Browse files Browse the repository at this point in the history
  • Loading branch information
mattf committed Dec 17, 2024
1 parent 6dd747a commit d09cece
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 2 deletions.
16 changes: 14 additions & 2 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""Embeddings Components Derived from NVEModel/Embeddings"""

from typing import Any, List, Literal, Optional

from langchain_core.embeddings import Embeddings
Expand Down Expand Up @@ -28,6 +26,8 @@ class NVIDIAEmbeddings(BaseModel, Embeddings):
- truncate: "NONE", "START", "END", truncate input text if it exceeds the model's
maximum token length. Default is "NONE", which raises an error if an input is
too long.
- dimensions: int, the number of dimensions for the embeddings. This parameter is
not supported by all models.
"""

model_config = ConfigDict(
Expand All @@ -47,6 +47,13 @@ class NVIDIAEmbeddings(BaseModel, Embeddings):
"Default is 'NONE', which raises an error if an input is too long."
),
)
dimensions: Optional[int] = Field(
default=None,
description=(
"The number of dimensions for the embeddings. This parameter is not "
"supported by all models."
),
)
max_batch_size: int = Field(default=_DEFAULT_BATCH_SIZE)

def __init__(self, **kwargs: Any):
Expand All @@ -67,6 +74,8 @@ def __init__(self, **kwargs: Any):
trucate (str): "NONE", "START", "END", truncate input text if it exceeds
the model's context length. Default is "NONE", which raises
an error if an input is too long.
dimensions (int): The number of dimensions for the embeddings. This parameter

Check failure on line 77 in libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

langchain_nvidia_ai_endpoints/embeddings.py:77:89: E501 Line too long (89 > 88)

Check failure on line 77 in libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

langchain_nvidia_ai_endpoints/embeddings.py:77:89: E501 Line too long (89 > 88)
is not supported by all models.
API Key:
- The recommended way to provide the API key is through the `NVIDIA_API_KEY`
Expand Down Expand Up @@ -125,6 +134,7 @@ def _embed(
# user: str -- ignored
# truncate: "NONE" | "START" | "END" -- default "NONE", error raised if
# an input is too long
# dimensions: int -- not supported by all models
payload = {
"input": texts,
"model": self.model,
Expand All @@ -133,6 +143,8 @@ def _embed(
}
if self.truncate:
payload["truncate"] = self.truncate
if self.dimensions:
payload["dimensions"] = self.dimensions

response = self._client.get_req(
payload=payload,
Expand Down
60 changes: 60 additions & 0 deletions libs/ai-endpoints/tests/integration_tests/test_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,65 @@ def test_embed_documents_truncate(
assert len(output) == count


@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048])
def test_embed_query_with_dimensions(embedding_model: str, mode: dict, dimensions: int) -> None:

Check failure on line 101 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:101:89: E501 Line too long (96 > 88)

Check failure on line 101 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:101:89: E501 Line too long (96 > 88)
if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
pytest.skip("Model does not support custom dimensions.")
query = "foo bar"
embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
assert len(embedding.embed_query(query)) == dimensions


@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048])
def test_embed_documents_with_dimensions(embedding_model: str, mode: dict, dimensions: int) -> None:

Check failure on line 110 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:110:89: E501 Line too long (100 > 88)

Check failure on line 110 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:110:89: E501 Line too long (100 > 88)
if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
pytest.skip("Model does not support custom dimensions.")
documents = ["foo bar", "bar foo"]
embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
output = embedding.embed_documents(documents)
assert len(output) == len(documents)
assert all(len(doc) == dimensions for doc in output)


@pytest.mark.parametrize("dimensions", [102400])
def test_embed_query_with_large_dimensions(embedding_model: str, mode: dict, dimensions: int) -> None:

Check failure on line 121 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:121:89: E501 Line too long (102 > 88)

Check failure on line 121 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:121:89: E501 Line too long (102 > 88)
if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
pytest.skip("Model does not support custom dimensions.")
query = "foo bar"
embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
assert 2048 <= len(embedding.embed_query(query)) < dimensions


@pytest.mark.parametrize("dimensions", [102400])
def test_embed_documents_with_large_dimensions(embedding_model: str, mode: dict, dimensions: int) -> None:

Check failure on line 130 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:130:89: E501 Line too long (106 > 88)

Check failure on line 130 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:130:89: E501 Line too long (106 > 88)
if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
pytest.skip("Model does not support custom dimensions.")
documents = ["foo bar", "bar foo"]
embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode)
output = embedding.embed_documents(documents)
assert len(output) == len(documents)
assert all(2048 <= len(doc) < dimensions for doc in output)


@pytest.mark.parametrize("dimensions", [-1])
def test_embed_query_invalid_dimensions(embedding_model: str, mode: dict, dimensions: int) -> None:

Check failure on line 141 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:141:89: E501 Line too long (99 > 88)

Check failure on line 141 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:141:89: E501 Line too long (99 > 88)
if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
pytest.skip("Model does not support custom dimensions.")
query = "foo bar"
with pytest.raises(Exception) as exc:
NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode).embed_query(query)

Check failure on line 146 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:146:89: E501 Line too long (97 > 88)

Check failure on line 146 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:146:89: E501 Line too long (97 > 88)
assert "400" in str(exc.value)


@pytest.mark.parametrize("dimensions", [-1])
def test_embed_documents_invalid_dimensions(embedding_model: str, mode: dict, dimensions: int) -> None:

Check failure on line 151 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:151:89: E501 Line too long (103 > 88)

Check failure on line 151 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:151:89: E501 Line too long (103 > 88)
if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2":
pytest.skip("Model does not support custom dimensions.")
documents = ["foo bar", "bar foo"]
with pytest.raises(Exception) as exc:
NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode).embed_documents(documents)

Check failure on line 156 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.8

Ruff (E501)

tests/integration_tests/test_embeddings.py:156:89: E501 Line too long (105 > 88)

Check failure on line 156 in libs/ai-endpoints/tests/integration_tests/test_embeddings.py

View workflow job for this annotation

GitHub Actions / cd libs/ai-endpoints / make lint #3.11

Ruff (E501)

tests/integration_tests/test_embeddings.py:156:89: E501 Line too long (105 > 88)
assert "400" in str(exc.value)


# todo: test max_length > max length accepted by the model
# todo: test max_batch_size > max batch size accepted by the model

0 comments on commit d09cece

Please sign in to comment.