Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vanity model name support #35

Merged
merged 7 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 24 additions & 22 deletions libs/ai-endpoints/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
## Core LC Chat Interface
from langchain_nvidia_ai_endpoints import ChatNVIDIA

llm = ChatNVIDIA(model="ai-llama3-70b", max_tokens=419)
llm = ChatNVIDIA(model="meta/llama3-70b-instruct", max_tokens=419)
result = llm.invoke("Write a ballad about LangChain.")
print(result.content)
```
Expand Down Expand Up @@ -67,23 +67,25 @@ Querying `available_models` will still give you all of the other models offered
```python
[model.id for model in llm.available_models if model.model_type]

#['ai-codegemma-7b',
# 'ai-codellama-70b',
# 'ai-fuyu-8b',
# 'ai-gemma-2b',
# 'ai-gemma-7b',
# 'ai-google-deplot',
# 'ai-llama2-70b',
# 'ai-llama3-70b',
# 'ai-llama3-8b',
# 'ai-microsoft-kosmos-2',
# 'ai-mistral-7b-instruct-v2',
# 'ai-mistral-large',
# 'ai-mixtral-8x22b-instruct',
# 'ai-mixtral-8x7b-instruct',
# 'ai-neva-22b',
# 'ai-recurrentgemma-2b',
# ]
#[
# ...
# 'databricks/dbrx-instruct',
# 'google/codegemma-7b',
# 'google/gemma-2b',
# 'google/gemma-7b',
# 'google/recurrentgemma-2b',
# 'meta/codellama-70b',
# 'meta/llama2-70b',
# 'meta/llama3-70b-instruct',
# 'meta/llama3-8b-instruct',
# 'microsoft/phi-3-mini-128k-instruct',
# 'mistralai/mistral-7b-instruct-v0.2',
# 'mistralai/mistral-large',
# 'mistralai/mixtral-8x22b-instruct-v0.1',
# 'mistralai/mixtral-8x7b-instruct-v0.1',
# 'snowflake/arctic',
# ...
#]
```

## Model types
Expand All @@ -96,7 +98,7 @@ Some model types support unique prompting techniques and chat messages. We will

### General Chat

Models such as `ai-llama3-70b` and `ai-mixtral-8x22b-instruct` are good all-around models that you can use for with any LangChain chat messages. Example below.
Models such as `meta/llama3-8b-instruct` and `mistralai/mixtral-8x22b-instruct-v0.1` are good all-around models that you can use for with any LangChain chat messages. Example below.

```python
from langchain_nvidia_ai_endpoints import ChatNVIDIA
Expand All @@ -111,7 +113,7 @@ prompt = ChatPromptTemplate.from_messages(
)
chain = (
prompt
| ChatNVIDIA(model="ai-llama3-70b")
| ChatNVIDIA(model="meta/llama3-8b-instruct")
| StrOutputParser()
)

Expand All @@ -121,7 +123,7 @@ for txt in chain.stream({"input": "What's your name?"}):

### Code Generation

These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `ai-codellama-70b` and `ai-codegemma-7b`.
These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `meta/codellama-70b` and `google/codegemma-7b`.

```python
prompt = ChatPromptTemplate.from_messages(
Expand All @@ -132,7 +134,7 @@ prompt = ChatPromptTemplate.from_messages(
)
chain = (
prompt
| ChatNVIDIA(model="ai-codellama-70b", max_tokens=419)
| ChatNVIDIA(model="meta/codellama-70b", max_tokens=419)
| StrOutputParser()
)

Expand Down
16 changes: 8 additions & 8 deletions libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@
"## Core LC Chat Interface\n",
"from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
"\n",
"llm = ChatNVIDIA(model=\"mixtral_8x7b\")\n",
"llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")\n",
"result = llm.invoke(\"Write a ballad about LangChain.\")\n",
"print(result.content)"
]
Expand Down Expand Up @@ -321,7 +321,7 @@
"source": [
"### General Chat\n",
"\n",
"Models such as `llama2_13b` and `mixtral_8x7b` are good all-around models that you can use for with any LangChain chat messages. Example below."
"Models such as `meta/llama3-8b-instruct` and `mistralai/mixtral-8x22b-instruct-v0.1` are good all-around models that you can use for with any LangChain chat messages. Example below."
]
},
{
Expand All @@ -346,7 +346,7 @@
"prompt = ChatPromptTemplate.from_messages(\n",
" [(\"system\", \"You are a helpful AI assistant named Fred.\"), (\"user\", \"{input}\")]\n",
")\n",
"chain = prompt | ChatNVIDIA(model=\"llama2_13b\") | StrOutputParser()\n",
"chain = prompt | ChatNVIDIA(model=\"meta/llama3-8b-instruct\") | StrOutputParser()\n",
"\n",
"for txt in chain.stream({\"input\": \"What's your name?\"}):\n",
" print(txt, end=\"\")"
Expand All @@ -359,7 +359,7 @@
"source": [
"### Code Generation\n",
"\n",
"These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `llama2_code_70b`."
"These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `meta/codellama-70b`."
]
},
{
Expand Down Expand Up @@ -397,7 +397,7 @@
" (\"user\", \"{input}\"),\n",
" ]\n",
")\n",
"chain = prompt | ChatNVIDIA(model=\"llama2_code_70b\") | StrOutputParser()\n",
"chain = prompt | ChatNVIDIA(model=\"meta/codellama-70b\") | StrOutputParser()\n",
"\n",
"for txt in chain.stream({\"input\": \"How do I solve this fizz buzz problem?\"}):\n",
" print(txt, end=\"\")"
Expand Down Expand Up @@ -1005,7 +1005,7 @@
"id": "79efa62d"
},
"source": [
"Like any other integration, ChatNVIDIA is fine to support chat utilities like conversation buffers by default. Below, we show the [LangChain ConversationBufferMemory](https://python.langchain.com/docs/modules/memory/types/buffer) example applied to the `mixtral_8x7b` model."
"Like any other integration, ChatNVIDIA is fine to support chat utilities like conversation buffers by default. Below, we show the [LangChain ConversationBufferMemory](https://python.langchain.com/docs/modules/memory/types/buffer) example applied to the `mistralai/mixtral-8x22b-instruct-v0.1` model."
]
},
{
Expand Down Expand Up @@ -1038,7 +1038,7 @@
"from langchain.chains import ConversationChain\n",
"from langchain.memory import ConversationBufferMemory\n",
"\n",
"chat = ChatNVIDIA(model=\"mixtral_8x7b\", temperature=0.1, max_tokens=100, top_p=1.0)\n",
"chat = ChatNVIDIA(model=\"mistralai/mixtral-8x22b-instruct-v0.1\", temperature=0.1, max_tokens=100, top_p=1.0)\n",
"\n",
"conversation = ConversationChain(llm=chat, memory=ConversationBufferMemory())"
]
Expand Down Expand Up @@ -1149,7 +1149,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion libs/ai-endpoints/docs/providers/nvidia.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export NVIDIA_API_KEY=nvapi-XXXXXXXXXXXXXXXXXXXXXXXXXX
```python
from langchain_nvidia_ai_endpoints import ChatNVIDIA

llm = ChatNVIDIA(model="mixtral_8x7b")
llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1")
result = llm.invoke("Write a ballad about LangChain.")
print(result.content)
```
Expand Down
10 changes: 10 additions & 0 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,16 @@ def available_functions(self) -> list:
raise ValueError(
f"Unexpected response when querying {invoke_url}\n{query_res}"
)
# if there's an alias / model name for the function, add it as well
# this lets users work with ai-gemma-2b and google/gemma-2b
aliases = []
for function in output:
name = function["name"]
if name in MODEL_SPECS and "model_name" in MODEL_SPECS[name]:
alias = function.copy()
alias.update(name=MODEL_SPECS[name]["model_name"])
aliases.append(alias)
output.extend(aliases)
self._available_functions = output
return self._available_functions

Expand Down
34 changes: 23 additions & 11 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,30 @@ class Model(BaseModel):
"playground_llama2_70b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-llama2-70b",
"alternative": "meta/llama2-70b",
},
"playground_nvolveqa_40k": {
"model_type": "embedding",
"api_type": "aifm",
"alternative": "NV-Embed-QA",
},
"playground_nvolveqa_40k": {"model_type": "embedding", "api_type": "aifm"},
"playground_nemotron_qa_8b": {"model_type": "qa", "api_type": "aifm"},
"playground_gemma_7b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-gemma-7b",
"alternative": "google/gemma-7b",
},
"playground_mistral_7b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-mistral-7b-instruct-v2",
"alternative": "mistralai/mistral-7b-instruct-v0.2",
},
"playground_mamba_chat": {"model_type": "chat", "api_type": "aifm"},
"playground_phi2": {"model_type": "chat", "api_type": "aifm"},
"playground_phi2": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "microsoft/phi-3-mini-128k-instruct",
},
"playground_sdxl": {"model_type": "image_out", "api_type": "aifm"},
"playground_nv_llama2_rlhf_70b": {"model_type": "chat", "api_type": "aifm"},
"playground_neva_22b": {
Expand All @@ -54,18 +62,18 @@ class Model(BaseModel):
"playground_llama2_code_70b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-codellama-70b",
"alternative": "meta/codelama-70b",
},
"playground_gemma_2b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-gemma-2b",
"alternative": "google/gemma-2b",
},
"playground_seamless": {"model_type": "translation", "api_type": "aifm"},
"playground_mixtral_8x7b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-mixtral-8x7b-instruct",
"alternative": "mistralai/mixtral-8x7b-instruct-v0.1",
},
"playground_fuyu_8b": {
"model_type": "image_in",
Expand All @@ -75,19 +83,19 @@ class Model(BaseModel):
"playground_llama2_code_34b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-codellama-70b",
"alternative": "meta/codellama-70b",
},
"playground_llama2_code_13b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-codellama-70b",
"alternative": "meta/codellama-70b",
},
"playground_steerlm_llama_70b": {"model_type": "chat", "api_type": "aifm"},
"playground_clip": {"model_type": "similarity", "api_type": "aifm"},
"playground_llama2_13b": {
"model_type": "chat",
"api_type": "aifm",
"alternative": "ai-llama2-70b",
"alternative": "meta/llama2-70b",
},
}

Expand Down Expand Up @@ -202,3 +210,7 @@ class Model(BaseModel):
MODEL_SPECS = {
k: {**v, "client": client_map[v["model_type"]]} for k, v in MODEL_SPECS.items()
}
# MODEL_SPEC database should have both function and model names
MODEL_SPECS.update(
{v["model_name"]: v for v in MODEL_SPECS.values() if "model_name" in v}
)
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class ChatNVIDIA(nvidia_ai_endpoints._NVIDIAClient, BaseChatModel):
response = model.invoke("Hello")
"""

_default_model: str = "ai-mixtral-8x7b-instruct"
_default_model: str = "mistralai/mixtral-8x7b-instruct-v0.1"
infer_endpoint: str = Field("{base_url}/chat/completions")
model: str = Field(_default_model, description="Name of the model to invoke")
temperature: Optional[float] = Field(description="Sampling temperature in [0, 1]")
Expand Down
20 changes: 12 additions & 8 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class NVIDIAEmbeddings(_NVIDIAClient, Embeddings):
too long.
"""

_default_model: str = "ai-embed-qa-4"
_default_model: str = "NV-Embed-QA"
_default_max_batch_size: int = 50
infer_endpoint: str = Field("{base_url}/embeddings")
model: str = Field(_default_model, description="Name of the model to invoke")
Expand Down Expand Up @@ -56,13 +56,17 @@ def deprecated_max_length(cls, value: int) -> int:

# todo: fix _NVIDIAClient.validate_client and enable Config.validate_assignment
@validator("model")
def deprecated_nvolveqa_40k(cls, value: str) -> str:
"""Deprecate the nvolveqa_40k model."""
if value == "nvolveqa_40k" or value == "playground_nvolveqa_40k":
warnings.warn(
"nvolveqa_40k is deprecated. Use ai-embed-qa-4 instead.",
DeprecationWarning,
)
def aifm_deprecated(cls, value: str) -> str:
"""All AI Foundataion Models are deprecate, use API Catalog models instead."""
for model in [value, f"playground_{value}"]:
if model in MODEL_SPECS and MODEL_SPECS[model].get("api_type") == "aifm":
alternative = MODEL_SPECS[model].get(
"alternative", NVIDIAEmbeddings._default_model
)
warnings.warn(
f"{value} is deprecated. Try {alternative} instead.",
DeprecationWarning,
)
return value

def _embed(
Expand Down
17 changes: 13 additions & 4 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ class Config:
_client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)

_default_batch_size: int = 32
_default_model: str = "ai-rerank-qa-mistral-4b"
_deprecated_model: str = "ai-rerank-qa-mistral-4b"
_default_model_name: str = "nv-rerank-qa-mistral-4b:1"

top_n: int = Field(5, ge=0, description="The number of documents to return.")
model: str = Field(_default_model, description="The model to use for reranking.")
model: str = Field(
_default_model_name, description="The model to use for reranking."
)
max_batch_size: int = Field(
_default_batch_size, ge=1, description="The maximum batch size."
)
Expand Down Expand Up @@ -62,12 +64,19 @@ def available_models(self) -> List[Model]:
# local NIM supports a single model and no /models endpoint
models = [
Model(
id=NVIDIARerank._default_model,
id=NVIDIARerank._default_model_name,
model_name=NVIDIARerank._default_model_name,
model_type="ranking",
client="NVIDIARerank",
path="magic",
),
Model(
id=NVIDIARerank._deprecated_model,
model_name=NVIDIARerank._default_model_name,
model_type="ranking",
client="NVIDIARerank",
path="magic",
)
),
]
else:
models = self._client.get_available_models(
Expand Down
12 changes: 5 additions & 7 deletions libs/ai-endpoints/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import warnings
from typing import List

import pytest

from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
from langchain_nvidia_ai_endpoints._common import Model


Expand Down Expand Up @@ -60,13 +59,12 @@ def get_all_models() -> List[Model]:
models = ["ai-rerank-qa-mistral-4b"]
if model := metafunc.config.getoption("rerank_model_id"):
models = [model]
# nim-mode reranking does not support model listing
# nim-mode reranking does not support model listing via /v1/models endpoint
if metafunc.config.getoption("all_models"):
if mode.get("mode", None) == "nim":
warnings.warn(
"Skipping model listing for Rerank "
"with --nim-endpoint, not supported"
)
models = [
model.id for model in NVIDIARerank().mode(**mode).available_models
]
else:
models = [
model.id
Expand Down
Loading