langchain-ai · mattf · May 7, 2024 · May 3, 2024 · May 3, 2024 · May 3, 2024
diff --git a/libs/ai-endpoints/README.md b/libs/ai-endpoints/README.md
@@ -34,7 +34,7 @@ if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
 ## Core LC Chat Interface
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
-llm = ChatNVIDIA(model="ai-llama3-70b", max_tokens=419)
+llm = ChatNVIDIA(model="meta/llama3-70b-instruct", max_tokens=419)
 result = llm.invoke("Write a ballad about LangChain.")
 print(result.content)
 ```
@@ -67,23 +67,25 @@ Querying `available_models` will still give you all of the other models offered
 ```python
 [model.id for model in llm.available_models if model.model_type]
 
-#['ai-codegemma-7b',
-# 'ai-codellama-70b',
-# 'ai-fuyu-8b',
-# 'ai-gemma-2b',
-# 'ai-gemma-7b',
-# 'ai-google-deplot',
-# 'ai-llama2-70b',
-# 'ai-llama3-70b',
-# 'ai-llama3-8b',
-# 'ai-microsoft-kosmos-2',
-# 'ai-mistral-7b-instruct-v2',
-# 'ai-mistral-large',
-# 'ai-mixtral-8x22b-instruct',
-# 'ai-mixtral-8x7b-instruct',
-# 'ai-neva-22b',
-# 'ai-recurrentgemma-2b',
-# ]
+#[
+# ...
+# 'databricks/dbrx-instruct',
+# 'google/codegemma-7b',
+# 'google/gemma-2b',
+# 'google/gemma-7b',
+# 'google/recurrentgemma-2b',
+# 'meta/codellama-70b',
+# 'meta/llama2-70b',
+# 'meta/llama3-70b-instruct',
+# 'meta/llama3-8b-instruct',
+# 'microsoft/phi-3-mini-128k-instruct',
+# 'mistralai/mistral-7b-instruct-v0.2',
+# 'mistralai/mistral-large',
+# 'mistralai/mixtral-8x22b-instruct-v0.1',
+# 'mistralai/mixtral-8x7b-instruct-v0.1',
+# 'snowflake/arctic',
+# ...
+#]
 ```
 
 ## Model types
@@ -96,7 +98,7 @@ Some model types support unique prompting techniques and chat messages. We will
 
 ### General Chat
 
-Models such as `ai-llama3-70b` and `ai-mixtral-8x22b-instruct` are good all-around models that you can use for with any LangChain chat messages. Example below.
+Models such as `meta/llama3-8b-instruct` and `mistralai/mixtral-8x22b-instruct-v0.1` are good all-around models that you can use for with any LangChain chat messages. Example below.
 
 ```python
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
@@ -111,7 +113,7 @@ prompt = ChatPromptTemplate.from_messages(
 )
 chain = (
     prompt
-    | ChatNVIDIA(model="ai-llama3-70b")
+    | ChatNVIDIA(model="meta/llama3-8b-instruct")
     | StrOutputParser()
 )
 
@@ -121,7 +123,7 @@ for txt in chain.stream({"input": "What's your name?"}):
 
 ### Code Generation
 
-These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `ai-codellama-70b` and `ai-codegemma-7b`.
+These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `meta/codellama-70b` and `google/codegemma-7b`.
 
 ```python
 prompt = ChatPromptTemplate.from_messages(
@@ -132,7 +134,7 @@ prompt = ChatPromptTemplate.from_messages(
 )
 chain = (
     prompt
-    | ChatNVIDIA(model="ai-codellama-70b", max_tokens=419)
+    | ChatNVIDIA(model="meta/codellama-70b", max_tokens=419)
     | StrOutputParser()
 )
 

diff --git a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
@@ -153,7 +153,7 @@
     "## Core LC Chat Interface\n",
     "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
     "\n",
-    "llm = ChatNVIDIA(model=\"mixtral_8x7b\")\n",
+    "llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")\n",
     "result = llm.invoke(\"Write a ballad about LangChain.\")\n",
     "print(result.content)"
    ]
@@ -321,7 +321,7 @@
    "source": [
     "### General Chat\n",
     "\n",
-    "Models such as `llama2_13b` and `mixtral_8x7b` are good all-around models that you can use for with any LangChain chat messages. Example below."
+    "Models such as `meta/llama3-8b-instruct` and `mistralai/mixtral-8x22b-instruct-v0.1` are good all-around models that you can use for with any LangChain chat messages. Example below."
    ]
   },
   {
@@ -346,7 +346,7 @@
     "prompt = ChatPromptTemplate.from_messages(\n",
     "    [(\"system\", \"You are a helpful AI assistant named Fred.\"), (\"user\", \"{input}\")]\n",
     ")\n",
-    "chain = prompt | ChatNVIDIA(model=\"llama2_13b\") | StrOutputParser()\n",
+    "chain = prompt | ChatNVIDIA(model=\"meta/llama3-8b-instruct\") | StrOutputParser()\n",
     "\n",
     "for txt in chain.stream({\"input\": \"What's your name?\"}):\n",
     "    print(txt, end=\"\")"
@@ -359,7 +359,7 @@
    "source": [
     "### Code Generation\n",
     "\n",
-    "These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `llama2_code_70b`."
+    "These models accept the same arguments and input structure as regular chat models, but they tend to perform better on code-genreation and structured code tasks. An example of this is `meta/codellama-70b`."
    ]
   },
   {
@@ -397,7 +397,7 @@
     "        (\"user\", \"{input}\"),\n",
     "    ]\n",
     ")\n",
-    "chain = prompt | ChatNVIDIA(model=\"llama2_code_70b\") | StrOutputParser()\n",
+    "chain = prompt | ChatNVIDIA(model=\"meta/codellama-70b\") | StrOutputParser()\n",
     "\n",
     "for txt in chain.stream({\"input\": \"How do I solve this fizz buzz problem?\"}):\n",
     "    print(txt, end=\"\")"
@@ -1005,7 +1005,7 @@
     "id": "79efa62d"
    },
    "source": [
-    "Like any other integration, ChatNVIDIA is fine to support chat utilities like conversation buffers by default. Below, we show the [LangChain ConversationBufferMemory](https://python.langchain.com/docs/modules/memory/types/buffer) example applied to the `mixtral_8x7b` model."
+    "Like any other integration, ChatNVIDIA is fine to support chat utilities like conversation buffers by default. Below, we show the [LangChain ConversationBufferMemory](https://python.langchain.com/docs/modules/memory/types/buffer) example applied to the `mistralai/mixtral-8x22b-instruct-v0.1` model."
    ]
   },
   {
@@ -1038,7 +1038,7 @@
     "from langchain.chains import ConversationChain\n",
     "from langchain.memory import ConversationBufferMemory\n",
     "\n",
-    "chat = ChatNVIDIA(model=\"mixtral_8x7b\", temperature=0.1, max_tokens=100, top_p=1.0)\n",
+    "chat = ChatNVIDIA(model=\"mistralai/mixtral-8x22b-instruct-v0.1\", temperature=0.1, max_tokens=100, top_p=1.0)\n",
     "\n",
     "conversation = ConversationChain(llm=chat, memory=ConversationBufferMemory())"
    ]
@@ -1149,7 +1149,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,

diff --git a/libs/ai-endpoints/docs/providers/nvidia.mdx b/libs/ai-endpoints/docs/providers/nvidia.mdx
@@ -25,7 +25,7 @@ export NVIDIA_API_KEY=nvapi-XXXXXXXXXXXXXXXXXXXXXXXXXX
 ```python
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
-llm = ChatNVIDIA(model="mixtral_8x7b")
+llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1")
 result = llm.invoke("Write a ballad about LangChain.")
 print(result.content)
 ```

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -183,6 +183,16 @@ def available_functions(self) -> list:
             raise ValueError(
                 f"Unexpected response when querying {invoke_url}\n{query_res}"
             )
+        # if there's an alias / model name for the function, add it as well
+        # this lets users work with ai-gemma-2b and google/gemma-2b
+        aliases = []
+        for function in output:
+            name = function["name"]
+            if name in MODEL_SPECS and "model_name" in MODEL_SPECS[name]:
+                alias = function.copy()
+                alias.update(name=MODEL_SPECS[name]["model_name"])
+                aliases.append(alias)
+        output.extend(aliases)
         self._available_functions = output
         return self._available_functions
 

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -18,22 +18,30 @@ class Model(BaseModel):
     "playground_llama2_70b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-llama2-70b",
+        "alternative": "meta/llama2-70b",
+    },
+    "playground_nvolveqa_40k": {
+        "model_type": "embedding",
+        "api_type": "aifm",
+        "alternative": "NV-Embed-QA",
     },
-    "playground_nvolveqa_40k": {"model_type": "embedding", "api_type": "aifm"},
     "playground_nemotron_qa_8b": {"model_type": "qa", "api_type": "aifm"},
     "playground_gemma_7b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-gemma-7b",
+        "alternative": "google/gemma-7b",
     },
     "playground_mistral_7b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-mistral-7b-instruct-v2",
+        "alternative": "mistralai/mistral-7b-instruct-v0.2",
     },
     "playground_mamba_chat": {"model_type": "chat", "api_type": "aifm"},
-    "playground_phi2": {"model_type": "chat", "api_type": "aifm"},
+    "playground_phi2": {
+        "model_type": "chat",
+        "api_type": "aifm",
+        "alternative": "microsoft/phi-3-mini-128k-instruct",
+    },
     "playground_sdxl": {"model_type": "image_out", "api_type": "aifm"},
     "playground_nv_llama2_rlhf_70b": {"model_type": "chat", "api_type": "aifm"},
     "playground_neva_22b": {
@@ -54,18 +62,18 @@ class Model(BaseModel):
     "playground_llama2_code_70b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-codellama-70b",
+        "alternative": "meta/codelama-70b",
     },
     "playground_gemma_2b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-gemma-2b",
+        "alternative": "google/gemma-2b",
     },
     "playground_seamless": {"model_type": "translation", "api_type": "aifm"},
     "playground_mixtral_8x7b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-mixtral-8x7b-instruct",
+        "alternative": "mistralai/mixtral-8x7b-instruct-v0.1",
     },
     "playground_fuyu_8b": {
         "model_type": "image_in",
@@ -75,19 +83,19 @@ class Model(BaseModel):
     "playground_llama2_code_34b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-codellama-70b",
+        "alternative": "meta/codellama-70b",
     },
     "playground_llama2_code_13b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-codellama-70b",
+        "alternative": "meta/codellama-70b",
     },
     "playground_steerlm_llama_70b": {"model_type": "chat", "api_type": "aifm"},
     "playground_clip": {"model_type": "similarity", "api_type": "aifm"},
     "playground_llama2_13b": {
         "model_type": "chat",
         "api_type": "aifm",
-        "alternative": "ai-llama2-70b",
+        "alternative": "meta/llama2-70b",
     },
 }
 
@@ -202,3 +210,7 @@ class Model(BaseModel):
 MODEL_SPECS = {
     k: {**v, "client": client_map[v["model_type"]]} for k, v in MODEL_SPECS.items()
 }
+# MODEL_SPEC database should have both function and model names
+MODEL_SPECS.update(
+    {v["model_name"]: v for v in MODEL_SPECS.values() if "model_name" in v}
+)
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
@@ -130,7 +130,7 @@ class ChatNVIDIA(nvidia_ai_endpoints._NVIDIAClient, BaseChatModel):
             response = model.invoke("Hello")
     """
 
-    _default_model: str = "ai-mixtral-8x7b-instruct"
+    _default_model: str = "mistralai/mixtral-8x7b-instruct-v0.1"
     infer_endpoint: str = Field("{base_url}/chat/completions")
     model: str = Field(_default_model, description="Name of the model to invoke")
     temperature: Optional[float] = Field(description="Sampling temperature in [0, 1]")

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -24,7 +24,7 @@ class NVIDIAEmbeddings(_NVIDIAClient, Embeddings):
         too long.
     """
 
-    _default_model: str = "ai-embed-qa-4"
+    _default_model: str = "NV-Embed-QA"
     _default_max_batch_size: int = 50
     infer_endpoint: str = Field("{base_url}/embeddings")
     model: str = Field(_default_model, description="Name of the model to invoke")
@@ -56,13 +56,17 @@ def deprecated_max_length(cls, value: int) -> int:
 
     # todo: fix _NVIDIAClient.validate_client and enable Config.validate_assignment
     @validator("model")
-    def deprecated_nvolveqa_40k(cls, value: str) -> str:
-        """Deprecate the nvolveqa_40k model."""
-        if value == "nvolveqa_40k" or value == "playground_nvolveqa_40k":
-            warnings.warn(
-                "nvolveqa_40k is deprecated. Use ai-embed-qa-4 instead.",
-                DeprecationWarning,
-            )
+    def aifm_deprecated(cls, value: str) -> str:
+        """All AI Foundataion Models are deprecate, use API Catalog models instead."""
+        for model in [value, f"playground_{value}"]:
+            if model in MODEL_SPECS and MODEL_SPECS[model].get("api_type") == "aifm":
+                alternative = MODEL_SPECS[model].get(
+                    "alternative", NVIDIAEmbeddings._default_model
+                )
+                warnings.warn(
+                    f"{value} is deprecated. Try {alternative} instead.",
+                    DeprecationWarning,
+                )
         return value
 
     def _embed(

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
@@ -27,11 +27,13 @@ class Config:
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
 
     _default_batch_size: int = 32
-    _default_model: str = "ai-rerank-qa-mistral-4b"
+    _deprecated_model: str = "ai-rerank-qa-mistral-4b"
     _default_model_name: str = "nv-rerank-qa-mistral-4b:1"
 
     top_n: int = Field(5, ge=0, description="The number of documents to return.")
-    model: str = Field(_default_model, description="The model to use for reranking.")
+    model: str = Field(
+        _default_model_name, description="The model to use for reranking."
+    )
     max_batch_size: int = Field(
         _default_batch_size, ge=1, description="The maximum batch size."
     )
@@ -62,12 +64,19 @@ def available_models(self) -> List[Model]:
             # local NIM supports a single model and no /models endpoint
             models = [
                 Model(
-                    id=NVIDIARerank._default_model,
+                    id=NVIDIARerank._default_model_name,
+                    model_name=NVIDIARerank._default_model_name,
+                    model_type="ranking",
+                    client="NVIDIARerank",
+                    path="magic",
+                ),
+                Model(
+                    id=NVIDIARerank._deprecated_model,
                     model_name=NVIDIARerank._default_model_name,
                     model_type="ranking",
                     client="NVIDIARerank",
                     path="magic",
-                )
+                ),
             ]
         else:
             models = self._client.get_available_models(

diff --git a/libs/ai-endpoints/tests/integration_tests/conftest.py b/libs/ai-endpoints/tests/integration_tests/conftest.py
@@ -1,9 +1,8 @@
-import warnings
 from typing import List
 
 import pytest
 
-from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
+from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
 from langchain_nvidia_ai_endpoints._common import Model
 
 
@@ -60,13 +59,12 @@ def get_all_models() -> List[Model]:
         models = ["ai-rerank-qa-mistral-4b"]
         if model := metafunc.config.getoption("rerank_model_id"):
             models = [model]
-        # nim-mode reranking does not support model listing
+        # nim-mode reranking does not support model listing via /v1/models endpoint
         if metafunc.config.getoption("all_models"):
             if mode.get("mode", None) == "nim":
-                warnings.warn(
-                    "Skipping model listing for Rerank "
-                    "with --nim-endpoint, not supported"
-                )
+                models = [
+                    model.id for model in NVIDIARerank().mode(**mode).available_models
+                ]
             else:
                 models = [
                     model.id