partners/ollama: Enabled Token Level Streaming when Using Bind Tools …

…for ChatOllama (#27689) **Description:** The issue concerns the unexpected behavior observed using the bind_tools method in LangChain's ChatOllama. When tools are not bound, the llm.stream() method works as expected, returning incremental chunks of content, which is crucial for real-time applications such as conversational agents and live feedback systems. However, when bind_tools([]) is used, the streaming behavior changes, causing the output to be delivered in full chunks rather than incrementally. This change negatively impacts the user experience by breaking the real-time nature of the streaming mechanism. **Issue:** #26971 --------- Co-authored-by: 4meyDam1e <[email protected]> Co-authored-by: Chester Curme <[email protected]>
langchain-ai · Nov 15, 2024 · d696728 · d696728
1 parent 776e327
commit d696728
Showing 1 changed file with 40 additions and 38 deletions.
diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py
@@ -327,7 +327,7 @@ class Multiply(BaseModel):
     """Base url the model is hosted under."""
 
     client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx Client. 
+    """Additional kwargs to pass to the httpx Client.
     For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
     """
 
@@ -475,26 +475,27 @@ async def _acreate_chat_stream(
                 params[key] = kwargs[key]
 
         params["options"]["stop"] = stop
-        if "tools" in kwargs:
-            yield await self._async_client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=False,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-                tools=kwargs["tools"],
-            )  # type:ignore
-        else:
-            async for part in await self._async_client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=True,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-            ):  # type:ignore
+
+        tools = kwargs.get("tools", None)
+        stream = tools is None or len(tools) == 0
+
+        chat_params = {
+            "model": params["model"],
+            "messages": ollama_messages,
+            "stream": stream,
+            "options": Options(**params["options"]),
+            "keep_alive": params["keep_alive"],
+            "format": params["format"],
+        }
+
+        if tools is not None:
+            chat_params["tools"] = tools
+
+        if stream:
+            async for part in await self._async_client.chat(**chat_params):
                 yield part
+        else:
+            yield await self._async_client.chat(**chat_params)
 
     def _create_chat_stream(
         self,
@@ -513,25 +514,26 @@ def _create_chat_stream(
                 params[key] = kwargs[key]
 
         params["options"]["stop"] = stop
-        if "tools" in kwargs:
-            yield self._client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=False,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-                tools=kwargs["tools"],
-            )
+
+        tools = kwargs.get("tools", None)
+        stream = tools is None or len(tools) == 0
+
+        chat_params = {
+            "model": params["model"],
+            "messages": ollama_messages,
+            "stream": stream,
+            "options": Options(**params["options"]),
+            "keep_alive": params["keep_alive"],
+            "format": params["format"],
+        }
+
+        if tools is not None:
+            chat_params["tools"] = tools
+
+        if stream:
+            yield from self._client.chat(**chat_params)
         else:
-            yield from self._client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=True,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-            )
+            yield self._client.chat(**chat_params)
 
     def _chat_stream_with_aggregation(
         self,