From d6967282788307f973fbf1f448e509900c8c6e19 Mon Sep 17 00:00:00 2001
From: Elham Badri <48000928+ElhamBadri2411@users.noreply.github.com>
Date: Fri, 15 Nov 2024 11:36:27 -0500
Subject: [PATCH] partners/ollama: Enabled Token Level Streaming when Using
 Bind Tools for ChatOllama (#27689)

**Description:** The issue concerns the unexpected behavior observed
using the bind_tools method in LangChain's ChatOllama. When tools are
not bound, the llm.stream() method works as expected, returning
incremental chunks of content, which is crucial for real-time
applications such as conversational agents and live feedback systems.
However, when bind_tools([]) is used, the streaming behavior changes,
causing the output to be delivered in full chunks rather than
incrementally. This change negatively impacts the user experience by
breaking the real-time nature of the streaming mechanism.
**Issue:** #26971

---------

Co-authored-by: 4meyDam1e <amey.damle@mail.utoronto.ca>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
---
 .../ollama/langchain_ollama/chat_models.py    | 78 ++++++++++---------
 1 file changed, 40 insertions(+), 38 deletions(-)

diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py
index 7f887280b000d..d6ece0d66a854 100644
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@@ -327,7 +327,7 @@ class Multiply(BaseModel):
     """Base url the model is hosted under."""
 
     client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx Client. 
+    """Additional kwargs to pass to the httpx Client.
     For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
     """
 
@@ -475,26 +475,27 @@ async def _acreate_chat_stream(
                 params[key] = kwargs[key]
 
         params["options"]["stop"] = stop
-        if "tools" in kwargs:
-            yield await self._async_client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=False,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-                tools=kwargs["tools"],
-            )  # type:ignore
-        else:
-            async for part in await self._async_client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=True,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-            ):  # type:ignore
+
+        tools = kwargs.get("tools", None)
+        stream = tools is None or len(tools) == 0
+
+        chat_params = {
+            "model": params["model"],
+            "messages": ollama_messages,
+            "stream": stream,
+            "options": Options(**params["options"]),
+            "keep_alive": params["keep_alive"],
+            "format": params["format"],
+        }
+
+        if tools is not None:
+            chat_params["tools"] = tools
+
+        if stream:
+            async for part in await self._async_client.chat(**chat_params):
                 yield part
+        else:
+            yield await self._async_client.chat(**chat_params)
 
     def _create_chat_stream(
         self,
@@ -513,25 +514,26 @@ def _create_chat_stream(
                 params[key] = kwargs[key]
 
         params["options"]["stop"] = stop
-        if "tools" in kwargs:
-            yield self._client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=False,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-                tools=kwargs["tools"],
-            )
+
+        tools = kwargs.get("tools", None)
+        stream = tools is None or len(tools) == 0
+
+        chat_params = {
+            "model": params["model"],
+            "messages": ollama_messages,
+            "stream": stream,
+            "options": Options(**params["options"]),
+            "keep_alive": params["keep_alive"],
+            "format": params["format"],
+        }
+
+        if tools is not None:
+            chat_params["tools"] = tools
+
+        if stream:
+            yield from self._client.chat(**chat_params)
         else:
-            yield from self._client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=True,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-            )
+            yield self._client.chat(**chat_params)
 
     def _chat_stream_with_aggregation(
         self,