From d6967282788307f973fbf1f448e509900c8c6e19 Mon Sep 17 00:00:00 2001 From: Elham Badri <48000928+ElhamBadri2411@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:36:27 -0500 Subject: [PATCH] partners/ollama: Enabled Token Level Streaming when Using Bind Tools for ChatOllama (#27689) **Description:** The issue concerns the unexpected behavior observed using the bind_tools method in LangChain's ChatOllama. When tools are not bound, the llm.stream() method works as expected, returning incremental chunks of content, which is crucial for real-time applications such as conversational agents and live feedback systems. However, when bind_tools([]) is used, the streaming behavior changes, causing the output to be delivered in full chunks rather than incrementally. This change negatively impacts the user experience by breaking the real-time nature of the streaming mechanism. **Issue:** #26971 --------- Co-authored-by: 4meyDam1e Co-authored-by: Chester Curme --- .../ollama/langchain_ollama/chat_models.py | 78 ++++++++++--------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py index 7f887280b000d..d6ece0d66a854 100644 --- a/libs/partners/ollama/langchain_ollama/chat_models.py +++ b/libs/partners/ollama/langchain_ollama/chat_models.py @@ -327,7 +327,7 @@ class Multiply(BaseModel): """Base url the model is hosted under.""" client_kwargs: Optional[dict] = {} - """Additional kwargs to pass to the httpx Client. + """Additional kwargs to pass to the httpx Client. For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html) """ @@ -475,26 +475,27 @@ async def _acreate_chat_stream( params[key] = kwargs[key] params["options"]["stop"] = stop - if "tools" in kwargs: - yield await self._async_client.chat( - model=params["model"], - messages=ollama_messages, - stream=False, - options=Options(**params["options"]), - keep_alive=params["keep_alive"], - format=params["format"], - tools=kwargs["tools"], - ) # type:ignore - else: - async for part in await self._async_client.chat( - model=params["model"], - messages=ollama_messages, - stream=True, - options=Options(**params["options"]), - keep_alive=params["keep_alive"], - format=params["format"], - ): # type:ignore + + tools = kwargs.get("tools", None) + stream = tools is None or len(tools) == 0 + + chat_params = { + "model": params["model"], + "messages": ollama_messages, + "stream": stream, + "options": Options(**params["options"]), + "keep_alive": params["keep_alive"], + "format": params["format"], + } + + if tools is not None: + chat_params["tools"] = tools + + if stream: + async for part in await self._async_client.chat(**chat_params): yield part + else: + yield await self._async_client.chat(**chat_params) def _create_chat_stream( self, @@ -513,25 +514,26 @@ def _create_chat_stream( params[key] = kwargs[key] params["options"]["stop"] = stop - if "tools" in kwargs: - yield self._client.chat( - model=params["model"], - messages=ollama_messages, - stream=False, - options=Options(**params["options"]), - keep_alive=params["keep_alive"], - format=params["format"], - tools=kwargs["tools"], - ) + + tools = kwargs.get("tools", None) + stream = tools is None or len(tools) == 0 + + chat_params = { + "model": params["model"], + "messages": ollama_messages, + "stream": stream, + "options": Options(**params["options"]), + "keep_alive": params["keep_alive"], + "format": params["format"], + } + + if tools is not None: + chat_params["tools"] = tools + + if stream: + yield from self._client.chat(**chat_params) else: - yield from self._client.chat( - model=params["model"], - messages=ollama_messages, - stream=True, - options=Options(**params["options"]), - keep_alive=params["keep_alive"], - format=params["format"], - ) + yield self._client.chat(**chat_params) def _chat_stream_with_aggregation( self,