Skip to content

Commit

Permalink
partners/ollama: Enabled Token Level Streaming when Using Bind Tools …
Browse files Browse the repository at this point in the history
…for ChatOllama (#27689)

**Description:** The issue concerns the unexpected behavior observed
using the bind_tools method in LangChain's ChatOllama. When tools are
not bound, the llm.stream() method works as expected, returning
incremental chunks of content, which is crucial for real-time
applications such as conversational agents and live feedback systems.
However, when bind_tools([]) is used, the streaming behavior changes,
causing the output to be delivered in full chunks rather than
incrementally. This change negatively impacts the user experience by
breaking the real-time nature of the streaming mechanism.
**Issue:** #26971

---------

Co-authored-by: 4meyDam1e <[email protected]>
Co-authored-by: Chester Curme <[email protected]>
  • Loading branch information
3 people authored Nov 15, 2024
1 parent 776e327 commit d696728
Showing 1 changed file with 40 additions and 38 deletions.
78 changes: 40 additions & 38 deletions libs/partners/ollama/langchain_ollama/chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ class Multiply(BaseModel):
"""Base url the model is hosted under."""

client_kwargs: Optional[dict] = {}
"""Additional kwargs to pass to the httpx Client.
"""Additional kwargs to pass to the httpx Client.
For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
"""

Expand Down Expand Up @@ -475,26 +475,27 @@ async def _acreate_chat_stream(
params[key] = kwargs[key]

params["options"]["stop"] = stop
if "tools" in kwargs:
yield await self._async_client.chat(
model=params["model"],
messages=ollama_messages,
stream=False,
options=Options(**params["options"]),
keep_alive=params["keep_alive"],
format=params["format"],
tools=kwargs["tools"],
) # type:ignore
else:
async for part in await self._async_client.chat(
model=params["model"],
messages=ollama_messages,
stream=True,
options=Options(**params["options"]),
keep_alive=params["keep_alive"],
format=params["format"],
): # type:ignore

tools = kwargs.get("tools", None)
stream = tools is None or len(tools) == 0

chat_params = {
"model": params["model"],
"messages": ollama_messages,
"stream": stream,
"options": Options(**params["options"]),
"keep_alive": params["keep_alive"],
"format": params["format"],
}

if tools is not None:
chat_params["tools"] = tools

if stream:
async for part in await self._async_client.chat(**chat_params):
yield part
else:
yield await self._async_client.chat(**chat_params)

def _create_chat_stream(
self,
Expand All @@ -513,25 +514,26 @@ def _create_chat_stream(
params[key] = kwargs[key]

params["options"]["stop"] = stop
if "tools" in kwargs:
yield self._client.chat(
model=params["model"],
messages=ollama_messages,
stream=False,
options=Options(**params["options"]),
keep_alive=params["keep_alive"],
format=params["format"],
tools=kwargs["tools"],
)

tools = kwargs.get("tools", None)
stream = tools is None or len(tools) == 0

chat_params = {
"model": params["model"],
"messages": ollama_messages,
"stream": stream,
"options": Options(**params["options"]),
"keep_alive": params["keep_alive"],
"format": params["format"],
}

if tools is not None:
chat_params["tools"] = tools

if stream:
yield from self._client.chat(**chat_params)
else:
yield from self._client.chat(
model=params["model"],
messages=ollama_messages,
stream=True,
options=Options(**params["options"]),
keep_alive=params["keep_alive"],
format=params["format"],
)
yield self._client.chat(**chat_params)

def _chat_stream_with_aggregation(
self,
Expand Down

0 comments on commit d696728

Please sign in to comment.