feat: Added support for streaming in chat as a run_manager (#9)

* feat: Added support for streaming in chat as a run_manager
langchain-ai · Aug 8, 2024 · e9a7c79 · e9a7c79
1 parent 951b6b8
commit e9a7c79
Show file tree

Hide file tree

Showing 6 changed files with 187 additions and 75 deletions.
diff --git a/libs/ibm/langchain_ibm/chat_models.py b/libs/ibm/langchain_ibm/chat_models.py
@@ -669,18 +669,13 @@ def _stream(
 
             message_chunk = _convert_delta_to_message_chunk(choice, default_chunk_class)
             generation_info = {}
-            if finish_reason := choice.get("stop_reason"):
+            if (finish_reason := choice.get("stop_reason")) != "not_finished":
                 generation_info["finish_reason"] = finish_reason
-            logprobs = choice.get("logprobs")
-            if logprobs:
-                generation_info["logprobs"] = logprobs
             chunk = ChatGenerationChunk(
                 message=message_chunk, generation_info=generation_info or None
             )
             if run_manager:
-                run_manager.on_llm_new_token(
-                    chunk.content, chunk=chunk, logprobs=logprobs
-                )
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
 
             yield chunk
 
@@ -751,8 +746,6 @@ def _create_chat_result(self, response: Union[dict]) -> ChatResult:
         for res in response["results"]:
             message = _convert_dict_to_message(res, call_id)
             generation_info = dict(finish_reason=res.get("stop_reason"))
-            if "logprobs" in res:
-                generation_info["logprobs"] = res["logprobs"]
             if "generated_token_count" in res:
                 sum_of_total_generated_tokens += res["generated_token_count"]
             if "input_token_count" in res:

diff --git a/libs/ibm/langchain_ibm/llms.py b/libs/ibm/langchain_ibm/llms.py
@@ -355,10 +355,15 @@ def _stream_response_to_generation_chunk(
         """Convert a stream response to a generation chunk."""
         if not stream_response["results"]:
             return GenerationChunk(text="")
+
+        finish_reason = stream_response["results"][0].get("stop_reason", None)
+
         return GenerationChunk(
             text=stream_response["results"][0]["generated_text"],
             generation_info=dict(
-                finish_reason=stream_response["results"][0].get("stop_reason", None),
+                finish_reason=None
+                if finish_reason == "not_finished"
+                else finish_reason,
                 llm_output={
                     "model_id": self.model_id,
                     "deployment_id": self.deployment_id,