From 5485897ff23c89d13087fc270ecf810a1638b303 Mon Sep 17 00:00:00 2001
From: Bohan Qu <bohanqu@gmail.com>
Date: Tue, 8 Oct 2024 17:48:23 +0800
Subject: [PATCH 1/3] feat: Add TTFT support in OpenAI generators

---
 haystack/components/generators/chat/openai.py        | 12 +++++++++++-
 releasenotes/notes/openai-ttft-42b1ad551b542930.yaml |  6 ++++++
 test/components/generators/chat/test_openai.py       | 11 ++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 releasenotes/notes/openai-ttft-42b1ad551b542930.yaml

diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
index 8f54b7ad36..2de4e5b1ba 100644
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@@ -5,6 +5,7 @@
 import copy
 import json
 import os
+from datetime import datetime
 from typing import Any, Callable, Dict, List, Optional, Union
 
 from openai import OpenAI, Stream
@@ -222,11 +223,15 @@ def run(
                 raise ValueError("Cannot stream multiple responses, please set n=1.")
             chunks: List[StreamingChunk] = []
             chunk = None
+            _first_token = True
 
             # pylint: disable=not-an-iterable
             for chunk in chat_completion:
                 if chunk.choices and streaming_callback:
                     chunk_delta: StreamingChunk = self._build_chunk(chunk)
+                    if _first_token:
+                        _first_token = False
+                        chunk_delta.meta["completion_start_time"] = datetime.now().isoformat()
                     chunks.append(chunk_delta)
                     streaming_callback(chunk_delta)  # invoke callback with the chunk_delta
             completions = [self._connect_chunks(chunk, chunks)]
@@ -280,7 +285,12 @@ def _connect_chunks(self, chunk: Any, chunks: List[StreamingChunk]) -> ChatMessa
                         payload["function"]["arguments"] += delta.arguments or ""
             complete_response = ChatMessage.from_assistant(json.dumps(payloads))
         else:
-            complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks]))
+            total_content = ""
+            total_meta = {}
+            for streaming_chunk in chunks:
+                total_content += streaming_chunk.content
+                total_meta.update(streaming_chunk.meta)
+            complete_response = ChatMessage.from_assistant(total_content, meta=total_meta)
         complete_response.meta.update(
             {
                 "model": chunk.model,
diff --git a/releasenotes/notes/openai-ttft-42b1ad551b542930.yaml b/releasenotes/notes/openai-ttft-42b1ad551b542930.yaml
new file mode 100644
index 0000000000..bc39b96f7f
--- /dev/null
+++ b/releasenotes/notes/openai-ttft-42b1ad551b542930.yaml
@@ -0,0 +1,6 @@
+---
+features:
+  - |
+    Add TTFT (Time-to-First-Token) support for OpenAI generators. This
+    captures the time taken to generate the first token from the model and
+    can be used to analyze the latency of the application.
diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py
index c3332aae97..9d5122d069 100644
--- a/test/components/generators/chat/test_openai.py
+++ b/test/components/generators/chat/test_openai.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 import logging
 import os
+from unittest.mock import patch
 
 import pytest
 from openai import OpenAIError
@@ -219,7 +220,8 @@ def streaming_callback(chunk: StreamingChunk) -> None:
         assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
         assert "Hello" in response["replies"][0].content  # see mock_chat_completion_chunk
 
-    def test_run_with_streaming_callback_in_run_method(self, chat_messages, mock_chat_completion_chunk):
+    @patch("haystack.components.generators.chat.openai.datetime")
+    def test_run_with_streaming_callback_in_run_method(self, mock_datetime, chat_messages, mock_chat_completion_chunk):
         streaming_callback_called = False
 
         def streaming_callback(chunk: StreamingChunk) -> None:
@@ -240,6 +242,13 @@ def streaming_callback(chunk: StreamingChunk) -> None:
         assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
         assert "Hello" in response["replies"][0].content  # see mock_chat_completion_chunk
 
+        assert hasattr(response["replies"][0], "meta")
+        assert isinstance(response["replies"][0].meta, dict)
+        assert (
+            response["replies"][0].meta["completion_start_time"]
+            == mock_datetime.now.return_value.isoformat.return_value
+        )
+
     def test_check_abnormal_completions(self, caplog):
         caplog.set_level(logging.INFO)
         component = OpenAIChatGenerator(api_key=Secret.from_token("test-api-key"))

From 4fd2723185c9adec975f3e40fcbacc9ffde787ae Mon Sep 17 00:00:00 2001
From: Vladimir Blagojevic <dovlex@gmail.com>
Date: Thu, 31 Oct 2024 15:51:23 +0100
Subject: [PATCH 2/3] pylint fixes

---
 haystack/components/generators/chat/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
index 2de4e5b1ba..42664d857f 100644
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@@ -64,7 +64,7 @@ class OpenAIChatGenerator:
     ```
     """
 
-    def __init__(
+    def __init__(  # pylint: disable=too-many-arguments
         self,
         api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
         model: str = "gpt-4o-mini",

From 7b9bdf95e773117de0bdf10180bbf8a2c3463bcf Mon Sep 17 00:00:00 2001
From: Vladimir Blagojevic <dovlex@gmail.com>
Date: Thu, 31 Oct 2024 16:02:57 +0100
Subject: [PATCH 3/3] correct disable

---
 haystack/components/generators/chat/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
index 42664d857f..6013880e74 100644
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@@ -64,7 +64,7 @@ class OpenAIChatGenerator:
     ```
     """
 
-    def __init__(  # pylint: disable=too-many-arguments
+    def __init__(  # pylint: disable=too-many-positional-arguments
         self,
         api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
         model: str = "gpt-4o-mini",