TranscriptProcessor to handle simple and list content

pipecat-ai · Dec 18, 2024 · 4211664 · 4211664
1 parent 1f8a217
commit 4211664
Showing 1 changed file with 14 additions and 4 deletions.
diff --git a/src/pipecat/processors/transcript_processor.py b/src/pipecat/processors/transcript_processor.py
@@ -71,7 +71,9 @@ def _extract_messages(self, messages: List[dict]) -> List[TranscriptionMessage]:
         """Extract conversation messages from standard format.
 
         Args:
-            messages: List of messages in standard format with structured content
+            messages: List of messages in OpenAI format, which can be either:
+                - Simple format: {"role": "user", "content": "Hello"}
+                - Content list: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
 
         Returns:
             List[TranscriptionMessage]: Normalized conversation messages
@@ -82,9 +84,17 @@ def _extract_messages(self, messages: List[dict]) -> List[TranscriptionMessage]:
             if msg["role"] not in ("user", "assistant"):
                 continue
 
-            content = msg.get("content", [])
-            if isinstance(content, list):
-                # Extract text from structured content
+            if "content" not in msg:
+                logger.warning(f"Message missing content field: {msg}")
+                continue
+
+            content = msg.get("content")
+            if isinstance(content, str):
+                # Handle simple string content
+                if content:
+                    result.append(TranscriptionMessage(role=msg["role"], content=content))
+            elif isinstance(content, list):
+                # Handle structured content
                 text_parts = []
                 for part in content:
                     if isinstance(part, dict) and part.get("type") == "text":