Skip to content

Commit

Permalink
TranscriptProcessor to handle simple and list content
Browse files Browse the repository at this point in the history
  • Loading branch information
markbackman committed Dec 18, 2024
1 parent 1f8a217 commit 4211664
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions src/pipecat/processors/transcript_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def _extract_messages(self, messages: List[dict]) -> List[TranscriptionMessage]:
"""Extract conversation messages from standard format.
Args:
messages: List of messages in standard format with structured content
messages: List of messages in OpenAI format, which can be either:
- Simple format: {"role": "user", "content": "Hello"}
- Content list: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
Returns:
List[TranscriptionMessage]: Normalized conversation messages
Expand All @@ -82,9 +84,17 @@ def _extract_messages(self, messages: List[dict]) -> List[TranscriptionMessage]:
if msg["role"] not in ("user", "assistant"):
continue

content = msg.get("content", [])
if isinstance(content, list):
# Extract text from structured content
if "content" not in msg:
logger.warning(f"Message missing content field: {msg}")
continue

content = msg.get("content")
if isinstance(content, str):
# Handle simple string content
if content:
result.append(TranscriptionMessage(role=msg["role"], content=content))
elif isinstance(content, list):
# Handle structured content
text_parts = []
for part in content:
if isinstance(part, dict) and part.get("type") == "text":
Expand Down

0 comments on commit 4211664

Please sign in to comment.