Skip to content

Commit

Permalink
Code review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
markbackman committed Dec 18, 2024
1 parent b5bd662 commit 1f8a217
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 79 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
format.
- New examples: `28a-transcription-processor-openai.py`,
`28b-transcription-processor-anthropic.py`, and
`28c-transcription-processor-gemini.py`
`28c-transcription-processor-gemini.py`.

- Add support for more languages to ElevenLabs (Arabic, Croatian, Filipino,
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
Expand Down
42 changes: 6 additions & 36 deletions examples/foundational/07a-interruptible-anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,19 @@
import asyncio
import os
import sys
from typing import List

import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure

from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, LLMMessagesFrame
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.anthropic import AnthropicLLMContext, AnthropicLLMService
from pipecat.services.anthropic import AnthropicLLMService
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport

Expand All @@ -31,28 +29,6 @@
logger.add(sys.stderr, level="DEBUG")


class TestAnthropicLLMService(AnthropicLLMService):
async def process_frame(self, frame: Frame, direction: FrameDirection):
if isinstance(frame, LLMMessagesFrame):
logger.info("Original OpenAI format messages:")
logger.info(frame.messages)

# Convert to Anthropic format
context = AnthropicLLMContext.from_messages(frame.messages)
logger.info("Converted to Anthropic format:")
logger.info(context.messages)

# Convert back to OpenAI format
openai_messages = []
for msg in context.messages:
converted = context.to_standard_messages(msg)
openai_messages.extend(converted)
logger.info("Converted back to OpenAI format:")
logger.info(openai_messages)

await super().process_frame(frame, direction)


async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
Expand All @@ -74,24 +50,18 @@ async def main():
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)

llm = TestAnthropicLLMService(
llm = AnthropicLLMService(
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229"
)

# Test messages including various formats
# todo: think more about how to handle system prompts in a more general way. OpenAI,
# Google, and Anthropic all have slightly different approaches to providing a system
# prompt.
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative, helpful, and brief way. Say hello.",
},
{
"role": "assistant",
"content": [
{"type": "text", "text": "Hello! How can I help you today?"},
{"type": "text", "text": "I'm ready to assist."},
],
},
{"role": "user", "content": "Hi there!"},
]

context = OpenAILLMContext(messages)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ async def on_transcript_update(processor, frame):
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
await task.queue_frames([LLMMessagesFrame(messages)])
await task.queue_frames([context_aggregator.user().get_context_frame()])

runner = PipelineRunner()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ async def on_transcript_update(processor, frame):
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
await task.queue_frames([LLMMessagesFrame(messages)])
await task.queue_frames([context_aggregator.user().get_context_frame()])

runner = PipelineRunner()

Expand Down
30 changes: 29 additions & 1 deletion src/pipecat/frames/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#

from dataclasses import dataclass, field
from typing import Any, List, Literal, Mapping, Optional, Tuple, TypeAlias
from typing import Any, List, Literal, Mapping, Optional, Tuple

from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.clocks.base_clock import BaseClock
Expand Down Expand Up @@ -240,6 +240,34 @@ class TranscriptionUpdateFrame(DataFrame):
This frame is emitted when new messages are added to the conversation history,
containing only the newly added messages rather than the full transcript.
Messages have normalized roles (user/assistant) regardless of the LLM service used.
Messages are always in the OpenAI standard message format, which supports both:
Simple format:
[
{
"role": "user",
"content": "Hi, how are you?"
},
{
"role": "assistant",
"content": "Great! And you?"
}
]
Content list format:
[
{
"role": "user",
"content": [{"type": "text", "text": "Hi, how are you?"}]
},
{
"role": "assistant",
"content": [{"type": "text", "text": "Great! And you?"}]
}
]
OpenAI supports both formats. Anthropic and Google messages are converted to the
content list format.
"""

messages: List[TranscriptionMessage]
Expand Down
58 changes: 19 additions & 39 deletions src/pipecat/processors/aggregators/openai_llm_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,59 +112,39 @@ def get_messages_for_logging(self) -> str:
msgs.append(msg)
return json.dumps(msgs)

def from_standard_message(self, message) -> dict:
"""Convert standard format message to OpenAI format.
def from_standard_message(self, message):
"""Convert from OpenAI message format to OpenAI message format (passthrough).
Converts structured content back to OpenAI's simple string format.
OpenAI's format allows both simple string content and structured content:
- Simple: {"role": "user", "content": "Hello"}
- Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
Since OpenAI is our standard format, this is a passthrough function.
Args:
message: Message in standard format:
{
"role": "user/assistant",
"content": [{"type": "text", "text": str}]
}
message (dict): Message in OpenAI format
Returns:
Message in OpenAI format:
{
"role": "user/assistant",
"content": str
}
dict: Same message, unchanged
"""
# If content is already a string, return as-is
if isinstance(message.get("content"), str):
return message

# Convert structured content to string
if isinstance(message.get("content"), list):
text_parts = []
for part in message["content"]:
if part.get("type") == "text":
text_parts.append(part["text"])

return {"role": message["role"], "content": " ".join(text_parts) if text_parts else ""}

return message

def to_standard_messages(self, obj) -> list:
"""Convert OpenAI message to standard structured format.
"""Convert from OpenAI message format to OpenAI message format (passthrough).
OpenAI's format is our standard format throughout Pipecat. This function
returns a list containing the original message to maintain consistency with
other LLM services that may need to return multiple messages.
Args:
obj: Message in OpenAI format {"role": "user", "content": "text"}
obj (dict): Message in OpenAI format with either:
- Simple content: {"role": "user", "content": "Hello"}
- List content: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
Returns:
List containing message with structured content:
[{"role": "user", "content": [{"type": "text", "text": "message"}]}]
list: List containing the original messages, preserving whether
the content was in simple string or structured list format
"""
# Skip messages without content
if not obj.get("content"):
return []

# Convert simple string content to structured format
if isinstance(obj["content"], str):
return [{"role": obj["role"], "content": [{"type": "text", "text": obj["content"]}]}]

# Return original message if content is already structured
return [obj]

def get_messages_for_initializing_history(self):
Expand Down

0 comments on commit 1f8a217

Please sign in to comment.