Skip to content

Commit

Permalink
Merge pull request #853 from pipecat-ai/revert-849-aleix/no-need-for-…
Browse files Browse the repository at this point in the history
…super-process-frame

Revert "no longer necessary to call super().process_frame(frame, direction)"
  • Loading branch information
aconchillo authored Dec 13, 2024
2 parents 10f854a + 6d11911 commit f8e69cf
Show file tree
Hide file tree
Showing 57 changed files with 212 additions and 56 deletions.
6 changes: 0 additions & 6 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
Galician, Hebrew, Mandarin, Serbian, Tagalog, Urdu, Xhosa).

### Changed

- It's no longer necessary to call `super().process_frame(frame, direction)` if
you subclass and implement `FrameProcessor.process_frame()`. This is all now
done internally and will avoid possible issues if you forget to add it.

### Deprecated

- `AWSTTSService` is now deprecated, use `PollyTTSService` instead.
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/05-sync-speech-and-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def __init__(self):
self.prepend_to_next_text_frame = False

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, MonthFrame):
self.most_recent_month = frame.month
elif self.prepend_to_next_text_frame and isinstance(frame, TextFrame):
Expand Down
6 changes: 6 additions & 0 deletions examples/foundational/05a-local-sync-speech-and-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def __init__(self):
self.text = ""

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TextFrame):
self.text = frame.text
await self.push_frame(frame, direction)
Expand All @@ -73,6 +75,8 @@ def __init__(self):
self.frame = None

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TTSAudioRawFrame):
self.audio.extend(frame.audio)
self.frame = OutputAudioRawFrame(
Expand All @@ -86,6 +90,8 @@ def __init__(self):
self.frame = None

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, URLImageRawFrame):
self.frame = frame
await self.push_frame(frame, direction)
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/06a-image-sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def __init__(self, speaking_path: str, waiting_path: str):
self._waiting_image_bytes = self._waiting_image.tobytes()

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
await self.push_frame(
OutputImageRawFrame(
Expand Down
5 changes: 5 additions & 0 deletions examples/foundational/07s-interruptible-google-audio-in.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def __init__(self, context, user_context_aggregator):
self._user_speaking = False

async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
# We could gracefully handle both audio input and text/transcription input ...
# but let's leave that as an exercise to the reader. :-)
Expand Down Expand Up @@ -124,6 +126,7 @@ def reset(self):
self._accumulating_transcript = False

async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)
if isinstance(frame, LLMFullResponseStartFrame):
self._processing_llm_response = True
self._accumulating_transcript = True
Expand Down Expand Up @@ -177,6 +180,8 @@ def add_transcript_back_to_inference_output(self):
self._context.messages[-1].parts[-1].text += f"\n\n{marker}\n{self._transcript}\n"

async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)

if isinstance(frame, MagicDemoTranscriptionFrame):
self._transcript = frame.text
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/09-mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

class MirrorProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, InputAudioRawFrame):
await self.push_frame(
OutputAudioRawFrame(
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/09a-local-mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@

class MirrorProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, InputAudioRawFrame):
await self.push_frame(
OutputAudioRawFrame(
Expand Down
4 changes: 4 additions & 0 deletions examples/foundational/11-sound-effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@

class OutboundSoundEffectWrapper(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, LLMFullResponseEndFrame):
await self.push_frame(sounds["ding1.wav"])
# In case anything else downstream needs it
Expand All @@ -70,6 +72,8 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):

class InboundSoundEffectWrapper(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, OpenAILLMContextFrame):
await self.push_frame(sounds["ding2.wav"])
# In case anything else downstream needs it
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12-describe-video.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12a-describe-video-gemini-flash.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12b-describe-video-gpt-4o.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12c-describe-video-anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13-whisper-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13a-whisper-local.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13b-deepgram-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13c-gladia-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13d-assemblyai-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
4 changes: 4 additions & 0 deletions examples/foundational/22b-natural-conversation-proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self, notifier: BaseNotifier, **kwargs):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
# We must not block system frames.
if isinstance(frame, SystemFrame):
await self.push_frame(frame, direction)
Expand Down Expand Up @@ -117,6 +118,7 @@ def __init__(self, notifier: BaseNotifier):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TextFrame) and frame.text == "YES":
logger.debug("Completeness check YES")
await self.push_frame(UserStoppedSpeakingFrame())
Expand All @@ -139,6 +141,8 @@ def open_gate(self):
self._gate_open = True

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

# We must not block system frames.
if isinstance(frame, SystemFrame):
if isinstance(frame, StartFrame):
Expand Down
41 changes: 23 additions & 18 deletions examples/foundational/22c-natural-conversation-mixed-llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@
Examples:
# Complete Wh-question
[{"role": "assistant", "content": "I can help you learn."},
[{"role": "assistant", "content": "I can help you learn."},
{"role": "user", "content": "What's the fastest way to learn Spanish"}]
Output: YES
# Complete Yes/No question despite STT error
[{"role": "assistant", "content": "I know about planets."},
[{"role": "assistant", "content": "I know about planets."},
{"role": "user", "content": "Is is Jupiter the biggest planet"}]
Output: YES
Expand All @@ -118,12 +118,12 @@
Examples:
# Direct instruction
[{"role": "assistant", "content": "I can explain many topics."},
[{"role": "assistant", "content": "I can explain many topics."},
{"role": "user", "content": "Tell me about black holes"}]
Output: YES
# Action demand
[{"role": "assistant", "content": "I can help with math."},
[{"role": "assistant", "content": "I can help with math."},
{"role": "user", "content": "Solve this equation x plus 5 equals 12"}]
Output: YES
Expand All @@ -134,12 +134,12 @@
Examples:
# Specific answer
[{"role": "assistant", "content": "What's your favorite color?"},
[{"role": "assistant", "content": "What's your favorite color?"},
{"role": "user", "content": "I really like blue"}]
Output: YES
# Option selection
[{"role": "assistant", "content": "Would you prefer morning or evening?"},
[{"role": "assistant", "content": "Would you prefer morning or evening?"},
{"role": "user", "content": "Morning"}]
Output: YES
Expand All @@ -153,17 +153,17 @@
Examples:
# Self-correction reaching completion
[{"role": "assistant", "content": "What would you like to know?"},
[{"role": "assistant", "content": "What would you like to know?"},
{"role": "user", "content": "Tell me about... no wait, explain how rainbows form"}]
Output: YES
# Topic change with complete thought
[{"role": "assistant", "content": "The weather is nice today."},
[{"role": "assistant", "content": "The weather is nice today."},
{"role": "user", "content": "Actually can you tell me who invented the telephone"}]
Output: YES
# Mid-sentence completion
[{"role": "assistant", "content": "Hello I'm ready."},
[{"role": "assistant", "content": "Hello I'm ready."},
{"role": "user", "content": "What's the capital of? France"}]
Output: YES
Expand All @@ -175,12 +175,12 @@
Examples:
# Acknowledgment
[{"role": "assistant", "content": "Should we talk about history?"},
[{"role": "assistant", "content": "Should we talk about history?"},
{"role": "user", "content": "Sure"}]
Output: YES
# Disagreement with completion
[{"role": "assistant", "content": "Is that what you meant?"},
[{"role": "assistant", "content": "Is that what you meant?"},
{"role": "user", "content": "No not really"}]
Output: YES
Expand All @@ -194,12 +194,12 @@
Examples:
# Word repetition but complete
[{"role": "assistant", "content": "I can help with that."},
[{"role": "assistant", "content": "I can help with that."},
{"role": "user", "content": "What what is the time right now"}]
Output: YES
# Missing punctuation but complete
[{"role": "assistant", "content": "I can explain that."},
[{"role": "assistant", "content": "I can explain that."},
{"role": "user", "content": "Please tell me how computers work"}]
Output: YES
Expand All @@ -211,12 +211,12 @@
Examples:
# Filler words but complete
[{"role": "assistant", "content": "What would you like to know?"},
[{"role": "assistant", "content": "What would you like to know?"},
{"role": "user", "content": "Um uh how do airplanes fly"}]
Output: YES
# Thinking pause but incomplete
[{"role": "assistant", "content": "I can explain anything."},
[{"role": "assistant", "content": "I can explain anything."},
{"role": "user", "content": "Well um I want to know about the"}]
Output: NO
Expand All @@ -241,17 +241,17 @@
Examples:
# Incomplete despite corrections
[{"role": "assistant", "content": "What would you like to know about?"},
[{"role": "assistant", "content": "What would you like to know about?"},
{"role": "user", "content": "Can you tell me about"}]
Output: NO
# Complete despite multiple artifacts
[{"role": "assistant", "content": "I can help you learn."},
[{"role": "assistant", "content": "I can help you learn."},
{"role": "user", "content": "How do you I mean what's the best way to learn programming"}]
Output: YES
# Trailing off incomplete
[{"role": "assistant", "content": "I can explain anything."},
[{"role": "assistant", "content": "I can explain anything."},
{"role": "user", "content": "I was wondering if you could tell me why"}]
Output: NO
"""
Expand All @@ -268,6 +268,7 @@ def __init__(self, notifier: BaseNotifier, **kwargs):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
# We must not block system frames.
if isinstance(frame, SystemFrame):
await self.push_frame(frame, direction)
Expand Down Expand Up @@ -319,6 +320,8 @@ def __init__(self, notifier: BaseNotifier):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TextFrame) and frame.text == "YES":
logger.debug("!!! Completeness check YES")
await self.push_frame(UserStoppedSpeakingFrame())
Expand All @@ -341,6 +344,8 @@ def open_gate(self):
self._gate_open = True

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

# We must not block system frames.
if isinstance(frame, SystemFrame):
if isinstance(frame, StartFrame):
Expand Down
Loading

0 comments on commit f8e69cf

Please sign in to comment.