Skip to content

Commit

Permalink
Merge pull request #595 from pipecat-ai/aleix/bot-speaking-system-frames
Browse files Browse the repository at this point in the history
bot speaking system frames
  • Loading branch information
aconchillo authored Oct 15, 2024
2 parents 79b52d4 + 4a71eac commit 0c4a513
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 82 deletions.
152 changes: 76 additions & 76 deletions src/pipecat/frames/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,17 @@ def __str__(self):
return f"{self.name}(message: {self.message})"


@dataclass
class FunctionCallResultFrame(DataFrame):
"""A frame containing the result of an LLM function (tool) call."""

function_name: str
tool_call_id: str
arguments: str
result: Any
run_llm: bool = True


#
# App frames. Application user-defined frames.
#
Expand Down Expand Up @@ -393,6 +404,25 @@ class StopInterruptionFrame(SystemFrame):
pass


@dataclass
class UserStartedSpeakingFrame(SystemFrame):
"""Emitted by VAD to indicate that a user has started speaking. This can be
used for interruptions or other times when detecting that someone is
speaking is more important than knowing what they're saying (as you will
with a TranscriptionFrame)
"""

pass


@dataclass
class UserStoppedSpeakingFrame(SystemFrame):
"""Emitted by the VAD to indicate that a user stopped speaking."""

pass


@dataclass
class BotInterruptionFrame(SystemFrame):
"""Emitted by when the bot should be interrupted. This will mainly cause the
Expand All @@ -404,6 +434,52 @@ class BotInterruptionFrame(SystemFrame):
pass


@dataclass
class BotStartedSpeakingFrame(SystemFrame):
"""Emitted upstream by transport outputs to indicate the bot started speaking."""

pass


@dataclass
class BotStoppedSpeakingFrame(SystemFrame):
"""Emitted upstream by transport outputs to indicate the bot stopped speaking."""

pass


@dataclass
class BotSpeakingFrame(SystemFrame):
"""Emitted upstream by transport outputs while the bot is still
speaking. This can be used, for example, to detect when a user is idle. That
is, while the bot is speaking we don't want to trigger any user idle timeout
since the user might be listening.
"""

pass


@dataclass
class UserImageRequestFrame(SystemFrame):
"""A frame user to request an image from the given user."""

user_id: str
context: Optional[Any] = None

def __str__(self):
return f"{self.name}, user: {self.user_id}"


@dataclass
class FunctionCallInProgressFrame(SystemFrame):
"""A frame signaling that a function call is in progress."""

function_name: str
tool_call_id: str
arguments: str


@dataclass
class TransportMessageUrgentFrame(SystemFrame):
message: Any
Expand Down Expand Up @@ -457,51 +533,6 @@ class LLMFullResponseEndFrame(ControlFrame):
pass


@dataclass
class UserStartedSpeakingFrame(ControlFrame):
"""Emitted by VAD to indicate that a user has started speaking. This can be
used for interruptions or other times when detecting that someone is
speaking is more important than knowing what they're saying (as you will
with a TranscriptionFrame)
"""

pass


@dataclass
class UserStoppedSpeakingFrame(ControlFrame):
"""Emitted by the VAD to indicate that a user stopped speaking."""

pass


@dataclass
class BotStartedSpeakingFrame(ControlFrame):
"""Emitted upstream by transport outputs to indicate the bot started speaking."""

pass


@dataclass
class BotStoppedSpeakingFrame(ControlFrame):
"""Emitted upstream by transport outputs to indicate the bot stopped speaking."""

pass


@dataclass
class BotSpeakingFrame(ControlFrame):
"""Emitted upstream by transport outputs while the bot is still
speaking. This can be used, for example, to detect when a user is idle. That
is, while the bot is speaking we don't want to trigger any user idle timeout
since the user might be listening.
"""

pass


@dataclass
class TTSStartedFrame(ControlFrame):
"""Used to indicate the beginning of a TTS response. Following
Expand All @@ -522,17 +553,6 @@ class TTSStoppedFrame(ControlFrame):
pass


@dataclass
class UserImageRequestFrame(ControlFrame):
"""A frame user to request an image from the given user."""

user_id: str
context: Optional[Any] = None

def __str__(self):
return f"{self.name}, user: {self.user_id}"


@dataclass
class ServiceUpdateSettingsFrame(ControlFrame):
"""A control frame containing a request to update service settings."""
Expand All @@ -555,26 +575,6 @@ class STTUpdateSettingsFrame(ServiceUpdateSettingsFrame):
pass


@dataclass
class FunctionCallInProgressFrame(SystemFrame):
"""A frame signaling that a function call is in progress."""

function_name: str
tool_call_id: str
arguments: str


@dataclass
class FunctionCallResultFrame(DataFrame):
"""A frame containing the result of an LLM function (tool) call."""

function_name: str
tool_call_id: str
arguments: str
result: Any
run_llm: bool = True


@dataclass
class VADParamsUpdateFrame(ControlFrame):
"""A control frame containing a request to update VAD params. Intended
Expand Down
16 changes: 10 additions & 6 deletions src/pipecat/processors/frameworks/rtvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,14 +459,18 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):

await self.push_frame(frame, direction)

if isinstance(frame, TextFrame):
if isinstance(frame, UserStartedSpeakingFrame):
await self._push_aggregation()
elif isinstance(frame, TextFrame):
self._aggregation += frame.text
if match_endofsentence(self._aggregation):
message = RTVIBotTranscriptionMessage(
data=RTVITextMessageData(text=self._aggregation)
)
await self._push_transport_message_urgent(message)
self._aggregation = ""
await self._push_aggregation()

async def _push_aggregation(self):
if len(self._aggregation) > 0:
message = RTVIBotTranscriptionMessage(data=RTVITextMessageData(text=self._aggregation))
await self._push_transport_message_urgent(message)
self._aggregation = ""


class RTVIBotLLMProcessor(RTVIFrameProcessor):
Expand Down

0 comments on commit 0c4a513

Please sign in to comment.