From 250458579eee018201005d30e457a919f6bcc490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 15 May 2024 23:35:52 -0700 Subject: [PATCH] examples: update 07-interruptible --- .../foundational/06-listen-and-respond.py | 2 +- examples/foundational/06a-image-sync.py | 2 +- examples/foundational/07-interruptible.py | 81 +++++++++++-------- examples/moondream-chatbot/bot.py | 2 +- examples/simple-chatbot/bot.py | 2 +- 5 files changed, 50 insertions(+), 39 deletions(-) diff --git a/examples/foundational/06-listen-and-respond.py b/examples/foundational/06-listen-and-respond.py index 4e5d0758f..3ba220912 100644 --- a/examples/foundational/06-listen-and-respond.py +++ b/examples/foundational/06-listen-and-respond.py @@ -65,7 +65,7 @@ async def main(room_url: str, token): messages = [ { "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not contain special characters. Respond to what the user said in a creative and helpful way.", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so never use special characters in your answers. Respond to what the user said in a creative and helpful way.", }, ] tma_in = LLMUserResponseAggregator(messages) diff --git a/examples/foundational/06a-image-sync.py b/examples/foundational/06a-image-sync.py index 73878976f..77278f21d 100644 --- a/examples/foundational/06a-image-sync.py +++ b/examples/foundational/06a-image-sync.py @@ -83,7 +83,7 @@ async def main(room_url: str, token): messages = [ { "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not contain special characters. Respond to what the user said in a creative and helpful way.", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so never use special characters in your answers. Respond to what the user said in a creative and helpful way.", }, ] diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index fd0c2f842..de7ceb8c5 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -1,26 +1,33 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + import asyncio import aiohttp -import logging import os -from pipecat.pipeline.aggregators import ( - LLMAssistantResponseAggregator, - LLMUserResponseAggregator, -) +import sys +from pipecat.frames.frames import LLMMessagesFrame from pipecat.pipeline.pipeline import Pipeline -from pipecat.services.ai_services import FrameLogger -from pipecat.transports.daily_transport import DailyTransport -from pipecat.services.open_ai_services import OpenAILLMService -from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineTask +from pipecat.processors.aggregators.llm_response import ( + LLMAssistantResponseAggregator, LLMUserResponseAggregator) +from pipecat.services.elevenlabs import ElevenLabsTTSService +from pipecat.services.openai import OpenAILLMService +from pipecat.transports.services.daily import DailyParams, DailyTransport from runner import configure +from loguru import logger + from dotenv import load_dotenv load_dotenv(override=True) -logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") -logger = logging.getLogger("pipecat") -logger.setLevel(logging.DEBUG) +logger.remove(0) +logger.add(sys.stderr, level="TRACE") async def main(room_url: str, token): @@ -29,12 +36,12 @@ async def main(room_url: str, token): room_url, token, "Respond bot", - duration_minutes=5, - start_transcription=True, - mic_enabled=True, - mic_sample_rate=16000, - camera_enabled=False, - vad_enabled=True, + DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + transcription_enabled=True, + vad_enabled=True, + ) ) tts = ElevenLabsTTSService( @@ -47,27 +54,31 @@ async def main(room_url: str, token): api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4-turbo-preview") - pipeline = Pipeline([FrameLogger(), llm, FrameLogger(), tts]) + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so never use special characters. Respond to what the user said in a creative and helpful way.", + }, + ] - @transport.event_handler("on_first_other_participant_joined") - async def on_first_other_participant_joined(transport, participant): - await transport.say("Hi, I'm listening!", tts) + tma_in = LLMUserResponseAggregator(messages) + tma_out = LLMAssistantResponseAggregator(messages) - async def run_conversation(): - messages = [ - { - "role": "system", - "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.", - }, - ] + pipeline = Pipeline([transport.input(), tma_in, llm, tts, tma_out, transport.output()]) - await transport.run_interruptible_pipeline( - pipeline, - post_processor=LLMAssistantResponseAggregator(messages), - pre_processor=LLMUserResponseAggregator(messages), - ) + task = PipelineTask(pipeline, allow_interruptions=True) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + transport.capture_participant_transcription(participant["id"]) + # Kick off the conversation. + messages.append( + {"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMMessagesFrame(messages)]) + + runner = PipelineRunner() - await asyncio.gather(transport.run(), run_conversation()) + await runner.run(task) if __name__ == "__main__": diff --git a/examples/moondream-chatbot/bot.py b/examples/moondream-chatbot/bot.py index 238a05f67..4a731d379 100644 --- a/examples/moondream-chatbot/bot.py +++ b/examples/moondream-chatbot/bot.py @@ -163,7 +163,7 @@ async def main(room_url: str, token): messages = [ { "role": "system", - "content": f"You are Chatbot, a friendly, helpful robot. Let the user know that you are capable of chatting or describing what you see. Your goal is to demonstrate your capabilities in a succinct way. Reply with only '{user_request_answer}' if the user asks you to describe what you see. Your output will be converted to audio so never include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.", + "content": f"You are Chatbot, a friendly, helpful robot. Let the user know that you are capable of chatting or describing what you see. Your goal is to demonstrate your capabilities in a succinct way. Reply with only '{user_request_answer}' if the user asks you to describe what you see. Your output will be converted to audio so never use special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.", }, ] diff --git a/examples/simple-chatbot/bot.py b/examples/simple-chatbot/bot.py index e7be4732d..bde15aee8 100644 --- a/examples/simple-chatbot/bot.py +++ b/examples/simple-chatbot/bot.py @@ -126,7 +126,7 @@ async def main(room_url: str, token): # # English # - "content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.", + "content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so never use special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.", # # Spanish