-
Notifications
You must be signed in to change notification settings - Fork 487
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9e6fabf
commit 14f309c
Showing
6 changed files
with
269 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
# | ||
# Copyright (c) 2024, Daily | ||
# | ||
# SPDX-License-Identifier: BSD 2-Clause License | ||
# | ||
|
||
import asyncio | ||
import os | ||
import sys | ||
|
||
import aiohttp | ||
from dotenv import load_dotenv | ||
from loguru import logger | ||
from PIL import Image | ||
from runner import configure | ||
|
||
from pipecat.audio.vad.silero import SileroVADAnalyzer | ||
from pipecat.audio.vad.vad_analyzer import VADParams | ||
from pipecat.frames.frames import ( | ||
BotStartedSpeakingFrame, | ||
BotStoppedSpeakingFrame, | ||
EndFrame, | ||
Frame, | ||
LLMMessagesFrame, | ||
OutputImageRawFrame, | ||
SpriteFrame, | ||
) | ||
from pipecat.pipeline.pipeline import Pipeline | ||
from pipecat.pipeline.runner import PipelineRunner | ||
from pipecat.pipeline.task import PipelineParams, PipelineTask | ||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext | ||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor | ||
from pipecat.processors.frameworks.rtvi import ( | ||
RTVIBotTranscriptionProcessor, | ||
RTVIMetricsProcessor, | ||
RTVISpeakingProcessor, | ||
RTVIUserTranscriptionProcessor, | ||
) | ||
from pipecat.services.elevenlabs import ElevenLabsTTSService | ||
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService | ||
from pipecat.services.openai import OpenAILLMService | ||
from pipecat.transports.services.daily import DailyParams, DailyTransport | ||
|
||
load_dotenv(override=True) | ||
|
||
logger.remove(0) | ||
logger.add(sys.stderr, level="DEBUG") | ||
|
||
sprites = [] | ||
|
||
script_dir = os.path.dirname(__file__) | ||
|
||
for i in range(1, 26): | ||
# Build the full path to the image file | ||
full_path = os.path.join(script_dir, f"assets/robot0{i}.png") | ||
# Get the filename without the extension to use as the dictionary key | ||
# Open the image and convert it to bytes | ||
with Image.open(full_path) as img: | ||
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)) | ||
|
||
flipped = sprites[::-1] | ||
sprites.extend(flipped) | ||
|
||
# When the bot isn't talking, show a static image of the cat listening | ||
quiet_frame = sprites[0] | ||
talking_frame = SpriteFrame(images=sprites) | ||
|
||
|
||
class TalkingAnimation(FrameProcessor): | ||
"""This class starts a talking animation when it receives an first AudioFrame. | ||
It then returns to a "quiet" sprite when it sees a TTSStoppedFrame. | ||
""" | ||
|
||
def __init__(self): | ||
super().__init__() | ||
self._is_talking = False | ||
|
||
async def process_frame(self, frame: Frame, direction: FrameDirection): | ||
await super().process_frame(frame, direction) | ||
|
||
if isinstance(frame, BotStartedSpeakingFrame): | ||
if not self._is_talking: | ||
await self.push_frame(talking_frame) | ||
self._is_talking = True | ||
elif isinstance(frame, BotStoppedSpeakingFrame): | ||
await self.push_frame(quiet_frame) | ||
self._is_talking = False | ||
|
||
await self.push_frame(frame, direction) | ||
|
||
|
||
async def main(): | ||
async with aiohttp.ClientSession() as session: | ||
(room_url, token) = await configure(session) | ||
|
||
transport = DailyTransport( | ||
room_url, | ||
token, | ||
"Chatbot", | ||
DailyParams( | ||
audio_in_sample_rate=16000, | ||
audio_out_sample_rate=24000, | ||
audio_out_enabled=True, | ||
camera_out_enabled=True, | ||
camera_out_width=1024, | ||
camera_out_height=576, | ||
vad_enabled=True, | ||
vad_audio_passthrough=True, | ||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), | ||
), | ||
) | ||
|
||
llm = GeminiMultimodalLiveLLMService( | ||
api_key=os.getenv("GEMINI_API_KEY"), | ||
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck | ||
transcribe_user_audio=True, | ||
transcribe_model_audio=True, | ||
) | ||
|
||
messages = [ | ||
{ | ||
"role": "user", | ||
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself.", | ||
}, | ||
] | ||
|
||
context = OpenAILLMContext(messages) | ||
context_aggregator = llm.create_context_aggregator(context) | ||
|
||
ta = TalkingAnimation() | ||
|
||
# RTVI | ||
|
||
# This will send `user-*-speaking` and `bot-*-speaking` messages. | ||
rtvi_speaking = RTVISpeakingProcessor() | ||
|
||
# This will emit UserTranscript events. | ||
rtvi_user_transcription = RTVIUserTranscriptionProcessor() | ||
|
||
# This will emit BotTranscript events. | ||
rtvi_bot_transcription = RTVIBotTranscriptionProcessor() | ||
|
||
# This will send `metrics` messages. | ||
rtvi_metrics = RTVIMetricsProcessor() | ||
|
||
pipeline = Pipeline( | ||
[ | ||
transport.input(), | ||
context_aggregator.user(), | ||
llm, | ||
rtvi_speaking, | ||
rtvi_user_transcription, | ||
rtvi_bot_transcription, | ||
ta, | ||
rtvi_metrics, | ||
transport.output(), | ||
context_aggregator.assistant(), | ||
] | ||
) | ||
|
||
task = PipelineTask( | ||
pipeline, | ||
PipelineParams( | ||
allow_interruptions=True, | ||
enable_metrics=True, | ||
enable_usage_metrics=True, | ||
), | ||
) | ||
await task.queue_frame(quiet_frame) | ||
|
||
@transport.event_handler("on_first_participant_joined") | ||
async def on_first_participant_joined(transport, participant): | ||
await transport.capture_participant_transcription(participant["id"]) | ||
await task.queue_frames([context_aggregator.user().get_context_frame()]) | ||
|
||
@transport.event_handler("on_participant_left") | ||
async def on_participant_left(transport, participant, reason): | ||
print(f"Participant left: {participant}") | ||
await task.queue_frame(EndFrame()) | ||
|
||
runner = PipelineRunner() | ||
|
||
await runner.run(task) | ||
|
||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev) | ||
DAILY_API_KEY=7df... | ||
OPENAI_API_KEY=sk-PL... | ||
ELEVENLABS_API_KEY=aeb... | ||
GEMINI_API_KEY=AIza... | ||
ELEVENLABS_API_KEY=aeb... | ||
BOT_IMPLEMENTATION= # Options: 'openai' or 'gemini' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.