diff --git a/examples/simple-chatbot/README.md b/examples/simple-chatbot/README.md
index 9254138e8..e9b06855d 100644
--- a/examples/simple-chatbot/README.md
+++ b/examples/simple-chatbot/README.md
@@ -2,7 +2,18 @@
-This repository demonstrates a simple AI chatbot with real-time audio/video interaction, implemented in three different ways. The bot server remains the same, but you can connect to it using three different client approaches.
+This repository demonstrates a simple AI chatbot with real-time audio/video interaction, implemented in three different ways. The bot server supports multiple AI backends, and you can connect to it using three different client approaches.
+
+## Two Bot Options
+
+1. **OpenAI Bot** (Default)
+
+ - Uses gpt-4o for conversation
+ - Requires OpenAI API key
+
+2. **Gemini Bot**
+ - Uses Google's Gemini Multimodal Live model
+ - Requires Gemini API key
## Three Ways to Connect
@@ -13,13 +24,13 @@ This repository demonstrates a simple AI chatbot with real-time audio/video inte
2. **JavaScript**
- - Basic implementation using RTVI JavaScript SDK
+ - Basic implementation using [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/reference/js/introduction)
- No framework dependencies
- Good for learning the fundamentals
3. **React**
- - Basic impelmentation using RTVI React SDK
- - Demonstrates the basic client principles with RTVI React
+ - Basic impelmentation using [Pipecat React SDK](https://docs.pipecat.ai/client/reference/react/introduction)
+ - Demonstrates the basic client principles with Pipecat React
## Quick Start
@@ -38,8 +49,12 @@ This repository demonstrates a simple AI chatbot with real-time audio/video inte
```bash
pip install -r requirements.txt
```
-4. Copy env.example to .env and add your credentials
-
+4. Copy env.example to .env and configure:
+ - Add your API keys
+ - Choose your bot implementation:
+ ```ini
+ BOT_IMPLEMENTATION= # Options: 'openai' (default) or 'gemini'
+ ```
5. Start the server:
```bash
python server.py
@@ -48,7 +63,7 @@ This repository demonstrates a simple AI chatbot with real-time audio/video inte
### Next, connect using your preferred client app:
- [Daily Prebuilt](examples/prebuilt/README.md)
-- [Vanilla JavaScript Guide](examples/javascript/README.md)
+- [JavaScript Guide](examples/javascript/README.md)
- [React Guide](examples/react/README.md)
## Important Note
@@ -60,21 +75,23 @@ The bot server must be running for any of the client implementations to work. St
- Python 3.10+
- Node.js 16+ (for JavaScript and React implementations)
- Daily API key
-- OpenAI API key
-- Cartesia API key
+- OpenAI API key (for OpenAI bot)
+- Gemini API key (for Gemini bot)
+- ElevenLabs API key
- Modern web browser with WebRTC support
## Project Structure
```
-simple-chatbot-full-stack/
-├── server/ # Bot server implementation
-│ ├── bot.py # Bot logic and media handling
-│ ├── runner.py # Server runner utilities
-│ ├── server.py # FastAPI server
+simple-chatbot/
+├── server/ # Bot server implementation
+│ ├── bot-openai.py # OpenAI bot implementation
+│ ├── bot-gemini.py # Gemini bot implementation
+│ ├── runner.py # Server runner utilities
+│ ├── server.py # FastAPI server
│ └── requirements.txt
-└── examples/ # Client implementations
- ├── prebuilt/ # Daily Prebuilt connection
- ├── javascript/ # JavaScript RTVI client
- └── react/ # React RTVI client
+└── examples/ # Client implementations
+ ├── prebuilt/ # Daily Prebuilt connection
+ ├── javascript/ # Pipecat JavaScript client
+ └── react/ # Pipecat React client
```
diff --git a/examples/simple-chatbot/examples/javascript/README.md b/examples/simple-chatbot/examples/javascript/README.md
index f07c9d243..74525c1c1 100644
--- a/examples/simple-chatbot/examples/javascript/README.md
+++ b/examples/simple-chatbot/examples/javascript/README.md
@@ -1,10 +1,10 @@
# JavaScript Implementation
-Basic implementation using the RTVI JavaScript SDK.
+Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/reference/js/introduction).
## Setup
-1. Run the bot server; see [README](../../README).
+1. Run the bot server. See the [server README](../../README).
2. Navigate to the `examples/javascript` directory:
diff --git a/examples/simple-chatbot/examples/react/README.md b/examples/simple-chatbot/examples/react/README.md
index a7ff4be09..44775a083 100644
--- a/examples/simple-chatbot/examples/react/README.md
+++ b/examples/simple-chatbot/examples/react/README.md
@@ -1,6 +1,6 @@
# React Implementation
-Basic implementation using the RTVI React SDK.
+Basic implementation using the [Pipecat React SDK](https://docs.pipecat.ai/client/reference/react/introduction).
## Setup
diff --git a/examples/simple-chatbot/examples/react/index.html b/examples/simple-chatbot/examples/react/index.html
index 38ce1ffd7..154e0a75a 100644
--- a/examples/simple-chatbot/examples/react/index.html
+++ b/examples/simple-chatbot/examples/react/index.html
@@ -4,7 +4,7 @@
- RTVI React Client
+ Pipecat React Client
diff --git a/examples/simple-chatbot/server/bot-gemini.py b/examples/simple-chatbot/server/bot-gemini.py
index a81e213c9..991df1cd1 100644
--- a/examples/simple-chatbot/server/bot-gemini.py
+++ b/examples/simple-chatbot/server/bot-gemini.py
@@ -4,6 +4,18 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
+"""Gemini Bot Implementation.
+
+This module implements a chatbot using Google's Gemini Multimodal Live model.
+It includes:
+- Real-time audio/video interaction through Daily
+- Animated robot avatar
+- Speech-to-speech model
+
+The bot runs as part of a pipeline that processes audio/video frames and manages
+the conversation flow using Gemini's streaming capabilities.
+"""
+
import asyncio
import os
import sys
@@ -21,7 +33,6 @@
BotStoppedSpeakingFrame,
EndFrame,
Frame,
- LLMMessagesFrame,
OutputImageRawFrame,
SpriteFrame,
)
@@ -47,7 +58,6 @@
logger.add(sys.stderr, level="DEBUG")
sprites = []
-
script_dir = os.path.dirname(__file__)
for i in range(1, 26):
@@ -58,18 +68,20 @@
with Image.open(full_path) as img:
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
+# Create a smooth animation by adding reversed frames
flipped = sprites[::-1]
sprites.extend(flipped)
-# When the bot isn't talking, show a static image of the cat listening
-quiet_frame = sprites[0]
-talking_frame = SpriteFrame(images=sprites)
+# Define static and animated states
+quiet_frame = sprites[0] # Static frame for when bot is listening
+talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
class TalkingAnimation(FrameProcessor):
- """This class starts a talking animation when it receives an first AudioFrame.
+ """Manages the bot's visual animation states.
- It then returns to a "quiet" sprite when it sees a TTSStoppedFrame.
+ Switches between static (listening) and animated (talking) states based on
+ the bot's current speaking status.
"""
def __init__(self):
@@ -77,12 +89,20 @@ def __init__(self):
self._is_talking = False
async def process_frame(self, frame: Frame, direction: FrameDirection):
+ """Process incoming frames and update animation state.
+
+ Args:
+ frame: The incoming frame to process
+ direction: The direction of frame flow in the pipeline
+ """
await super().process_frame(frame, direction)
+ # Switch to talking animation when bot starts speaking
if isinstance(frame, BotStartedSpeakingFrame):
if not self._is_talking:
await self.push_frame(talking_frame)
self._is_talking = True
+ # Return to static frame when bot stops speaking
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(quiet_frame)
self._is_talking = False
@@ -91,9 +111,19 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
async def main():
+ """Main bot execution function.
+
+ Sets up and runs the bot pipeline including:
+ - Daily video transport with specific audio parameters
+ - Gemini Live multimodal model integration
+ - Voice activity detection
+ - Animation processing
+ - RTVI event handling
+ """
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
+ # Set up Daily transport with specific audio/video parameters for Gemini
transport = DailyTransport(
room_url,
token,
@@ -111,6 +141,7 @@ async def main():
),
)
+ # Initialize the Gemini Multimodal Live model
llm = GeminiMultimodalLiveLLMService(
api_key=os.getenv("GEMINI_API_KEY"),
voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck
@@ -125,12 +156,16 @@ async def main():
},
]
+ # Set up conversation context and management
+ # The context_aggregator will automatically collect conversation context
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
ta = TalkingAnimation()
- # RTVI
+ #
+ # RTVI events for Pipecat client UI
+ #
# This will send `user-*-speaking` and `bot-*-speaking` messages.
rtvi_speaking = RTVISpeakingProcessor()
diff --git a/examples/simple-chatbot/server/bot-openai.py b/examples/simple-chatbot/server/bot-openai.py
index 4db90c932..a3a68c839 100644
--- a/examples/simple-chatbot/server/bot-openai.py
+++ b/examples/simple-chatbot/server/bot-openai.py
@@ -4,6 +4,19 @@
# SPDX-License-Identifier: BSD 2-Clause License
#
+"""OpenAI Bot Implementation.
+
+This module implements a chatbot using OpenAI's GPT-4 model for natural language
+processing. It includes:
+- Real-time audio/video interaction through Daily
+- Animated robot avatar
+- Text-to-speech using ElevenLabs
+- Support for both English and Spanish
+
+The bot runs as part of a pipeline that processes audio/video frames and manages
+the conversation flow.
+"""
+
import asyncio
import os
import sys
@@ -40,14 +53,13 @@
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
-
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
sprites = []
-
script_dir = os.path.dirname(__file__)
+# Load sequential animation frames
for i in range(1, 26):
# Build the full path to the image file
full_path = os.path.join(script_dir, f"assets/robot0{i}.png")
@@ -56,18 +68,20 @@
with Image.open(full_path) as img:
sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
+# Create a smooth animation by adding reversed frames
flipped = sprites[::-1]
sprites.extend(flipped)
-# When the bot isn't talking, show a static image of the cat listening
-quiet_frame = sprites[0]
-talking_frame = SpriteFrame(images=sprites)
+# Define static and animated states
+quiet_frame = sprites[0] # Static frame for when bot is listening
+talking_frame = SpriteFrame(images=sprites) # Animation sequence for when bot is talking
class TalkingAnimation(FrameProcessor):
- """This class starts a talking animation when it receives an first AudioFrame.
+ """Manages the bot's visual animation states.
- It then returns to a "quiet" sprite when it sees a TTSStoppedFrame.
+ Switches between static (listening) and animated (talking) states based on
+ the bot's current speaking status.
"""
def __init__(self):
@@ -75,12 +89,20 @@ def __init__(self):
self._is_talking = False
async def process_frame(self, frame: Frame, direction: FrameDirection):
+ """Process incoming frames and update animation state.
+
+ Args:
+ frame: The incoming frame to process
+ direction: The direction of frame flow in the pipeline
+ """
await super().process_frame(frame, direction)
+ # Switch to talking animation when bot starts speaking
if isinstance(frame, BotStartedSpeakingFrame):
if not self._is_talking:
await self.push_frame(talking_frame)
self._is_talking = True
+ # Return to static frame when bot stops speaking
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(quiet_frame)
self._is_talking = False
@@ -89,9 +111,19 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
async def main():
+ """Main bot execution function.
+
+ Sets up and runs the bot pipeline including:
+ - Daily video transport
+ - Speech-to-text and text-to-speech services
+ - Language model integration
+ - Animation processing
+ - RTVI event handling
+ """
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
+ # Set up Daily transport with video/audio parameters
transport = DailyTransport(
room_url,
token,
@@ -115,6 +147,7 @@ async def main():
),
)
+ # Initialize text-to-speech service
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
#
@@ -128,6 +161,7 @@ async def main():
# voice_id="gD1IexrzCvsXPHUuT0s3",
)
+ # Initialize LLM service
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
@@ -144,12 +178,16 @@ async def main():
},
]
+ # Set up conversation context and management
+ # The context_aggregator will automatically collect conversation context
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
ta = TalkingAnimation()
- # RTVI
+ #
+ # RTVI events for Pipecat client UI
+ #
# This will send `user-*-speaking` and `bot-*-speaking` messages.
rtvi_speaking = RTVISpeakingProcessor()