diff --git a/examples/foundational/26d-gemini-multimodal-live-text.py b/examples/foundational/26d-gemini-multimodal-live-text.py index 760af39ce..493c2983a 100644 --- a/examples/foundational/26d-gemini-multimodal-live-text.py +++ b/examples/foundational/26d-gemini-multimodal-live-text.py @@ -9,7 +9,6 @@ import sys import aiohttp -from agent.services.tts.cartesia_multilingual import CartesiaMultiLingualTTSService from dotenv import load_dotenv from loguru import logger from runner import configure @@ -19,7 +18,11 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService +from pipecat.services.cartesia import CartesiaTTSService +from pipecat.services.gemini_multimodal_live.gemini import ( + GeminiMultimodalLiveLLMService, + GeminiMultimodalModalities, +) from pipecat.transports.services.daily import DailyParams, DailyTransport load_dotenv(override=True) @@ -53,10 +56,16 @@ async def main(): llm = GeminiMultimodalLiveLLMService( api_key=os.getenv("GOOGLE_API_KEY"), # system_instruction="Talk like a pirate." + transcribe_user_audio=True, + transcribe_model_audio=True, ) - llm.set_model_only_text() # This forces model to produce text only responses + llm.set_model_modalities( + GeminiMultimodalModalities.TEXT + ) # This forces model to produce text only responses - tts = CartesiaMultiLingualTTSService(api_key=os.getenv("CARTESIA_API_KEY")) + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22" + ) pipeline = Pipeline( [