From 3fa00c4db8acee91918978d57eb1ef15c731912d Mon Sep 17 00:00:00 2001 From: Moishe Lettvin Date: Thu, 8 Feb 2024 09:41:51 -0500 Subject: [PATCH] Cleanup constructor args in examples --- src/samples/foundational/01-say-one-thing.py | 2 +- src/samples/foundational/02-llm-say-one-thing.py | 3 +-- src/samples/foundational/03-still-frame.py | 9 ++++----- .../foundational/04-utterance-and-speech.py | 8 +++----- .../foundational/05-sync-speech-and-image.py | 11 +++++------ .../foundational/06-listen-and-respond.py | 4 ---- src/samples/foundational/08-bots-arguing.py | 16 ++++++---------- src/samples/foundational/10-wake-word.py | 11 ++++++----- src/samples/foundational/11-sound-effects.py | 11 +++-------- .../foundational/13-whisper-transcription.py | 12 +++++------- 10 files changed, 34 insertions(+), 53 deletions(-) diff --git a/src/samples/foundational/01-say-one-thing.py b/src/samples/foundational/01-say-one-thing.py index a6e3f820b..fc21b11e4 100644 --- a/src/samples/foundational/01-say-one-thing.py +++ b/src/samples/foundational/01-say-one-thing.py @@ -23,8 +23,8 @@ async def main(room_url): None, "Say One Thing", meeting_duration_minutes, + mic_enabled=True ) - transport._mic_enabled = True tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID")) # Register an event handler so we can play the audio when the participant joins. diff --git a/src/samples/foundational/02-llm-say-one-thing.py b/src/samples/foundational/02-llm-say-one-thing.py index b27bd7205..4f9e4c37c 100644 --- a/src/samples/foundational/02-llm-say-one-thing.py +++ b/src/samples/foundational/02-llm-say-one-thing.py @@ -1,4 +1,3 @@ -import argparse import asyncio import os @@ -20,8 +19,8 @@ async def main(room_url): None, "Say One Thing From an LLM", duration_minutes=meeting_duration_minutes, + mic_enabled=True ) - transport._mic_enabled = True tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID")) # tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION")) diff --git a/src/samples/foundational/03-still-frame.py b/src/samples/foundational/03-still-frame.py index e18f20bb6..654f30aef 100644 --- a/src/samples/foundational/03-still-frame.py +++ b/src/samples/foundational/03-still-frame.py @@ -1,4 +1,3 @@ -import argparse import asyncio import aiohttp import os @@ -23,11 +22,11 @@ async def main(room_url): None, "Show a still frame image", duration_minutes=meeting_duration_minutes, + mic_enabled=False, + camera_enabled=True, + camera_width=1024, + camera_height=1024 ) - transport._mic_enabled = False - transport._camera_enabled = True - transport._camera_width = 1024 - transport._camera_height = 1024 imagegen = FalImageGenService(image_size="1024x1024", aiohttp_session=session, key_id=os.getenv("FAL_KEY_ID"), key_secret=os.getenv("FAL_KEY_SECRET")) # imagegen = OpenAIImageGenService(aiohttp_session=session, api_key=os.getenv("OPENAI_DALLE_API_KEY"), image_size="1024x1024") diff --git a/src/samples/foundational/04-utterance-and-speech.py b/src/samples/foundational/04-utterance-and-speech.py index 9ac8a8f81..9da73688c 100644 --- a/src/samples/foundational/04-utterance-and-speech.py +++ b/src/samples/foundational/04-utterance-and-speech.py @@ -1,7 +1,5 @@ -import argparse import asyncio import os -import re import aiohttp @@ -19,10 +17,10 @@ async def main(room_url: str): None, "Static And Dynamic Speech", duration_minutes=1, + mic_enabled=True, + mic_sample_rate=16000, + camera_enabled=False ) - transport._mic_enabled = True - transport._mic_sample_rate = 16000 - transport._camera_enabled = False llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL")) azure_tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION")) diff --git a/src/samples/foundational/05-sync-speech-and-image.py b/src/samples/foundational/05-sync-speech-and-image.py index af4e2d71d..7d660a8c3 100644 --- a/src/samples/foundational/05-sync-speech-and-image.py +++ b/src/samples/foundational/05-sync-speech-and-image.py @@ -1,4 +1,3 @@ -import argparse import asyncio import aiohttp import os @@ -20,12 +19,12 @@ async def main(room_url): None, "Month Narration Bot", duration_minutes=meeting_duration_minutes, + mic_enabled=True, + camera_enabled=True, + mic_sample_rate=16000, + camera_width=1024, + camera_height=1024 ) - transport._mic_enabled = True - transport._camera_enabled = True - transport._mic_sample_rate = 16000 - transport._camera_width = 1024 - transport._camera_height = 1024 llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL")) tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id="ErXwobaYiN019PkySvjV") diff --git a/src/samples/foundational/06-listen-and-respond.py b/src/samples/foundational/06-listen-and-respond.py index a54880559..2ec9db755 100644 --- a/src/samples/foundational/06-listen-and-respond.py +++ b/src/samples/foundational/06-listen-and-respond.py @@ -7,10 +7,6 @@ from samples.foundational.support.runner import configure async def main(room_url: str, token): - global transport - global llm - global tts - transport = DailyTransportService( room_url, token, diff --git a/src/samples/foundational/08-bots-arguing.py b/src/samples/foundational/08-bots-arguing.py index 3fd040578..c897953cd 100644 --- a/src/samples/foundational/08-bots-arguing.py +++ b/src/samples/foundational/08-bots-arguing.py @@ -12,21 +12,17 @@ async def main(room_url:str): async with aiohttp.ClientSession() as session: - global transport - global llm - global tts - transport = DailyTransportService( room_url, None, "Respond bot", - duration_minutes=10 + duration_minutes=10, + mic_enabled=True, + mic_sample_rate=16000, + camera_enabled=True, + camera_width=1024, + camera_height=1024 ) - transport._mic_enabled = True - transport._mic_sample_rate = 16000 - transport._camera_enabled = True - transport._camera_width = 1024 - transport._camera_height = 1024 llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL")) tts1 = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION")) diff --git a/src/samples/foundational/10-wake-word.py b/src/samples/foundational/10-wake-word.py index 8bd99e2e6..4f02a6537 100644 --- a/src/samples/foundational/10-wake-word.py +++ b/src/samples/foundational/10-wake-word.py @@ -105,16 +105,17 @@ async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, N async def main(room_url: str, token): async with aiohttp.ClientSession() as session: - global transport - global llm - global tts - transport = DailyTransportService( room_url, token, "Santa Cat", duration_minutes=3, - start_transcription=True + start_transcription=True, + mic_enabled=True, + mic_sample_rate=16000, + camera_enabled=True, + camera_width=720, + camera_height=1280 ) transport._mic_enabled = True transport._mic_sample_rate = 16000 diff --git a/src/samples/foundational/11-sound-effects.py b/src/samples/foundational/11-sound-effects.py index a454780e2..af74f1694 100644 --- a/src/samples/foundational/11-sound-effects.py +++ b/src/samples/foundational/11-sound-effects.py @@ -65,20 +65,15 @@ async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, N async def main(room_url: str, token): async with aiohttp.ClientSession() as session: - - global transport - global llm - global tts - transport = DailyTransportService( room_url, token, "Respond bot", duration_minutes=5, + mic_enabled=True, + mic_sample_rate=16000, + camera_enabled=False ) - transport._mic_enabled = True - transport._mic_sample_rate = 16000 - transport._camera_enabled = False llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL")) tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id="ErXwobaYiN019PkySvjV") diff --git a/src/samples/foundational/13-whisper-transcription.py b/src/samples/foundational/13-whisper-transcription.py index 001666563..031be8c7d 100644 --- a/src/samples/foundational/13-whisper-transcription.py +++ b/src/samples/foundational/13-whisper-transcription.py @@ -6,18 +6,16 @@ from samples.foundational.support.runner import configure async def main(room_url: str): - global transport - global stt - transport = DailyTransportService( room_url, None, "Transcription bot", - start_transcription=True + start_transcription=True, + mic_enabled=False, + camera_enabled=False, + speaker_enabled=True ) - transport._mic_enabled = False - transport._camera_enabled = False - transport._speaker_enabled = True + stt = WhisperSTTService() transcription_output_queue = asyncio.Queue()