changed default services (#47)

pipecat-ai · Mar 8, 2024 · 5d46302 · 5d46302
1 parent 8241dc0
commit 5d46302
Show file tree

Hide file tree

Showing 8 changed files with 472 additions and 63 deletions.
diff --git a/src/examples/foundational/03-still-frame.py b/src/examples/foundational/03-still-frame.py
@@ -38,8 +38,6 @@ async def main(room_url):
             key_id=os.getenv("FAL_KEY_ID"),
             key_secret=os.getenv("FAL_KEY_SECRET"),
         )
-        # imagegen = OpenAIImageGenService(aiohttp_session=session, api_key=os.getenv("OPENAI_DALLE_API_KEY"), image_size="1024x1024")
-        # imagegen = AzureImageGenServiceREST(image_size="1024x1024", aiohttp_session=session, api_key=os.getenv("AZURE_DALLE_API_KEY"), endpoint=os.getenv("AZURE_DALLE_ENDPOINT"), model=os.getenv("AZURE_DALLE_MODEL"))
 
         image_task = asyncio.create_task(
             imagegen.run_to_queue(

diff --git a/src/examples/foundational/05-sync-speech-and-image.py b/src/examples/foundational/05-sync-speech-and-image.py
@@ -18,15 +18,10 @@
     LLMResponseStartFrame,
 )
 from dailyai.pipeline.pipeline import Pipeline
-from dailyai.services.azure_ai_services import (
-    AzureLLMService,
-    AzureImageGenServiceREST,
-    AzureTTSService,
-)
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.daily_transport_service import DailyTransportService
 from dailyai.services.fal_ai_services import FalImageGenService
-from dailyai.services.open_ai_services import OpenAIImageGenService
+from dailyai.services.open_ai_services import OpenAILLMService
 
 from examples.support.runner import configure
 
@@ -50,15 +45,14 @@ async def main(room_url):
             camera_height=1024,
         )
 
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"),
-        )
         tts = ElevenLabsTTSService(
             aiohttp_session=session,
             api_key=os.getenv("ELEVENLABS_API_KEY"),
-            voice_id="ErXwobaYiN019PkySvjV",
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
         )
 
         dalle = FalImageGenService(

diff --git a/src/examples/foundational/05a-local-sync-speech-and-text.py b/src/examples/foundational/05a-local-sync-speech-and-text.py
@@ -6,7 +6,7 @@
 import os
 
 from dailyai.pipeline.frames import AudioFrame, ImageFrame
-from dailyai.services.azure_ai_services import AzureLLMService
+from dailyai.services.open_ai_services import OpenAILLMService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.services.local_transport_service import LocalTransportService
@@ -31,16 +31,16 @@ async def main(room_url):
             tk_root=tk_root,
         )
 
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"),
-        )
         tts = ElevenLabsTTSService(
             aiohttp_session=session,
             api_key=os.getenv("ELEVENLABS_API_KEY"),
-            voice_id="ErXwobaYiN019PkySvjV",
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
         )
+
         dalle = FalImageGenService(
             image_size="1024x1024",
             aiohttp_session=session,

diff --git a/src/examples/foundational/06a-image-sync.py b/src/examples/foundational/06a-image-sync.py
@@ -11,12 +11,13 @@
 
 from dailyai.pipeline.frames import ImageFrame, Frame
 from dailyai.services.daily_transport_service import DailyTransportService
-from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService
 from dailyai.services.ai_services import AIService
 from dailyai.pipeline.aggregators import (
     LLMAssistantContextAggregator,
     LLMUserContextAggregator,
 )
+from dailyai.services.open_ai_services import OpenAILLMService
+from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.services.fal_ai_services import FalImageGenService
 from examples.support.runner import configure
 
@@ -53,15 +54,16 @@ async def main(room_url: str, token):
         transport._mic_enabled = True
         transport._mic_sample_rate = 16000
 
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"),
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
         )
-        tts = AzureTTSService(
-            api_key=os.getenv("AZURE_SPEECH_API_KEY"),
-            region=os.getenv("AZURE_SPEECH_REGION"),
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
         )
+
         img = FalImageGenService(
             image_size="1024x1024",
             aiohttp_session=session,

diff --git a/src/examples/foundational/07-interruptible.py b/src/examples/foundational/07-interruptible.py
@@ -12,7 +12,8 @@
 from dailyai.pipeline.pipeline import Pipeline
 from dailyai.services.ai_services import FrameLogger
 from dailyai.services.daily_transport_service import DailyTransportService
-from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService
+from dailyai.services.open_ai_services import OpenAILLMService
+from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from examples.support.runner import configure
 
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
@@ -34,14 +35,14 @@ async def main(room_url: str, token):
             vad_enabled=True,
         )
 
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"),
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
         )
-        tts = AzureTTSService(
-            api_key=os.getenv("AZURE_SPEECH_API_KEY"),
-            region=os.getenv("AZURE_SPEECH_REGION"),
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
         )
 
         pipeline = Pipeline([FrameLogger(), llm, FrameLogger(), tts])

diff --git a/src/examples/foundational/10-wake-word.py b/src/examples/foundational/10-wake-word.py
@@ -7,7 +7,7 @@
 from PIL import Image
 
 from dailyai.services.daily_transport_service import DailyTransportService
-from dailyai.services.azure_ai_services import AzureLLMService
+from dailyai.services.open_ai_services import OpenAILLMService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.pipeline.aggregators import (
     LLMUserContextAggregator,
@@ -129,11 +129,10 @@ async def main(room_url: str, token):
         transport._camera_width = 720
         transport._camera_height = 1280
 
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"),
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
         )
+
         tts = ElevenLabsTTSService(
             aiohttp_session=session,
             api_key=os.getenv("ELEVENLABS_API_KEY"),

diff --git a/src/examples/foundational/11-sound-effects.py b/src/examples/foundational/11-sound-effects.py
@@ -5,11 +5,20 @@
 import wave
 
 from dailyai.services.daily_transport_service import DailyTransportService
-from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService
+from dailyai.services.open_ai_services import OpenAILLMService
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
-from dailyai.pipeline.aggregators import LLMContextAggregator, LLMUserContextAggregator, LLMAssistantContextAggregator
+from dailyai.pipeline.aggregators import (
+    LLMContextAggregator,
+    LLMUserContextAggregator,
+    LLMAssistantContextAggregator,
+)
 from dailyai.services.ai_services import AIService, FrameLogger
-from dailyai.pipeline.frames import Frame, AudioFrame, LLMResponseEndFrame, LLMMessagesQueueFrame
+from dailyai.pipeline.frames import (
+    Frame,
+    AudioFrame,
+    LLMResponseEndFrame,
+    LLMMessagesQueueFrame,
+)
 from typing import AsyncGenerator
 
 from examples.support.runner import configure
@@ -19,10 +28,7 @@
 logger.setLevel(logging.DEBUG)
 
 sounds = {}
-sound_files = [
-    'ding1.wav',
-    'ding2.wav'
-]
+sound_files = ["ding1.wav", "ding2.wav"]
 
 script_dir = os.path.dirname(__file__)
 
@@ -71,17 +77,18 @@ async def main(room_url: str, token):
             duration_minutes=5,
             mic_enabled=True,
             mic_sample_rate=16000,
-            camera_enabled=False
+            camera_enabled=False,
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
         )
 
-        llm = AzureLLMService(
-            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
-            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
-            model=os.getenv("AZURE_CHATGPT_MODEL"))
         tts = ElevenLabsTTSService(
             aiohttp_session=session,
             api_key=os.getenv("ELEVENLABS_API_KEY"),
-            voice_id="ErXwobaYiN019PkySvjV")
+            voice_id="ErXwobaYiN019PkySvjV",
+        )
 
         @transport.event_handler("on_first_other_participant_joined")
         async def on_first_other_participant_joined(transport):
@@ -90,12 +97,13 @@ async def on_first_other_participant_joined(transport):
 
         async def handle_transcriptions():
             messages = [
-                {"role": "system", "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way."},
+                {
+                    "role": "system",
+                    "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
+                },
             ]
 
-            tma_in = LLMUserContextAggregator(
-                messages, transport._my_participant_id
-            )
+            tma_in = LLMUserContextAggregator(messages, transport._my_participant_id)
             tma_out = LLMAssistantContextAggregator(
                 messages, transport._my_participant_id
             )
@@ -111,15 +119,13 @@ async def handle_transcriptions():
                             llm.run(
                                 fl2.run(
                                     in_sound.run(
-                                        tma_in.run(
-                                            transport.get_receive_frames()
-                                        )
+                                        tma_in.run(transport.get_receive_frames())
                                     )
                                 )
                             )
                         )
                     )
-                )
+                ),
             )
 
         transport.transcription_settings["extra"]["punctuate"] = True