pipecat-ai · aconchillo · Apr 5, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024
diff --git a/examples/foundational/01a-local-transport.py b/examples/foundational/01a-local-transport.py
@@ -6,6 +6,9 @@
 from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
 from dailyai.transports.local_transport import LocalTransport
 
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
@@ -25,10 +28,7 @@ async def main():
 
         async def say_something():
             await asyncio.sleep(1)
-            await tts.say(
-                "Hello there.",
-                transport.send_queue,
-            )
+            await transport.say("Hello there.", tts)
             await transport.stop_when_done()
 
         await asyncio.gather(transport.run(), say_something())

diff --git a/examples/foundational/03a-image-local.py b/examples/foundational/03a-image-local.py
@@ -5,50 +5,52 @@
 
 import tkinter as tk
 
-from dailyai.pipeline.frames import TextFrame
+from dailyai.pipeline.frames import TextFrame, EndFrame
+from dailyai.pipeline.pipeline import Pipeline
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.transports.local_transport import LocalTransport
 
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 
-local_joined = False
-participant_joined = False
-
 
 async def main():
     async with aiohttp.ClientSession() as session:
         meeting_duration_minutes = 2
+
         tk_root = tk.Tk()
-        tk_root.title("Calendar")
+        tk_root.title("dailyai")
+
         transport = LocalTransport(
             tk_root=tk_root,
-            mic_enabled=True,
+            mic_enabled=False,
             camera_enabled=True,
             camera_width=1024,
             camera_height=1024,
             duration_minutes=meeting_duration_minutes,
         )
 
         imagegen = FalImageGenService(
-            image_size="1024x1024",
+            image_size="square_hd",
             aiohttp_session=session,
             key_id=os.getenv("FAL_KEY_ID"),
             key_secret=os.getenv("FAL_KEY_SECRET"),
         )
-        image_task = asyncio.create_task(
-            imagegen.run_to_queue(
-                transport.send_queue, [
-                    TextFrame("a cat in the style of picasso")]))
+
+        pipeline = Pipeline([imagegen])
+        await pipeline.queue_frames([TextFrame("a cat in the style of picasso")])
 
         async def run_tk():
             while not transport._stop_threads.is_set():
                 tk_root.update()
                 tk_root.update_idletasks()
                 await asyncio.sleep(0.1)
 
-        await asyncio.gather(transport.run(), image_task, run_tk())
+        await asyncio.gather(transport.run(pipeline, override_pipeline_source_queue=False), run_tk())
 
 
 if __name__ == "__main__":

diff --git a/examples/foundational/13-whisper-transcription.py b/examples/foundational/13-whisper-transcription.py
@@ -3,12 +3,10 @@
 
 from dailyai.transports.daily_transport import DailyTransport
 from dailyai.services.whisper_ai_services import WhisperSTTService
+from dailyai.pipeline.pipeline import Pipeline
 
 from runner import configure
 
-from dotenv import load_dotenv
-load_dotenv(override=True)
-
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
@@ -19,27 +17,26 @@ async def main(room_url: str):
         room_url,
         None,
         "Transcription bot",
-        start_transcription=True,
+        start_transcription=False,
         mic_enabled=False,
         camera_enabled=False,
         speaker_enabled=True,
     )
 
     stt = WhisperSTTService()
+
     transcription_output_queue = asyncio.Queue()
 
+    pipeline = Pipeline([stt])
+    pipeline.set_sink(transcription_output_queue)
+
     async def handle_transcription():
         print("`````````TRANSCRIPTION`````````")
         while True:
             item = await transcription_output_queue.get()
             print(item.text)
 
-    async def handle_speaker():
-        await stt.run_to_queue(
-            transcription_output_queue, transport.get_receive_frames()
-        )
-
-    await asyncio.gather(transport.run(), handle_speaker(), handle_transcription())
+    await asyncio.gather(transport.run(pipeline), handle_transcription())
 
 
 if __name__ == "__main__":

diff --git a/examples/foundational/13a-whisper-local.py b/examples/foundational/13a-whisper-local.py
@@ -1,32 +1,35 @@
-import argparse
 import asyncio
 import logging
-from dailyai.pipeline.frames import EndFrame, TranscriptionFrame
 
+from dailyai.pipeline.frames import EndFrame, TranscriptionFrame
 from dailyai.transports.local_transport import LocalTransport
 from dailyai.services.whisper_ai_services import WhisperSTTService
+from dailyai.pipeline.pipeline import Pipeline
 
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 
 
-async def main(room_url: str):
-    global transport
-    global stt
-
+async def main():
     meeting_duration_minutes = 1
+
     transport = LocalTransport(
-        mic_enabled=True,
+        mic_enabled=False,
         camera_enabled=False,
         speaker_enabled=True,
         duration_minutes=meeting_duration_minutes,
-        start_transcription=True,
+        start_transcription=False,
     )
+
     stt = WhisperSTTService()
+
     transcription_output_queue = asyncio.Queue()
     transport_done = asyncio.Event()
 
+    pipeline = Pipeline([stt])
+    pipeline.set_sink(transcription_output_queue)
+
     async def handle_transcription():
         print("`````````TRANSCRIPTION`````````")
         while not transport_done.is_set():
@@ -38,29 +41,13 @@ async def handle_transcription():
                 break
         print("handle_transcription done")
 
-    async def handle_speaker():
-        await stt.run_to_queue(
-            transcription_output_queue, transport.get_receive_frames()
-        )
-        await transcription_output_queue.put(EndFrame())
-        print("handle speaker done.")
-
     async def run_until_done():
-        await transport.run()
+        await transport.run(pipeline)
         transport_done.set()
         print("run_until_done done")
 
-    await asyncio.gather(run_until_done(), handle_speaker(), handle_transcription())
+    await asyncio.gather(run_until_done(), handle_transcription())
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
-    parser.add_argument(
-        "-u",
-        "--url",
-        type=str,
-        required=True,
-        help="URL of the Daily room to join")
-
-    args, unknown = parser.parse_known_args()
-    asyncio.run(main(args.url))
+    asyncio.run(main())
diff --git a/...ational/05a-local-sync-speech-and-text.py → ...updated/05a-local-sync-speech-and-text.py b/...ational/05a-local-sync-speech-and-text.py → ...updated/05a-local-sync-speech-and-text.py
@@ -1,5 +1,4 @@
 import aiohttp
-import argparse
 import asyncio
 import logging
 import tkinter as tk
@@ -11,12 +10,15 @@
 from dailyai.services.fal_ai_services import FalImageGenService
 from dailyai.transports.local_transport import LocalTransport
 
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
 logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
 logger = logging.getLogger("dailyai")
 logger.setLevel(logging.DEBUG)
 
 
-async def main(room_url):
+async def main():
     async with aiohttp.ClientSession() as session:
         meeting_duration_minutes = 5
         tk_root = tk.Tk()
@@ -59,12 +61,8 @@ async def get_all_audio(text):
             return all_audio
 
         async def get_month_data(month):
-            messages = [
-                {
-                    "role": "system",
-                    "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.",
-                }
-            ]
+            messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {
+                month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
 
             image_description = await llm.run_llm(messages)
             if not image_description:
@@ -133,14 +131,4 @@ async def run_tk():
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
-    parser.add_argument(
-        "-u",
-        "--url",
-        type=str,
-        required=True,
-        help="URL of the Daily room to join")
-
-    args, unknown = parser.parse_known_args()
-
-    asyncio.run(main(args.url))
+    asyncio.run(main())
diff --git a/examples/foundational/06a-image-sync.py → ...ndational/to_be_updated/06a-image-sync.py b/examples/foundational/06a-image-sync.py → ...ndational/to_be_updated/06a-image-sync.py
diff --git a/examples/foundational/10-wake-word.py → ...oundational/to_be_updated/10-wake-word.py b/examples/foundational/10-wake-word.py → ...oundational/to_be_updated/10-wake-word.py
diff --git a/examples/foundational/11-sound-effects.py → ...ational/to_be_updated/11-sound-effects.py b/examples/foundational/11-sound-effects.py → ...ational/to_be_updated/11-sound-effects.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,7 @@ dependencies = [
 ]
 
 [project.urls]
-Source = "https://github.com/daily-co/daily-ai-sdk"
+Source = "https://github.com/daily-co/dailyai"
 Website = "https://daily.co"
 
 [project.optional-dependencies]

diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py
@@ -4,9 +4,6 @@
 import os
 from PIL import Image
 
-from dailyai.services.ai_services import ImageGenService
-
-
 from dailyai.services.ai_services import ImageGenService
 
 try:

diff --git a/src/dailyai/transports/daily_transport.py b/src/dailyai/transports/daily_transport.py
@@ -5,7 +5,6 @@
 import threading
 import types
 
-from enum import Enum
 from functools import partial
 from typing import Any
 
@@ -120,8 +119,7 @@ def _patch_method(self, event_name, *args, **kwargs):
             raise e
 
     def _webrtc_vad_analyze(self):
-        buffer = self.read_audio_frames(
-            int(self._vad_samples))
+        buffer = self.read_audio_frames(int(self._vad_samples))
         if len(buffer) > 0:
             confidence = self.webrtc_vad.analyze_frames(buffer)
             # yeses = int(confidence * 20.0)

diff --git a/src/dailyai/transports/local_transport.py b/src/dailyai/transports/local_transport.py
@@ -48,13 +48,16 @@ def write_frame_to_camera(self, frame: bytes):
             )
 
     def write_frame_to_mic(self, frame: bytes):
-        self._audio_stream.write(frame)
+        if self._mic_enabled:
+            self._audio_stream.write(frame)
 
     def read_frames(self, desired_frame_count):
-        bytes = self._speaker_stream.read(
-            desired_frame_count,
-            exception_on_overflow=False,
-        )
+        bytes = b""
+        if self._speaker_enabled:
+            bytes = self._speaker_stream.read(
+                desired_frame_count,
+                exception_on_overflow=False,
+            )
         return bytes
 
     def _prerun(self):

diff --git a/src/dailyai/transports/threaded_transport.py b/src/dailyai/transports/threaded_transport.py
@@ -81,12 +81,13 @@ def __init__(
 
             except ModuleNotFoundError as e:
                 if self._has_webrtc_vad:
-                    self._logger.debug(f"Couldn't load torch; using webrtc VAD")
+                    self._logger.debug(
+                        f"Couldn't load torch; using webrtc VAD")
                     self._vad_samples = int(self._speaker_sample_rate / 100.0)
                 else:
                     self._logger.error(f"Exception: {e}")
                     self._logger.error(
-                        "In order to use VAD, you'll need to install the `torch` and `torchaudio` modules.")
+                        "In order to use Silero VAD, you'll need to `pip install dailyai[silero].")
                     raise Exception(f"Missing module(s): {e}")
 
         vad_frame_s = self._vad_samples / self._speaker_sample_rate
@@ -181,7 +182,6 @@ async def run_interruptible_pipeline(
         pipeline.set_sink(self.send_queue)
         source_queue = asyncio.Queue()
         pipeline.set_source(source_queue)
-        pipeline.set_sink(self.send_queue)
         pipeline_task = asyncio.create_task(pipeline.run_pipeline())
 
         async def yield_frame(frame: Frame) -> AsyncGenerator[Frame, None]: