From 982c0a0749b1de78dd1abde09470a32564c8d960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 4 Apr 2024 14:04:53 -0700 Subject: [PATCH 1/5] examples: move non-working examples to to_be_updated --- examples/foundational/01a-local-transport.py | 8 ++-- .../foundational/13-whisper-transcription.py | 17 ++++---- examples/foundational/13a-whisper-local.py | 41 +++++++------------ .../{ => to_be_updated}/03a-image-local.py | 0 .../05a-local-sync-speech-and-text.py | 26 ++++-------- .../{ => to_be_updated}/06a-image-sync.py | 0 .../{ => to_be_updated}/10-wake-word.py | 0 .../{ => to_be_updated}/11-sound-effects.py | 0 8 files changed, 32 insertions(+), 60 deletions(-) rename examples/foundational/{ => to_be_updated}/03a-image-local.py (100%) rename examples/foundational/{ => to_be_updated}/05a-local-sync-speech-and-text.py (85%) rename examples/foundational/{ => to_be_updated}/06a-image-sync.py (100%) rename examples/foundational/{ => to_be_updated}/10-wake-word.py (100%) rename examples/foundational/{ => to_be_updated}/11-sound-effects.py (100%) diff --git a/examples/foundational/01a-local-transport.py b/examples/foundational/01a-local-transport.py index d653a684c..617459590 100644 --- a/examples/foundational/01a-local-transport.py +++ b/examples/foundational/01a-local-transport.py @@ -6,6 +6,9 @@ from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService from dailyai.transports.local_transport import LocalTransport +from dotenv import load_dotenv +load_dotenv(override=True) + logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") logger = logging.getLogger("dailyai") logger.setLevel(logging.DEBUG) @@ -25,10 +28,7 @@ async def main(): async def say_something(): await asyncio.sleep(1) - await tts.say( - "Hello there.", - transport.send_queue, - ) + await transport.say("Hello there.", tts) await transport.stop_when_done() await asyncio.gather(transport.run(), say_something()) diff --git a/examples/foundational/13-whisper-transcription.py b/examples/foundational/13-whisper-transcription.py index d579724de..c634a9c0d 100644 --- a/examples/foundational/13-whisper-transcription.py +++ b/examples/foundational/13-whisper-transcription.py @@ -3,12 +3,10 @@ from dailyai.transports.daily_transport import DailyTransport from dailyai.services.whisper_ai_services import WhisperSTTService +from dailyai.pipeline.pipeline import Pipeline from runner import configure -from dotenv import load_dotenv -load_dotenv(override=True) - logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") logger = logging.getLogger("dailyai") logger.setLevel(logging.DEBUG) @@ -19,27 +17,26 @@ async def main(room_url: str): room_url, None, "Transcription bot", - start_transcription=True, + start_transcription=False, mic_enabled=False, camera_enabled=False, speaker_enabled=True, ) stt = WhisperSTTService() + transcription_output_queue = asyncio.Queue() + pipeline = Pipeline([stt]) + pipeline.set_sink(transcription_output_queue) + async def handle_transcription(): print("`````````TRANSCRIPTION`````````") while True: item = await transcription_output_queue.get() print(item.text) - async def handle_speaker(): - await stt.run_to_queue( - transcription_output_queue, transport.get_receive_frames() - ) - - await asyncio.gather(transport.run(), handle_speaker(), handle_transcription()) + await asyncio.gather(transport.run(pipeline), handle_transcription()) if __name__ == "__main__": diff --git a/examples/foundational/13a-whisper-local.py b/examples/foundational/13a-whisper-local.py index 598e000ae..00562b402 100644 --- a/examples/foundational/13a-whisper-local.py +++ b/examples/foundational/13a-whisper-local.py @@ -1,32 +1,35 @@ -import argparse import asyncio import logging -from dailyai.pipeline.frames import EndFrame, TranscriptionFrame +from dailyai.pipeline.frames import EndFrame, TranscriptionFrame from dailyai.transports.local_transport import LocalTransport from dailyai.services.whisper_ai_services import WhisperSTTService +from dailyai.pipeline.pipeline import Pipeline logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") logger = logging.getLogger("dailyai") logger.setLevel(logging.DEBUG) -async def main(room_url: str): - global transport - global stt - +async def main(): meeting_duration_minutes = 1 + transport = LocalTransport( - mic_enabled=True, + mic_enabled=False, camera_enabled=False, speaker_enabled=True, duration_minutes=meeting_duration_minutes, - start_transcription=True, + start_transcription=False, ) + stt = WhisperSTTService() + transcription_output_queue = asyncio.Queue() transport_done = asyncio.Event() + pipeline = Pipeline([stt]) + pipeline.set_sink(transcription_output_queue) + async def handle_transcription(): print("`````````TRANSCRIPTION`````````") while not transport_done.is_set(): @@ -38,29 +41,13 @@ async def handle_transcription(): break print("handle_transcription done") - async def handle_speaker(): - await stt.run_to_queue( - transcription_output_queue, transport.get_receive_frames() - ) - await transcription_output_queue.put(EndFrame()) - print("handle speaker done.") - async def run_until_done(): - await transport.run() + await transport.run(pipeline) transport_done.set() print("run_until_done done") - await asyncio.gather(run_until_done(), handle_speaker(), handle_transcription()) + await asyncio.gather(run_until_done(), handle_transcription()) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Simple Daily Bot Sample") - parser.add_argument( - "-u", - "--url", - type=str, - required=True, - help="URL of the Daily room to join") - - args, unknown = parser.parse_known_args() - asyncio.run(main(args.url)) + asyncio.run(main()) diff --git a/examples/foundational/03a-image-local.py b/examples/foundational/to_be_updated/03a-image-local.py similarity index 100% rename from examples/foundational/03a-image-local.py rename to examples/foundational/to_be_updated/03a-image-local.py diff --git a/examples/foundational/05a-local-sync-speech-and-text.py b/examples/foundational/to_be_updated/05a-local-sync-speech-and-text.py similarity index 85% rename from examples/foundational/05a-local-sync-speech-and-text.py rename to examples/foundational/to_be_updated/05a-local-sync-speech-and-text.py index fd1076ff5..85c2dc204 100644 --- a/examples/foundational/05a-local-sync-speech-and-text.py +++ b/examples/foundational/to_be_updated/05a-local-sync-speech-and-text.py @@ -1,5 +1,4 @@ import aiohttp -import argparse import asyncio import logging import tkinter as tk @@ -11,12 +10,15 @@ from dailyai.services.fal_ai_services import FalImageGenService from dailyai.transports.local_transport import LocalTransport +from dotenv import load_dotenv +load_dotenv(override=True) + logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") logger = logging.getLogger("dailyai") logger.setLevel(logging.DEBUG) -async def main(room_url): +async def main(): async with aiohttp.ClientSession() as session: meeting_duration_minutes = 5 tk_root = tk.Tk() @@ -59,12 +61,8 @@ async def get_all_audio(text): return all_audio async def get_month_data(month): - messages = [ - { - "role": "system", - "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", - } - ] + messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of { + month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }] image_description = await llm.run_llm(messages) if not image_description: @@ -133,14 +131,4 @@ async def run_tk(): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Simple Daily Bot Sample") - parser.add_argument( - "-u", - "--url", - type=str, - required=True, - help="URL of the Daily room to join") - - args, unknown = parser.parse_known_args() - - asyncio.run(main(args.url)) + asyncio.run(main()) diff --git a/examples/foundational/06a-image-sync.py b/examples/foundational/to_be_updated/06a-image-sync.py similarity index 100% rename from examples/foundational/06a-image-sync.py rename to examples/foundational/to_be_updated/06a-image-sync.py diff --git a/examples/foundational/10-wake-word.py b/examples/foundational/to_be_updated/10-wake-word.py similarity index 100% rename from examples/foundational/10-wake-word.py rename to examples/foundational/to_be_updated/10-wake-word.py diff --git a/examples/foundational/11-sound-effects.py b/examples/foundational/to_be_updated/11-sound-effects.py similarity index 100% rename from examples/foundational/11-sound-effects.py rename to examples/foundational/to_be_updated/11-sound-effects.py From 951f20c788979d62bd2cfd8af910f9d866e269b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 4 Apr 2024 14:05:15 -0700 Subject: [PATCH 2/5] transports: don't write/read if microphone/speaker not enabled --- src/dailyai/transports/daily_transport.py | 3 +-- src/dailyai/transports/local_transport.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/dailyai/transports/daily_transport.py b/src/dailyai/transports/daily_transport.py index 506255a94..254b35629 100644 --- a/src/dailyai/transports/daily_transport.py +++ b/src/dailyai/transports/daily_transport.py @@ -120,8 +120,7 @@ def _patch_method(self, event_name, *args, **kwargs): raise e def _webrtc_vad_analyze(self): - buffer = self.read_audio_frames( - int(self._vad_samples)) + buffer = self.read_audio_frames(int(self._vad_samples)) if len(buffer) > 0: confidence = self.webrtc_vad.analyze_frames(buffer) # yeses = int(confidence * 20.0) diff --git a/src/dailyai/transports/local_transport.py b/src/dailyai/transports/local_transport.py index e79b147a8..cf66345b2 100644 --- a/src/dailyai/transports/local_transport.py +++ b/src/dailyai/transports/local_transport.py @@ -48,13 +48,16 @@ def write_frame_to_camera(self, frame: bytes): ) def write_frame_to_mic(self, frame: bytes): - self._audio_stream.write(frame) + if self._mic_enabled: + self._audio_stream.write(frame) def read_frames(self, desired_frame_count): - bytes = self._speaker_stream.read( - desired_frame_count, - exception_on_overflow=False, - ) + bytes = b"" + if self._speaker_enabled: + bytes = self._speaker_stream.read( + desired_frame_count, + exception_on_overflow=False, + ) return bytes def _prerun(self): From d687c8cdeba6d291f4724ddf3e7275e0d3e131be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 4 Apr 2024 14:05:40 -0700 Subject: [PATCH 3/5] transports: updated silero vad not found message --- src/dailyai/transports/threaded_transport.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dailyai/transports/threaded_transport.py b/src/dailyai/transports/threaded_transport.py index 037ba8f10..9b2530efd 100644 --- a/src/dailyai/transports/threaded_transport.py +++ b/src/dailyai/transports/threaded_transport.py @@ -81,12 +81,13 @@ def __init__( except ModuleNotFoundError as e: if self._has_webrtc_vad: - self._logger.debug(f"Couldn't load torch; using webrtc VAD") + self._logger.debug( + f"Couldn't load torch; using webrtc VAD") self._vad_samples = int(self._speaker_sample_rate / 100.0) else: self._logger.error(f"Exception: {e}") self._logger.error( - "In order to use VAD, you'll need to install the `torch` and `torchaudio` modules.") + "In order to use Silero VAD, you'll need to `pip install dailyai[silero].") raise Exception(f"Missing module(s): {e}") vad_frame_s = self._vad_samples / self._speaker_sample_rate From 1ea503c1e61423446711cef10ecb21ac8774b3a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 4 Apr 2024 15:35:58 -0700 Subject: [PATCH 4/5] examples: fix 03a-image-local --- .../{to_be_updated => }/03a-image-local.py | 26 ++++++++++--------- src/dailyai/services/fal_ai_services.py | 3 --- src/dailyai/transports/daily_transport.py | 1 - src/dailyai/transports/threaded_transport.py | 1 - 4 files changed, 14 insertions(+), 17 deletions(-) rename examples/foundational/{to_be_updated => }/03a-image-local.py (70%) diff --git a/examples/foundational/to_be_updated/03a-image-local.py b/examples/foundational/03a-image-local.py similarity index 70% rename from examples/foundational/to_be_updated/03a-image-local.py rename to examples/foundational/03a-image-local.py index eba208970..46f811471 100644 --- a/examples/foundational/to_be_updated/03a-image-local.py +++ b/examples/foundational/03a-image-local.py @@ -5,26 +5,29 @@ import tkinter as tk -from dailyai.pipeline.frames import TextFrame +from dailyai.pipeline.frames import TextFrame, EndFrame +from dailyai.pipeline.pipeline import Pipeline from dailyai.services.fal_ai_services import FalImageGenService from dailyai.transports.local_transport import LocalTransport +from dotenv import load_dotenv +load_dotenv(override=True) + logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") logger = logging.getLogger("dailyai") logger.setLevel(logging.DEBUG) -local_joined = False -participant_joined = False - async def main(): async with aiohttp.ClientSession() as session: meeting_duration_minutes = 2 + tk_root = tk.Tk() - tk_root.title("Calendar") + tk_root.title("dailyai") + transport = LocalTransport( tk_root=tk_root, - mic_enabled=True, + mic_enabled=False, camera_enabled=True, camera_width=1024, camera_height=1024, @@ -32,15 +35,14 @@ async def main(): ) imagegen = FalImageGenService( - image_size="1024x1024", + image_size="square_hd", aiohttp_session=session, key_id=os.getenv("FAL_KEY_ID"), key_secret=os.getenv("FAL_KEY_SECRET"), ) - image_task = asyncio.create_task( - imagegen.run_to_queue( - transport.send_queue, [ - TextFrame("a cat in the style of picasso")])) + + pipeline = Pipeline([imagegen]) + await pipeline.queue_frames([TextFrame("a cat in the style of picasso")]) async def run_tk(): while not transport._stop_threads.is_set(): @@ -48,7 +50,7 @@ async def run_tk(): tk_root.update_idletasks() await asyncio.sleep(0.1) - await asyncio.gather(transport.run(), image_task, run_tk()) + await asyncio.gather(transport.run(pipeline, override_pipeline_source_queue=False), run_tk()) if __name__ == "__main__": diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py index 9130b062b..1f97db598 100644 --- a/src/dailyai/services/fal_ai_services.py +++ b/src/dailyai/services/fal_ai_services.py @@ -4,9 +4,6 @@ import os from PIL import Image -from dailyai.services.ai_services import ImageGenService - - from dailyai.services.ai_services import ImageGenService try: diff --git a/src/dailyai/transports/daily_transport.py b/src/dailyai/transports/daily_transport.py index 254b35629..50a2af124 100644 --- a/src/dailyai/transports/daily_transport.py +++ b/src/dailyai/transports/daily_transport.py @@ -5,7 +5,6 @@ import threading import types -from enum import Enum from functools import partial from typing import Any diff --git a/src/dailyai/transports/threaded_transport.py b/src/dailyai/transports/threaded_transport.py index 9b2530efd..361e56f40 100644 --- a/src/dailyai/transports/threaded_transport.py +++ b/src/dailyai/transports/threaded_transport.py @@ -182,7 +182,6 @@ async def run_interruptible_pipeline( pipeline.set_sink(self.send_queue) source_queue = asyncio.Queue() pipeline.set_source(source_queue) - pipeline.set_sink(self.send_queue) pipeline_task = asyncio.create_task(pipeline.run_pipeline()) async def yield_frame(frame: Frame) -> AsyncGenerator[Frame, None]: From 46d265514eace067f7190167d103b6a77317ba87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 4 Apr 2024 15:52:28 -0700 Subject: [PATCH 5/5] pyproject: update github url --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ebffca741..46ebc7660 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ ] [project.urls] -Source = "https://github.com/daily-co/daily-ai-sdk" +Source = "https://github.com/daily-co/dailyai" Website = "https://daily.co" [project.optional-dependencies]