From fd2fa23e9cd410367b4bbbf6e61364565ccc7fae Mon Sep 17 00:00:00 2001 From: Moishe Lettvin Date: Mon, 11 Mar 2024 13:00:29 -0400 Subject: [PATCH 1/4] Fix example 2 --- src/examples/foundational/02-llm-say-one-thing.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/examples/foundational/02-llm-say-one-thing.py b/src/examples/foundational/02-llm-say-one-thing.py index 55a6863e8..fc596407b 100644 --- a/src/examples/foundational/02-llm-say-one-thing.py +++ b/src/examples/foundational/02-llm-say-one-thing.py @@ -42,15 +42,21 @@ async def main(room_url): } ] - @transport.event_handler("on_first_other_participant_joined") - async def on_first_other_participant_joined(transport): + other_joined_event = asyncio.Event() + async def speak_from_llm(): + await other_joined_event.wait() await tts.run_to_queue( transport.send_queue, llm.run([LLMMessagesQueueFrame(messages)]), + add_end_of_stream=True ) await transport.stop_when_done() - await transport.run() + @transport.event_handler("on_first_other_participant_joined") + async def on_first_other_participant_joined(transport): + other_joined_event.set() + + await asyncio.gather(transport.run(), speak_from_llm()) if __name__ == "__main__": From 61c55d2f474f0f01f9ba60f11659cece780c18c2 Mon Sep 17 00:00:00 2001 From: Moishe Lettvin Date: Mon, 11 Mar 2024 13:17:31 -0400 Subject: [PATCH 2/4] Fix up other examples --- src/dailyai/services/fal_ai_services.py | 4 +-- src/examples/foundational/01-say-one-thing.py | 27 +++++++++++++------ .../foundational/02-llm-say-one-thing.py | 3 +-- src/examples/foundational/03-still-frame.py | 13 ++++++--- .../foundational/04-utterance-and-speech.py | 10 +++++-- .../foundational/05-sync-speech-and-image.py | 8 +++++- .../foundational/06-listen-and-respond.py | 4 +-- 7 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py index 4d7b0bb20..60226dd52 100644 --- a/src/dailyai/services/fal_ai_services.py +++ b/src/dailyai/services/fal_ai_services.py @@ -32,8 +32,8 @@ def __init__( async def run_image_gen(self, sentence) -> tuple[str, bytes]: def get_image_url(sentence, size): handler = fal.apps.submit( - # "110602490-fast-sdxl", - "fal-ai/fast-sdxl", + "110602490-fast-sdxl", + #"fal-ai/fast-sdxl", arguments={"prompt": sentence}, ) for event in handler.iter_events(): diff --git a/src/examples/foundational/01-say-one-thing.py b/src/examples/foundational/01-say-one-thing.py index 388aabc6a..7b196e789 100644 --- a/src/examples/foundational/01-say-one-thing.py +++ b/src/examples/foundational/01-say-one-thing.py @@ -28,21 +28,32 @@ async def main(room_url): voice_id=os.getenv("ELEVENLABS_VOICE_ID"), ) + other_joined_event = asyncio.Event() + participant_name = '' + + async def say_hello(): + nonlocal tts + nonlocal participant_name + + await other_joined_event.wait() + print("done waiting") + await tts.say( + "Hello there, " + participant_name + "!", + transport.send_queue, + ) + await transport.stop_when_done() + # Register an event handler so we can play the audio when the participant joins. @transport.event_handler("on_participant_joined") async def on_participant_joined(transport, participant): if participant["info"]["isLocal"]: return - await tts.say( - "Hello there, " + participant["info"]["userName"] + "!", - transport.send_queue, - ) - - # wait for the output queue to be empty, then leave the meeting - await transport.stop_when_done() + nonlocal participant_name + participant_name = participant["info"]["userName"] or '' + other_joined_event.set() - await transport.run() + await asyncio.gather(transport.run(), say_hello()) del tts diff --git a/src/examples/foundational/02-llm-say-one-thing.py b/src/examples/foundational/02-llm-say-one-thing.py index fc596407b..370ab7d77 100644 --- a/src/examples/foundational/02-llm-say-one-thing.py +++ b/src/examples/foundational/02-llm-say-one-thing.py @@ -47,8 +47,7 @@ async def speak_from_llm(): await other_joined_event.wait() await tts.run_to_queue( transport.send_queue, - llm.run([LLMMessagesQueueFrame(messages)]), - add_end_of_stream=True + llm.run([LLMMessagesQueueFrame(messages)]) ) await transport.stop_when_done() diff --git a/src/examples/foundational/03-still-frame.py b/src/examples/foundational/03-still-frame.py index 8099e97a0..11274ea66 100644 --- a/src/examples/foundational/03-still-frame.py +++ b/src/examples/foundational/03-still-frame.py @@ -23,6 +23,7 @@ async def main(room_url): camera_enabled=True, camera_width=1024, camera_height=1024, + duration_minutes=1 ) imagegen = FalImageGenService( @@ -32,13 +33,19 @@ async def main(room_url): key_secret=os.getenv("FAL_KEY_SECRET"), ) - @transport.event_handler("on_first_other_participant_joined") - async def on_first_other_participant_joined(transport): + other_joined_event = asyncio.Event() + + async def show_image(): + await other_joined_event.wait() await imagegen.run_to_queue( transport.send_queue, [TextFrame("a cat in the style of picasso")] ) - await transport.run() + @transport.event_handler("on_first_other_participant_joined") + async def on_first_other_participant_joined(transport): + other_joined_event.set() + + await asyncio.gather(transport.run(), show_image()) if __name__ == "__main__": diff --git a/src/examples/foundational/04-utterance-and-speech.py b/src/examples/foundational/04-utterance-and-speech.py index 1d25a6c1a..3bfeb5972 100644 --- a/src/examples/foundational/04-utterance-and-speech.py +++ b/src/examples/foundational/04-utterance-and-speech.py @@ -62,8 +62,15 @@ async def main(room_url: str): await source_queue.put(EndFrame()) pipeline_run_task = pipeline.run_pipeline() + other_participant_joined = asyncio.Event() + @transport.event_handler("on_first_other_participant_joined") async def on_first_other_participant_joined(transport): + other_participant_joined.set() + + async def say_something(): + await other_participant_joined.wait() + await azure_tts.say( "My friend the LLM is now going to tell a joke about llamas.", transport.send_queue, @@ -87,9 +94,8 @@ async def buffer_to_send_queue(): break await asyncio.gather(pipeline_run_task, buffer_to_send_queue()) - await transport.stop_when_done() - await transport.run() + await asyncio.gather(transport.run(), say_something()) if __name__ == "__main__": diff --git a/src/examples/foundational/05-sync-speech-and-image.py b/src/examples/foundational/05-sync-speech-and-image.py index b7c76af6e..2c2dbdee6 100644 --- a/src/examples/foundational/05-sync-speech-and-image.py +++ b/src/examples/foundational/05-sync-speech-and-image.py @@ -140,12 +140,18 @@ async def main(room_url): ) pipeline_task = pipeline.run_pipeline() + other_joined = asyncio.Event() + @transport.event_handler("on_first_other_participant_joined") async def on_first_other_participant_joined(transport): + other_joined.set() + + async def show_calendar(): + await other_joined.wait() await pipeline_task await transport.stop_when_done() - await transport.run() + await asyncio.gather(transport.run(), show_calendar()) if __name__ == "__main__": diff --git a/src/examples/foundational/06-listen-and-respond.py b/src/examples/foundational/06-listen-and-respond.py index 94cf0f55c..9c3099712 100644 --- a/src/examples/foundational/06-listen-and-respond.py +++ b/src/examples/foundational/06-listen-and-respond.py @@ -49,7 +49,7 @@ async def main(room_url: str, token): async def on_first_other_participant_joined(transport): await tts.say("Hi, I'm listening!", transport.send_queue) - async def handle_transcriptions(): + async def have_conversation(): messages = [ { "role": "system", @@ -75,7 +75,7 @@ async def handle_transcriptions(): transport.transcription_settings["extra"]["endpointing"] = True transport.transcription_settings["extra"]["punctuate"] = True - await asyncio.gather(transport.run(), handle_transcriptions()) + await asyncio.gather(transport.run(), have_conversation()) if __name__ == "__main__": From 977c12d530958123bc6ffd6ec986ebf7aa708bd9 Mon Sep 17 00:00:00 2001 From: Moishe Lettvin Date: Mon, 11 Mar 2024 13:19:47 -0400 Subject: [PATCH 3/4] undo fal change --- src/dailyai/services/fal_ai_services.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py index 60226dd52..0ba6436b6 100644 --- a/src/dailyai/services/fal_ai_services.py +++ b/src/dailyai/services/fal_ai_services.py @@ -32,8 +32,8 @@ def __init__( async def run_image_gen(self, sentence) -> tuple[str, bytes]: def get_image_url(sentence, size): handler = fal.apps.submit( - "110602490-fast-sdxl", - #"fal-ai/fast-sdxl", + #"110602490-fast-sdxl", + "fal-ai/fast-sdxl", arguments={"prompt": sentence}, ) for event in handler.iter_events(): From f8ae264957cc88f441ed941dc91b6a60c7967e3d Mon Sep 17 00:00:00 2001 From: Moishe Lettvin Date: Mon, 11 Mar 2024 13:20:28 -0400 Subject: [PATCH 4/4] remove unnecessary print --- src/examples/foundational/01-say-one-thing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/examples/foundational/01-say-one-thing.py b/src/examples/foundational/01-say-one-thing.py index 7b196e789..45896269c 100644 --- a/src/examples/foundational/01-say-one-thing.py +++ b/src/examples/foundational/01-say-one-thing.py @@ -36,7 +36,6 @@ async def say_hello(): nonlocal participant_name await other_joined_event.wait() - print("done waiting") await tts.say( "Hello there, " + participant_name + "!", transport.send_queue,