Skip to content

Commit

Permalink
Merge pull request #106 from daily-co/updated-to-be-updated-examples
Browse files Browse the repository at this point in the history
examples: updated to_be_updated examples
  • Loading branch information
aconchillo authored Apr 6, 2024
2 parents 172a142 + 02b5c3d commit 88404e4
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 130 deletions.
12 changes: 10 additions & 2 deletions dot-env.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,16 @@
ANTHROPIC_API_KEY=...

# Azure
SPEECH_KEY=...
SPEECH_REGION=...
AZURE_SPEECH_REGION=...
AZURE_SPEECH_API_KEY=...

AZURE_CHATGPT_API_KEY=...
AZURE_CHATGPT_ENDPOINT=https://...
AZURE_CHATGPT_MODEL=...

AZURE_DALLE_API_KEY=...
AZURE_DALLE_ENDPOINT=https://...
AZURE_DALLE_MODEL=...

# Daily
DAILY_API_KEY=...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import logging
import tkinter as tk
import os
from dailyai.pipeline.aggregators import LLMFullResponseAggregator

from dailyai.pipeline.frames import AudioFrame, ImageFrame
from dailyai.pipeline.frames import AudioFrame, ImageFrame, LLMMessagesFrame, TextFrame
from dailyai.services.open_ai_services import OpenAILLMService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.fal_ai_services import FalImageGenService
Expand All @@ -22,7 +23,7 @@ async def main():
async with aiohttp.ClientSession() as session:
meeting_duration_minutes = 5
tk_root = tk.Tk()
tk_root.title("Calendar")
tk_root.title("dailyai")

transport = LocalTransport(
mic_enabled=True,
Expand All @@ -43,7 +44,7 @@ async def main():
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4-turbo-preview")

dalle = FalImageGenService(
imagegen = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
Expand All @@ -60,18 +61,33 @@ async def get_all_audio(text):

return all_audio

async def get_month_description(aggregator, frame):
async for frame in aggregator.process_frame(frame):
if isinstance(frame, TextFrame):
return frame.text

async def get_month_data(month):
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {
month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]

image_description = await llm.run_llm(messages)
messages_frame = LLMMessagesFrame(messages)

llm_full_response_aggregator = LLMFullResponseAggregator()

image_description = None
async for frame in llm.process_frame(messages_frame):
result = await get_month_description(llm_full_response_aggregator, frame)
if result:
image_description = result
break

if not image_description:
return

to_speak = f"{month}: {image_description}"
audio_task = asyncio.create_task(get_all_audio(to_speak))
image_task = asyncio.create_task(
dalle.run_image_gen(image_description))
imagegen.run_image_gen(image_description))
(audio, image_data) = await asyncio.gather(audio_task, image_task)

return {
Expand All @@ -82,19 +98,14 @@ async def get_month_data(month):
"audio": audio,
}

# We only specify 5 months as we create tasks all at once and we might
# get rate limited otherwise.
months: list[str] = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]

async def show_images():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import aiohttp
from PIL import Image

from dailyai.pipeline.frames import ImageFrame, Frame
from dailyai.pipeline.frames import ImageFrame, Frame, TextFrame
from dailyai.pipeline.pipeline import Pipeline
from dailyai.transports.daily_transport import DailyTransport
from dailyai.services.ai_services import AIService
from dailyai.pipeline.aggregators import (
Expand All @@ -14,7 +15,6 @@
)
from dailyai.services.open_ai_services import OpenAILLMService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.fal_ai_services import FalImageGenService

from runner import configure

Expand Down Expand Up @@ -53,6 +53,7 @@ async def main(room_url: str, token):
transport._camera_height = 1024
transport._mic_enabled = True
transport._mic_sample_rate = 16000
transport.transcription_settings["extra"]["punctuate"] = True

tts = ElevenLabsTTSService(
aiohttp_session=session,
Expand All @@ -64,57 +65,30 @@ async def main(room_url: str, token):
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4-turbo-preview")

img = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
key_secret=os.getenv("FAL_KEY_SECRET"),
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not include any special characters. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
image_sync_aggregator = ImageSyncAggregator(
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
)

async def get_images():
get_speaking_task = asyncio.create_task(
img.run_image_gen("An image of a cat speaking")
)
get_waiting_task = asyncio.create_task(
img.run_image_gen("An image of a cat waiting")
)

(speaking_data, waiting_data) = await asyncio.gather(
get_speaking_task, get_waiting_task
)

return speaking_data, waiting_data
pipeline = Pipeline([image_sync_aggregator, tma_in, llm, tma_out, tts])

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say("Hi, I'm listening!", transport.send_queue)

async def handle_transcriptions():
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
image_sync_aggregator = ImageSyncAggregator(
os.path.join(
os.path.dirname(__file__), "assets", "speaking.png"), os.path.join(
os.path.dirname(__file__), "assets", "waiting.png"), )
await tts.run_to_queue(
transport.send_queue,
image_sync_aggregator.run(
tma_out.run(llm.run(tma_in.run(transport.get_receive_frames())))
),
)
await pipeline.queue_frames([TextFrame("Hi, I'm listening!")])

transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions())
await transport.run(pipeline)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import random
from typing import AsyncGenerator
from PIL import Image
from dailyai.pipeline.pipeline import Pipeline

from dailyai.transports.daily_transport import DailyTransport
from dailyai.services.open_ai_services import OpenAILLMService
Expand Down Expand Up @@ -133,6 +134,7 @@ async def main(room_url: str, token):
transport._camera_enabled = True
transport._camera_width = 720
transport._camera_height = 1280
transport.transcription_settings["extra"]["punctuate"] = True

llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
Expand All @@ -145,45 +147,34 @@ async def main(room_url: str, token):
)
isa = ImageSyncAggregator()

messages = [
{
"role": "system",
"content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while. Your responses should only be a few sentences long.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
tf = TranscriptFilter(transport._my_participant_id)
ncf = NameCheckFilter(["Santa Cat", "Santa"])

pipeline = Pipeline([isa, tf, ncf, tma_in, llm, tma_out, tts])

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say(
await transport.say(
"Hi! If you want to talk to me, just say 'hey Santa Cat'.",
transport.send_queue,
)

async def handle_transcriptions():
messages = [
{
"role": "system",
"content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while. Your responses should only be a few sentences long.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
tf = TranscriptFilter(transport._my_participant_id)
ncf = NameCheckFilter(["Santa Cat", "Santa"])
await tts.run_to_queue(
transport.send_queue,
isa.run(
tma_out.run(
llm.run(
tma_in.run(
ncf.run(tf.run(transport.get_receive_frames())))
)
)
),
tts,
)

async def starting_image():
await transport.send_queue.put(quiet_frame)

transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions(), starting_image())
await asyncio.gather(transport.run(pipeline), starting_image())


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import wave
from dailyai.pipeline.pipeline import Pipeline

from dailyai.transports.daily_transport import DailyTransport
from dailyai.services.open_ai_services import OpenAILLMService
Expand Down Expand Up @@ -81,6 +82,7 @@ async def main(room_url: str, token):
mic_sample_rate=16000,
camera_enabled=False,
)
transport.transcription_settings["extra"]["punctuate"] = True

llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
Expand All @@ -92,47 +94,31 @@ async def main(room_url: str, token):
voice_id="ErXwobaYiN019PkySvjV",
)

messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
out_sound = OutboundSoundEffectWrapper()
in_sound = InboundSoundEffectWrapper()
fl = FrameLogger("LLM Out")
fl2 = FrameLogger("Transcription In")

pipeline = Pipeline([tma_in, in_sound, fl2, llm, tma_out, fl, tts, out_sound])

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say("Hi, I'm listening!", transport.send_queue)
await transport.say("Hi, I'm listening!", tts)
await transport.send_queue.put(AudioFrame(sounds["ding1.wav"]))

async def handle_transcriptions():
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
out_sound = OutboundSoundEffectWrapper()
in_sound = InboundSoundEffectWrapper()
fl = FrameLogger("LLM Out")
fl2 = FrameLogger("Transcription In")
await out_sound.run_to_queue(
transport.send_queue,
tts.run(
fl.run(
tma_out.run(
llm.run(
fl2.run(
in_sound.run(
tma_in.run(transport.get_receive_frames())
)
)
)
)
)
),
)

transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions())
await asyncio.gather(transport.run(pipeline))


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion src/dailyai/services/azure_ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
except ModuleNotFoundError as e:
print(f"Exception: {e}")
print(
"In order to use Azure TTS, you need to `pip install dailyai[azure]`. Also, set `SPEECH_KEY` and `SPEECH_REGION` environment variables.")
"In order to use Azure TTS, you need to `pip install dailyai[azure]`. Also, set `AZURE_SPEECH_API_KEY` and `AZURE_SPEECH_REGION` environment variables.")
raise Exception(f"Missing module: {e}")

from dailyai.services.openai_api_llm_service import BaseOpenAILLMService
Expand Down

0 comments on commit 88404e4

Please sign in to comment.