Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

examples: updated to_be_updated examples #106

Merged
merged 2 commits into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions dot-env.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,16 @@
ANTHROPIC_API_KEY=...

# Azure
SPEECH_KEY=...
SPEECH_REGION=...
AZURE_SPEECH_REGION=...
AZURE_SPEECH_API_KEY=...

AZURE_CHATGPT_API_KEY=...
AZURE_CHATGPT_ENDPOINT=https://...
AZURE_CHATGPT_MODEL=...

AZURE_DALLE_API_KEY=...
AZURE_DALLE_ENDPOINT=https://...
AZURE_DALLE_MODEL=...

# Daily
DAILY_API_KEY=...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import logging
import tkinter as tk
import os
from dailyai.pipeline.aggregators import LLMFullResponseAggregator

from dailyai.pipeline.frames import AudioFrame, ImageFrame
from dailyai.pipeline.frames import AudioFrame, ImageFrame, LLMMessagesFrame, TextFrame
from dailyai.services.open_ai_services import OpenAILLMService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.fal_ai_services import FalImageGenService
Expand All @@ -22,7 +23,7 @@ async def main():
async with aiohttp.ClientSession() as session:
meeting_duration_minutes = 5
tk_root = tk.Tk()
tk_root.title("Calendar")
tk_root.title("dailyai")

transport = LocalTransport(
mic_enabled=True,
Expand All @@ -43,7 +44,7 @@ async def main():
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4-turbo-preview")

dalle = FalImageGenService(
imagegen = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
Expand All @@ -60,18 +61,33 @@ async def get_all_audio(text):

return all_audio

async def get_month_description(aggregator, frame):
async for frame in aggregator.process_frame(frame):
if isinstance(frame, TextFrame):
return frame.text

async def get_month_data(month):
messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {
month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]

image_description = await llm.run_llm(messages)
messages_frame = LLMMessagesFrame(messages)

llm_full_response_aggregator = LLMFullResponseAggregator()

image_description = None
async for frame in llm.process_frame(messages_frame):
result = await get_month_description(llm_full_response_aggregator, frame)
if result:
image_description = result
break

if not image_description:
return

to_speak = f"{month}: {image_description}"
audio_task = asyncio.create_task(get_all_audio(to_speak))
image_task = asyncio.create_task(
dalle.run_image_gen(image_description))
imagegen.run_image_gen(image_description))
(audio, image_data) = await asyncio.gather(audio_task, image_task)

return {
Expand All @@ -82,19 +98,14 @@ async def get_month_data(month):
"audio": audio,
}

# We only specify 5 months as we create tasks all at once and we might
# get rate limited otherwise.
months: list[str] = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]

async def show_images():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import aiohttp
from PIL import Image

from dailyai.pipeline.frames import ImageFrame, Frame
from dailyai.pipeline.frames import ImageFrame, Frame, TextFrame
from dailyai.pipeline.pipeline import Pipeline
from dailyai.transports.daily_transport import DailyTransport
from dailyai.services.ai_services import AIService
from dailyai.pipeline.aggregators import (
Expand All @@ -14,7 +15,6 @@
)
from dailyai.services.open_ai_services import OpenAILLMService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from dailyai.services.fal_ai_services import FalImageGenService

from runner import configure

Expand Down Expand Up @@ -53,6 +53,7 @@ async def main(room_url: str, token):
transport._camera_height = 1024
transport._mic_enabled = True
transport._mic_sample_rate = 16000
transport.transcription_settings["extra"]["punctuate"] = True

tts = ElevenLabsTTSService(
aiohttp_session=session,
Expand All @@ -64,57 +65,30 @@ async def main(room_url: str, token):
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4-turbo-preview")

img = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
key_secret=os.getenv("FAL_KEY_SECRET"),
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so it should not include any special characters. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
image_sync_aggregator = ImageSyncAggregator(
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
)

async def get_images():
get_speaking_task = asyncio.create_task(
img.run_image_gen("An image of a cat speaking")
)
get_waiting_task = asyncio.create_task(
img.run_image_gen("An image of a cat waiting")
)

(speaking_data, waiting_data) = await asyncio.gather(
get_speaking_task, get_waiting_task
)

return speaking_data, waiting_data
pipeline = Pipeline([image_sync_aggregator, tma_in, llm, tma_out, tts])

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say("Hi, I'm listening!", transport.send_queue)

async def handle_transcriptions():
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
image_sync_aggregator = ImageSyncAggregator(
os.path.join(
os.path.dirname(__file__), "assets", "speaking.png"), os.path.join(
os.path.dirname(__file__), "assets", "waiting.png"), )
await tts.run_to_queue(
transport.send_queue,
image_sync_aggregator.run(
tma_out.run(llm.run(tma_in.run(transport.get_receive_frames())))
),
)
await pipeline.queue_frames([TextFrame("Hi, I'm listening!")])

transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions())
await transport.run(pipeline)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import random
from typing import AsyncGenerator
from PIL import Image
from dailyai.pipeline.pipeline import Pipeline

from dailyai.transports.daily_transport import DailyTransport
from dailyai.services.open_ai_services import OpenAILLMService
Expand Down Expand Up @@ -133,6 +134,7 @@ async def main(room_url: str, token):
transport._camera_enabled = True
transport._camera_width = 720
transport._camera_height = 1280
transport.transcription_settings["extra"]["punctuate"] = True

llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
Expand All @@ -145,45 +147,34 @@ async def main(room_url: str, token):
)
isa = ImageSyncAggregator()

messages = [
{
"role": "system",
"content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while. Your responses should only be a few sentences long.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
tf = TranscriptFilter(transport._my_participant_id)
ncf = NameCheckFilter(["Santa Cat", "Santa"])

pipeline = Pipeline([isa, tf, ncf, tma_in, llm, tma_out, tts])

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say(
await transport.say(
"Hi! If you want to talk to me, just say 'hey Santa Cat'.",
transport.send_queue,
)

async def handle_transcriptions():
messages = [
{
"role": "system",
"content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while. Your responses should only be a few sentences long.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
tf = TranscriptFilter(transport._my_participant_id)
ncf = NameCheckFilter(["Santa Cat", "Santa"])
await tts.run_to_queue(
transport.send_queue,
isa.run(
tma_out.run(
llm.run(
tma_in.run(
ncf.run(tf.run(transport.get_receive_frames())))
)
)
),
tts,
)

async def starting_image():
await transport.send_queue.put(quiet_frame)

transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions(), starting_image())
await asyncio.gather(transport.run(pipeline), starting_image())


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import wave
from dailyai.pipeline.pipeline import Pipeline

from dailyai.transports.daily_transport import DailyTransport
from dailyai.services.open_ai_services import OpenAILLMService
Expand Down Expand Up @@ -81,6 +82,7 @@ async def main(room_url: str, token):
mic_sample_rate=16000,
camera_enabled=False,
)
transport.transcription_settings["extra"]["punctuate"] = True

llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
Expand All @@ -92,47 +94,31 @@ async def main(room_url: str, token):
voice_id="ErXwobaYiN019PkySvjV",
)

messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
out_sound = OutboundSoundEffectWrapper()
in_sound = InboundSoundEffectWrapper()
fl = FrameLogger("LLM Out")
fl2 = FrameLogger("Transcription In")

pipeline = Pipeline([tma_in, in_sound, fl2, llm, tma_out, fl, tts, out_sound])

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say("Hi, I'm listening!", transport.send_queue)
await transport.say("Hi, I'm listening!", tts)
await transport.send_queue.put(AudioFrame(sounds["ding1.wav"]))

async def handle_transcriptions():
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
},
]

tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
out_sound = OutboundSoundEffectWrapper()
in_sound = InboundSoundEffectWrapper()
fl = FrameLogger("LLM Out")
fl2 = FrameLogger("Transcription In")
await out_sound.run_to_queue(
transport.send_queue,
tts.run(
fl.run(
tma_out.run(
llm.run(
fl2.run(
in_sound.run(
tma_in.run(transport.get_receive_frames())
)
)
)
)
)
),
)

transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions())
await asyncio.gather(transport.run(pipeline))


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion src/dailyai/services/azure_ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
except ModuleNotFoundError as e:
print(f"Exception: {e}")
print(
"In order to use Azure TTS, you need to `pip install dailyai[azure]`. Also, set `SPEECH_KEY` and `SPEECH_REGION` environment variables.")
"In order to use Azure TTS, you need to `pip install dailyai[azure]`. Also, set `AZURE_SPEECH_API_KEY` and `AZURE_SPEECH_REGION` environment variables.")
raise Exception(f"Missing module: {e}")

from dailyai.services.openai_api_llm_service import BaseOpenAILLMService
Expand Down
Loading