Skip to content

Commit

Permalink
Added sound effect example (#18)
Browse files Browse the repository at this point in the history
* added sound effect example

* added dialout to this branch too

* fixup

* fixup for more dialout testing

* cleanup
  • Loading branch information
chadbailey59 authored Feb 1, 2024
1 parent 4e95865 commit 0d96f91
Show file tree
Hide file tree
Showing 20 changed files with 354 additions and 6 deletions.
2 changes: 2 additions & 0 deletions src/dailyai/queue_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class StartStreamQueueFrame(ControlQueueFrame):
class EndStreamQueueFrame(ControlQueueFrame):
pass

class LLMResponseEndQueueFrame(QueueFrame):
pass

@dataclass()
class AudioQueueFrame(QueueFrame):
Expand Down
16 changes: 16 additions & 0 deletions src/dailyai/services/ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
EndStreamQueueFrame,
ImageQueueFrame,
LLMMessagesQueueFrame,
LLMResponseEndQueueFrame,
QueueFrame,
TextQueueFrame,
)
Expand Down Expand Up @@ -89,6 +90,9 @@ async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, N
if isinstance(frame, LLMMessagesQueueFrame):
async for text_chunk in self.run_llm_async(frame.messages):
yield TextQueueFrame(text_chunk)
yield LLMResponseEndQueueFrame()
else:
yield frame


class TTSService(AIService):
Expand Down Expand Up @@ -186,6 +190,18 @@ async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, N
text = await self.run_stt(content)
yield TextQueueFrame(text)

class FrameLogger(AIService):
def __init__(self, prefix="Frame", **kwargs):
super().__init__(**kwargs)
self.prefix = prefix

async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, None]:
if isinstance(frame, (AudioQueueFrame, ImageQueueFrame)):
self.logger.info(f"{self.prefix}: {type(frame)}")
else:
print(f"{self.prefix}: {frame}")

yield frame

@dataclass
class AIServiceConfig:
Expand Down
6 changes: 6 additions & 0 deletions src/dailyai/services/daily_transport_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,12 @@ def call_joined(self, join_data, client_error):
t = Thread(target=self._receive_audio, daemon=True)
t.start()

def dialout(self, number):
self.client.start_dialout({"phoneNumber": number})

def start_recording(self):
self.client.start_recording()

def on_error(self, error):
self._logger.error(f"on_error: {error}")

Expand Down
4 changes: 2 additions & 2 deletions src/samples/foundational/06a-image-sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ async def handle_transcriptions():
messages, transport.my_participant_id
)
image_sync_aggregator = ImageSyncAggregator(
os.path.join(os.path.dirname(__file__), "images", "speaking.png"),
os.path.join(os.path.dirname(__file__), "images", "waiting.png"),
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
)
await tts.run_to_queue(
transport.send_queue,
Expand Down
6 changes: 3 additions & 3 deletions src/samples/foundational/08b-debate-generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ async def main(room_url:str):
affirmative = "A woman dressed as a cowboy, outside on a ranch"
negative = "Pikachu in a business suit"

topic = "Is a hot dog a sandwich?"
affirmative = "A woman conservatively dressed as a librarian in a library surrounded by books"
negative = "A cat dressed in a hot dog costume"
# topic = "Is a hot dog a sandwich?"
# affirmative = "A woman conservatively dressed as a librarian in a library surrounded by books"
# negative = "A cat dressed in a hot dog costume"



Expand Down
2 changes: 1 addition & 1 deletion src/samples/foundational/10-wake-word.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

for file in image_files:
# Build the full path to the image file
full_path = os.path.join(script_dir, "images", file)
full_path = os.path.join(script_dir, "assets", file)
# Get the filename without the extension to use as the dictionary key
filename = os.path.splitext(os.path.basename(full_path))[0]
# Open the image and convert it to bytes
Expand Down
159 changes: 159 additions & 0 deletions src/samples/foundational/11-sound-effects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import argparse
import asyncio
import logging
import os
import wave
import requests
import time
import urllib.parse

from dailyai.services.daily_transport_service import DailyTransportService
from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService
from dailyai.queue_aggregators import LLMContextAggregator, LLMUserContextAggregator, LLMAssistantContextAggregator
from dailyai.services.ai_services import AIService, FrameLogger
from dailyai.queue_frame import QueueFrame, AudioQueueFrame, LLMResponseEndQueueFrame, LLMMessagesQueueFrame
from typing import AsyncGenerator

logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s") # or whatever
logger = logging.getLogger("dailyai")
logger.setLevel(logging.DEBUG)

sounds = {}
sound_files = [
'ding1.wav',
'ding2.wav'
]

script_dir = os.path.dirname(__file__)

for file in sound_files:
# Build the full path to the image file
full_path = os.path.join(script_dir, "assets", file)
# Get the filename without the extension to use as the dictionary key
filename = os.path.splitext(os.path.basename(full_path))[0]
# Open the image and convert it to bytes
with wave.open(full_path) as audio_file:
sounds[file] = audio_file.readframes(-1)




class OutboundSoundEffectWrapper(AIService):
def __init__(self):
pass

async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, None]:
if isinstance(frame, LLMResponseEndQueueFrame):
yield AudioQueueFrame(sounds["ding1.wav"])
# In case anything else up the stack needs it
yield frame
else:
yield frame

class InboundSoundEffectWrapper(AIService):
def __init__(self):
pass

async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, None]:
if isinstance(frame, LLMMessagesQueueFrame):
yield AudioQueueFrame(sounds["ding2.wav"])
# In case anything else up the stack needs it
yield frame
else:
yield frame


async def main(room_url: str, token):
global transport
global llm
global tts

transport = DailyTransportService(
room_url,
token,
"Respond bot",
5,
)
transport.mic_enabled = True
transport.mic_sample_rate = 16000
transport.camera_enabled = False

llm = AzureLLMService()
tts = AzureTTSService()

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
await tts.say("Hi, I'm listening!", transport.send_queue)
await transport.send_queue.put(AudioQueueFrame(sounds["ding1.wav"]))
async def handle_transcriptions():
messages = [
{"role": "system", "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way."},
]

tma_in = LLMUserContextAggregator(
messages, transport.my_participant_id
)
tma_out = LLMAssistantContextAggregator(
messages, transport.my_participant_id
)
out_sound = OutboundSoundEffectWrapper()
in_sound = InboundSoundEffectWrapper()
fl = FrameLogger("LLM Out")
fl2 = FrameLogger("Transcription In")
await out_sound.run_to_queue(
transport.send_queue,
tts.run(
fl.run(
tma_out.run(
llm.run(
fl2.run(
in_sound.run(
tma_in.run(
transport.get_receive_frames()
)
)
)
)
)
)
)
)


transport.transcription_settings["extra"]["punctuate"] = True
await asyncio.gather(transport.run(), handle_transcriptions())


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=True, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=True,
help="Daily API Key (needed to create token)",
)

args, unknown = parser.parse_known_args()

# Create a meeting token for the given room with an expiration 1 hour in the future.
room_name: str = urllib.parse.urlparse(args.url).path[1:]
expiration: float = time.time() + 60 * 60

res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={"Authorization": f"Bearer {args.apikey}"},
json={
"properties": {"room_name": room_name, "is_owner": True, "exp": expiration}
},
)

if res.status_code != 200:
raise Exception(f"Failed to create meeting token: {res.status_code} {res.text}")

token: str = res.json()["token"]

asyncio.run(main(args.url, token))
Loading

0 comments on commit 0d96f91

Please sign in to comment.