Skip to content

Commit

Permalink
Merge pull request #24 from daily-co/another-formatting-pass
Browse files Browse the repository at this point in the history
Another autopep8 formatting pass
  • Loading branch information
Moishe authored Feb 10, 2024
2 parents 560c98f + 815aa2b commit 08144fc
Show file tree
Hide file tree
Showing 21 changed files with 152 additions and 52 deletions.
14 changes: 9 additions & 5 deletions src/dailyai/queue_aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,17 @@ async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, N

# TODO: split up transcription by participant
if self.complete_sentences:
self.sentence += frame.text # type: ignore -- the linter thinks this isn't a TextQueueFrame, even though we check it above
# type: ignore -- the linter thinks this isn't a TextQueueFrame, even
# though we check it above
self.sentence += frame.text
if self.sentence.endswith((".", "?", "!")):
self.messages.append({"role": self.role, "content": self.sentence})
self.sentence = ""
yield LLMMessagesQueueFrame(self.messages)
else:
self.messages.append({"role": self.role, "content": frame.text}) # type: ignore -- the linter thinks this isn't a TextQueueFrame, even though we check it above
# type: ignore -- the linter thinks this isn't a TextQueueFrame, even
# though we check it above
self.messages.append({"role": self.role, "content": frame.text})
yield LLMMessagesQueueFrame(self.messages)

async def finalize(self) -> AsyncGenerator[QueueFrame, None]:
Expand All @@ -79,9 +83,9 @@ async def finalize(self) -> AsyncGenerator[QueueFrame, None]:

class LLMUserContextAggregator(LLMContextAggregator):
def __init__(self,
messages: list[dict],
bot_participant_id=None,
complete_sentences=True):
messages: list[dict],
bot_participant_id=None,
complete_sentences=True):
super().__init__(messages, "user", bot_participant_id, complete_sentences, pass_through=False)


Expand Down
2 changes: 2 additions & 0 deletions src/dailyai/queue_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ class StartStreamQueueFrame(ControlQueueFrame):
class EndStreamQueueFrame(ControlQueueFrame):
pass


class LLMResponseEndQueueFrame(QueueFrame):
pass


@dataclass()
class AudioQueueFrame(QueueFrame):
data: bytes
Expand Down
1 change: 1 addition & 0 deletions src/dailyai/services/base_transport_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
StartStreamQueueFrame,
)


class BaseTransportService():

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion src/dailyai/services/daily_transport_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
start_transcription: bool = False,
**kwargs,
):
super().__init__(**kwargs) # This will call BaseTransportService.__init__ method, not EventHandler
super().__init__(**kwargs) # This will call BaseTransportService.__init__ method, not EventHandler

self._room_url: str = room_url
self._bot_name: str = bot_name
Expand Down
8 changes: 7 additions & 1 deletion src/dailyai/services/fal_ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@


class FalImageGenService(ImageGenService):
def __init__(self, *, image_size, aiohttp_session: aiohttp.ClientSession, key_id=None, key_secret=None):
def __init__(
self,
*,
image_size,
aiohttp_session: aiohttp.ClientSession,
key_id=None,
key_secret=None):
super().__init__(image_size)
self._aiohttp_session = aiohttp_session
if key_id:
Expand Down
8 changes: 6 additions & 2 deletions src/dailyai/services/local_transport_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ def __init__(self, **kwargs):

async def _write_frame_to_tkinter(self, frame: bytes):
data = f"P6 {self._camera_width} {self._camera_height} 255 ".encode() + frame
photo = tk.PhotoImage(width=self._camera_width, height=self._camera_height, data=data, format="PPM")
photo = tk.PhotoImage(
width=self._camera_width,
height=self._camera_height,
data=data,
format="PPM")
self._image_label.config(image=photo)

# This holds a reference to the photo, preventing it from being garbage collected.
self._image_label.image = photo # type: ignore
self._image_label.image = photo # type: ignore

def write_frame_to_camera(self, frame: bytes):
if self._camera_enabled and self._loop:
Expand Down
6 changes: 5 additions & 1 deletion src/examples/foundational/01-say-one-thing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from examples.foundational.support.runner import configure


async def main(room_url):
async with aiohttp.ClientSession() as session:
# create a transport service object using environment variables for
Expand All @@ -25,7 +26,10 @@ async def main(room_url):
meeting_duration_minutes,
mic_enabled=True
)
tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))

# Register an event handler so we can play the audio when the participant joins.
@transport.event_handler("on_participant_joined")
Expand Down
13 changes: 10 additions & 3 deletions src/examples/foundational/02-llm-say-one-thing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from dailyai.services.open_ai_services import OpenAILLMService
from examples.foundational.support.runner import configure


async def main(room_url):
async with aiohttp.ClientSession() as session:
meeting_duration_minutes = 1
Expand All @@ -22,12 +23,18 @@ async def main(room_url):
mic_enabled=True
)

tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
# tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))
# tts = DeepgramTTSService(aiohttp_session=session, api_key=os.getenv("DEEPGRAM_API_KEY"), voice=os.getenv("DEEPGRAM_VOICE"))

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
#llm = OpenAILLMService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
# llm = OpenAILLMService(api_key=os.getenv("OPENAI_CHATGPT_API_KEY"))
messages = [{
"role": "system",
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world."
Expand Down
6 changes: 5 additions & 1 deletion src/examples/foundational/03-still-frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ async def main(room_url):
camera_height=1024
)

imagegen = FalImageGenService(image_size="1024x1024", aiohttp_session=session, key_id=os.getenv("FAL_KEY_ID"), key_secret=os.getenv("FAL_KEY_SECRET"))
imagegen = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
key_secret=os.getenv("FAL_KEY_SECRET"))
# imagegen = OpenAIImageGenService(aiohttp_session=session, api_key=os.getenv("OPENAI_DALLE_API_KEY"), image_size="1024x1024")
# imagegen = AzureImageGenServiceREST(image_size="1024x1024", aiohttp_session=session, api_key=os.getenv("AZURE_DALLE_API_KEY"), endpoint=os.getenv("AZURE_DALLE_ENDPOINT"), model=os.getenv("AZURE_DALLE_MODEL"))

Expand Down
15 changes: 12 additions & 3 deletions src/examples/foundational/04-utterance-and-speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from examples.foundational.support.runner import configure


async def main(room_url: str):
async with aiohttp.ClientSession() as session:
transport = DailyTransportService(
Expand All @@ -22,9 +23,17 @@ async def main(room_url: str):
camera_enabled=False
)

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
azure_tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))
elevenlabs_tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
azure_tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"))
elevenlabs_tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"))

messages = [{"role": "system", "content": "tell the user a joke about llamas"}]

Expand Down
17 changes: 14 additions & 3 deletions src/examples/foundational/05-sync-speech-and-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from examples.foundational.support.runner import configure


async def main(room_url):
async with aiohttp.ClientSession() as session:
meeting_duration_minutes = 5
Expand All @@ -26,11 +27,21 @@ async def main(room_url):
camera_height=1024
)

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id="ErXwobaYiN019PkySvjV")
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id="ErXwobaYiN019PkySvjV")
# tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))

dalle = FalImageGenService(image_size="1024x1024", aiohttp_session=session, key_id=os.getenv("FAL_KEY_ID"), key_secret=os.getenv("FAL_KEY_SECRET"))
dalle = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
key_secret=os.getenv("FAL_KEY_SECRET"))
# dalle = OpenAIImageGenService(aiohttp_session=session, api_key=os.getenv("OPENAI_DALLE_API_KEY"), image_size="1024x1024")
# dalle = AzureImageGenServiceREST(image_size="1024x1024", aiohttp_session=session, api_key=os.getenv("AZURE_DALLE_API_KEY"), endpoint=os.getenv("AZURE_DALLE_ENDPOINT"), model=os.getenv("AZURE_DALLE_MODEL"))

Expand Down
12 changes: 9 additions & 3 deletions src/examples/foundational/06-listen-and-respond.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dailyai.queue_aggregators import LLMAssistantContextAggregator, LLMContextAggregator, LLMUserContextAggregator
from examples.foundational.support.runner import configure


async def main(room_url: str, token):
transport = DailyTransportService(
room_url,
Expand All @@ -15,11 +16,16 @@ async def main(room_url: str, token):
start_transcription=True,
mic_enabled=True,
mic_sample_rate=16000,
camera_enabled = False
camera_enabled=False
)

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"))

@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
Expand Down
16 changes: 13 additions & 3 deletions src/examples/foundational/06a-image-sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from examples.foundational.support.runner import configure


class ImageSyncAggregator(AIService):
def __init__(self, speaking_path: str, waiting_path: str):
self._speaking_image = Image.open(speaking_path)
Expand Down Expand Up @@ -46,9 +47,18 @@ async def main(room_url: str, token):
transport._mic_enabled = True
transport._mic_sample_rate = 16000

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))
img = FalImageGenService(image_size="1024x1024", aiohttp_session=session, key_id=os.getenv("FAL_KEY_ID"), key_secret=os.getenv("FAL_KEY_SECRET"))
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"))
img = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
key_secret=os.getenv("FAL_KEY_SECRET"))

async def get_images():
get_speaking_task = asyncio.create_task(
Expand Down
10 changes: 8 additions & 2 deletions src/examples/foundational/07-interruptible.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from examples.foundational.support.runner import configure


async def main(room_url: str, token):
async with aiohttp.ClientSession() as session:
transport = DailyTransportService(
Expand All @@ -23,8 +24,13 @@ async def main(room_url: str, token):
camera_enabled=False,
)

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"))

async def run_response(user_speech, tma_in, tma_out):
await tts.run_to_queue(
Expand Down
27 changes: 21 additions & 6 deletions src/examples/foundational/08-bots-arguing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

from examples.foundational.support.runner import configure

async def main(room_url:str):

async def main(room_url: str):
async with aiohttp.ClientSession() as session:
transport = DailyTransportService(
room_url,
Expand All @@ -24,16 +25,30 @@ async def main(room_url:str):
camera_height=1024
)

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
tts1 = AzureTTSService(api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"))
tts2 = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id="jBpfuIE2acCO8z3wKNLl")
dalle = FalImageGenService(image_size="1024x1024", aiohttp_session=session, key_id=os.getenv("FAL_KEY_ID"), key_secret=os.getenv("FAL_KEY_SECRET"))
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
tts1 = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"))
tts2 = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id="jBpfuIE2acCO8z3wKNLl")
dalle = FalImageGenService(
image_size="1024x1024",
aiohttp_session=session,
key_id=os.getenv("FAL_KEY_ID"),
key_secret=os.getenv("FAL_KEY_SECRET"))

bot1_messages = [
{"role": "system", "content": "You are a stern librarian. You strongly believe that a hot dog is a sandwich. Start by stating this fact in a few sentences, then be prepared to debate this with the user. You shouldn't ever compromise on the fundamental truth that a hot dog is a sandwich. Your responses should only be a few sentences long."},
]
bot2_messages = [
{"role": "system", "content": "You are a silly cat, and you strongly believe that a hot dog is not a sandwich. Debate this with the user, only responding with a few sentences. Don't ever accept that a hot dog is a sandwich."},
{
"role": "system",
"content": "You are a silly cat, and you strongly believe that a hot dog is not a sandwich. Debate this with the user, only responding with a few sentences. Don't ever accept that a hot dog is a sandwich."},
]

async def get_bot1_statement():
Expand Down
12 changes: 9 additions & 3 deletions src/examples/foundational/10-wake-word.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ async def process_frame(self, frame: QueueFrame) -> AsyncGenerator[QueueFrame, N


class NameCheckFilter(AIService):
def __init__(self, names:list[str]):
def __init__(self, names: list[str]):
self.names = names
self.sentence = ""

Expand Down Expand Up @@ -123,8 +123,14 @@ async def main(room_url: str, token):
transport._camera_width = 720
transport._camera_height = 1280

llm = AzureLLMService(api_key=os.getenv("AZURE_CHATGPT_API_KEY"), endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv("ELEVENLABS_API_KEY"), voice_id="jBpfuIE2acCO8z3wKNLl")
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
model=os.getenv("AZURE_CHATGPT_MODEL"))
tts = ElevenLabsTTSService(
aiohttp_session=session,
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id="jBpfuIE2acCO8z3wKNLl")
isa = ImageSyncAggregator()

@transport.event_handler("on_first_other_participant_joined")
Expand Down
Loading

0 comments on commit 08144fc

Please sign in to comment.