diff --git a/src/dailyai/pipeline/aggregators.py b/src/dailyai/pipeline/aggregators.py index 63cc0f30b..f9bcaca13 100644 --- a/src/dailyai/pipeline/aggregators.py +++ b/src/dailyai/pipeline/aggregators.py @@ -57,7 +57,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: # Sometimes VAD triggers quickly on and off. If we don't get any transcription, # it creates empty LLM message queue frames if len(self.aggregation) > 0: - self.messages.append({"role": self._role, "content": self.aggregation}) + self.messages.append( + {"role": self._role, "content": self.aggregation}) self.aggregation = "" yield self._end_frame() yield LLMMessagesQueueFrame(self.messages) @@ -110,7 +111,8 @@ def __init__( self.pass_through = pass_through async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: - # We don't do anything with non-text frames, pass it along to next in the pipeline. + # We don't do anything with non-text frames, pass it along to next in + # the pipeline. if not isinstance(frame, TextFrame): yield frame return @@ -132,7 +134,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: # though we check it above self.sentence += frame.text if self.sentence.endswith((".", "?", "!")): - self.messages.append({"role": self.role, "content": self.sentence}) + self.messages.append( + {"role": self.role, "content": self.sentence}) self.sentence = "" yield LLMMessagesQueueFrame(self.messages) else: @@ -144,17 +147,24 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: class LLMUserContextAggregator(LLMContextAggregator): def __init__( - self, messages: list[dict], bot_participant_id=None, complete_sentences=True - ): + self, + messages: list[dict], + bot_participant_id=None, + complete_sentences=True): super().__init__( - messages, "user", bot_participant_id, complete_sentences, pass_through=False - ) + messages, + "user", + bot_participant_id, + complete_sentences, + pass_through=False) class LLMAssistantContextAggregator(LLMContextAggregator): def __init__( - self, messages: list[dict], bot_participant_id=None, complete_sentences=True - ): + self, + messages: list[dict], + bot_participant_id=None, + complete_sentences=True): super().__init__( messages, "assistant", @@ -328,7 +338,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: continue seen_ids.add(id(frame)) - # Skip passing along EndParallelPipeQueueFrame, because we use them for our own flow control. + # Skip passing along EndParallelPipeQueueFrame, because we use them + # for our own flow control. if not isinstance(frame, EndPipeFrame): yield frame diff --git a/src/dailyai/pipeline/frames.py b/src/dailyai/pipeline/frames.py index 6dcff3d2b..6c39fb2c7 100644 --- a/src/dailyai/pipeline/frames.py +++ b/src/dailyai/pipeline/frames.py @@ -13,7 +13,7 @@ class ControlFrame(Frame): # Control frames should contain no instance data, so # equality is based solely on the class. def __eq__(self, other): - return type(other) == self.__class__ + return isinstance(other, self.__class__) class StartFrame(ControlFrame): diff --git a/src/dailyai/pipeline/merge_pipeline.py b/src/dailyai/pipeline/merge_pipeline.py index 51178aead..736903e9d 100644 --- a/src/dailyai/pipeline/merge_pipeline.py +++ b/src/dailyai/pipeline/merge_pipeline.py @@ -6,7 +6,8 @@ class SequentialMergePipeline(Pipeline): """This class merges the sink queues from a list of pipelines. Frames from each pipeline's sink are merged in the order of pipelines in the list.""" - def __init__(self, pipelines:List[Pipeline]): + + def __init__(self, pipelines: List[Pipeline]): super().__init__([]) self.pipelines = pipelines @@ -14,7 +15,9 @@ async def run_pipeline(self): for pipeline in self.pipelines: while True: frame = await pipeline.sink.get() - if isinstance(frame, EndFrame) or isinstance(frame, EndPipeFrame): + if isinstance( + frame, EndFrame) or isinstance( + frame, EndPipeFrame): break await self.sink.put(frame) diff --git a/src/dailyai/pipeline/opeanai_llm_aggregator.py b/src/dailyai/pipeline/opeanai_llm_aggregator.py index b38b7f050..a9d95f0e3 100644 --- a/src/dailyai/pipeline/opeanai_llm_aggregator.py +++ b/src/dailyai/pipeline/opeanai_llm_aggregator.py @@ -69,7 +69,10 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: else: yield frame - def string_aggregator(self, frame: Frame, aggregation: str | None) -> str | None: + def string_aggregator( + self, + frame: Frame, + aggregation: str | None) -> str | None: if not isinstance(frame, TextFrame): raise TypeError( "Frame must be a TextFrame instance to be aggregated by a string aggregator." @@ -94,7 +97,7 @@ def __init__(self, context: OpenAILLMContext): class OpenAIAssistantContextAggregator(OpenAIContextAggregator): - def __init__(self, context:OpenAILLMContext): + def __init__(self, context: OpenAILLMContext): super().__init__( context, aggregator=self.string_aggregator, diff --git a/src/dailyai/pipeline/pipeline.py b/src/dailyai/pipeline/pipeline.py index b62458f3a..b055f3d81 100644 --- a/src/dailyai/pipeline/pipeline.py +++ b/src/dailyai/pipeline/pipeline.py @@ -89,7 +89,8 @@ async def run_pipeline(self): ): break except asyncio.CancelledError: - # this means there's been an interruption, do any cleanup necessary here. + # this means there's been an interruption, do any cleanup necessary + # here. for processor in self.processors: await processor.interrupted() pass @@ -107,4 +108,3 @@ async def _run_pipeline_recursively( yield final_frame else: yield initial_frame - diff --git a/src/dailyai/services/ai_services.py b/src/dailyai/services/ai_services.py index 7300a6c1c..0afe8412f 100644 --- a/src/dailyai/services/ai_services.py +++ b/src/dailyai/services/ai_services.py @@ -28,6 +28,7 @@ class AIService(FrameProcessor): def __init__(self): self.logger = logging.getLogger("dailyai") + class LLMService(AIService): """This class is a no-op but serves as a base class for LLM services.""" @@ -76,7 +77,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: async for audio_chunk in self.run_tts(text): yield AudioFrame(audio_chunk) - # note we pass along the text frame *after* the audio, so the text frame is completed after the audio is processed. + # note we pass along the text frame *after* the audio, so the text + # frame is completed after the audio is processed. yield TextFrame(text) diff --git a/src/dailyai/services/anthropic_llm_service.py b/src/dailyai/services/anthropic_llm_service.py index a58c610d7..9e8ce0db4 100644 --- a/src/dailyai/services/anthropic_llm_service.py +++ b/src/dailyai/services/anthropic_llm_service.py @@ -9,7 +9,11 @@ class AnthropicLLMService(LLMService): - def __init__(self, api_key, model="claude-3-opus-20240229", max_tokens=1024): + def __init__( + self, + api_key, + model="claude-3-opus-20240229", + max_tokens=1024): super().__init__() self.client = AsyncAnthropic(api_key=api_key) self.model = model diff --git a/src/dailyai/services/azure_ai_services.py b/src/dailyai/services/azure_ai_services.py index a401e2e79..c677ffff5 100644 --- a/src/dailyai/services/azure_ai_services.py +++ b/src/dailyai/services/azure_ai_services.py @@ -44,8 +44,7 @@ async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]: "" "" f"{sentence}" - " " - ) + " ") result = await asyncio.to_thread(self.speech_synthesizer.speak_ssml, (ssml)) self.logger.info("Got azure tts result") if result.reason == ResultReason.SynthesizingAudioCompleted: @@ -55,16 +54,22 @@ async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]: elif result.reason == ResultReason.Canceled: cancellation_details = result.cancellation_details self.logger.info( - "Speech synthesis canceled: {}".format(cancellation_details.reason) - ) + "Speech synthesis canceled: {}".format( + cancellation_details.reason)) if cancellation_details.reason == CancellationReason.Error: self.logger.info( - "Error details: {}".format(cancellation_details.error_details) - ) + "Error details: {}".format( + cancellation_details.error_details)) class AzureLLMService(BaseOpenAILLMService): - def __init__(self, *, api_key, endpoint, api_version="2023-12-01-preview", model): + def __init__( + self, + *, + api_key, + endpoint, + api_version="2023-12-01-preview", + model): self._endpoint = endpoint self._api_version = api_version @@ -101,7 +106,9 @@ def __init__( async def run_image_gen(self, sentence) -> tuple[str, bytes]: url = f"{self._azure_endpoint}openai/images/generations:submit?api-version={self._api_version}" - headers = {"api-key": self._api_key, "Content-Type": "application/json"} + headers = { + "api-key": self._api_key, + "Content-Type": "application/json"} body = { # Enter your prompt text here "prompt": sentence, @@ -112,7 +119,8 @@ async def run_image_gen(self, sentence) -> tuple[str, bytes]: url, headers=headers, json=body ) as submission: # We never get past this line, because this header isn't - # defined on a 429 response, but something is eating our exceptions! + # defined on a 429 response, but something is eating our + # exceptions! operation_location = submission.headers["operation-location"] status = "" attempts_left = 120 @@ -130,8 +138,7 @@ async def run_image_gen(self, sentence) -> tuple[str, bytes]: status = json_response["status"] image_url = ( - json_response["result"]["data"][0]["url"] if json_response else None - ) + json_response["result"]["data"][0]["url"] if json_response else None) if not image_url: raise Exception("Image generation failed") # Load the image from the url diff --git a/src/dailyai/services/base_transport_service.py b/src/dailyai/services/base_transport_service.py index b05c7b1f3..c4d963007 100644 --- a/src/dailyai/services/base_transport_service.py +++ b/src/dailyai/services/base_transport_service.py @@ -127,12 +127,14 @@ def __init__( self._logger: logging.Logger = logging.getLogger() - async def run(self, pipeline:Pipeline | None=None, override_pipeline_source_queue=True): + async def run(self, pipeline: Pipeline | None = None, override_pipeline_source_queue=True): self._prerun() - async_output_queue_marshal_task = asyncio.create_task(self._marshal_frames()) + async_output_queue_marshal_task = asyncio.create_task( + self._marshal_frames()) - self._camera_thread = threading.Thread(target=self._run_camera, daemon=True) + self._camera_thread = threading.Thread( + target=self._run_camera, daemon=True) self._camera_thread.start() self._frame_consumer_thread = threading.Thread( @@ -182,7 +184,7 @@ async def run(self, pipeline:Pipeline | None=None, override_pipeline_source_queu if self._vad_enabled: self._vad_thread.join() - async def run_pipeline(self, pipeline:Pipeline, override_pipeline_source_queue=True): + async def run_pipeline(self, pipeline: Pipeline, override_pipeline_source_queue=True): pipeline.set_sink(self.send_queue) if override_pipeline_source_queue: pipeline.set_source(self.receive_queue) @@ -217,7 +219,8 @@ async def post_process(post_processor: FrameProcessor): break if post_processor: - post_process_task = asyncio.create_task(post_process(post_processor)) + post_process_task = asyncio.create_task( + post_process(post_processor)) started = False @@ -244,7 +247,7 @@ async def post_process(post_processor: FrameProcessor): await asyncio.gather(pipeline_task, post_process_task) - async def say(self, text:str, tts:TTSService): + async def say(self, text: str, tts: TTSService): """Say a phrase. Use with caution; this bypasses any running pipelines.""" async for frame in tts.process_frame(TextFrame(text)): await self.send_queue.put(frame) @@ -290,7 +293,8 @@ def _vad(self): audio_chunk = self.read_audio_frames(self._vad_samples) audio_int16 = np.frombuffer(audio_chunk, np.int16) audio_float32 = int2float(audio_int16) - new_confidence = model(torch.from_numpy(audio_float32), 16000).item() + new_confidence = model( + torch.from_numpy(audio_float32), 16000).item() speaking = new_confidence > 0.5 if speaking: @@ -320,8 +324,8 @@ def _vad(self): ): if self._loop: asyncio.run_coroutine_threadsafe( - self.receive_queue.put(UserStartedSpeakingFrame()), self._loop - ) + self.receive_queue.put( + UserStartedSpeakingFrame()), self._loop) # self.interrupt() self._vad_state = VADState.SPEAKING self._vad_starting_count = 0 @@ -331,8 +335,8 @@ def _vad(self): ): if self._loop: asyncio.run_coroutine_threadsafe( - self.receive_queue.put(UserStoppedSpeakingFrame()), self._loop - ) + self.receive_queue.put( + UserStoppedSpeakingFrame()), self._loop) self._vad_state = VADState.QUIET self._vad_stopping_count = 0 @@ -370,7 +374,9 @@ def _receive_audio(self): self.receive_queue.put(frame), self._loop ) - asyncio.run_coroutine_threadsafe(self.receive_queue.put(EndFrame()), self._loop) + asyncio.run_coroutine_threadsafe( + self.receive_queue.put( + EndFrame()), self._loop) def _set_image(self, image: bytes): self._images = itertools.cycle([image]) @@ -378,7 +384,7 @@ def _set_image(self, image: bytes): def _set_images(self, images: list[bytes], start_frame=0): self._images = itertools.cycle(images) - def send_app_message(self, message: Any, participantId:str|None): + def send_app_message(self, message: Any, participantId: str | None): """ Child classes should override this to send a custom message to the room. """ pass @@ -401,17 +407,18 @@ def _frame_consumer(self): largest_write_size = 8000 while True: try: - frames_or_frame: Frame | list[Frame] = self._threadsafe_send_queue.get() + frames_or_frame: Frame | list[Frame] = self._threadsafe_send_queue.get( + ) if ( isinstance(frames_or_frame, AudioFrame) and len(frames_or_frame.data) > largest_write_size ): # subdivide large audio frames to enable interruption frames = [] - for i in range(0, len(frames_or_frame.data), largest_write_size): - frames.append( - AudioFrame(frames_or_frame.data[i : i + largest_write_size]) - ) + for i in range(0, len(frames_or_frame.data), + largest_write_size): + frames.append(AudioFrame( + frames_or_frame.data[i: i + largest_write_size])) elif isinstance(frames_or_frame, Frame): frames: list[Frame] = [frames_or_frame] elif isinstance(frames_or_frame, list): @@ -430,7 +437,8 @@ def _frame_consumer(self): ) return - # if interrupted, we just pull frames off the queue and discard them + # if interrupted, we just pull frames off the queue and + # discard them if not self._is_interrupted.is_set(): if frame: if isinstance(frame, AudioFrame): @@ -441,14 +449,16 @@ def _frame_consumer(self): len(b) % smallest_write_size ) if truncated_length: - self.write_frame_to_mic(bytes(b[:truncated_length])) + self.write_frame_to_mic( + bytes(b[:truncated_length])) b = b[truncated_length:] elif isinstance(frame, ImageFrame): self._set_image(frame.image) elif isinstance(frame, SpriteFrame): self._set_images(frame.images) elif isinstance(frame, SendAppMessageFrame): - self.send_app_message(frame.message, frame.participantId) + self.send_app_message( + frame.message, frame.participantId) elif len(b): self.write_frame_to_mic(bytes(b)) b = bytearray() @@ -457,7 +467,8 @@ def _frame_consumer(self): # can cause static in the audio stream. if len(b): truncated_length = len(b) - (len(b) % 160) - self.write_frame_to_mic(bytes(b[:truncated_length])) + self.write_frame_to_mic( + bytes(b[:truncated_length])) b = bytearray() if isinstance(frame, StartFrame): @@ -479,5 +490,6 @@ def _frame_consumer(self): b = bytearray() except Exception as e: - self._logger.error(f"Exception in frame_consumer: {e}, {len(b)}") + self._logger.error( + f"Exception in frame_consumer: {e}, {len(b)}") raise e diff --git a/src/dailyai/services/daily_transport_service.py b/src/dailyai/services/daily_transport_service.py index 94c0078f5..9c8aa5e78 100644 --- a/src/dailyai/services/daily_transport_service.py +++ b/src/dailyai/services/daily_transport_service.py @@ -48,7 +48,8 @@ def __init__( start_transcription: bool = False, **kwargs, ): - super().__init__(**kwargs) # This will call BaseTransportService.__init__ method, not EventHandler + # This will call BaseTransportService.__init__ method, not EventHandler + super().__init__(**kwargs) self._room_url: str = room_url self._bot_name: str = bot_name @@ -83,9 +84,11 @@ def _patch_method(self, event_name, *args, **kwargs): for handler in self._event_handlers[event_name]: if inspect.iscoroutinefunction(handler): if self._loop: - future = asyncio.run_coroutine_threadsafe(handler(*args, **kwargs), self._loop) + future = asyncio.run_coroutine_threadsafe( + handler(*args, **kwargs), self._loop) - # wait for the coroutine to finish. This will also raise any exceptions raised by the coroutine. + # wait for the coroutine to finish. This will also + # raise any exceptions raised by the coroutine. future.result() else: raise Exception( @@ -98,7 +101,8 @@ def _patch_method(self, event_name, *args, **kwargs): def add_event_handler(self, event_name: str, handler): if not event_name.startswith("on_"): - raise Exception(f"Event handler {event_name} must start with 'on_'") + raise Exception( + f"Event handler {event_name} must start with 'on_'") methods = inspect.getmembers(self, predicate=inspect.ismethod) if event_name not in [method[0] for method in methods]: @@ -111,7 +115,8 @@ def add_event_handler(self, event_name: str, handler): handler, self)] setattr(self, event_name, partial(self._patch_method, event_name)) else: - self._event_handlers[event_name].append(types.MethodType(handler, self)) + self._event_handlers[event_name].append( + types.MethodType(handler, self)) def event_handler(self, event_name: str): def decorator(handler): @@ -148,8 +153,7 @@ def _prerun(self): if self._camera_enabled: self.camera: VirtualCameraDevice = Daily.create_camera_device( - "camera", width=self._camera_width, height=self._camera_height, color_format="RGB" - ) + "camera", width=self._camera_width, height=self._camera_height, color_format="RGB") if self._speaker_enabled or self._vad_enabled: self._speaker: VirtualSpeakerDevice = Daily.create_speaker_device( @@ -249,7 +253,7 @@ def on_participant_left(self, participant, reason): if len(self.client.participants()) < self._min_others_count + 1: self._stop_threads.set() - def on_app_message(self, message:Any, sender:str): + def on_app_message(self, message: Any, sender: str): if self._loop: frame = ReceivedAppMessageFrame(message, sender) print(frame) @@ -265,8 +269,10 @@ def on_transcription_message(self, message: dict): elif "session_id" in message: participantId = message["session_id"] if self._my_participant_id and participantId != self._my_participant_id: - frame = TranscriptionQueueFrame(message["text"], participantId, message["timestamp"]) - asyncio.run_coroutine_threadsafe(self.receive_queue.put(frame), self._loop) + frame = TranscriptionQueueFrame( + message["text"], participantId, message["timestamp"]) + asyncio.run_coroutine_threadsafe( + self.receive_queue.put(frame), self._loop) def on_transcription_error(self, message): self._logger.error(f"Transcription error: {message}") diff --git a/src/dailyai/services/deepgram_ai_service.py b/src/dailyai/services/deepgram_ai_service.py index 1ede1c35d..a8c58e734 100644 --- a/src/dailyai/services/deepgram_ai_service.py +++ b/src/dailyai/services/deepgram_ai_service.py @@ -25,7 +25,9 @@ async def run_tts(self, sentence): self.logger.info(f"Running deepgram tts for {sentence}") base_url = "https://api.beta.deepgram.com/v1/speak" request_url = f"{base_url}?model={self._voice}&encoding=linear16&container=none&sample_rate={self._sample_rate}" - headers = {"authorization": f"token {self._api_key}", "Content-Type": "application/json"} + headers = { + "authorization": f"token {self._api_key}", + "Content-Type": "application/json"} data = {"text": sentence} async with self._aiohttp_session.post( diff --git a/src/dailyai/services/deepgram_ai_services.py b/src/dailyai/services/deepgram_ai_services.py index ff6563023..f04dec01c 100644 --- a/src/dailyai/services/deepgram_ai_services.py +++ b/src/dailyai/services/deepgram_ai_services.py @@ -9,7 +9,12 @@ class DeepgramTTSService(TTSService): - def __init__(self, *, aiohttp_session, api_key, voice="alpha-asteria-en-v2"): + def __init__( + self, + *, + aiohttp_session, + api_key, + voice="alpha-asteria-en-v2"): super().__init__() self._voice = voice diff --git a/src/dailyai/services/elevenlabs_ai_service.py b/src/dailyai/services/elevenlabs_ai_service.py index 07068b9dd..3edaa71c5 100644 --- a/src/dailyai/services/elevenlabs_ai_service.py +++ b/src/dailyai/services/elevenlabs_ai_service.py @@ -28,7 +28,9 @@ def __init__( async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]: url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream" payload = {"text": sentence, "model_id": self._model} - querystring = {"output_format": "pcm_16000", "optimize_streaming_latency": 2} + querystring = { + "output_format": "pcm_16000", + "optimize_streaming_latency": 2} headers = { "xi-api-key": self._api_key, "Content-Type": "application/json", diff --git a/src/dailyai/services/fal_ai_services.py b/src/dailyai/services/fal_ai_services.py index 60226dd52..10343f97c 100644 --- a/src/dailyai/services/fal_ai_services.py +++ b/src/dailyai/services/fal_ai_services.py @@ -33,7 +33,7 @@ async def run_image_gen(self, sentence) -> tuple[str, bytes]: def get_image_url(sentence, size): handler = fal.apps.submit( "110602490-fast-sdxl", - #"fal-ai/fast-sdxl", + # "fal-ai/fast-sdxl", arguments={"prompt": sentence}, ) for event in handler.iter_events(): diff --git a/src/dailyai/services/local_transport_service.py b/src/dailyai/services/local_transport_service.py index 7538d3b06..f6ab43da7 100644 --- a/src/dailyai/services/local_transport_service.py +++ b/src/dailyai/services/local_transport_service.py @@ -15,13 +15,15 @@ def __init__(self, **kwargs): self._tk_root = kwargs.get("tk_root") or None if self._camera_enabled and not self._tk_root: - raise ValueError("If camera is enabled, a tkinter root must be provided") + raise ValueError( + "If camera is enabled, a tkinter root must be provided") if self._speaker_enabled: self._speaker_buffer_pending = bytearray() async def _write_frame_to_tkinter(self, frame: bytes): - data = f"P6 {self._camera_width} {self._camera_height} 255 ".encode() + frame + data = f"P6 {self._camera_width} {self._camera_height} 255 ".encode() + \ + frame photo = tk.PhotoImage( width=self._camera_width, height=self._camera_height, @@ -29,7 +31,8 @@ async def _write_frame_to_tkinter(self, frame: bytes): format="PPM") self._image_label.config(image=photo) - # This holds a reference to the photo, preventing it from being garbage collected. + # This holds a reference to the photo, preventing it from being garbage + # collected. self._image_label.image = photo # type: ignore def write_frame_to_camera(self, frame: bytes): @@ -61,8 +64,13 @@ def _prerun(self): if self._camera_enabled: # Start with a neutral gray background. array = np.ones((1024, 1024, 3)) * 128 - data = f"P5 {1024} {1024} 255 ".encode() + array.astype(np.uint8).tobytes() - photo = tk.PhotoImage(width=1024, height=1024, data=data, format="PPM") + data = f"P5 {1024} {1024} 255 ".encode( + ) + array.astype(np.uint8).tobytes() + photo = tk.PhotoImage( + width=1024, + height=1024, + data=data, + format="PPM") self._image_label = tk.Label(self._tk_root, image=photo) self._image_label.pack() diff --git a/src/dailyai/services/openai_api_llm_service.py b/src/dailyai/services/openai_api_llm_service.py index 681582c15..47439da13 100644 --- a/src/dailyai/services/openai_api_llm_service.py +++ b/src/dailyai/services/openai_api_llm_service.py @@ -110,7 +110,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: yield LLMFunctionStartFrame(function_name=tool_call.function.name) if tool_call.function and tool_call.function.arguments: # Keep iterating through the response to collect all the argument fragments and - # yield a complete LLMFunctionCallFrame after run_llm_async completes + # yield a complete LLMFunctionCallFrame after run_llm_async + # completes arguments += tool_call.function.arguments elif chunk.choices[0].delta.content: yield TextFrame(chunk.choices[0].delta.content) diff --git a/src/dailyai/services/openai_llm_context.py b/src/dailyai/services/openai_llm_context.py index 52f56c8ba..ce705f547 100644 --- a/src/dailyai/services/openai_llm_context.py +++ b/src/dailyai/services/openai_llm_context.py @@ -16,7 +16,8 @@ def __init__( tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN ): - self.messages: List[ChatCompletionMessageParam] = messages if messages else [] + self.messages: List[ChatCompletionMessageParam] = messages if messages else [ + ] self.tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice self.tools: List[ChatCompletionToolParam] | NotGiven = tools @@ -25,13 +26,13 @@ def from_messages(messages: List[dict]) -> "OpenAILLMContext": context = OpenAILLMContext() for message in messages: context.add_message({ - "content":message["content"], - "role":message["role"], - "name":message["name"] if "name" in message else message["role"] + "content": message["content"], + "role": message["role"], + "name": message["name"] if "name" in message else message["role"] }) return context - #def __deepcopy__(self, memo): + # def __deepcopy__(self, memo): def add_message(self, message: ChatCompletionMessageParam): self.messages.append(message) @@ -44,9 +45,10 @@ def set_tool_choice( ): self.tool_choice = tool_choice - def set_tools(self, tools:List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN): + def set_tools( + self, + tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN): if tools != NOT_GIVEN and len(tools) == 0: tools = NOT_GIVEN self.tools = tools - diff --git a/src/dailyai/services/to_be_updated/cloudflare_ai_service.py b/src/dailyai/services/to_be_updated/cloudflare_ai_service.py index b4a810bd5..058e2212c 100644 --- a/src/dailyai/services/to_be_updated/cloudflare_ai_service.py +++ b/src/dailyai/services/to_be_updated/cloudflare_ai_service.py @@ -17,7 +17,10 @@ def __init__(self): # base endpoint, used by the others def run(self, model, input): - response = requests.post(f"{self.api_base_url}{model}", headers=self.headers, json=input) + response = requests.post( + f"{self.api_base_url}{model}", + headers=self.headers, + json=input) return response.json() # https://developers.cloudflare.com/workers-ai/models/llm/ @@ -41,7 +44,8 @@ def run_text_translation(self, sentence, source_language, target_language): # https://developers.cloudflare.com/workers-ai/models/sentiment-analysis/ def run_text_sentiment(self, sentence): - return self.run("@cf/huggingface/distilbert-sst-2-int8", {"text": sentence}) + return self.run("@cf/huggingface/distilbert-sst-2-int8", + {"text": sentence}) # https://developers.cloudflare.com/workers-ai/models/image-classification/ def run_image_classification(self, image_url): diff --git a/src/dailyai/tests/integration/integration_azure_llm.py b/src/dailyai/tests/integration/integration_azure_llm.py index a55791ba5..41cb8ee6c 100644 --- a/src/dailyai/tests/integration/integration_azure_llm.py +++ b/src/dailyai/tests/integration/integration_azure_llm.py @@ -10,7 +10,7 @@ ChatCompletionSystemMessageParam, ) -if __name__=="__main__": +if __name__ == "__main__": async def test_chat(): llm = AzureLLMService( api_key=os.getenv("AZURE_CHATGPT_API_KEY"), @@ -19,8 +19,7 @@ async def test_chat(): ) context = OpenAILLMContext() message: ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam( - content="Please tell the world hello.", name="system", role="system" - ) + content="Please tell the world hello.", name="system", role="system") context.add_message(message) frame = OpenAILLMContextFrame(context) async for s in llm.process_frame(frame): diff --git a/src/dailyai/tests/integration/integration_ollama_llm.py b/src/dailyai/tests/integration/integration_ollama_llm.py index e33d40966..527a20e98 100644 --- a/src/dailyai/tests/integration/integration_ollama_llm.py +++ b/src/dailyai/tests/integration/integration_ollama_llm.py @@ -9,13 +9,12 @@ ) from dailyai.services.ollama_ai_services import OLLamaLLMService -if __name__=="__main__": +if __name__ == "__main__": async def test_chat(): llm = OLLamaLLMService() context = OpenAILLMContext() message: ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam( - content="Please tell the world hello.", name="system", role="system" - ) + content="Please tell the world hello.", name="system", role="system") context.add_message(message) frame = OpenAILLMContextFrame(context) async for s in llm.process_frame(frame): diff --git a/src/dailyai/tests/integration/integration_openai_llm.py b/src/dailyai/tests/integration/integration_openai_llm.py index a0eb21831..baea80d00 100644 --- a/src/dailyai/tests/integration/integration_openai_llm.py +++ b/src/dailyai/tests/integration/integration_openai_llm.py @@ -18,7 +18,7 @@ async def test_functions(): tools = [ ChatCompletionToolParam( type="function", - function= { + function={ "name": "get_current_weather", "description": "Get the current weather", "parameters": { @@ -30,15 +30,17 @@ async def test_functions(): }, "format": { "type": "string", - "enum": ["celsius", "fahrenheit"], + "enum": [ + "celsius", + "fahrenheit"], "description": "The temperature unit to use. Infer this from the users location.", }, }, - "required": ["location", "format"], + "required": [ + "location", + "format"], }, - } - ) - ] + })] api_key = os.getenv("OPENAI_API_KEY") @@ -70,8 +72,7 @@ async def test_chat(): ) context = OpenAILLMContext() message: ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam( - content="Please tell the world hello.", name="system", role="system" - ) + content="Please tell the world hello.", name="system", role="system") context.add_message(message) frame = OpenAILLMContextFrame(context) async for s in llm.process_frame(frame): diff --git a/src/dailyai/tests/test_aggregators.py b/src/dailyai/tests/test_aggregators.py index 800066969..d232349e3 100644 --- a/src/dailyai/tests/test_aggregators.py +++ b/src/dailyai/tests/test_aggregators.py @@ -45,10 +45,9 @@ async def test_sentence_aggregator(self): async def test_gated_accumulator(self): gated_aggregator = GatedAggregator( - gate_open_fn=lambda frame: isinstance(frame, ImageFrame), - gate_close_fn=lambda frame: isinstance(frame, LLMResponseStartFrame), - start_open=False, - ) + gate_open_fn=lambda frame: isinstance( + frame, ImageFrame), gate_close_fn=lambda frame: isinstance( + frame, LLMResponseStartFrame), start_open=False, ) frames = [ LLMResponseStartFrame(), @@ -76,12 +75,14 @@ async def test_gated_accumulator(self): async def test_parallel_pipeline(self): - async def slow_add(sleep_time:float, name:str, x: str): + async def slow_add(sleep_time: float, name: str, x: str): await asyncio.sleep(sleep_time) return ":".join([x, name]) - pipe1_annotation = StatelessTextTransformer(functools.partial(slow_add, 0.1, 'pipe1')) - pipe2_annotation = StatelessTextTransformer(functools.partial(slow_add, 0.2, 'pipe2')) + pipe1_annotation = StatelessTextTransformer( + functools.partial(slow_add, 0.1, 'pipe1')) + pipe2_annotation = StatelessTextTransformer( + functools.partial(slow_add, 0.2, 'pipe2')) sentence_aggregator = SentenceAggregator() add_dots = StatelessTextTransformer(lambda x: x + ".") diff --git a/src/examples/foundational/01-say-one-thing.py b/src/examples/foundational/01-say-one-thing.py index 6a0765095..e9531ad9a 100644 --- a/src/examples/foundational/01-say-one-thing.py +++ b/src/examples/foundational/01-say-one-thing.py @@ -32,7 +32,8 @@ async def main(room_url): pipeline = Pipeline([tts]) - # Register an event handler so we can play the audio when the participant joins. + # Register an event handler so we can play the audio when the + # participant joins. @transport.event_handler("on_participant_joined") async def on_participant_joined(transport, participant): if participant["info"]["isLocal"]: diff --git a/src/examples/foundational/01a-local-transport.py b/src/examples/foundational/01a-local-transport.py index 6fa28f2dd..b54ecedbd 100644 --- a/src/examples/foundational/01a-local-transport.py +++ b/src/examples/foundational/01a-local-transport.py @@ -10,6 +10,7 @@ logger = logging.getLogger("dailyai") logger.setLevel(logging.DEBUG) + async def main(): async with aiohttp.ClientSession() as session: meeting_duration_minutes = 1 diff --git a/src/examples/foundational/02-llm-say-one-thing.py b/src/examples/foundational/02-llm-say-one-thing.py index 0737703e8..40113404e 100644 --- a/src/examples/foundational/02-llm-say-one-thing.py +++ b/src/examples/foundational/02-llm-say-one-thing.py @@ -33,17 +33,16 @@ async def main(room_url): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") messages = [ { "role": "system", "content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.", - } - ] + }] - pipeline= Pipeline([llm, tts]) + pipeline = Pipeline([llm, tts]) @transport.event_handler("on_first_other_participant_joined") async def on_first_other_participant_joined(transport): diff --git a/src/examples/foundational/03-still-frame.py b/src/examples/foundational/03-still-frame.py index d5b0badb5..3a58f5bb8 100644 --- a/src/examples/foundational/03-still-frame.py +++ b/src/examples/foundational/03-still-frame.py @@ -40,7 +40,8 @@ async def main(room_url): async def on_first_other_participant_joined(transport): # Note that we do not put an EndFrame() item in the pipeline for this demo. # This means that the bot will stay in the channel until it times out. - # An EndFrame() in the pipeline would cause the transport to shut down. + # An EndFrame() in the pipeline would cause the transport to shut + # down. await pipeline.queue_frames( [TextFrame("a cat in the style of picasso")] ) diff --git a/src/examples/foundational/03a-image-local.py b/src/examples/foundational/03a-image-local.py index a5b594756..d3e6b5797 100644 --- a/src/examples/foundational/03a-image-local.py +++ b/src/examples/foundational/03a-image-local.py @@ -39,9 +39,8 @@ async def main(): ) image_task = asyncio.create_task( imagegen.run_to_queue( - transport.send_queue, [TextFrame("a cat in the style of picasso")] - ) - ) + transport.send_queue, [ + TextFrame("a cat in the style of picasso")])) async def run_tk(): while not transport._stop_threads.is_set(): diff --git a/src/examples/foundational/04-utterance-and-speech.py b/src/examples/foundational/04-utterance-and-speech.py index f056005d6..b35bc0436 100644 --- a/src/examples/foundational/04-utterance-and-speech.py +++ b/src/examples/foundational/04-utterance-and-speech.py @@ -49,7 +49,8 @@ async def main(room_url: str): voice_id=os.getenv("ELEVENLABS_VOICE_ID"), ) - messages = [{"role": "system", "content": "tell the user a joke about llamas"}] + messages = [{"role": "system", + "content": "tell the user a joke about llamas"}] # Start a task to run the LLM to create a joke, and convert the LLM output to audio frames. This task # will run in parallel with generating and speaking the audio for static text, so there's no delay to @@ -65,7 +66,8 @@ async def main(room_url: str): ] ) - merge_pipeline = SequentialMergePipeline([simple_tts_pipeline, llm_pipeline]) + merge_pipeline = SequentialMergePipeline( + [simple_tts_pipeline, llm_pipeline]) await asyncio.gather( transport.run(merge_pipeline), diff --git a/src/examples/foundational/05-sync-speech-and-image.py b/src/examples/foundational/05-sync-speech-and-image.py index 85b6311c8..f62dd6c00 100644 --- a/src/examples/foundational/05-sync-speech-and-image.py +++ b/src/examples/foundational/05-sync-speech-and-image.py @@ -79,8 +79,8 @@ async def main(room_url): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") imagegen = FalImageGenService( image_size="square_hd", @@ -90,10 +90,9 @@ async def main(room_url): ) gated_aggregator = GatedAggregator( - gate_open_fn=lambda frame: isinstance(frame, ImageFrame), - gate_close_fn=lambda frame: isinstance(frame, LLMResponseStartFrame), - start_open=False, - ) + gate_open_fn=lambda frame: isinstance( + frame, ImageFrame), gate_close_fn=lambda frame: isinstance( + frame, LLMResponseStartFrame), start_open=False, ) sentence_aggregator = SentenceAggregator() month_prepender = MonthPrepender() diff --git a/src/examples/foundational/05a-local-sync-speech-and-text.py b/src/examples/foundational/05a-local-sync-speech-and-text.py index f3c63eefe..5a59effe7 100644 --- a/src/examples/foundational/05a-local-sync-speech-and-text.py +++ b/src/examples/foundational/05a-local-sync-speech-and-text.py @@ -38,8 +38,8 @@ async def main(room_url): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") dalle = FalImageGenService( image_size="1024x1024", @@ -49,7 +49,8 @@ async def main(room_url): ) # Get a complete audio chunk from the given text. Splitting this into its own - # coroutine lets us ensure proper ordering of the audio chunks on the send queue. + # coroutine lets us ensure proper ordering of the audio chunks on the + # send queue. async def get_all_audio(text): all_audio = bytearray() async for audio in tts.run_tts(text): @@ -71,7 +72,8 @@ async def get_month_data(month): to_speak = f"{month}: {image_description}" audio_task = asyncio.create_task(get_all_audio(to_speak)) - image_task = asyncio.create_task(dalle.run_image_gen(image_description)) + image_task = asyncio.create_task( + dalle.run_image_gen(image_description)) (audio, image_data) = await asyncio.gather(audio_task, image_task) return { @@ -100,7 +102,8 @@ async def get_month_data(month): async def show_images(): # This will play the months in the order they're completed. The benefit # is we'll have as little delay as possible before the first month, and - # likely no delay between months, but the months won't display in order. + # likely no delay between months, but the months won't display in + # order. for month_data_task in asyncio.as_completed(month_tasks): data = await month_data_task if data: @@ -122,7 +125,9 @@ async def run_tk(): tk_root.update_idletasks() await asyncio.sleep(0.1) - month_tasks = [asyncio.create_task(get_month_data(month)) for month in months] + month_tasks = [ + asyncio.create_task( + get_month_data(month)) for month in months] await asyncio.gather(transport.run(), show_images(), run_tk()) @@ -130,8 +135,11 @@ async def run_tk(): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Simple Daily Bot Sample") parser.add_argument( - "-u", "--url", type=str, required=True, help="URL of the Daily room to join" - ) + "-u", + "--url", + type=str, + required=True, + help="URL of the Daily room to join") args, unknown = parser.parse_known_args() diff --git a/src/examples/foundational/06-listen-and-respond.py b/src/examples/foundational/06-listen-and-respond.py index a5c3ce13c..3dd2fc2fa 100644 --- a/src/examples/foundational/06-listen-and-respond.py +++ b/src/examples/foundational/06-listen-and-respond.py @@ -41,8 +41,8 @@ async def main(room_url: str, token): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") fl = FrameLogger("Inner") fl2 = FrameLogger("Outer") messages = [ @@ -52,7 +52,8 @@ async def main(room_url: str, token): }, ] - tma_in = LLMUserContextAggregator(messages, transport._my_participant_id) + tma_in = LLMUserContextAggregator( + messages, transport._my_participant_id) tma_out = LLMAssistantContextAggregator( messages, transport._my_participant_id ) @@ -70,7 +71,8 @@ async def main(room_url: str, token): @transport.event_handler("on_first_other_participant_joined") async def on_first_other_participant_joined(transport): # Kick off the conversation. - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + messages.append( + {"role": "system", "content": "Please introduce yourself to the user."}) await pipeline.queue_frames([LLMMessagesQueueFrame(messages)]) transport.transcription_settings["extra"]["endpointing"] = True diff --git a/src/examples/foundational/06a-image-sync.py b/src/examples/foundational/06a-image-sync.py index db68aad88..80a4b8929 100644 --- a/src/examples/foundational/06a-image-sync.py +++ b/src/examples/foundational/06a-image-sync.py @@ -61,8 +61,8 @@ async def main(room_url: str, token): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") img = FalImageGenService( image_size="1024x1024", @@ -97,14 +97,15 @@ async def handle_transcriptions(): }, ] - tma_in = LLMUserContextAggregator(messages, transport._my_participant_id) + tma_in = LLMUserContextAggregator( + messages, transport._my_participant_id) tma_out = LLMAssistantContextAggregator( messages, transport._my_participant_id ) image_sync_aggregator = ImageSyncAggregator( - os.path.join(os.path.dirname(__file__), "assets", "speaking.png"), - os.path.join(os.path.dirname(__file__), "assets", "waiting.png"), - ) + os.path.join( + os.path.dirname(__file__), "assets", "speaking.png"), os.path.join( + os.path.dirname(__file__), "assets", "waiting.png"), ) await tts.run_to_queue( transport.send_queue, image_sync_aggregator.run( diff --git a/src/examples/foundational/07-interruptible.py b/src/examples/foundational/07-interruptible.py index 7fb3759f9..af6adb275 100644 --- a/src/examples/foundational/07-interruptible.py +++ b/src/examples/foundational/07-interruptible.py @@ -42,8 +42,8 @@ async def main(room_url: str, token): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") pipeline = Pipeline([FrameLogger(), llm, FrameLogger(), tts]) diff --git a/src/examples/foundational/10-wake-word.py b/src/examples/foundational/10-wake-word.py index 70801d1be..169d3fbdc 100644 --- a/src/examples/foundational/10-wake-word.py +++ b/src/examples/foundational/10-wake-word.py @@ -56,7 +56,8 @@ talking = [random.choice(talking_list) for x in range(30)] talking_frame = SpriteFrame(images=talking) -# TODO: Support "thinking" as soon as we get a valid transcript, while LLM is processing +# TODO: Support "thinking" as soon as we get a valid transcript, while LLM +# is processing thinking_list = [ sprites["sc-think-1.png"], sprites["sc-think-2.png"], @@ -130,8 +131,8 @@ async def main(room_url: str, token): transport._camera_height = 1280 llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") tts = ElevenLabsTTSService( aiohttp_session=session, @@ -155,7 +156,8 @@ async def handle_transcriptions(): }, ] - tma_in = LLMUserContextAggregator(messages, transport._my_participant_id) + tma_in = LLMUserContextAggregator( + messages, transport._my_participant_id) tma_out = LLMAssistantContextAggregator( messages, transport._my_participant_id ) diff --git a/src/examples/foundational/11-sound-effects.py b/src/examples/foundational/11-sound-effects.py index a4a54ce52..5459878d1 100644 --- a/src/examples/foundational/11-sound-effects.py +++ b/src/examples/foundational/11-sound-effects.py @@ -81,8 +81,8 @@ async def main(room_url: str, token): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") tts = ElevenLabsTTSService( aiohttp_session=session, @@ -103,7 +103,8 @@ async def handle_transcriptions(): }, ] - tma_in = LLMUserContextAggregator(messages, transport._my_participant_id) + tma_in = LLMUserContextAggregator( + messages, transport._my_participant_id) tma_out = LLMAssistantContextAggregator( messages, transport._my_participant_id ) diff --git a/src/examples/foundational/13a-whisper-local.py b/src/examples/foundational/13a-whisper-local.py index 8efba0535..d86c7483c 100644 --- a/src/examples/foundational/13a-whisper-local.py +++ b/src/examples/foundational/13a-whisper-local.py @@ -57,8 +57,11 @@ async def run_until_done(): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Simple Daily Bot Sample") parser.add_argument( - "-u", "--url", type=str, required=True, help="URL of the Daily room to join" - ) + "-u", + "--url", + type=str, + required=True, + help="URL of the Daily room to join") args, unknown = parser.parse_known_args() asyncio.run(main(args.url)) diff --git a/src/examples/image-gen.py b/src/examples/image-gen.py index 3ef5b5a28..e1cedfcca 100644 --- a/src/examples/image-gen.py +++ b/src/examples/image-gen.py @@ -45,14 +45,17 @@ async def handle_transcriptions(): print(f"finder: {finder}") if finder >= 0: async for audio in tts.run_tts(f"Resetting."): - transport.output_queue.put(Frame(FrameType.AUDIO_FRAME, audio)) + transport.output_queue.put( + Frame(FrameType.AUDIO_FRAME, audio)) sentence = "" continue - # todo: we could differentiate between transcriptions from different participants + # todo: we could differentiate between transcriptions from + # different participants sentence += f" {message['text']}" print(f"sentence is now: {sentence}") # TODO: Cache this audio - phrase = random.choice(["OK.", "Got it.", "Sure.", "You bet.", "Sure thing."]) + phrase = random.choice( + ["OK.", "Got it.", "Sure.", "You bet.", "Sure thing."]) async for audio in tts.run_tts(phrase): transport.output_queue.put(Frame(FrameType.AUDIO_FRAME, audio)) img_result = img.run_image_gen(sentence, "1024x1024") @@ -82,8 +85,11 @@ async def on_participant_joined(transport, participant): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Simple Daily Bot Sample") parser.add_argument( - "-u", "--url", type=str, required=True, help="URL of the Daily room to join" - ) + "-u", + "--url", + type=str, + required=True, + help="URL of the Daily room to join") parser.add_argument( "-k", "--apikey", @@ -94,20 +100,25 @@ async def on_participant_joined(transport, participant): args, unknown = parser.parse_known_args() - # Create a meeting token for the given room with an expiration 1 hour in the future. + # Create a meeting token for the given room with an expiration 1 hour in + # the future. room_name: str = urllib.parse.urlparse(args.url).path[1:] expiration: float = time.time() + 60 * 60 res: requests.Response = requests.post( f"https://api.daily.co/v1/meeting-tokens", - headers={"Authorization": f"Bearer {args.apikey}"}, + headers={ + "Authorization": f"Bearer {args.apikey}"}, json={ - "properties": {"room_name": room_name, "is_owner": True, "exp": expiration} - }, + "properties": { + "room_name": room_name, + "is_owner": True, + "exp": expiration}}, ) if res.status_code != 200: - raise Exception(f"Failed to create meeting token: {res.status_code} {res.text}") + raise Exception( + f"Failed to create meeting token: {res.status_code} {res.text}") token: str = res.json()["token"] diff --git a/src/examples/server/auth.py b/src/examples/server/auth.py index 6eba905e3..fb80dcc04 100644 --- a/src/examples/server/auth.py +++ b/src/examples/server/auth.py @@ -24,7 +24,8 @@ def get_meeting_token(room_name, daily_api_key, token_expiry): 'is_owner': True, 'exp': token_expiry}}) if res.status_code != 200: - return jsonify({'error': 'Unable to create meeting token', 'detail': res.text}), 500 + return jsonify( + {'error': 'Unable to create meeting token', 'detail': res.text}), 500 meeting_token = res.json()['token'] return meeting_token diff --git a/src/examples/server/daily-bot-manager.py b/src/examples/server/daily-bot-manager.py index a25256b6c..f59d7fc67 100644 --- a/src/examples/server/daily-bot-manager.py +++ b/src/examples/server/daily-bot-manager.py @@ -14,14 +14,16 @@ app = Flask(__name__) CORS(app) -print(f"I loaded an environment, and my FAL_KEY_ID is {os.getenv('FAL_KEY_ID')}") +print( + f"I loaded an environment, and my FAL_KEY_ID is {os.getenv('FAL_KEY_ID')}") def start_bot(bot_path, args=None): daily_api_key = os.getenv("DAILY_API_KEY") api_path = os.getenv("DAILY_API_PATH") or "https://api.daily.co/v1" - timeout = int(os.getenv("DAILY_ROOM_TIMEOUT") or os.getenv("DAILY_BOT_MAX_DURATION") or 300) + timeout = int(os.getenv("DAILY_ROOM_TIMEOUT") + or os.getenv("DAILY_BOT_MAX_DURATION") or 300) exp = time.time() + timeout res = requests.post( f"{api_path}/rooms", @@ -59,14 +61,13 @@ def start_bot(bot_path, args=None): extra_args = "" proc = subprocess.Popen( - [ - f"python {bot_path} -u {room_url} -t {meeting_token} -k {daily_api_key} {extra_args}" - ], + [f"python {bot_path} -u {room_url} -t {meeting_token} -k {daily_api_key} {extra_args}"], shell=True, bufsize=1, ) - # Don't return until the bot has joined the room, but wait for at most 2 seconds. + # Don't return until the bot has joined the room, but wait for at most 2 + # seconds. attempts = 0 while attempts < 20: time.sleep(0.1) @@ -82,11 +83,13 @@ def start_bot(bot_path, args=None): # Additional client config config = {} if os.getenv("CLIENT_VAD_TIMEOUT_SEC"): - config['vad_timeout_sec'] = float(os.getenv("DAILY_CLIENT_VAD_TIMEOUT_SEC")) + config['vad_timeout_sec'] = float( + os.getenv("DAILY_CLIENT_VAD_TIMEOUT_SEC")) else: config['vad_timeout_sec'] = 1.5 - # return jsonify({"room_url": room_url, "token": meeting_token, "config": config}), 200 + # return jsonify({"room_url": room_url, "token": meeting_token, "config": + # config}), 200 return redirect(room_url, code=301) diff --git a/src/examples/starter-apps/chatbot.py b/src/examples/starter-apps/chatbot.py index a78b821ce..2b3b419b5 100644 --- a/src/examples/starter-apps/chatbot.py +++ b/src/examples/starter-apps/chatbot.py @@ -109,8 +109,8 @@ async def main(room_url: str, token): ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") ta = TalkingAnimation() ai = AnimationInitializer() diff --git a/src/examples/starter-apps/patient-intake.py b/src/examples/starter-apps/patient-intake.py index 1f3f61548..536faab4b 100644 --- a/src/examples/starter-apps/patient-intake.py +++ b/src/examples/starter-apps/patient-intake.py @@ -61,161 +61,101 @@ sounds[file] = audio_file.readframes(-1) -steps = [ - { - "prompt": "Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.", - "run_async": False, - "failed": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.", - "tools": [ - { - "type": "function", - "function": { - "name": "verify_birthday", - "description": "Use this function to verify the user has provided their correct birthday.", - "parameters": { - "type": "object", - "properties": { - "birthday": { - "type": "string", - "description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.", - } - }, - }, - }, - } - ], - }, - { - "prompt": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.", - "run_async": True, - "tools": [ - { - "type": "function", - "function": { - "name": "list_prescriptions", - "description": "Once the user has provided a list of their prescription medications, call this function.", - "parameters": { - "type": "object", - "properties": { - "prescriptions": { - "type": "array", - "items": { - "type": "object", - "properties": { - "medication": { - "type": "string", - "description": "The medication's name", - }, - "dosage": { - "type": "string", - "description": "The prescription's dosage", - }, - }, - }, - } - }, - }, - }, - } - ], - }, - { - "prompt": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.", - "run_async": True, - "tools": [ - { - "type": "function", - "function": { - "name": "list_allergies", - "description": "Once the user has provided a list of their allergies, call this function.", - "parameters": { - "type": "object", - "properties": { - "allergies": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "What the user is allergic to", - } - }, - }, - } - }, - }, - }, - } - ], - }, - { - "prompt": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.", - "run_async": True, - "tools": [ - { - "type": "function", - "function": { - "name": "list_conditions", - "description": "Once the user has provided a list of their medical conditions, call this function.", - "parameters": { - "type": "object", - "properties": { - "conditions": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The user's medical condition", - } - }, - }, - } - }, - }, - }, - }, - ], - }, - { - "prompt": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.", - "run_async": True, - "tools": [ - { - "type": "function", - "function": { - "name": "list_visit_reasons", - "description": "Once the user has provided a list of the reasons they are visiting a doctor today, call this function.", - "parameters": { - "type": "object", - "properties": { - "visit_reasons": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The user's reason for visiting the doctor", - } - }, - }, - } - }, - }, - }, - } - ], - }, - { - "prompt": "Now, thank the user and end the conversation.", - "run_async": True, - "tools": [], - }, - {"prompt": "", "run_async": True, "tools": []}, -] +steps = [{"prompt": "Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.", + "run_async": False, + "failed": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.", + "tools": [{"type": "function", + "function": {"name": "verify_birthday", + "description": "Use this function to verify the user has provided their correct birthday.", + "parameters": {"type": "object", + "properties": {"birthday": {"type": "string", + "description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.", + }}, + }, + }, + }], + }, + {"prompt": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.", + "run_async": True, + "tools": [{"type": "function", + "function": {"name": "list_prescriptions", + "description": "Once the user has provided a list of their prescription medications, call this function.", + "parameters": {"type": "object", + "properties": {"prescriptions": {"type": "array", + "items": {"type": "object", + "properties": {"medication": {"type": "string", + "description": "The medication's name", + }, + "dosage": {"type": "string", + "description": "The prescription's dosage", + }, + }, + }, + }}, + }, + }, + }], + }, + {"prompt": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.", + "run_async": True, + "tools": [{"type": "function", + "function": {"name": "list_allergies", + "description": "Once the user has provided a list of their allergies, call this function.", + "parameters": {"type": "object", + "properties": {"allergies": {"type": "array", + "items": {"type": "object", + "properties": {"name": {"type": "string", + "description": "What the user is allergic to", + }}, + }, + }}, + }, + }, + }], + }, + {"prompt": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.", + "run_async": True, + "tools": [{"type": "function", + "function": {"name": "list_conditions", + "description": "Once the user has provided a list of their medical conditions, call this function.", + "parameters": {"type": "object", + "properties": {"conditions": {"type": "array", + "items": {"type": "object", + "properties": {"name": {"type": "string", + "description": "The user's medical condition", + }}, + }, + }}, + }, + }, + }, + ], + }, + {"prompt": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.", + "run_async": True, + "tools": [{"type": "function", + "function": {"name": "list_visit_reasons", + "description": "Once the user has provided a list of the reasons they are visiting a doctor today, call this function.", + "parameters": {"type": "object", + "properties": {"visit_reasons": {"type": "array", + "items": {"type": "object", + "properties": {"name": {"type": "string", + "description": "The user's reason for visiting the doctor", + }}, + }, + }}, + }, + }, + }], + }, + {"prompt": "Now, thank the user and end the conversation.", + "run_async": True, + "tools": [], + }, + {"prompt": "", + "run_async": True, + "tools": []}, + ] current_step = 0 @@ -299,10 +239,12 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: elif isinstance(frame, LLMFunctionCallFrame): if frame.function_name and frame.arguments: - print(f"--> Calling function: {frame.function_name} with arguments:") + print( + f"--> Calling function: {frame.function_name} with arguments:") pretty_json = re.sub( - "\n", "\n ", json.dumps(json.loads(frame.arguments), indent=2) - ) + "\n", "\n ", json.dumps( + json.loads( + frame.arguments), indent=2)) print(f"--> {pretty_json}\n") if frame.function_name not in self._functions: raise Exception( diff --git a/src/examples/starter-apps/storybot.py b/src/examples/starter-apps/storybot.py index f9fe3797d..e529ef3d4 100644 --- a/src/examples/starter-apps/storybot.py +++ b/src/examples/starter-apps/storybot.py @@ -146,7 +146,8 @@ async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]: self._story.append(self._text) yield StoryPageFrame(self._text) else: - # After the prompt thing, we'll catch an LLM end to get the last bit + # After the prompt thing, we'll catch an LLM end to get the + # last bit pass elif isinstance(frame, LLMResponseEndFrame): yield ImageFrame(None, images["grandma-writing.png"]) @@ -251,7 +252,8 @@ async def storytime(): } ] lca = LLMAssistantContextAggregator(messages) - local_pipeline = Pipeline([llm, lca, tts], sink=transport.send_queue) + local_pipeline = Pipeline( + [llm, lca, tts], sink=transport.send_queue) await local_pipeline.queue_frames( [ ImageFrame(None, images["grandma-listening.png"]), diff --git a/src/examples/starter-apps/translator.py b/src/examples/starter-apps/translator.py index 0e1ae5af3..9b8c6410f 100644 --- a/src/examples/starter-apps/translator.py +++ b/src/examples/starter-apps/translator.py @@ -30,7 +30,8 @@ """ -# We need to use a custom service here to yield LLM frames without saving any context +# We need to use a custom service here to yield LLM frames without saving +# any context class TranslationProcessor(FrameProcessor): def __init__(self, language): self._language = language @@ -68,8 +69,8 @@ async def main(room_url: str, token): voice="es-ES-AlvaroNeural", ) llm = OpenAILLMService( - api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview" - ) + api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), + model="gpt-4-turbo-preview") sa = SentenceAggregator() tp = TranslationProcessor("Spanish") pipeline = Pipeline([sa, tp, llm, tts]) diff --git a/src/examples/support/runner.py b/src/examples/support/runner.py index b4b7d4862..abdbaeee4 100644 --- a/src/examples/support/runner.py +++ b/src/examples/support/runner.py @@ -11,8 +11,11 @@ def configure(): parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") parser.add_argument( - "-u", "--url", type=str, required=False, help="URL of the Daily room to join" - ) + "-u", + "--url", + type=str, + required=False, + help="URL of the Daily room to join") parser.add_argument( "-k", "--apikey", @@ -33,20 +36,25 @@ def configure(): if not key: raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.") - # Create a meeting token for the given room with an expiration 1 hour in the future. + # Create a meeting token for the given room with an expiration 1 hour in + # the future. room_name: str = urllib.parse.urlparse(url).path[1:] expiration: float = time.time() + 60 * 60 res: requests.Response = requests.post( f"https://api.daily.co/v1/meeting-tokens", - headers={"Authorization": f"Bearer {key}"}, + headers={ + "Authorization": f"Bearer {key}"}, json={ - "properties": {"room_name": room_name, "is_owner": True, "exp": expiration} - }, + "properties": { + "room_name": room_name, + "is_owner": True, + "exp": expiration}}, ) if res.status_code != 200: - raise Exception(f"Failed to create meeting token: {res.status_code} {res.text}") + raise Exception( + f"Failed to create meeting token: {res.status_code} {res.text}") token: str = res.json()["token"]