From 9bda09b1a8c81ade2a014a4839f66bb4a9955083 Mon Sep 17 00:00:00 2001 From: nulyang Date: Sun, 18 Aug 2024 23:40:32 +0800 Subject: [PATCH 1/5] serializers(livekit): Add audio serializers --- src/pipecat/serializers/livekit.py | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 src/pipecat/serializers/livekit.py diff --git a/src/pipecat/serializers/livekit.py b/src/pipecat/serializers/livekit.py new file mode 100644 index 000000000..5d8cdb4bb --- /dev/null +++ b/src/pipecat/serializers/livekit.py @@ -0,0 +1,37 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import ctypes +import pickle + +from livekit.rtc import AudioFrame +from pipecat.frames.frames import AudioRawFrame, Frame +from pipecat.serializers.base_serializer import FrameSerializer + + +class LivekitFrameSerializer(FrameSerializer): + SERIALIZABLE_TYPES = { + AudioRawFrame: "audio", + } + + def serialize(self, frame: Frame) -> str | bytes | None: + if not isinstance(frame, AudioRawFrame): + return None + audio_frame = AudioFrame( + data=frame.audio, + sample_rate=frame.sample_rate, + num_channels=frame.num_channels, + samples_per_channel=len(frame.audio) // ctypes.sizeof(ctypes.c_int16), + ) + return pickle.dumps(audio_frame) + + def deserialize(self, data: str | bytes) -> Frame | None: + audio_frame: AudioFrame = pickle.loads(data)['frame'] + return AudioRawFrame( + audio=bytes(audio_frame.data), + sample_rate=audio_frame.sample_rate, + num_channels=audio_frame.num_channels, + ) From 0eb189ce7fe0854ce71511349a7cd9b0a953c369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 22 Aug 2024 08:50:03 -0700 Subject: [PATCH 2/5] transports(fastapi): fix variable name clash --- CHANGELOG.md | 6 ++++++ src/pipecat/transports/network/fastapi_websocket.py | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 639e91af6..07047b640 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to **pipecat** will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass. + ## [0.0.40] - 2024-08-20 ### Added diff --git a/src/pipecat/transports/network/fastapi_websocket.py b/src/pipecat/transports/network/fastapi_websocket.py index 32857a696..914870114 100644 --- a/src/pipecat/transports/network/fastapi_websocket.py +++ b/src/pipecat/transports/network/fastapi_websocket.py @@ -91,13 +91,13 @@ def __init__(self, websocket: WebSocket, params: FastAPIWebsocketParams, **kwarg self._websocket = websocket self._params = params - self._audio_buffer = bytes() + self._websocket_audio_buffer = bytes() async def write_raw_audio_frames(self, frames: bytes): - self._audio_buffer += frames - while len(self._audio_buffer) >= self._params.audio_frame_size: + self._websocket_audio_buffer += frames + while len(self._websocket_audio_buffer) >= self._params.audio_frame_size: frame = AudioRawFrame( - audio=self._audio_buffer[:self._params.audio_frame_size], + audio=self._websocket_audio_buffer[:self._params.audio_frame_size], sample_rate=self._params.audio_out_sample_rate, num_channels=self._params.audio_out_channels ) @@ -121,7 +121,7 @@ async def write_raw_audio_frames(self, frames: bytes): if payload and self._websocket.client_state == WebSocketState.CONNECTED: await self._websocket.send_text(payload) - self._audio_buffer = self._audio_buffer[self._params.audio_frame_size:] + self._websocket_audio_buffer = self._websocket_audio_buffer[self._params.audio_frame_size:] class FastAPIWebsocketTransport(BaseTransport): From 873d5dc23fdca8d25b0e26a00835c1318fcb1892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 22 Aug 2024 10:54:18 -0700 Subject: [PATCH 3/5] added livekit dependency --- CHANGELOG.md | 4 ++++ pyproject.toml | 1 + src/pipecat/serializers/livekit.py | 11 ++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07047b640..de97fd6cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Added `LivekitFrameSerializer` audio frame serializer. + ### Fixed - Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass. diff --git a/pyproject.toml b/pyproject.toml index ea6e4eb9b..712dcb276 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ google = [ "google-generativeai~=0.7.2" ] gstreamer = [ "pygobject~=3.48.2" ] fireworks = [ "openai~=1.37.2" ] langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ] +livekit = [ "livekit~=0.13.1" ] local = [ "pyaudio~=0.2.14" ] moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ] openai = [ "openai~=1.37.2" ] diff --git a/src/pipecat/serializers/livekit.py b/src/pipecat/serializers/livekit.py index 5d8cdb4bb..7a0e8afd1 100644 --- a/src/pipecat/serializers/livekit.py +++ b/src/pipecat/serializers/livekit.py @@ -7,10 +7,19 @@ import ctypes import pickle -from livekit.rtc import AudioFrame from pipecat.frames.frames import AudioRawFrame, Frame from pipecat.serializers.base_serializer import FrameSerializer +from loguru import logger + +try: + from livekit.rtc import AudioFrame +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error( + "In order to use LiveKit, you need to `pip install pipecat-ai[livekit]`.") + raise Exception(f"Missing module: {e}") + class LivekitFrameSerializer(FrameSerializer): SERIALIZABLE_TYPES = { From d42f072ff553bbfd416907967d1e5cc47f01665e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 22 Aug 2024 11:36:16 -0700 Subject: [PATCH 4/5] examples: fix studypal errors and update requirements --- CHANGELOG.md | 6 ++++++ examples/studypal/requirements.txt | 6 +++--- examples/studypal/runner.py | 6 +++--- examples/studypal/studypal.py | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de97fd6cd..218f16c1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass. +- Fix an `AnthropicLLMService` issue with empty arguments in function calling. + +### Other + +- Fixed `studypal` example errors. + ## [0.0.40] - 2024-08-20 ### Added diff --git a/examples/studypal/requirements.txt b/examples/studypal/requirements.txt index 210c59ebc..2d6e21042 100644 --- a/examples/studypal/requirements.txt +++ b/examples/studypal/requirements.txt @@ -1,5 +1,5 @@ -beautifulsoup4==4.12.2 -PyPDF2==3.0.1 +beautifulsoup4==4.12.3 +pypdf==4.3.1 tiktoken==0.7.0 -pipecat-ai[daily,cartesia,openai,silero]==0.0.39 +pipecat-ai[daily,cartesia,openai,silero]==0.0.40 python-dotenv==1.0.1 diff --git a/examples/studypal/runner.py b/examples/studypal/runner.py index 949e46b59..068174eec 100644 --- a/examples/studypal/runner.py +++ b/examples/studypal/runner.py @@ -50,12 +50,12 @@ async def configure_with_args( daily_rest_helper = DailyRESTHelper( daily_api_key=key, daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), - ) + aiohttp_session=aiohttp_session) # Create a meeting token for the given room with an expiration 1 hour in # the future. expiry_time: float = 60 * 60 - token = daily_rest_helper.get_token(url, expiry_time) + token = await daily_rest_helper.get_token(url, expiry_time) - return (url, token, args) \ No newline at end of file + return (url, token, args) diff --git a/examples/studypal/studypal.py b/examples/studypal/studypal.py index 67b02a9fb..8adfe2954 100644 --- a/examples/studypal/studypal.py +++ b/examples/studypal/studypal.py @@ -5,7 +5,7 @@ import io from bs4 import BeautifulSoup -from PyPDF2 import PdfReader +from pypdf import PdfReader import tiktoken from pipecat.frames.frames import LLMMessagesFrame From 0c46b3e481b218264ba7ee17a75f7f165cb30d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 22 Aug 2024 10:40:34 -0700 Subject: [PATCH 5/5] prepare pipecat 0.0.41 --- CHANGELOG.md | 2 +- README.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 218f16c1a..4dc4273be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to **pipecat** will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.0.41] - 2024-08-22 ### Added diff --git a/README.md b/README.md index bd1da578e..40f96636c 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@ # Pipecat -[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021 -)](https://discord.gg/pipecat) +[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) `pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.