Merge branch 'main' into pr/add-lmnt

ThibaultMardinli · Aug 24, 2024 · 1d92fc3 · 1d92fc3
2 parents 8ac7fb1 + e038767
commit 1d92fc3
Show file tree

Hide file tree

Showing 8 changed files with 76 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,22 @@ All notable changes to **pipecat** will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.0.41] - 2024-08-22
+
+### Added
+
+- Added `LivekitFrameSerializer` audio frame serializer.
+
+### Fixed
+
+- Fix `FastAPIWebsocketOutputTransport` variable name clash with subclass.
+
+- Fix an `AnthropicLLMService` issue with empty arguments in function calling.
+
+### Other
+
+- Fixed `studypal` example errors.
+
 ## [0.0.40] - 2024-08-20
 
 ### Added

diff --git a/README.md b/README.md
@@ -4,8 +4,7 @@
 
 # Pipecat
 
-[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021
-)](https://discord.gg/pipecat)
+[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
 
 `pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
 

diff --git a/examples/studypal/requirements.txt b/examples/studypal/requirements.txt
@@ -1,5 +1,5 @@
-beautifulsoup4==4.12.2
-PyPDF2==3.0.1
+beautifulsoup4==4.12.3
+pypdf==4.3.1
 tiktoken==0.7.0
-pipecat-ai[daily,cartesia,openai,silero]==0.0.39
+pipecat-ai[daily,cartesia,openai,silero]==0.0.40
 python-dotenv==1.0.1
diff --git a/examples/studypal/runner.py b/examples/studypal/runner.py
@@ -50,12 +50,12 @@ async def configure_with_args(
     daily_rest_helper = DailyRESTHelper(
         daily_api_key=key,
         daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
-    )
+        aiohttp_session=aiohttp_session)
 
     # Create a meeting token for the given room with an expiration 1 hour in
     # the future.
     expiry_time: float = 60 * 60
 
-    token = daily_rest_helper.get_token(url, expiry_time)
+    token = await daily_rest_helper.get_token(url, expiry_time)
 
-    return (url, token, args)
+    return (url, token, args)
diff --git a/examples/studypal/studypal.py b/examples/studypal/studypal.py
@@ -5,7 +5,7 @@
 import io
 
 from bs4 import BeautifulSoup
-from PyPDF2 import PdfReader
+from pypdf import PdfReader
 import tiktoken
 
 from pipecat.frames.frames import LLMMessagesFrame

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ google = [ "google-generativeai~=0.7.2" ]
 gstreamer = [ "pygobject~=3.48.2" ]
 fireworks = [ "openai~=1.37.2" ]
 langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ]
+livekit = [ "livekit~=0.13.1" ]
 lmnt = [ "lmnt~=1.1.4" ]
 local = [ "pyaudio~=0.2.14" ]
 moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ]

diff --git a/src/pipecat/serializers/livekit.py b/src/pipecat/serializers/livekit.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import ctypes
+import pickle
+
+from pipecat.frames.frames import AudioRawFrame, Frame
+from pipecat.serializers.base_serializer import FrameSerializer
+
+from loguru import logger
+
+try:
+    from livekit.rtc import AudioFrame
+except ModuleNotFoundError as e:
+    logger.error(f"Exception: {e}")
+    logger.error(
+        "In order to use LiveKit, you need to `pip install pipecat-ai[livekit]`.")
+    raise Exception(f"Missing module: {e}")
+
+
+class LivekitFrameSerializer(FrameSerializer):
+    SERIALIZABLE_TYPES = {
+        AudioRawFrame: "audio",
+    }
+
+    def serialize(self, frame: Frame) -> str | bytes | None:
+        if not isinstance(frame, AudioRawFrame):
+            return None
+        audio_frame = AudioFrame(
+            data=frame.audio,
+            sample_rate=frame.sample_rate,
+            num_channels=frame.num_channels,
+            samples_per_channel=len(frame.audio) // ctypes.sizeof(ctypes.c_int16),
+        )
+        return pickle.dumps(audio_frame)
+
+    def deserialize(self, data: str | bytes) -> Frame | None:
+        audio_frame: AudioFrame = pickle.loads(data)['frame']
+        return AudioRawFrame(
+            audio=bytes(audio_frame.data),
+            sample_rate=audio_frame.sample_rate,
+            num_channels=audio_frame.num_channels,
+        )
diff --git a/src/pipecat/transports/network/fastapi_websocket.py b/src/pipecat/transports/network/fastapi_websocket.py
@@ -91,13 +91,13 @@ def __init__(self, websocket: WebSocket, params: FastAPIWebsocketParams, **kwarg
 
         self._websocket = websocket
         self._params = params
-        self._audio_buffer = bytes()
+        self._websocket_audio_buffer = bytes()
 
     async def write_raw_audio_frames(self, frames: bytes):
-        self._audio_buffer += frames
-        while len(self._audio_buffer) >= self._params.audio_frame_size:
+        self._websocket_audio_buffer += frames
+        while len(self._websocket_audio_buffer) >= self._params.audio_frame_size:
             frame = AudioRawFrame(
-                audio=self._audio_buffer[:self._params.audio_frame_size],
+                audio=self._websocket_audio_buffer[:self._params.audio_frame_size],
                 sample_rate=self._params.audio_out_sample_rate,
                 num_channels=self._params.audio_out_channels
             )
@@ -121,7 +121,7 @@ async def write_raw_audio_frames(self, frames: bytes):
             if payload and self._websocket.client_state == WebSocketState.CONNECTED:
                 await self._websocket.send_text(payload)
 
-            self._audio_buffer = self._audio_buffer[self._params.audio_frame_size:]
+            self._websocket_audio_buffer = self._websocket_audio_buffer[self._params.audio_frame_size:]
 
 
 class FastAPIWebsocketTransport(BaseTransport):