diff --git a/CHANGELOG.md b/CHANGELOG.md index 78e069fc7..200875dbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `KoalaFilter` which implement on device noise reduction using Koala + Noise Suppression. + (see https://picovoice.ai/platform/koala/) + - Pipecat now supports Python 3.13. We had a dependency on the `audioop` package which was deprecated and now removed on Python 3.13. We are now using `audioop-lts` (https://github.com/AbstractUmbra/audioop) to provide the same diff --git a/pyproject.toml b/pyproject.toml index 8b8026a8e..4cf1de72c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ groq = [ "openai~=1.57.2" ] gstreamer = [ "pygobject~=3.48.2" ] fireworks = [ "openai~=1.57.2" ] krisp = [ "pipecat-ai-krisp~=0.3.0" ] +koala = [ "pvkoala~=2.0.2" ] langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ] livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1" ] lmnt = [ "lmnt~=1.1.4" ] diff --git a/src/pipecat/audio/filters/koala_filter.py b/src/pipecat/audio/filters/koala_filter.py new file mode 100644 index 000000000..416e4e9fb --- /dev/null +++ b/src/pipecat/audio/filters/koala_filter.py @@ -0,0 +1,75 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +from typing import Sequence + +import numpy as np +from loguru import logger + +from pipecat.audio.filters.base_audio_filter import BaseAudioFilter +from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame + +try: + import pvkoala +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use the Koala filter, you need to `pip install pipecat-ai[koala]`.") + raise Exception(f"Missing module: {e}") + + +class KoalaFilter(BaseAudioFilter): + """This is an audio filter that uses Koala Noise Suppression (from + PicoVoice). + + """ + + def __init__(self, *, access_key: str) -> None: + self._access_key = access_key + + self._filtering = True + self._sample_rate = 0 + self._koala = pvkoala.create(access_key=f"{self._access_key}") + self._koala_ready = True + self._audio_buffer = bytearray() + + async def start(self, sample_rate: int): + self._sample_rate = sample_rate + if self._sample_rate != self._koala.sample_rate: + logger.warning( + f"Koala filter needs sample rate {self._koala.sample_rate} (got {self._sample_rate})" + ) + self._koala_ready = False + + async def stop(self): + self._koala.reset() + + async def process_frame(self, frame: FilterControlFrame): + if isinstance(frame, FilterEnableFrame): + self._filtering = frame.enable + + async def filter(self, audio: bytes) -> bytes: + if not self._koala_ready or not self._filtering: + return audio + + self._audio_buffer.extend(audio) + + filtered_data: Sequence[int] = [] + + num_frames = len(self._audio_buffer) // 2 + while num_frames >= self._koala.frame_length: + # Grab the number of frames required by Koala. + num_bytes = self._koala.frame_length * 2 + audio = bytes(self._audio_buffer[:num_bytes]) + # Process audio + data = np.frombuffer(audio, dtype=np.int16).tolist() + filtered_data += self._koala.process(data) + # Adjust audio buffer and check again + self._audio_buffer = self._audio_buffer[num_bytes:] + num_frames = len(self._audio_buffer) // 2 + + filtered = np.array(filtered_data, dtype=np.int16).tobytes() + + return filtered