From 4b9d7634e7a06f1d65821093b7685b0a71fcd0f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 17 Dec 2024 18:14:04 -0800 Subject: [PATCH] audio(koala): add new audio filter KoalaFilter --- CHANGELOG.md | 4 ++ src/pipecat/audio/filters/koala_filter.py | 75 +++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 src/pipecat/audio/filters/koala_filter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 78e069fc7..200875dbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `KoalaFilter` which implement on device noise reduction using Koala + Noise Suppression. + (see https://picovoice.ai/platform/koala/) + - Pipecat now supports Python 3.13. We had a dependency on the `audioop` package which was deprecated and now removed on Python 3.13. We are now using `audioop-lts` (https://github.com/AbstractUmbra/audioop) to provide the same diff --git a/src/pipecat/audio/filters/koala_filter.py b/src/pipecat/audio/filters/koala_filter.py new file mode 100644 index 000000000..416e4e9fb --- /dev/null +++ b/src/pipecat/audio/filters/koala_filter.py @@ -0,0 +1,75 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +from typing import Sequence + +import numpy as np +from loguru import logger + +from pipecat.audio.filters.base_audio_filter import BaseAudioFilter +from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame + +try: + import pvkoala +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use the Koala filter, you need to `pip install pipecat-ai[koala]`.") + raise Exception(f"Missing module: {e}") + + +class KoalaFilter(BaseAudioFilter): + """This is an audio filter that uses Koala Noise Suppression (from + PicoVoice). + + """ + + def __init__(self, *, access_key: str) -> None: + self._access_key = access_key + + self._filtering = True + self._sample_rate = 0 + self._koala = pvkoala.create(access_key=f"{self._access_key}") + self._koala_ready = True + self._audio_buffer = bytearray() + + async def start(self, sample_rate: int): + self._sample_rate = sample_rate + if self._sample_rate != self._koala.sample_rate: + logger.warning( + f"Koala filter needs sample rate {self._koala.sample_rate} (got {self._sample_rate})" + ) + self._koala_ready = False + + async def stop(self): + self._koala.reset() + + async def process_frame(self, frame: FilterControlFrame): + if isinstance(frame, FilterEnableFrame): + self._filtering = frame.enable + + async def filter(self, audio: bytes) -> bytes: + if not self._koala_ready or not self._filtering: + return audio + + self._audio_buffer.extend(audio) + + filtered_data: Sequence[int] = [] + + num_frames = len(self._audio_buffer) // 2 + while num_frames >= self._koala.frame_length: + # Grab the number of frames required by Koala. + num_bytes = self._koala.frame_length * 2 + audio = bytes(self._audio_buffer[:num_bytes]) + # Process audio + data = np.frombuffer(audio, dtype=np.int16).tolist() + filtered_data += self._koala.process(data) + # Adjust audio buffer and check again + self._audio_buffer = self._audio_buffer[num_bytes:] + num_frames = len(self._audio_buffer) // 2 + + filtered = np.array(filtered_data, dtype=np.int16).tobytes() + + return filtered