Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

audio(koala): add new audio filter KoalaFilter #886

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Added `KoalaFilter` which implement on device noise reduction using Koala
Noise Suppression.
(see https://picovoice.ai/platform/koala/)

- Pipecat now supports Python 3.13. We had a dependency on the `audioop` package
which was deprecated and now removed on Python 3.13. We are now using
`audioop-lts` (https://github.com/AbstractUmbra/audioop) to provide the same
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ groq = [ "openai~=1.57.2" ]
gstreamer = [ "pygobject~=3.48.2" ]
fireworks = [ "openai~=1.57.2" ]
krisp = [ "pipecat-ai-krisp~=0.3.0" ]
koala = [ "pvkoala~=2.0.2" ]
langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ]
livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1" ]
lmnt = [ "lmnt~=1.1.4" ]
Expand Down
75 changes: 75 additions & 0 deletions src/pipecat/audio/filters/koala_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

from typing import Sequence

import numpy as np
from loguru import logger

from pipecat.audio.filters.base_audio_filter import BaseAudioFilter
from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame

try:
import pvkoala
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error("In order to use the Koala filter, you need to `pip install pipecat-ai[koala]`.")
raise Exception(f"Missing module: {e}")


class KoalaFilter(BaseAudioFilter):
"""This is an audio filter that uses Koala Noise Suppression (from
PicoVoice).
"""

def __init__(self, *, access_key: str) -> None:
self._access_key = access_key

self._filtering = True
self._sample_rate = 0
self._koala = pvkoala.create(access_key=f"{self._access_key}")
self._koala_ready = True
self._audio_buffer = bytearray()

async def start(self, sample_rate: int):
self._sample_rate = sample_rate
if self._sample_rate != self._koala.sample_rate:
logger.warning(
f"Koala filter needs sample rate {self._koala.sample_rate} (got {self._sample_rate})"
)
self._koala_ready = False

async def stop(self):
self._koala.reset()

async def process_frame(self, frame: FilterControlFrame):
if isinstance(frame, FilterEnableFrame):
self._filtering = frame.enable

async def filter(self, audio: bytes) -> bytes:
if not self._koala_ready or not self._filtering:
return audio

self._audio_buffer.extend(audio)

filtered_data: Sequence[int] = []

num_frames = len(self._audio_buffer) // 2
while num_frames >= self._koala.frame_length:
# Grab the number of frames required by Koala.
num_bytes = self._koala.frame_length * 2
audio = bytes(self._audio_buffer[:num_bytes])
# Process audio
data = np.frombuffer(audio, dtype=np.int16).tolist()
filtered_data += self._koala.process(data)
# Adjust audio buffer and check again
self._audio_buffer = self._audio_buffer[num_bytes:]
num_frames = len(self._audio_buffer) // 2

filtered = np.array(filtered_data, dtype=np.int16).tobytes()

return filtered
Loading