Skip to content

Commit

Permalink
audio(koala): add new audio filter KoalaFilter
Browse files Browse the repository at this point in the history
  • Loading branch information
aconchillo committed Dec 18, 2024
1 parent fb9f72d commit 7322bad
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Added `KoalaFilter` which implement on device noise reduction using Koala
Noise Suppression.
(see https://picovoice.ai/platform/koala/)

- Pipecat now supports Python 3.13. We had a dependency on the `audioop` package
which was deprecated and now removed on Python 3.13. We are now using
`audioop-lts` (https://github.com/AbstractUmbra/audioop) to provide the same
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ groq = [ "openai~=1.57.2" ]
gstreamer = [ "pygobject~=3.48.2" ]
fireworks = [ "openai~=1.57.2" ]
krisp = [ "pipecat-ai-krisp~=0.3.0" ]
koala = [ "pvkoala~=2.0.2" ]
langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ]
livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1" ]
lmnt = [ "lmnt~=1.1.4" ]
Expand Down
75 changes: 75 additions & 0 deletions src/pipecat/audio/filters/koala_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

from typing import Sequence

import numpy as np
from loguru import logger

from pipecat.audio.filters.base_audio_filter import BaseAudioFilter
from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame

try:
import pvkoala
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error("In order to use the Koala filter, you need to `pip install pipecat-ai[koala]`.")
raise Exception(f"Missing module: {e}")


class KoalaFilter(BaseAudioFilter):
"""This is an audio filter that uses Koala Noise Suppression (from
PicoVoice).
"""

def __init__(self, *, access_key: str) -> None:
self._access_key = access_key

self._filtering = True
self._sample_rate = 0
self._koala = pvkoala.create(access_key=f"{self._access_key}")
self._koala_ready = True
self._audio_buffer = bytearray()

async def start(self, sample_rate: int):
self._sample_rate = sample_rate
if self._sample_rate != self._koala.sample_rate:
logger.warning(
f"Koala filter needs sample rate {self._koala.sample_rate} (got {self._sample_rate})"
)
self._koala_ready = False

async def stop(self):
self._koala.reset()

async def process_frame(self, frame: FilterControlFrame):
if isinstance(frame, FilterEnableFrame):
self._filtering = frame.enable

async def filter(self, audio: bytes) -> bytes:
if not self._koala_ready or not self._filtering:
return audio

self._audio_buffer.extend(audio)

filtered_data: Sequence[int] = []

num_frames = len(self._audio_buffer) // 2
while num_frames >= self._koala.frame_length:
# Grab the number of frames required by Koala.
num_bytes = self._koala.frame_length * 2
audio = bytes(self._audio_buffer[:num_bytes])
# Process audio
data = np.frombuffer(audio, dtype=np.int16).tolist()
filtered_data += self._koala.process(data)
# Adjust audio buffer and check again
self._audio_buffer = self._audio_buffer[num_bytes:]
num_frames = len(self._audio_buffer) // 2

filtered = np.array(filtered_data, dtype=np.int16).tobytes()

return filtered

0 comments on commit 7322bad

Please sign in to comment.