diff --git a/CHANGELOG.md b/CHANGELOG.md index 032e7d59b..135bc8db7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to **pipecat** will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.0.35] - 2024-06-28 + +### Changed + +- `FastAPIWebsocketParams` now require a serializer. + +- `TwilioFrameSerializer` now requires a `streamSid`. + +### Fixed + +- Silero VAD number of frames needs to be 512 for 16000 sample rate or 256 for + 8000 sample rate. + ## [0.0.34] - 2024-06-25 ### Fixed diff --git a/src/pipecat/vad/silero.py b/src/pipecat/vad/silero.py index 52ea159b1..99bc71ee6 100644 --- a/src/pipecat/vad/silero.py +++ b/src/pipecat/vad/silero.py @@ -36,6 +36,9 @@ class SileroVADAnalyzer(VADAnalyzer): def __init__(self, sample_rate=16000, params: VADParams = VADParams()): super().__init__(sample_rate=sample_rate, num_channels=1, params=params) + if sample_rate != 16000 and sample_rate != 8000: + raise Exception("Silero VAD sample rate needs to be 16000 or 8000") + logger.debug("Loading Silero VAD model...") (self._model, utils) = torch.hub.load( @@ -51,7 +54,7 @@ def __init__(self, sample_rate=16000, params: VADParams = VADParams()): # def num_frames_required(self) -> int: - return int(self.sample_rate / 100) * 4 # 40ms + return 512 if self.sample_rate == 16000 else 256 def voice_confidence(self, buffer) -> float: try: