Skip to content

Commit

Permalink
webrtc VAD neds a different sample size
Browse files Browse the repository at this point in the history
  • Loading branch information
chadbailey59 committed Apr 3, 2024
1 parent 3d03547 commit 02c5328
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
10 changes: 5 additions & 5 deletions src/dailyai/transports/daily_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

from dailyai.transports.threaded_transport import ThreadedTransport

SAMPLE_RATE = 16000
NUM_CHANNELS = 1

SPEECH_THRESHOLD = 0.90
Expand Down Expand Up @@ -91,7 +90,7 @@ def __init__(

self.webrtc_vad = Daily.create_native_vad(
reset_period_ms=VAD_RESET_PERIOD_MS,
sample_rate=SAMPLE_RATE,
sample_rate=self._speaker_sample_rate,
channels=NUM_CHANNELS
)

Expand All @@ -117,14 +116,15 @@ def _patch_method(self, event_name, *args, **kwargs):

def _webrtc_vad_analyze(self):
buffer = self.read_audio_frames(
int(self._speaker_sample_rate / 100))
int(self._vad_samples))
if len(buffer) > 0:
confidence = self.webrtc_vad.analyze_frames(buffer)
# yeses = int(confidence * 20.0)
# nos = 20 - yeses
# out = "!" * yeses + "." * nos
# print(f"!!! confidence: {out}")
return confidence > 0.90
# print(f"!!! confidence: {out} {confidence}")
talking = confidence > 0.90
return talking

def add_event_handler(self, event_name: str, handler):
if not event_name.startswith("on_"):
Expand Down
11 changes: 6 additions & 5 deletions src/dailyai/transports/threaded_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def int2float(sound):
return sound


SAMPLE_RATE = 16000


class VADState(Enum):
QUIET = 1
STARTING = 2
Expand Down Expand Up @@ -81,17 +78,18 @@ def __init__(
repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
)
print(f"!!! Loaded Silero VAD")
self._vad_samples = 1536

except ModuleNotFoundError as e:
if self._has_webrtc_vad:
print(f"Couldn't load torch; using webrtc VAD")
self._vad_samples = int(self._speaker_sample_rate / 100.0)
else:
print(f"Exception: {e}")
print("In order to use VAD, you'll need to install the `torch` and `torchaudio` modules.")
raise Exception(f"Missing module(s): {e}")

self._vad_samples = 1536
vad_frame_s = self._vad_samples / SAMPLE_RATE
vad_frame_s = self._vad_samples / self._speaker_sample_rate
self._vad_start_frames = round(self._vad_start_s / vad_frame_s)
self._vad_stop_frames = round(self._vad_stop_s / vad_frame_s)
self._vad_starting_count = 0
Expand Down Expand Up @@ -314,6 +312,7 @@ def _vad(self):
self._vad_state == VADState.STARTING
and self._vad_starting_count >= self._vad_start_frames
):
print(f"!!! !!! STARTED SPEAKING")
if self._loop:
asyncio.run_coroutine_threadsafe(
self.receive_queue.put(
Expand All @@ -325,6 +324,8 @@ def _vad(self):
self._vad_state == VADState.STOPPING
and self._vad_stopping_count >= self._vad_stop_frames
):
print(f"!!! !!! STOPPED SPEAKING")

if self._loop:
asyncio.run_coroutine_threadsafe(
self.receive_queue.put(
Expand Down

0 comments on commit 02c5328

Please sign in to comment.