From 73904efcdc98299e93faca696f3a19468430ee18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Mach=C3=A1=C4=8Dek?= Date: Mon, 19 Aug 2024 10:43:29 +0200 Subject: [PATCH] small code review --- whisper_online.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/whisper_online.py b/whisper_online.py index d3b1e4c..d3e4a5c 100644 --- a/whisper_online.py +++ b/whisper_online.py @@ -558,7 +558,6 @@ def clear_buffer(self): def insert_audio_chunk(self, audio): res = self.vac(audio) - print(res) self.audio_buffer = np.append(self.audio_buffer, audio) if res is not None: @@ -584,11 +583,10 @@ def insert_audio_chunk(self, audio): if self.status == 'voice': self.online.insert_audio_chunk(self.audio_buffer) self.current_online_chunk_buffer_size += len(self.audio_buffer) - if self.status is not None: self.clear_buffer() - else: # we are at the beginning of process, no voice has ever been detected - # We keep the 1s because VAD may later find start of voice in it. - # But trimming it to prevent OOM. + else: + # We keep 1 second because VAD may later find start of voice in it. + # But we trim it to prevent OOM. self.buffer_offset += max(0,len(self.audio_buffer)-self.SAMPLING_RATE) self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]