Skip to content
This repository was archived by the owner on Dec 20, 2023. It is now read-only.

Commit 5ce8b0e

Browse files
committed
fix AVS audio streaming
* audio is now recorded separately in real-time into a queue and requests get data from this queue instead of requests actually controlling the recording
1 parent a0aca4d commit 5ce8b0e

File tree

1 file changed

+81
-63
lines changed

1 file changed

+81
-63
lines changed

src/alexapi/capture.py

+81-63
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import time
33
import threading
44
import os
5+
import queue
56

67
import webrtcvad
78

@@ -57,6 +58,9 @@ class Capture(object):
5758
_handle_chunk_size = None
5859

5960
_device_info = None
61+
_stream = None
62+
_callback_data = None
63+
_queue = None
6064
_vad = None
6165
_config = None
6266
_tmp_path = None
@@ -69,6 +73,7 @@ def __init__(self, config, tmp_path):
6973
self._tmp_path = tmp_path
7074

7175
self._pa = pyaudio.PyAudio()
76+
self._queue = queue.Queue()
7277
self._device_info = DeviceInfo()
7378

7479
self._recording_lock_inverted = threading.Event()
@@ -116,100 +121,113 @@ def handle_read(self):
116121
def handle_release(self):
117122
self._handle.close()
118123

119-
def silence_listener(self, throwaway_frames=None, force_record=None):
124+
def _callback(self, in_data, frame_count, time_info, status): # pylint: disable=unused-argument
120125

121-
self._recording_lock_inverted.clear()
126+
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
122127

123-
throwaway_frames = throwaway_frames or self.VAD_THROWAWAY_FRAMES
128+
if not in_data:
129+
self._queue.put(False)
130+
return None, pyaudio.paAbort
124131

125-
logger.debug("Setting up recording")
132+
do_VAD = True
133+
if self._callback_data['force_record'] and not self._callback_data['force_record'][1]:
134+
do_VAD = False
126135

127-
stream = self._pa.open(
128-
input=True,
129-
input_device_index=self._device_info.get_device_index(self._config['sound']['input_device']),
130-
format=pyaudio.paInt16,
131-
channels=1,
132-
rate=self.VAD_SAMPLERATE,
133-
frames_per_buffer=self.VAD_PERIOD
134-
)
136+
# do not count first 10 frames when doing VAD
137+
if do_VAD and (self._callback_data['frames'] < self._callback_data['throwaway_frames']):
138+
self._callback_data['frames'] += 1
135139

136-
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
140+
# now do VAD
141+
elif (self._callback_data['force_record'] and self._callback_data['force_record'][0]()) \
142+
or (do_VAD and (self._callback_data['thresholdSilenceMet'] is False)
143+
and ((time.time() - self._callback_data['start']) < self.MAX_RECORDING_LENGTH)):
137144

138-
logger.debug("Start recording")
145+
if do_VAD:
139146

140-
if self._state_callback:
141-
self._state_callback()
147+
if int(len(in_data) / 2) == self.VAD_PERIOD:
148+
isSpeech = self._vad.is_speech(in_data, self.VAD_SAMPLERATE)
142149

143-
def _listen():
144-
start = time.time()
150+
if not isSpeech:
151+
self._callback_data['silenceRun'] += 1
152+
else:
153+
self._callback_data['silenceRun'] = 0
154+
self._callback_data['numSilenceRuns'] += 1
145155

146-
do_VAD = True
147-
if force_record and not force_record[1]:
148-
do_VAD = False
156+
# only count silence runs after the first one
157+
# (allow user to speak for total of max recording length if they haven't said anything yet)
158+
if (self._callback_data['numSilenceRuns'] != 0) \
159+
and ((self._callback_data['silenceRun'] * self.VAD_FRAME_MS) > self.VAD_SILENCE_TIMEOUT):
160+
self._callback_data['thresholdSilenceMet'] = True
149161

150-
# Buffer as long as we haven't heard enough silence or the total size is within max size
151-
thresholdSilenceMet = False
152-
frames = 0
153-
numSilenceRuns = 0
154-
silenceRun = 0
162+
else:
163+
self._queue.put(False)
164+
return None, pyaudio.paComplete
155165

156-
if debug:
157-
audio = b''
166+
self._queue.put(in_data)
167+
if debug:
168+
self._callback_data['audio'] += in_data
158169

159-
if do_VAD:
160-
# do not count first 10 frames when doing VAD
161-
while frames < throwaway_frames:
170+
return None, pyaudio.paContinue
162171

163-
if self._interrupt:
164-
break
172+
def silence_listener(self, throwaway_frames=None, force_record=None):
165173

166-
data = stream.read(self.VAD_PERIOD, exception_on_overflow=self._pa_exception_on_overflow)
167-
frames += 1
168-
if data:
169-
yield data
174+
logger.debug("Recording: Setting up")
170175

171-
if debug:
172-
audio += data
176+
self._recording_lock_inverted.clear()
173177

174-
# now do VAD
175-
while (force_record and force_record[0]()) \
176-
or (do_VAD and (thresholdSilenceMet is False) and ((time.time() - start) < self.MAX_RECORDING_LENGTH)):
178+
debug = logging.getLogger('alexapi').getEffectiveLevel() == logging.DEBUG
177179

178-
if self._interrupt:
179-
break
180+
if self._state_callback:
181+
self._state_callback()
180182

181-
data = stream.read(self.VAD_PERIOD, exception_on_overflow=self._pa_exception_on_overflow)
182-
if data:
183-
yield data
183+
self._queue.queue.clear()
184184

185-
if debug:
186-
audio += data
185+
self._callback_data = {
186+
'start': time.time(),
187+
'thresholdSilenceMet': False, # Buffer as long as we haven't heard enough silence or the total size is within max size
188+
'frames': 0,
189+
'throwaway_frames': throwaway_frames or self.VAD_THROWAWAY_FRAMES,
190+
'numSilenceRuns': 0,
191+
'silenceRun': 0,
192+
'force_record': force_record,
193+
'audio': b'' if debug else False,
194+
}
187195

188-
if do_VAD and (int(len(data)/2) == self.VAD_PERIOD):
189-
isSpeech = self._vad.is_speech(data, self.VAD_SAMPLERATE)
196+
stream = self._pa.open(
197+
input=True,
198+
input_device_index=self._device_info.get_device_index(self._config['sound']['input_device']),
199+
format=pyaudio.paInt16,
200+
channels=1,
201+
rate=self.VAD_SAMPLERATE,
202+
frames_per_buffer=self.VAD_PERIOD,
203+
stream_callback=self._callback,
204+
start=False
205+
)
190206

191-
if not isSpeech:
192-
silenceRun += 1
193-
else:
194-
silenceRun = 0
195-
numSilenceRuns += 1
207+
logger.debug("Recording: Start")
208+
stream.start_stream()
196209

197-
if do_VAD:
198-
# only count silence runs after the first one
199-
# (allow user to speak for total of max recording length if they haven't said anything yet)
200-
if (numSilenceRuns != 0) and ((silenceRun * self.VAD_FRAME_MS) > self.VAD_SILENCE_TIMEOUT):
201-
thresholdSilenceMet = True
210+
def _listen():
211+
while True:
212+
try:
213+
data = self._queue.get(block=True, timeout=2)
214+
if not data or self._interrupt:
215+
break
202216

203-
logger.debug("End recording")
217+
yield data
218+
except queue.Empty:
219+
break
204220

221+
stream.stop_stream()
222+
logger.debug("Recording: End")
205223
stream.close()
206224

207225
if self._state_callback:
208226
self._state_callback(False)
209227

210228
if debug:
211229
with open(self._tmp_path + 'recording.wav', 'wb') as rf:
212-
rf.write(audio)
230+
rf.write(self._callback_data['audio'])
213231

214232
self._recording_lock_inverted.set()
215233

0 commit comments

Comments
 (0)