2
2
import time
3
3
import threading
4
4
import os
5
+ import queue
5
6
6
7
import webrtcvad
7
8
@@ -57,6 +58,9 @@ class Capture(object):
57
58
_handle_chunk_size = None
58
59
59
60
_device_info = None
61
+ _stream = None
62
+ _callback_data = None
63
+ _queue = None
60
64
_vad = None
61
65
_config = None
62
66
_tmp_path = None
@@ -69,6 +73,7 @@ def __init__(self, config, tmp_path):
69
73
self ._tmp_path = tmp_path
70
74
71
75
self ._pa = pyaudio .PyAudio ()
76
+ self ._queue = queue .Queue ()
72
77
self ._device_info = DeviceInfo ()
73
78
74
79
self ._recording_lock_inverted = threading .Event ()
@@ -116,100 +121,113 @@ def handle_read(self):
116
121
def handle_release (self ):
117
122
self ._handle .close ()
118
123
119
- def silence_listener (self , throwaway_frames = None , force_record = None ):
124
+ def _callback (self , in_data , frame_count , time_info , status ): # pylint: disable=unused-argument
120
125
121
- self . _recording_lock_inverted . clear ()
126
+ debug = logging . getLogger ( 'alexapi' ). getEffectiveLevel () == logging . DEBUG
122
127
123
- throwaway_frames = throwaway_frames or self .VAD_THROWAWAY_FRAMES
128
+ if not in_data :
129
+ self ._queue .put (False )
130
+ return None , pyaudio .paAbort
124
131
125
- logger .debug ("Setting up recording" )
132
+ do_VAD = True
133
+ if self ._callback_data ['force_record' ] and not self ._callback_data ['force_record' ][1 ]:
134
+ do_VAD = False
126
135
127
- stream = self ._pa .open (
128
- input = True ,
129
- input_device_index = self ._device_info .get_device_index (self ._config ['sound' ]['input_device' ]),
130
- format = pyaudio .paInt16 ,
131
- channels = 1 ,
132
- rate = self .VAD_SAMPLERATE ,
133
- frames_per_buffer = self .VAD_PERIOD
134
- )
136
+ # do not count first 10 frames when doing VAD
137
+ if do_VAD and (self ._callback_data ['frames' ] < self ._callback_data ['throwaway_frames' ]):
138
+ self ._callback_data ['frames' ] += 1
135
139
136
- debug = logging .getLogger ('alexapi' ).getEffectiveLevel () == logging .DEBUG
140
+ # now do VAD
141
+ elif (self ._callback_data ['force_record' ] and self ._callback_data ['force_record' ][0 ]()) \
142
+ or (do_VAD and (self ._callback_data ['thresholdSilenceMet' ] is False )
143
+ and ((time .time () - self ._callback_data ['start' ]) < self .MAX_RECORDING_LENGTH )):
137
144
138
- logger . debug ( "Start recording" )
145
+ if do_VAD :
139
146
140
- if self ._state_callback :
141
- self ._state_callback ( )
147
+ if int ( len ( in_data ) / 2 ) == self .VAD_PERIOD :
148
+ isSpeech = self ._vad . is_speech ( in_data , self . VAD_SAMPLERATE )
142
149
143
- def _listen ():
144
- start = time .time ()
150
+ if not isSpeech :
151
+ self ._callback_data ['silenceRun' ] += 1
152
+ else :
153
+ self ._callback_data ['silenceRun' ] = 0
154
+ self ._callback_data ['numSilenceRuns' ] += 1
145
155
146
- do_VAD = True
147
- if force_record and not force_record [1 ]:
148
- do_VAD = False
156
+ # only count silence runs after the first one
157
+ # (allow user to speak for total of max recording length if they haven't said anything yet)
158
+ if (self ._callback_data ['numSilenceRuns' ] != 0 ) \
159
+ and ((self ._callback_data ['silenceRun' ] * self .VAD_FRAME_MS ) > self .VAD_SILENCE_TIMEOUT ):
160
+ self ._callback_data ['thresholdSilenceMet' ] = True
149
161
150
- # Buffer as long as we haven't heard enough silence or the total size is within max size
151
- thresholdSilenceMet = False
152
- frames = 0
153
- numSilenceRuns = 0
154
- silenceRun = 0
162
+ else :
163
+ self ._queue .put (False )
164
+ return None , pyaudio .paComplete
155
165
156
- if debug :
157
- audio = b''
166
+ self ._queue .put (in_data )
167
+ if debug :
168
+ self ._callback_data ['audio' ] += in_data
158
169
159
- if do_VAD :
160
- # do not count first 10 frames when doing VAD
161
- while frames < throwaway_frames :
170
+ return None , pyaudio .paContinue
162
171
163
- if self ._interrupt :
164
- break
172
+ def silence_listener (self , throwaway_frames = None , force_record = None ):
165
173
166
- data = stream .read (self .VAD_PERIOD , exception_on_overflow = self ._pa_exception_on_overflow )
167
- frames += 1
168
- if data :
169
- yield data
174
+ logger .debug ("Recording: Setting up" )
170
175
171
- if debug :
172
- audio += data
176
+ self ._recording_lock_inverted .clear ()
173
177
174
- # now do VAD
175
- while (force_record and force_record [0 ]()) \
176
- or (do_VAD and (thresholdSilenceMet is False ) and ((time .time () - start ) < self .MAX_RECORDING_LENGTH )):
178
+ debug = logging .getLogger ('alexapi' ).getEffectiveLevel () == logging .DEBUG
177
179
178
- if self ._interrupt :
179
- break
180
+ if self ._state_callback :
181
+ self . _state_callback ()
180
182
181
- data = stream .read (self .VAD_PERIOD , exception_on_overflow = self ._pa_exception_on_overflow )
182
- if data :
183
- yield data
183
+ self ._queue .queue .clear ()
184
184
185
- if debug :
186
- audio += data
185
+ self ._callback_data = {
186
+ 'start' : time .time (),
187
+ 'thresholdSilenceMet' : False , # Buffer as long as we haven't heard enough silence or the total size is within max size
188
+ 'frames' : 0 ,
189
+ 'throwaway_frames' : throwaway_frames or self .VAD_THROWAWAY_FRAMES ,
190
+ 'numSilenceRuns' : 0 ,
191
+ 'silenceRun' : 0 ,
192
+ 'force_record' : force_record ,
193
+ 'audio' : b'' if debug else False ,
194
+ }
187
195
188
- if do_VAD and (int (len (data )/ 2 ) == self .VAD_PERIOD ):
189
- isSpeech = self ._vad .is_speech (data , self .VAD_SAMPLERATE )
196
+ stream = self ._pa .open (
197
+ input = True ,
198
+ input_device_index = self ._device_info .get_device_index (self ._config ['sound' ]['input_device' ]),
199
+ format = pyaudio .paInt16 ,
200
+ channels = 1 ,
201
+ rate = self .VAD_SAMPLERATE ,
202
+ frames_per_buffer = self .VAD_PERIOD ,
203
+ stream_callback = self ._callback ,
204
+ start = False
205
+ )
190
206
191
- if not isSpeech :
192
- silenceRun += 1
193
- else :
194
- silenceRun = 0
195
- numSilenceRuns += 1
207
+ logger .debug ("Recording: Start" )
208
+ stream .start_stream ()
196
209
197
- if do_VAD :
198
- # only count silence runs after the first one
199
- # (allow user to speak for total of max recording length if they haven't said anything yet)
200
- if (numSilenceRuns != 0 ) and ((silenceRun * self .VAD_FRAME_MS ) > self .VAD_SILENCE_TIMEOUT ):
201
- thresholdSilenceMet = True
210
+ def _listen ():
211
+ while True :
212
+ try :
213
+ data = self ._queue .get (block = True , timeout = 2 )
214
+ if not data or self ._interrupt :
215
+ break
202
216
203
- logger .debug ("End recording" )
217
+ yield data
218
+ except queue .Empty :
219
+ break
204
220
221
+ stream .stop_stream ()
222
+ logger .debug ("Recording: End" )
205
223
stream .close ()
206
224
207
225
if self ._state_callback :
208
226
self ._state_callback (False )
209
227
210
228
if debug :
211
229
with open (self ._tmp_path + 'recording.wav' , 'wb' ) as rf :
212
- rf .write (audio )
230
+ rf .write (self . _callback_data [ ' audio' ] )
213
231
214
232
self ._recording_lock_inverted .set ()
215
233
0 commit comments