forked from bynds/makevoicedemo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pocket_sphinx_listener.py
95 lines (81 loc) · 4.7 KB
/
pocket_sphinx_listener.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from pocketsphinx import *
import pyaudio
import gevent
class PocketSphinxListener(object):
def __init__(self, debug=False):
self.hmm = 'cmusphinx-5prealpha-en-us-ptm-2.0/'
self.dic = 'dictionary.dic'
self.lm = 'language_model.lm'
self.grammar = 'grammar.jsgf'
self.bitesize = 512
self.debug = debug
self.config = Decoder.default_config()
self.config.set_string('-hmm', self.hmm)
# The language model is a statistical model that you can use to determine what words the user is trying to say.
# This can be used in place of a predetermined grammar file.
#self.config.set_string('-lm', self.lm)
self.config.set_string('-dict', self.dic)
self.config.set_string('-jsgf', self.grammar)
# Comment out the following line to get debugging output from the decoder. This is useful if the program is failing
# with an error such as "argument 1 of type 'Decoder *'"
if not self.debug:
self.config.set_string('-logfn', '/dev/null')
self.config.set_boolean("-allphone_ci", True)
self.decoder = Decoder(self.config)
self.pyAudio = pyaudio.PyAudio()
def getCommand(self, debug=False):
# Check if we are in debugging mode, either for the getCommand method or for the entire class
if self.debug or debug:
debug = True
# We're going to set up the stream from pyAudio that well be using to get the user's speech from the microphone.
self.stream = self.pyAudio.open(format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=self.bitesize)
# Let the use know that we're ready for them to speak
print 'Need more input: '
# This is a flag that we'll use in a bit to determine whether we are going from silence to speech
# or from speech to silence.
utteranceStarted = False
# This will tell PocketSphinx to start decoding the "utterance". When we are finished with our audio
# we will tell PocketSphinx that the utterance is over.
self.decoder.start_utt()
# We want this to loop for as long as it takes to get the full sentence from the user. We only exit with a
# return statement when we have our best guess of what the person said.
while True:
try:
# This takes a small sound bite from the microphone to process.
soundBite = self.stream.read(self.bitesize)
except Exception as e:
pass
# If we've got something from the microphone, we should begin processing it.
if soundBite:
self.decoder.process_raw(soundBite, False, False)
inSpeech = self.decoder.get_in_speech()
# The following checks for the transition from silence to speech.
# We're going to set a flag to reflect this.
if inSpeech and not utteranceStarted:
utteranceStarted = True
# The following checks for the transition from speech to silence.
# This is our cue to check what was said and do something useful with it.
if not inSpeech and utteranceStarted:
# We tell PocketSphinx that the user is finished saying what they wanted
# to say, and that it should makes it's best guess as to what thay was.
self.decoder.end_utt()
# The following will get a hypothesis object with, amongst other things,
# the string of words that PocketSphinx thinks the user said.
self.hypothesis = self.decoder.hyp()
if self.hypothesis is not None:
bestGuess = self.hypothesis.hypstr
print 'I just heard you say:"{}"'.format(bestGuess)
# We are done with the microphone for now so we'll close the stream.
self.stream.stop_stream()
self.stream.close()
# We have what we came for! A string representing what the user said.
# We'll now return it to the runMain function so that it can be
# processed and some meaning can be gleamed from it.
return bestGuess
# The following is here for debugging to see what the decoder thinks we're saying as we go
if debug and self.decoder.hyp() is not None:
print self.decoder.hyp().hypstr