Skip to content

Commit

Permalink
Improved state management
Browse files Browse the repository at this point in the history
  • Loading branch information
matt200-ok committed Dec 27, 2024
1 parent 3c00157 commit e5d6617
Showing 1 changed file with 22 additions and 16 deletions.
38 changes: 22 additions & 16 deletions recipes/llm-voice-assistant/python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


class Commands:
INIT = 'init'
CLOSE = 'close'
START = 'start'
INTERRUPT = 'interrupt'
Expand All @@ -41,7 +42,10 @@ def tock(self, audio: Optional[Sequence[int]] = None) -> None:
self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0.

def rtf(self) -> float:
rtf = self._compute_sec / self._audio_sec
if self._audio_sec > 0:
rtf = self._compute_sec / self._audio_sec
else:
rtf = 0
self._compute_sec = 0.
self._audio_sec = 0.
return rtf
Expand Down Expand Up @@ -113,24 +117,23 @@ def handler(_, __) -> None:
porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
porcupine_profiler = RTFProfiler(porcupine.sample_rate)

main_queue.put({'command': 'init', 'name': 'Porcupine', 'version': porcupine.version})
main_queue.put({'command': Commands.INIT, 'name': 'Porcupine', 'version': porcupine.version})

cheetah = pvcheetah.create(
access_key=access_key,
endpoint_duration_sec=cheetah_endpoint_duration_sec,
enable_automatic_punctuation=True)
cheetah_profiler = RTFProfiler(cheetah.sample_rate)

main_queue.put({'command': 'init', 'name': 'Cheetah', 'version': cheetah.version})
main_queue.put({'command': Commands.INIT, 'name': 'Cheetah', 'version': cheetah.version})

mic = PvRecorder(frame_length=porcupine.frame_length)
mic.start()

main_queue.put({'command': 'init', 'name': 'PvRecorder', 'version': mic.version})
main_queue.put({'command': Commands.INIT, 'name': 'PvRecorder', 'version': mic.version})

while listen_queue.empty():
time.sleep(0.01)
listen_queue.get()

try:
close = False
Expand Down Expand Up @@ -158,8 +161,9 @@ def handler(_, __) -> None:
cheetah_profiler.tick()
partial_transcript, endpoint_reached = cheetah.process(pcm)
cheetah_profiler.tock(pcm)
user_request += partial_transcript
main_queue.put({'command': Commands.TEXT, 'text': partial_transcript})
if len(partial_transcript) > 0:
user_request += partial_transcript
main_queue.put({'command': Commands.TEXT, 'text': partial_transcript})
if endpoint_reached:
utterance_end_sec = time.perf_counter()
cheetah_profiler.tick()
Expand Down Expand Up @@ -188,7 +192,7 @@ def handler(_, __) -> None:
dialog = pllm.get_dialog()
generating = False

main_queue.put({'command': 'init', 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"})
main_queue.put({'command': Commands.INIT, 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"})

stop_phrases = {
'</s>', # Llama-2, Mistral, and Mixtral
Expand Down Expand Up @@ -238,7 +242,6 @@ def llm_task(user_request, utterance_end_sec):

while generate_queue.empty():
time.sleep(0.01)
generate_queue.get()

try:
close = False
Expand Down Expand Up @@ -284,15 +287,14 @@ def handler(_, __) -> None:
orca_profiler = RTFProfiler(orca.sample_rate)
warmup_size = int(warmup_sec * orca.sample_rate)

main_queue.put({'command': 'init', 'name': 'Orca', 'version': orca.version})
main_queue.put({'command': Commands.INIT, 'name': 'Orca', 'version': orca.version})

speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20)
speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=1)

main_queue.put({'command': 'init', 'name': 'PvSpeaker', 'version': speaker.version})
main_queue.put({'command': Commands.INIT, 'name': 'PvSpeaker', 'version': speaker.version})

while speak_queue.empty():
time.sleep(0.01)
speak_queue.get()

try:
close = False
Expand Down Expand Up @@ -350,7 +352,7 @@ def handler(_, __) -> None:
synthesizing = False
if pcm is not None:
pcm_queue.extend(pcm)
main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})

if not speaking and len(pcm_queue) > warmup_size:
speaker.start()
Expand Down Expand Up @@ -508,6 +510,7 @@ def handler(_, __) -> None:

try:
close = False
listening = False
generating = False
while not close:
while main_queue.empty():
Expand All @@ -516,7 +519,7 @@ def handler(_, __) -> None:
message = main_queue.get(block=True)
if message['command'] == Commands.CLOSE:
close = True
elif message['command'] == 'init':
elif message['command'] == Commands.INIT:
print(f"→ {message['name']} v{message['version']}")
modules.remove(message['name'])
if len(modules) == 0:
Expand All @@ -525,7 +528,8 @@ def handler(_, __) -> None:
generate_queue.put({'command': Commands.START})
speak_queue.put({'command': Commands.START})
elif message['command'] == Commands.START:
print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
if not listening:
print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
elif message['command'] == Commands.INTERRUPT:
if generating:
print()
Expand All @@ -534,11 +538,13 @@ def handler(_, __) -> None:
print("User > ", end='', flush=True)
generate_queue.put(message)
speak_queue.put(message)
listening = True
elif message['command'] == Commands.TEXT:
print(message['text'], end='', flush=True)
elif message['command'] == Commands.GENERATE:
print()
generate_queue.put(message)
listening = False
elif message['command'] == Commands.SYNTHESIZE_START:
print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True)
speak_queue.put(message)
Expand Down

0 comments on commit e5d6617

Please sign in to comment.