Add NeonVoiceClient class for minimal remote audio client (#23)

* Add NeonVoiceClient class for minimal remote audio client * Add CLI entrypoint for listener Add WW confirmation sound Update logging and bugfixes around MQ event handling
NeonGeckoCom · Nov 3, 2023 · 326f5fe · 326f5fe
1 parent a83adac
commit 326f5fe
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 2 deletions.
diff --git a/neon_iris/cli.py b/neon_iris/cli.py
@@ -40,6 +40,7 @@
 
 environ.setdefault("OVOS_CONFIG_BASE_FOLDER", "neon")
 environ.setdefault("OVOS_CONFIG_FILENAME", "diana.yaml")
+# TODO: Define default config file from this package
 
 
 def _print_config():
@@ -80,7 +81,7 @@ def start_client(mq_config, user_config, lang, audio):
     if user_config:
         user_config = load_config_file(expanduser(user_config))
     client = CLIClient(mq_config, user_config)
-    LOG.init({"level": logging.WARNING})
+    LOG.init({"level": logging.WARNING})  # TODO: Debug flag?
 
     client.audio_enabled = audio
     click.echo("Enter '!{lang}' to change language\n"
@@ -118,6 +119,16 @@ def start_client(mq_config, user_config, lang, audio):
     client.shutdown()
 
 
+@neon_iris_cli.command(help="Create an MQ listener session")
+def start_listener():
+    from neon_iris.voice_client import NeonVoiceClient
+    from ovos_utils import wait_for_exit_signal
+    client = NeonVoiceClient()
+    _print_config()
+    wait_for_exit_signal()
+    client.shutdown()
+
+
 @neon_iris_cli.command(help="Transcribe an audio file")
 @click.option('--lang', '-l', default='en-us',
               help="language of input audio")

diff --git a/neon_iris/client.py b/neon_iris/client.py
@@ -136,6 +136,8 @@ def handle_neon_response(self, channel, method, _, body):
             self._handle_profile_update(message)
         elif message.msg_type == "neon.clear_data":
             self._handle_clear_data(message)
+        elif message.msg_type == "klat.error":
+            self.handle_error_response(message)
         elif message.msg_type.endswith(".response"):
             self.handle_api_response(message)
         else:
@@ -248,12 +250,14 @@ def _build_message(self, msg_type: str, data: dict,
                        username: Optional[str] = None,
                        user_profiles: Optional[list] = None,
                        ident: str = None) -> Message:
+        user_profiles = user_profiles or [self.user_config]
+        username = username or user_profiles[0]['user']['username']
         return Message(msg_type, data,
                        {"client_name": self.client_name,
                         "client": self._client,
                         "ident": ident or str(time()),
                         "username": username,
-                        "user_profiles": user_profiles or list(),
+                        "user_profiles": user_profiles,
                         "mq": {"routing_key": self.uid,
                                "message_id": self.connection.create_unique_id()}
                         })

diff --git a/neon_iris/res/start_listening.wav b/neon_iris/res/start_listening.wav
diff --git a/neon_iris/voice_client.py b/neon_iris/voice_client.py
@@ -0,0 +1,148 @@
+# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
+# All trademark and other rights reserved by their respective owners
+# Copyright 2008-2021 Neongecko.com Inc.
+# BSD-3
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from this
+#    software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import wave
+
+from threading import Event, Thread
+from time import time
+from unittest.mock import Mock
+from os.path import join, isdir, dirname
+from os import makedirs
+
+from ovos_plugin_manager.microphone import OVOSMicrophoneFactory
+from ovos_plugin_manager.vad import OVOSVADFactory
+from ovos_dinkum_listener.voice_loop.voice_loop import DinkumVoiceLoop
+from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer
+from ovos_config.config import Configuration
+from ovos_utils.messagebus import FakeBus
+from ovos_utils.log import LOG
+from ovos_utils.xdg_utils import xdg_data_home
+from ovos_utils.sound import play_wav
+from ovos_bus_client.message import Message
+from neon_utils.file_utils import decode_base64_string_to_file
+from neon_iris.client import NeonAIClient
+
+
+class MockTransformers(Mock):
+    def transform(self, chunk):
+        return chunk, dict()
+
+
+class NeonVoiceClient(NeonAIClient):
+    def __init__(self, bus=None):
+        self.config = Configuration()
+        NeonAIClient.__init__(self, self.config.get("MQ"))
+        self.bus = bus or FakeBus()
+        self._mic = OVOSMicrophoneFactory.create(self.config)
+        self._mic.start()
+        self._hotwords = HotwordContainer(self.bus)
+        self._hotwords.load_hotword_engines()
+        self._vad = OVOSVADFactory.create(self.config)
+
+        self._voice_loop = DinkumVoiceLoop(mic=self._mic,
+                                           hotwords=self._hotwords,
+                                           stt=Mock(),
+                                           fallback_stt=Mock(),
+                                           vad=self._vad,
+                                           transformers=MockTransformers(),
+                                           stt_audio_callback=self.on_stt_audio,
+                                           listenword_audio_callback=self.on_hotword_audio)
+        self._voice_loop.start()
+        self._voice_thread = None
+
+        self._stt_audio_path = join(xdg_data_home(), "iris", "stt")
+        self._tts_audio_path = join(xdg_data_home(), "iris", "tts")
+        if not isdir(self._stt_audio_path):
+            makedirs(self._stt_audio_path)
+        if not isdir(self._tts_audio_path):
+            makedirs(self._tts_audio_path)
+
+        self._listening_sound = join(dirname(__file__), "res",
+                                     "start_listening.wav")
+
+        self.run()
+
+    def run(self):
+        self._voice_thread = Thread(target=self._voice_loop.run, daemon=True)
+        self._voice_thread.start()
+
+    def on_stt_audio(self, audio_bytes: bytes, context: dict):
+        LOG.info(f"Got {len(audio_bytes)} bytes of audio")
+        wav_path = join(self._stt_audio_path, f"{time()}.wav")
+        with open(wav_path, "wb") as wav_io, \
+                wave.open(wav_io, "wb") as wav_file:
+            wav_file.setframerate(self._mic.sample_rate)
+            wav_file.setsampwidth(self._mic.sample_width)
+            wav_file.setnchannels(self._mic.sample_channels)
+            wav_file.writeframes(audio_bytes)
+
+        self.send_audio(wav_path)
+        LOG.debug("Sent Audio to MQ")
+
+    def on_hotword_audio(self, audio: bytes, context: dict):
+        payload = context
+        msg_type = "recognizer_loop:wakeword"
+        play_wav(self._listening_sound)
+        LOG.info(f"Emitting hotword event: {msg_type}")
+        # emit ww event
+        self.bus.emit(Message(msg_type, payload, context))
+
+    def handle_klat_response(self, message: Message):
+        responses = message.data.get('responses')
+        for lang, data in responses.items():
+            text = data.get('sentence')
+            LOG.info(text)
+            file_basename = f"{hash(text)}.wav"
+            genders = data.get('genders', [])
+            for gender in genders:
+                audio_data = data["audio"].get(gender)
+                audio_file = join(self._tts_audio_path, lang, gender,
+                                  file_basename)
+                try:
+                    decode_base64_string_to_file(audio_data, audio_file)
+                except FileExistsError:
+                    pass
+                play_wav(audio_file)
+
+    def handle_complete_intent_failure(self, message: Message):
+        LOG.info(f"{message.data}")
+
+    def handle_api_response(self, message: Message):
+        LOG.info(f"{message.data}")
+
+    def handle_error_response(self, message: Message):
+        LOG.error(f"Got error response: {message.data}")
+
+    def clear_caches(self, message: Message):
+        pass
+
+    def clear_media(self, message: Message):
+        pass
+
+    def shutdown(self):
+        self._voice_loop.stop()
+        self._voice_thread.join(30)
+        NeonAIClient.shutdown(self)