diff --git a/neon_minerva/cli.py b/neon_minerva/cli.py index a039a0b..a9bfbc4 100644 --- a/neon_minerva/cli.py +++ b/neon_minerva/cli.py @@ -23,9 +23,10 @@ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import logging + import os import click +import yaml from os.path import expanduser, relpath, isfile, isdir from click_default_group import DefaultGroup @@ -53,6 +54,7 @@ def _init_tests(debug: bool = False): if debug: os.environ["OVOS_DEFAULT_LOG_LEVEL"] = "DEBUG" + def _get_test_file(test_file: str) -> str: """ Parse an input path to locate a test file that may be relative to `~` or the @@ -128,3 +130,21 @@ def test_intents(skill_entrypoint, test_file, debug, padacioso): os.environ["INTENT_TEST_FILE"] = test_file from neon_minerva.tests.test_skill_intents import TestSkillIntentMatching TextTestRunner().run(makeSuite(TestSkillIntentMatching)) + + +@neon_minerva_cli.command +@click.option('-l', '--lang', default="en-us", + help="Language of test_file inputs") +@click.option('-a', '--audio', is_flag=True, default=False, + help="Test input as audio") +@click.argument("test_file") +def test_utterances(lang, audio, test_file): + from neon_utils.file_utils import load_commented_file + from neon_minerva.integration.user_utterance import UtteranceTests + + test_file = _get_test_file(test_file) + prompts = load_commented_file(test_file).split('\n') + click.echo(f"Testing {len(prompts)} prompts") + runner = UtteranceTests(prompts, lang=lang, audio=audio) + results = runner.run_test() + click.echo(yaml.safe_dump(results)) diff --git a/neon_minerva/integration/__init__.py b/neon_minerva/integration/__init__.py new file mode 100644 index 0000000..d782cbb --- /dev/null +++ b/neon_minerva/integration/__init__.py @@ -0,0 +1,25 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2021 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/neon_minerva/integration/user_utterance.py b/neon_minerva/integration/user_utterance.py new file mode 100644 index 0000000..e4343f6 --- /dev/null +++ b/neon_minerva/integration/user_utterance.py @@ -0,0 +1,210 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2021 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from tempfile import mkstemp +from threading import Event, Lock +from time import time +from typing import List + +from neon_utils.file_utils import encode_file_to_base64_string +from ovos_utils.log import LOG +from ovos_bus_client.client import MessageBusClient +from ovos_bus_client.message import Message +from ovos_plugin_manager.tts import TTS + + +class UtteranceTests: + def __init__(self, prompts: List[str], lang: str = "en-us", + bus_config: dict = None, user_config: dict = None, + audio: bool = False, tts: TTS = None): + if not user_config: + from neon_utils.configuration_utils import get_neon_user_config + user_config = get_neon_user_config().content + user_config['user']['username'] = "minerva" + self._user_config = user_config + bus_config = bus_config or dict() + self.core_bus = MessageBusClient(**bus_config) + self.core_bus.run_in_thread() + self.lang = lang + self.test_audio = audio + self._tts = tts + # TODO: Handle prompt metadata for longer timeouts + self._prompts = prompts + self._stt_timeout = 60 # Time to transcribe + audio parsers + self._intent_timeout = 60 # Time to match AND handle intent + self._speak_timeout = 60 # Time after intent handling for TTS playback + + self._results = list() + self._audio_output_done = Event() + self._prompt_handled = Event() + self._prompt_lock = Lock() + + self._last_message = None + self._audio_output_done.set() + self.register_bus_events() + + def run_test(self) -> dict: + """ + Run tests and return dict timing results + """ + self._results.clear() + for prompt in self._prompts: + self.handle_prompt(prompt) + aggregated_results = {"save_transcript": [], + "text_parsers": [], + "get_tts": [], + "intent_handler": [], + "total": []} + if self.test_audio: + aggregated_results['get_stt'] = [] + for result in self._results: + try: + aggregated_results['save_transcript'].append(result['save_transcript']) + aggregated_results['text_parsers'].append(result['text_parsers']) + aggregated_results['get_tts'].append(result['get_tts']) + aggregated_results['intent_handler'].append(result['speech_start'] - result['handle_utterance']) + aggregated_results['total'].append(result['finished'] - result['transcribed']) + if self.test_audio: + aggregated_results['get_stt'].append(result['get_stt']) + except KeyError: + LOG.error(result) + formatted_results = dict() + for key, values in aggregated_results.items(): + formatted_results[key] = {"average": round(sum(values) / + len(values), 6), + "minimum": round(min(values), 6), + "maximum": round(max(values), 6)} + return formatted_results + + def register_bus_events(self): + """ + Register listeners to track audio and skill module states + """ + self.core_bus.on("recognizer_loop:audio_output_start", + self._audio_started) + self.core_bus.on("recognizer_loop:audio_output_end", + self._audio_stopped) + self.core_bus.on("mycroft.mic.listen", self._mic_listen) + self.core_bus.on("mycroft.skill.handler.complete", + self._handler_complete) + + def _audio_started(self, _): + """ + Handle audio output started + """ + self._audio_output_done.clear() + + def _audio_stopped(self, message): + """ + Handle audio output finished + @param message: Message associated with completed audio playback + """ + LOG.debug("audio finished") + self._last_message = message + self._audio_output_done.set() + + def _mic_listen(self, message): + """ + Handle start listening (for prompts that trigger `get_response`) + @param message: Message associated with completed skill handler + """ + LOG.debug("`get_response` call") + # self._last_message = message + self._prompt_handled.set() + + def _handler_complete(self, _): + """ + Handle skill execution complete (audio output may not be complete) + """ + LOG.debug("Skill Handler Complete") + self._prompt_handled.set() + + def send_prompt(self, prompt: str): + """ + Send a prompt to core for intent handling + """ + context = {"neon_should_respond": True, + "source": ["minerva"], + "destination": ["skills"], + "timing": {"transcribed": time()}, + "username": "minerva", + "user_profiles": [self._user_config]} + if self.test_audio: + if self._tts: + _, file_path = mkstemp() + audio, _ = self._tts.get_tts(prompt, file_path, lang=self.lang) + else: + resp = self.core_bus.wait_for_response( + Message("neon.get_tts", {'text': prompt, + 'speaker': {'language': self.lang, + 'gender': 'female'}}), + timeout=self._stt_timeout) + file_path = resp.data[self.lang]['female'] + resp = self.core_bus.wait_for_response( + Message("neon.audio_input", + {"audio_data": encode_file_to_base64_string(file_path), + "lang": self.lang}, context), + timeout=self._stt_timeout) + LOG.info(resp.data) + if prompt.lower() not in (t.lower() for t + in resp.data['transcripts']): + LOG.warning(f"Invalid transcription for '{prompt}': " + f"{resp.data['transcripts']}") + else: + self.core_bus.emit(Message("recognizer_loop:utterance", + {"utterances": [prompt], + "lang": self.lang}, context)) + + def handle_prompt(self, prompt: str): + """ + Send a prompt (text or audio) and collect timing results. + @param prompt: string prompt to send for intent (and optionally STT) + processing + """ + with self._prompt_lock: + # Ensure event state matches expectation + if not self._audio_output_done.is_set(): + LOG.warning("Audio output not finished when expected!") + self._audio_output_done.wait(self._speak_timeout) + self._audio_output_done.clear() + self._prompt_handled.clear() + self._last_message = None + + # Send prompt + self.send_prompt(prompt) + try: + assert self._prompt_handled.wait(self._intent_timeout) + assert self._audio_output_done.wait(self._speak_timeout) + assert self._last_message is not None + if "speech_start" not in self._last_message.context["timing"]: + LOG.warning(f"Missing speech_start timestamp for {prompt}") + self._last_message.context["timing"]["speech_start"] = \ + self._last_message.context["timing"]["handle_utterance"] + self._results.append({**self._last_message.context["timing"], + **{'finished': time()}}) + except AssertionError as e: + LOG.error(f"{prompt}: {e}") + LOG.debug(f"Handled {prompt}") diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 6c6967e..806afa7 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -4,4 +4,5 @@ ovos-utils~=0.0.35 ovos-workshop~=0.0.12 fann2==1.0.7 padatious~=0.4.8 -padacioso~=0.2 \ No newline at end of file +padacioso~=0.2 +pyyaml~=6.0 \ No newline at end of file