Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add integration tests #3

Merged
merged 29 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e57fafd
Implement text input tests
NeonDaniel Oct 2, 2023
e9a6902
Resolve class name conflict
NeonDaniel Oct 2, 2023
1d76271
Troubleshooting prompt handling
NeonDaniel Oct 2, 2023
f1964f4
Troubleshooting prompt handling
NeonDaniel Oct 2, 2023
fbfe9c6
Update result data handling
NeonDaniel Oct 2, 2023
df0b33a
Add timing context
NeonDaniel Oct 2, 2023
3a7cf9a
Troubleshooting message context
NeonDaniel Oct 2, 2023
ea391bd
Troubleshooting test failures
NeonDaniel Oct 2, 2023
ae24b1a
Format returned results data
NeonDaniel Oct 2, 2023
b4bc17c
Troubleshoot result handling
NeonDaniel Oct 3, 2023
ee966ec
Add support for audio tests
NeonDaniel Oct 3, 2023
faa453a
Troubleshoot `get_tts` compatible signature
NeonDaniel Oct 3, 2023
75f7819
Fix typo in audio input tests
NeonDaniel Oct 3, 2023
87fed73
Troubleshooting timeouts
NeonDaniel Oct 3, 2023
c3c27b5
Troubleshooting RuntimeExceptions
NeonDaniel Oct 3, 2023
d692b20
Add stt metrics to output
NeonDaniel Oct 3, 2023
734bf00
Refactor module/class names to match actual implementation
NeonDaniel Oct 4, 2023
32950fa
Remove invalid kwarg from argument spec
NeonDaniel Oct 4, 2023
d49f7eb
Add default user profile to utterance tests
NeonDaniel Oct 4, 2023
2e97011
Fix profile data type
NeonDaniel Oct 4, 2023
1cd6436
Add exception handling in test result generation
NeonDaniel Oct 10, 2023
aff0af7
Troubleshooting test errors around prompts
NeonDaniel Oct 10, 2023
973f05e
Add docstrings
NeonDaniel Oct 10, 2023
038a961
Troubleshoot performance on Pi4/Mark2
NeonDaniel Oct 10, 2023
4057cda
Extend default timeout params for Mark2 compat.
NeonDaniel Oct 10, 2023
58bd1e6
Round printed result times
NeonDaniel Oct 10, 2023
a4eed87
Increase logged precision to avoid `0.0` durations
NeonDaniel Oct 10, 2023
4af9cdd
Log formatted json instead of pprint for consistency
NeonDaniel Oct 10, 2023
5b150a3
Troubleshoot logged errors parsing test timing results
NeonDaniel Oct 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion neon_minerva/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import logging

import os
import click
import yaml

from os.path import expanduser, relpath, isfile, isdir
from click_default_group import DefaultGroup
Expand Down Expand Up @@ -53,6 +54,7 @@ def _init_tests(debug: bool = False):
if debug:
os.environ["OVOS_DEFAULT_LOG_LEVEL"] = "DEBUG"


def _get_test_file(test_file: str) -> str:
"""
Parse an input path to locate a test file that may be relative to `~` or the
Expand Down Expand Up @@ -128,3 +130,21 @@ def test_intents(skill_entrypoint, test_file, debug, padacioso):
os.environ["INTENT_TEST_FILE"] = test_file
from neon_minerva.tests.test_skill_intents import TestSkillIntentMatching
TextTestRunner().run(makeSuite(TestSkillIntentMatching))


@neon_minerva_cli.command
@click.option('-l', '--lang', default="en-us",
help="Language of test_file inputs")
@click.option('-a', '--audio', is_flag=True, default=False,
help="Test input as audio")
@click.argument("test_file")
def test_utterances(lang, audio, test_file):
from neon_utils.file_utils import load_commented_file
from neon_minerva.integration.user_utterance import UtteranceTests

test_file = _get_test_file(test_file)
prompts = load_commented_file(test_file).split('\n')
click.echo(f"Testing {len(prompts)} prompts")
runner = UtteranceTests(prompts, lang=lang, audio=audio)
results = runner.run_test()
click.echo(yaml.safe_dump(results))
25 changes: 25 additions & 0 deletions neon_minerva/integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
# All trademark and other rights reserved by their respective owners
# Copyright 2008-2021 Neongecko.com Inc.
# BSD-3
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
210 changes: 210 additions & 0 deletions neon_minerva/integration/user_utterance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
# All trademark and other rights reserved by their respective owners
# Copyright 2008-2021 Neongecko.com Inc.
# BSD-3
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from tempfile import mkstemp
from threading import Event, Lock
from time import time
from typing import List

from neon_utils.file_utils import encode_file_to_base64_string
from ovos_utils.log import LOG
from ovos_bus_client.client import MessageBusClient
from ovos_bus_client.message import Message
from ovos_plugin_manager.tts import TTS


class UtteranceTests:
def __init__(self, prompts: List[str], lang: str = "en-us",
bus_config: dict = None, user_config: dict = None,
audio: bool = False, tts: TTS = None):
if not user_config:
from neon_utils.configuration_utils import get_neon_user_config
user_config = get_neon_user_config().content
user_config['user']['username'] = "minerva"
self._user_config = user_config
bus_config = bus_config or dict()
self.core_bus = MessageBusClient(**bus_config)
self.core_bus.run_in_thread()
self.lang = lang
self.test_audio = audio
self._tts = tts
# TODO: Handle prompt metadata for longer timeouts
self._prompts = prompts
self._stt_timeout = 60 # Time to transcribe + audio parsers
self._intent_timeout = 60 # Time to match AND handle intent
self._speak_timeout = 60 # Time after intent handling for TTS playback

self._results = list()
self._audio_output_done = Event()
self._prompt_handled = Event()
self._prompt_lock = Lock()

self._last_message = None
self._audio_output_done.set()
self.register_bus_events()

def run_test(self) -> dict:
"""
Run tests and return dict timing results
"""
self._results.clear()
for prompt in self._prompts:
self.handle_prompt(prompt)
aggregated_results = {"save_transcript": [],
"text_parsers": [],
"get_tts": [],
"intent_handler": [],
"total": []}
if self.test_audio:
aggregated_results['get_stt'] = []
for result in self._results:
try:
aggregated_results['save_transcript'].append(result['save_transcript'])
aggregated_results['text_parsers'].append(result['text_parsers'])
aggregated_results['get_tts'].append(result['get_tts'])
aggregated_results['intent_handler'].append(result['speech_start'] - result['handle_utterance'])
aggregated_results['total'].append(result['finished'] - result['transcribed'])
if self.test_audio:
aggregated_results['get_stt'].append(result['get_stt'])
except KeyError:
LOG.error(result)
formatted_results = dict()
for key, values in aggregated_results.items():
formatted_results[key] = {"average": round(sum(values) /
len(values), 6),
"minimum": round(min(values), 6),
"maximum": round(max(values), 6)}
return formatted_results

def register_bus_events(self):
"""
Register listeners to track audio and skill module states
"""
self.core_bus.on("recognizer_loop:audio_output_start",
self._audio_started)
self.core_bus.on("recognizer_loop:audio_output_end",
self._audio_stopped)
self.core_bus.on("mycroft.mic.listen", self._mic_listen)
self.core_bus.on("mycroft.skill.handler.complete",
self._handler_complete)

def _audio_started(self, _):
"""
Handle audio output started
"""
self._audio_output_done.clear()

def _audio_stopped(self, message):
"""
Handle audio output finished
@param message: Message associated with completed audio playback
"""
LOG.debug("audio finished")
self._last_message = message
self._audio_output_done.set()

def _mic_listen(self, message):
"""
Handle start listening (for prompts that trigger `get_response`)
@param message: Message associated with completed skill handler
"""
LOG.debug("`get_response` call")
# self._last_message = message
self._prompt_handled.set()

def _handler_complete(self, _):
"""
Handle skill execution complete (audio output may not be complete)
"""
LOG.debug("Skill Handler Complete")
self._prompt_handled.set()

def send_prompt(self, prompt: str):
"""
Send a prompt to core for intent handling
"""
context = {"neon_should_respond": True,
"source": ["minerva"],
"destination": ["skills"],
"timing": {"transcribed": time()},
"username": "minerva",
"user_profiles": [self._user_config]}
if self.test_audio:
if self._tts:
_, file_path = mkstemp()
audio, _ = self._tts.get_tts(prompt, file_path, lang=self.lang)
else:
resp = self.core_bus.wait_for_response(
Message("neon.get_tts", {'text': prompt,
'speaker': {'language': self.lang,
'gender': 'female'}}),
timeout=self._stt_timeout)
file_path = resp.data[self.lang]['female']
resp = self.core_bus.wait_for_response(
Message("neon.audio_input",
{"audio_data": encode_file_to_base64_string(file_path),
"lang": self.lang}, context),
timeout=self._stt_timeout)
LOG.info(resp.data)
if prompt.lower() not in (t.lower() for t
in resp.data['transcripts']):
LOG.warning(f"Invalid transcription for '{prompt}': "
f"{resp.data['transcripts']}")
else:
self.core_bus.emit(Message("recognizer_loop:utterance",
{"utterances": [prompt],
"lang": self.lang}, context))

def handle_prompt(self, prompt: str):
"""
Send a prompt (text or audio) and collect timing results.
@param prompt: string prompt to send for intent (and optionally STT)
processing
"""
with self._prompt_lock:
# Ensure event state matches expectation
if not self._audio_output_done.is_set():
LOG.warning("Audio output not finished when expected!")
self._audio_output_done.wait(self._speak_timeout)
self._audio_output_done.clear()
self._prompt_handled.clear()
self._last_message = None

# Send prompt
self.send_prompt(prompt)
try:
assert self._prompt_handled.wait(self._intent_timeout)
assert self._audio_output_done.wait(self._speak_timeout)
assert self._last_message is not None
if "speech_start" not in self._last_message.context["timing"]:
LOG.warning(f"Missing speech_start timestamp for {prompt}")
self._last_message.context["timing"]["speech_start"] = \
self._last_message.context["timing"]["handle_utterance"]
self._results.append({**self._last_message.context["timing"],
**{'finished': time()}})
except AssertionError as e:
LOG.error(f"{prompt}: {e}")
LOG.debug(f"Handled {prompt}")
3 changes: 2 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ ovos-utils~=0.0.35
ovos-workshop~=0.0.12
fann2==1.0.7
padatious~=0.4.8
padacioso~=0.2
padacioso~=0.2
pyyaml~=6.0
Loading