diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..78d0f1a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git/ +tests/ +*.egg-info +build/ +dist/ diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml index ff73f4a..2740690 100644 --- a/.github/workflows/publish_release.yml +++ b/.github/workflows/publish_release.yml @@ -10,7 +10,16 @@ jobs: build_and_publish_pypi_and_release: uses: neongeckocom/.github/.github/workflows/publish_stable_release.yml@master secrets: inherit - build_and_publish_docker: + build_and_publish_docker_gradio: needs: build_and_publish_pypi_and_release uses: neongeckocom/.github/.github/workflows/publish_docker.yml@master - secrets: inherit \ No newline at end of file + secrets: inherit + with: + build_args: EXTRAS=gradio + build_and_publish_docker_websat: + needs: build_and_publish_pypi_and_release + uses: neongeckocom/.github/.github/workflows/publish_docker.yml@master + secrets: inherit + with: + build_args: EXTRAS=web_sat + image_name: ${{ github.repository }}-websat \ No newline at end of file diff --git a/.github/workflows/publish_test_build.yml b/.github/workflows/publish_test_build.yml index 5d9d572..2af420a 100644 --- a/.github/workflows/publish_test_build.yml +++ b/.github/workflows/publish_test_build.yml @@ -16,7 +16,16 @@ jobs: version_file: "neon_iris/version.py" setup_py: "setup.py" publish_prerelease: true - build_and_publish_docker: + build_and_publish_docker_gradio: needs: publish_alpha_release uses: neongeckocom/.github/.github/workflows/publish_docker.yml@master - secrets: inherit \ No newline at end of file + secrets: inherit + with: + build_args: EXTRAS=gradio + build_and_publish_docker_websat: + needs: publish_alpha_release + uses: neongeckocom/.github/.github/workflows/publish_docker.yml@master + secrets: inherit + with: + build_args: EXTRAS=web_sat + image_name: ${{ github.repository }}-websat \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 139a09a..3ee3a8b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,55 @@ +# Stage 1: Use a base image to install ffmpeg +FROM jrottenberg/ffmpeg:4.1 as ffmpeg-base + +# Stage 2: Build the final image FROM python:3.8-slim +# Label for vendor LABEL vendor=neon.ai \ ai.neon.name="neon-iris" -ENV OVOS_CONFIG_BASE_FOLDER neon -ENV OVOS_CONFIG_FILENAME neon.yaml -ENV XDG_CONFIG_HOME /config +# Build argument for specifying extras +ARG EXTRAS -RUN apt update && \ - apt install -y ffmpeg +ENV OVOS_CONFIG_BASE_FOLDER=neon \ + OVOS_CONFIG_FILENAME=neon.yaml \ + XDG_CONFIG_HOME=/config -ADD . /neon_iris -WORKDIR /neon_iris +# Copy ffmpeg binaries from the ffmpeg-base stage +COPY --from=ffmpeg-base /usr/local/bin/ /usr/local/bin/ +COPY --from=ffmpeg-base /usr/local/lib/ /usr/local/lib/ -RUN pip install wheel && \ - pip install .[gradio] +RUN mkdir -p /neon_iris/requirements +COPY ./requirements/* /neon_iris/requirements + +RUN pip install wheel && pip install -r /neon_iris/requirements/requirements.txt +RUN if [ "$EXTRAS" = "gradio" ]; then \ + pip install -r /neon_iris/requirements/gradio.txt; \ + elif [ "$EXTRAS" = "web_sat" ]; then \ + pip install -r /neon_iris/requirements/web_sat.txt; \ + else \ + pip install -r /neon_iris/requirements/requirements.txt; \ + fi + +WORKDIR /neon_iris +ADD . /neon_iris +RUN pip install . COPY docker_overlay/ / -CMD ["iris", "start-gradio"] \ No newline at end of file +# Expose port 8000 for websat +EXPOSE 8000 + +# Set the ARG value as an environment variable +ENV EXTRAS=${EXTRAS} + +# Create a non-root user with a home directory and change ownership of necessary directories + +RUN groupadd -r neon && useradd -r -m -g neon neon \ + && mkdir -p /config/neon \ + && chown -R neon:neon /neon_iris /usr/local/bin /config + +# Use the non-root user to run the container +USER neon + +ENTRYPOINT ["/neon_iris/entrypoint.sh"] diff --git a/README.md b/README.md index 16a537e..90182f5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Neon Iris + Neon Iris (Interactive Relay for Intelligence Systems) provides tools for interacting with Neon systems remotely, via [MQ](https://github.com/NeonGeckoCom/chat_api_mq_proxy). @@ -6,15 +7,18 @@ Install the Iris Python package with: `pip install neon-iris` The `iris` entrypoint is available to interact with a bus via CLI. Help is available via `iris --help`. ## Configuration -Configuration files can be specified via environment variables. By default, -`Iris` will read configuration from `~/.config/neon/diana.yaml` where + +Configuration files can be specified via environment variables. By default, +`Iris` will read configuration from `~/.config/neon/diana.yaml` where `XDG_CONFIG_HOME` is set to the default `~/.config`. -More information about configuration handling can be found +More information about configuration handling can be found [in the docs](https://neongeckocom.github.io/neon-docs/quick_reference/configuration/). -> *Note:* The neon-iris Docker image uses `neon.yaml` by default because the + +> _Note:_ The neon-iris Docker image uses `neon.yaml` by default because the > `iris` web UI is often deployed with neon-core. A default configuration might look like: + ```yaml MQ: server: neonaialpha.com @@ -34,22 +38,123 @@ iris: ``` ### Language Support + For Neon Core deployments that support language support queries via MQ, `languages` may be removed and `enable_lang_api: True` added to configuration. This will use the reported STT/TTS supported languages in place of any `iris` configuration. ## Interfacing with a Diana installation + The `iris` CLI includes utilities for interacting with a `Diana` backend. Use `iris --help` to get a current list of available commands. ### `iris start-listener` -This will start a local wake word recognizer and use a remote Neon + +This will start a local wake word recognizer and use a remote Neon instance connected to MQ for processing audio and providing responses. ### `iris start-gradio` + This will start a local webserver and serve a Gradio UI to interact with a Neon instance connected to MQ. ### `iris start-client` -This starts a CLI client for typing inputs and receiving responses from a Neon + +This starts a CLI client for typing inputs and receiving responses from a Neon instance connected via MQ. + +### `iris start-websat` + +This starts a local webserver and serves a web UI for interacting with a Neon +instance connected to MQ. + +## Docker + +### Building + +To build the Docker image, run: + +```bash +docker build -t ghcr.io/neongeckocom/neon-iris:latest . +``` + +To build the Docker image with gradio extras, run: + +```bash +docker build --build-arg EXTRAS=gradio -t ghcr.io/neongeckocom/neon-iris:latest . +``` + +To build the Docker image with websat extras, run: + +```bash +docker build --build-arg EXTRAS=websat -t ghcr.io/neongeckocom/neon-iris:latest . +``` + +### Running + +The Docker image that is built for this service runs the `iris` CLI with the +`-h` argument by default. In order to use the container to run different services, +you must override the entrypoint. For example, to run the `start-websat` service, +you would run: + +```bash +docker run --rm -p 8000:8000 ghcr.io/neongeckocom/neon-iris:latest start-websat +``` + +Running the container without any arguments gives you a list of commands that +can be run. You can choose to run any of these commands by replacing `start-websat` +in the above command with the command you want to run. + +## websat + +### Configuration + +The `websat` web UI is a simple web UI for interacting with a Neon instance. It +accepts special configuration items prefixed with `webui_` to customize the UI. + +| parameter | description | default | +| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- | +| webui_description | The header text for the web UI | Chat with Neon | +| webui_title | The title text for the web UI in the browser | Neon AI | +| webui_input_placeholder | The placeholder text for the input box | Ask me something | +| webui_ws_url | The websocket URL to connect to, which must be accessible from the browser you're running in. Note that the default will usually fail. | ws://localhost:8000/ws | + +Iris uses the `Configuration()` class from OVOS to handle configuration. This +means that you can specify configuration in a `neon.yaml` file in the +`~/.config/neon`. When using a container, you can mount a volume to +`/home/neon/.config/neon` to provide a configuration file. + +Example configuration block: + +```yaml +iris: + webui_title: Neon AI + webui_description: Chat with Neon + webui_input_placeholder: Ask me something + webui_ws_url: wss://neonaialpha.com/ws +``` + +### Customization + +The websat web UI reads in the following items from `neon_iris/static/custom`: + +- `error.mp3` - Used for error responses +- `wake.mp3` - Used for wake word responses +- `favicon.ico` - The favicon for the web UI +- `logo.webp` - The logo for the web UI + +To customize these items, you can replace them in the `neon_iris/static/custom` folder and rebuild the image. + +### Websocket endpoint + +The websat web UI uses a websocket to communicate with OpenWakeWord, which can +load `.tflite` or `.onnx` models. The websocket endpoint is `/ws`, but since it +is served with FastAPI, it also supports `wss` for secure connections. To +use `wss`, you must provide a certificate and key file. + +### Chat history + +The websat web UI stores chat history in the browser's [local storage](https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage). +This allows chat history to persist between browser sessions. However, it also +means that if you clear your browser's local storage, you will lose your chat +history. This is a feature, not a bug. diff --git a/docker_overlay/etc/neon/neon.yaml b/docker_overlay/etc/neon/neon.yaml index f3f8e79..cac3cf0 100644 --- a/docker_overlay/etc/neon/neon.yaml +++ b/docker_overlay/etc/neon/neon.yaml @@ -12,6 +12,7 @@ iris: webui_chatbot_label: Chat History webui_mic_label: Speak to Neon webui_text_label: Text with Neon + webui_ws_url: ws://localhost:8000/ws # Override, as this needs to be reachable by the browser server_address: "0.0.0.0" server_port: 7860 default_lang: en-us @@ -43,4 +44,4 @@ logs: error: - pika warning: - - filelock \ No newline at end of file + - filelock diff --git a/docker_overlay/neon_iris/entrypoint.sh b/docker_overlay/neon_iris/entrypoint.sh new file mode 100755 index 0000000..e5af320 --- /dev/null +++ b/docker_overlay/neon_iris/entrypoint.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2024 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -e + +if [ "$EXTRAS" = "gradio" ]; then + exec iris start-gradio + elif [ "$EXTRAS" = "web_sat" ]; then + exec iris start-websat +else + echo "No extras specified, showing help. To execute a command, use 'docker run iris '" + exec iris -h +fi diff --git a/neon_iris/cli.py b/neon_iris/cli.py index 2539846..5b7fd59 100644 --- a/neon_iris/cli.py +++ b/neon_iris/cli.py @@ -138,6 +138,19 @@ def start_gradio(): click.echo("Unable to connect to MQ server") +@neon_iris_cli.command(help="Create a Web Voice Satellite session") +@click.option("--port", "-p", default=8000, help="Port to run on, defaults to 8000") +@click.option("--host", default="0.0.0.0", help="Host to run on, defaults to 0.0.0.0") +def start_websat(port, host): + from neon_iris.web_sat_client import app + _print_config() + try: + import uvicorn + uvicorn.run(app, host=host, port=port) + except OSError: + click.echo("Unable to connect to MQ server") + + @neon_iris_cli.command(help="Query Neon Core for supported languages") def get_languages(): from neon_iris.util import query_neon diff --git a/neon_iris/client.py b/neon_iris/client.py index a79984b..62f8db7 100644 --- a/neon_iris/client.py +++ b/neon_iris/client.py @@ -333,7 +333,7 @@ def _send_utterance(self, utterance: str, lang: str, self._send_serialized_message(serialized) def _send_audio(self, audio_file: str, lang: str, - username: str, user_profiles: list, + username: Optional[str], user_profiles: Optional[list], context: Optional[dict] = None): context = context or dict() audio_data = encode_file_to_base64_string(audio_file) diff --git a/neon_iris/models/__init__.py b/neon_iris/models/__init__.py new file mode 100644 index 0000000..2886d6b --- /dev/null +++ b/neon_iris/models/__init__.py @@ -0,0 +1,27 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2024 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .web_sat import UserInput, UserInputResponse # noqa \ No newline at end of file diff --git a/neon_iris/models/web_sat.py b/neon_iris/models/web_sat.py new file mode 100644 index 0000000..08ab5ef --- /dev/null +++ b/neon_iris/models/web_sat.py @@ -0,0 +1,42 @@ +"""API data models for the WebSAT API.""" +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2024 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Optional +from pydantic import BaseModel + +class UserInput(BaseModel): + """UserInput is the input data model for the WebSAT API.""" + utterance: Optional[str] = "" + audio_input: Optional[str] = "" + session_id: str = "websat0000" + +class UserInputResponse(BaseModel): + """UserInputResponse is the response data model for the WebSAT API.""" + utterance: Optional[str] = "" + audio_output: Optional[str] = "" + session_id: str = "websat0000" + transcription: str diff --git a/neon_iris/static/custom/error.mp3 b/neon_iris/static/custom/error.mp3 new file mode 100644 index 0000000..c6404ce Binary files /dev/null and b/neon_iris/static/custom/error.mp3 differ diff --git a/neon_iris/static/custom/favicon.ico b/neon_iris/static/custom/favicon.ico new file mode 100644 index 0000000..844ecce Binary files /dev/null and b/neon_iris/static/custom/favicon.ico differ diff --git a/neon_iris/static/custom/logo.webp b/neon_iris/static/custom/logo.webp new file mode 100644 index 0000000..844ecce Binary files /dev/null and b/neon_iris/static/custom/logo.webp differ diff --git a/neon_iris/static/custom/wake.mp3 b/neon_iris/static/custom/wake.mp3 new file mode 100644 index 0000000..efb06b4 Binary files /dev/null and b/neon_iris/static/custom/wake.mp3 differ diff --git a/neon_iris/static/scripts/audio.js b/neon_iris/static/scripts/audio.js new file mode 100644 index 0000000..d573217 --- /dev/null +++ b/neon_iris/static/scripts/audio.js @@ -0,0 +1,110 @@ +// Manages audio capture and processing +const AudioHandler = (() => { + let audioStream; + let audioContext; + let recorder; + let volume; + let sampleRate; + let isRecording = false; + + // Ensure the getUserMedia is correctly referenced + const getUserMedia = + navigator.getUserMedia || + navigator.webkitGetUserMedia || + navigator.mozGetUserMedia || + navigator.msGetUserMedia; + + const startAudio = () => { + if (getUserMedia) { + getUserMedia.call( + navigator, + { audio: true }, + (stream) => { + audioStream = stream; + const AudioContext = window.AudioContext || window.webkitAudioContext; + audioContext = new AudioContext(); + sampleRate = audioContext.sampleRate; + volume = audioContext.createGain(); + const audioInput = audioContext.createMediaStreamSource(audioStream); + audioInput.connect(volume); + + const bufferSize = 4096; + // Use the audio context to create the script processor + recorder = audioContext.createScriptProcessor(bufferSize, 1, 1); + + recorder.onaudioprocess = (event) => { + const samples = event.inputBuffer.getChannelData(0); + const PCM16iSamples = convertFloat32ToInt16(samples); + WebSocketHandler.send( + new Blob([PCM16iSamples], { type: "application/octet-stream" }) + ); + }; + + volume.connect(recorder); + recorder.connect(audioContext.destination); + WebSocketHandler.setSampleRate(sampleRate); + isRecording = true; + }, + (error) => { + console.error("Error capturing audio.", error); + } + ); + } else { + console.error("getUserMedia not supported in this browser."); + } + }; + + const stopAudio = () => { + if (isRecording) { + if (recorder) { + recorder.disconnect(); + volume.disconnect(); + // Disconnecting the audio context might not be necessary + // audioContext.close(); + } + if (audioStream) { + const tracks = audioStream.getTracks(); + tracks.forEach((track) => track.stop()); + } + } + }; + + const toggle = () => { + if (!isRecording) { + startAudio(); + } else { + stopAudio(); + } + isRecording = !isRecording; // Toggle the recording state + }; + + const isCurrentlyRecording = () => isRecording; + + const convertFloat32ToInt16 = (buffer) => { + let l = buffer.length; + let buf = new Int16Array(l); + while (l--) { + buf[l] = Math.min(1, buffer[l]) * 0x7fff; + } + return buf.buffer; + }; + + return { + toggle, + isRecording: isCurrentlyRecording, + }; +})(); + +const startButton = document.getElementById("startButton"); +startButton.addEventListener("click", function () { + AudioHandler.toggle(); + + // Update the button's text and class based on the recording state + if (AudioHandler.isRecording()) { + startButton.classList.add("listening"); + startButton.textContent = "Listening..."; + } else { + startButton.classList.remove("listening"); + startButton.textContent = "Start Listening"; + } +}); diff --git a/neon_iris/static/scripts/sprite.js b/neon_iris/static/scripts/sprite.js new file mode 100644 index 0000000..303e72b --- /dev/null +++ b/neon_iris/static/scripts/sprite.js @@ -0,0 +1,20 @@ +const sprite = document.querySelector(".sprite"); + +function triggerWake() { + sprite.classList.remove("record", "waiting"); + sprite.classList.add("wake"); +} + +function triggerRecord() { + sprite.classList.remove("wake", "waiting"); + sprite.classList.add("record"); +} + +function triggerWaiting() { + sprite.classList.remove("wake", "record"); + sprite.classList.add("waiting"); +} + +function triggerDone() { + sprite.classList.remove("wake", "record", "waiting"); +} diff --git a/neon_iris/static/scripts/ui.js b/neon_iris/static/scripts/ui.js new file mode 100644 index 0000000..7e55d77 --- /dev/null +++ b/neon_iris/static/scripts/ui.js @@ -0,0 +1,150 @@ +function submitMessage() { + const inputElement = document.getElementById("chatInput"); + const userMessage = inputElement.value.trim(); + + if (userMessage !== "") { + const userMessageDiv = createMessageDiv("user", userMessage); + appendMessageToHistory(userMessageDiv); + + // Save the message to localStorage + saveMessageToLocalStorage("user", userMessage); + + inputElement.value = ""; + + // Get AI response and update the chat history + getAIResponse(userMessage); // Pass the user message to the function + } +} + +async function getAIResponse(text = "", recording = "") { + try { + triggerWaiting(); // Trigger waiting animation + const payload = + text !== "" && recording === "" + ? { utterance: text } + : { audio_input: recording }; + // Make the POST request to the server + const response = await fetch("/user_input", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), // Send the user message in the body + }); + + // Check if the response is okay + if (!response.ok) { + throw new Error("Network response was not ok: " + response.statusText); + } + + // Convert the response payload into JSON + const data = await response.json(); + console.debug(data, null, 4); + + // Assuming 'data' contains the AI response in a property named 'reply' + const aiMessage = data.transcription; + + triggerDone(); // Trigger done animation + // Add in the user's transcription if STT + if (text === "" && recording !== "") { + const userMessage = createMessageDiv("user", data.utterance); + appendMessageToHistory(userMessage); + saveMessageToLocalStorage("user", data.utterance); + } + + // Create the AI message div and append it to the history + const aiMessageDiv = createMessageDiv("ai", aiMessage); + appendMessageToHistory(aiMessageDiv); + + // Save the AI message to localStorage + saveMessageToLocalStorage("ai", aiMessage); + + // Play the TTS audio + const audioBlob = base64ToBlob(data.audio_output, "audio/wav"); + const audioUrl = URL.createObjectURL(audioBlob); + const audio = new Audio(audioUrl); + audio.type = "audio/wav"; + await audio.play(); + audio.onended = () => { + if (shouldListen && myVad) { + myVad.start(); + } else { + myVad.pause(); + } + }; + } catch (error) { + console.error("Error fetching AI response:", error); + // Handle the error, such as showing a message to the user + } +} + +function simulateAIResponse() { + setTimeout(() => { + const aiMessage = "This is a sample AI response."; + const aiMessageDiv = createMessageDiv("ai", aiMessage); + appendMessageToHistory(aiMessageDiv); + + // Save the AI response to localStorage + saveMessageToLocalStorage("ai", aiMessage); + }, 1000); // Simulated delay of 1 second +} + +function createMessageDiv(sender, message) { + const messageDiv = document.createElement("div"); + messageDiv.className = `${sender}-message`; + messageDiv.textContent = message; + return messageDiv; +} + +function appendMessageToHistory(messageDiv) { + const messageContainer = document.getElementById("chatHistory"); + messageContainer.appendChild(messageDiv); + setTimeout(() => { + messageContainer.scrollTop = messageContainer.scrollHeight; + }, 0); +} + +function saveMessageToLocalStorage(sender, message) { + // Retrieve existing chat history from localStorage + const chatHistory = JSON.parse(localStorage.getItem("chatHistory")) || []; + + // Add the new message to the chat history + chatHistory.push({ sender, message }); + + // Store the updated chat history back in localStorage + localStorage.setItem("chatHistory", JSON.stringify(chatHistory)); +} + +function base64ToBlob(base64, mimeType) { + const byteCharacters = atob(base64.replace(/^data:audio\/wav;base64,/, "")); + const byteNumbers = new Array(byteCharacters.length); + for (let i = 0; i < byteCharacters.length; i++) { + byteNumbers[i] = byteCharacters.charCodeAt(i); + } + const byteArray = new Uint8Array(byteNumbers); + return new Blob([byteArray], { type: mimeType }); +} + +// Load chat history from localStorage when the page loads +window.addEventListener("load", () => { + const chatHistory = JSON.parse(localStorage.getItem("chatHistory")) || []; + + for (const { sender, message } of chatHistory) { + const messageDiv = createMessageDiv(sender, message); + appendMessageToHistory(messageDiv); + } +}); + +document.addEventListener("DOMContentLoaded", function () { + // Get the input element + const inputElement = document.getElementById("chatInput"); + + // Add the keydown event listener to the input element + inputElement.addEventListener("keydown", function (event) { + // Check if Enter was pressed, or Ctrl+Enter + if (event.key === "Enter" && (event.ctrlKey || !event.shiftKey)) { + event.preventDefault(); + submitMessage(); + } + }); +}); diff --git a/neon_iris/static/scripts/websocket.js b/neon_iris/static/scripts/websocket.js new file mode 100644 index 0000000..0415a36 --- /dev/null +++ b/neon_iris/static/scripts/websocket.js @@ -0,0 +1,162 @@ +function float32ArrayToWavBlob(float32Array, sampleRate = 16000) { + const buffer = new ArrayBuffer(44 + float32Array.length * 2); + const view = new DataView(buffer); + + // Write WAV header to the buffer + // RIFF chunk descriptor + writeString(view, 0, "RIFF"); + view.setUint32(4, 36 + float32Array.length * 2, true); + writeString(view, 8, "WAVE"); + // FMT sub-chunk + writeString(view, 12, "fmt "); + view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM) + view.setUint16(20, 1, true); // AudioFormat (PCM = 1) + view.setUint16(22, 1, true); // NumChannels (Mono = 1, Stereo = 2) + view.setUint32(24, sampleRate, true); // SampleRate + view.setUint32(28, sampleRate * 2, true); // ByteRate (SampleRate * NumChannels * BitsPerSample/8) + view.setUint16(32, 2, true); // BlockAlign (NumChannels * BitsPerSample/8) + view.setUint16(34, 16, true); // BitsPerSample + // Data sub-chunk + writeString(view, 36, "data"); + view.setUint32(40, float32Array.length * 2, true); + + // Write the audio data + float32To16BitPCM(view, 44, float32Array); + + return new Blob([view], { type: "audio/wav" }); +} + +function writeString(view, offset, string) { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } +} + +function float32To16BitPCM(output, offset, input) { + for (let i = 0; i < input.length; i++, offset += 2) { + const s = Math.max(-1, Math.min(1, input[i])); + output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true); + } +} + +function wavBlobToBase64(blob) { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.readAsDataURL(blob); + reader.onloadend = () => { + const base64data = reader.result; + // Extract the base64 part + const base64String = base64data.split(",")[1]; + resolve(base64String); + }; + reader.onerror = (error) => { + reject(error); + }; + }); +} + +let shouldListen = false; // Global state flag for controlling VAD listening state +let myVad; // VAD instance +let isVadRunning = false; + +async function initializeVad() { + myVad = await vad.MicVAD.new({ + onSpeechEnd: handleSpeechEnd, + }); + if (shouldListen && !isVadRunning) { + myVad.start(); + isVadRunning = true; + } +} + +async function handleSpeechEnd(audio) { + const wavBlob = float32ArrayToWavBlob(audio); + const audioUrl = URL.createObjectURL(wavBlob); + const audioOutput = await wavBlobToBase64(wavBlob); + + // Save the spoken audio as a downloadable file + const downloadArea = document.getElementById("download-area"); + if (downloadArea) { + downloadArea.innerHTML = ""; // Clear the download area + const downloadButton = document.createElement("a"); + downloadButton.href = audioUrl; + downloadButton.download = "recorded_audio.wav"; + downloadButton.textContent = "Download Recorded Audio"; + downloadButton.className = "download-button"; // Add a class for styling + downloadButton.setAttribute("role", "button"); // Accessibility improvement + downloadArea.appendChild(downloadButton); + triggerWaiting(); // Trigger waiting animation + } else { + console.error("Download area not found"); + } + if (myVad && isVadRunning) { + myVad.pause(); + isVadRunning = false; + shouldListen = false; + } + + // Send audio to STT + getAIResponse("", audioOutput); +} + +function toggleListeningState() { + shouldListen = !shouldListen; + if (shouldListen && !isVadRunning) { + startVad(); + isVadRunning = true; + } else { + stopVad(); + isVadRunning = false; + } +} + +// Handles WebSocket connection and message events +const WebSocketHandler = (() => { + let lastActivationTime = 0; + const activationCooldown = 3000; // 3 seconds cooldown + const ws = new WebSocket(WS_URL); + const audio = new Audio("/static/custom/wake.mp3"); // Wakeword acknowledgment sound + + ws.onopen = () => { + console.info("WebSocket connection is open"); + }; + + ws.onmessage = async (event) => { + console.log(event.data); + const model_payload = JSON.parse(event.data); + const currentTime = Date.now(); + if ("activations" in model_payload) { + if ( + model_payload.activations.includes("hey_neon_high") && + currentTime - lastActivationTime > activationCooldown + ) { + shouldListen = true; + audio.onended = () => { + console.log("Activation sound is done playing"); + if (myVad && !isVadRunning) { + triggerRecord(); // Trigger recording animation + myVad.start(); + isVadRunning = true; + } else if (!shouldListen && isVadRunning) { + myVad.pause(); + isVadRunning = false; + } + }; + triggerWake(); // Trigger wake animation + audio.play(); + lastActivationTime = currentTime; + } + } + }; + + return { + send: (data) => ws.send(data), + setSampleRate: (rate) => ws.send(rate), + }; +})(); + +// Initialize VAD when the page is ready +window.addEventListener("DOMContentLoaded", (event) => { + initializeVad(); +}); +WebSocketHandler; diff --git a/neon_iris/static/sprite.css b/neon_iris/static/sprite.css new file mode 100644 index 0000000..73d5d1e --- /dev/null +++ b/neon_iris/static/sprite.css @@ -0,0 +1,61 @@ +/* Base styles for the split sprite */ +.sprite { + width: 40px; /* sprite width adjusted to 40px */ + height: 40px; /* sprite height adjusted to 40px */ + border-radius: 50%; + background-image: linear-gradient(to right, #000 50%, #fff 50%); + box-shadow: 0 0 0 2px #000; /* Adjusted border thickness for smaller size */ + transition: transform 0.3s ease, opacity 0.3s ease; + opacity: 0; /* sprite is invisible by default */ + position: relative; /* Required for absolute positioning of pseudo-elements */ + display: flex; /* Center content */ + justify-content: center; + align-items: center; + margin: 20px; +} + +/* Pulse animation while recording */ +@keyframes pulse { + 0%, + 100% { + transform: scale(1); + } + 50% { + transform: scale(1.1); + } +} + +.sprite.record { + animation: pulse 1s infinite ease-in-out; + opacity: 1; /* sprite is visible while recording */ +} + +/* Spin animation while waiting for a response */ +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} + +.sprite.waiting { + animation: spin 2s infinite linear; + opacity: 1; /* sprite is visible while waiting */ +} + +/* Appear animation for wake word activation */ +@keyframes appear { + 0% { + opacity: 0; + } + 100% { + opacity: 1; + } +} + +.sprite.wake { + animation: appear 1s forwards; + opacity: 1; /* sprite is visible when awake */ +} diff --git a/neon_iris/static/styles.css b/neon_iris/static/styles.css new file mode 100644 index 0000000..a048b26 --- /dev/null +++ b/neon_iris/static/styles.css @@ -0,0 +1,196 @@ +body, +html { + text-align: center; + font-family: "Roboto", sans-serif; + background-color: #f4f4f4; + height: 100%; + margin: 0; + padding: 0; +} +img.logo { + width: 40px; + height: 40px; +} +a { + text-decoration: none; +} +a:link { + color: #fff; + border-bottom: 1px solid #ff0000; +} +a:visited { + color: #e600e6; + border-bottom: 1px solid #b3b3b3; +} +a:hover { + color: black; + border-bottom: 1px solid #000099; +} +.button-container { + display: flex; + justify-content: center; /* This centers the button in the container */ + padding: 20px; + background-color: #333; /* Match the header background */ +} +#startButton { + padding: 15px 30px; + font-size: 18px; + border: none; + border-radius: 4px; + color: white; + cursor: pointer; + outline: none; + margin-bottom: 10px; + margin-top: 10px; + transition: background-color 0.3s; + background-color: #4caf50; + max-width: 50%; + align-content: center; + align-self: center; +} +#startButton.listening { + background-color: #03a9f4; +} +.content { + display: flex; + align-items: center; + justify-content: space-between; + padding: 20px; + background-color: #333; + margin: 0; +} +.chat-header { + padding: 20px; + background-color: #333; + color: #fff; + text-align: center; + font-size: 2em; + text-shadow: 2px 2px 4px #000000; + transition: transform 0.3s ease; +} +.chat-header:hover { + transform: scale( + 1.05 + ); /* Slight increase in size on hover for dynamic effect */ +} +.chat-window { + display: flex; + flex-direction: column; + padding: 20px; + overflow: auto; +} +#chatHistory { + display: flex; + flex-direction: column; + align-items: flex-start; /* Align items to the start by default */ + height: 100%; + overflow-y: auto; /* Allows scrolling if content overflows */ + background-color: #1a1a1a; /* Dark background for the chat container */ +} +.input-area { + display: flex; + padding: 20px; +} +.input-area input { + flex: 1; + padding: 10px; + margin-right: 10px; + font-size: 16px; +} +.input-area button { + padding: 10px 20px; + background-color: #f90; + border: none; + font-size: 16px; + cursor: pointer; +} +.input-area button:hover { + background-color: #e80; +} +.input-field { + flex-grow: 1; + padding: 10px; + font-size: 16px; + margin-right: 10px; /* Spacing between input field and submit button */ +} +#download-area { + padding: 20px; + background-color: #333; + color: #fff; + text-align: center; + font-size: 1.5em; +} +.download-button { + display: inline-block; + padding: 10px 20px; + margin: 10px 0; + background-color: #03a9f4; + color: #ffffff; + text-align: center; + text-decoration: none; + border-radius: 5px; + transition: background-color 0.3s; +} + +/* Style for user messages */ +.user-message { + background-color: #007bff; /* Blue background for user messages */ + color: #fff; /* White text color for user messages */ + padding: 5px 10px; + margin: 5px 0; + border-radius: 10px; + align-self: flex-end; /* Right-align user messages */ + max-width: 60%; + word-wrap: break-word; /* Wrap long words if needed */ +} + +/* Style for AI messages */ +.ai-message { + background-color: #e0e0e0; /* Gray background for AI messages */ + color: #000; /* Black text color for AI messages */ + padding: 5px 10px; + margin: 5px 0; + border-radius: 10px; + align-self: flex-start; /* Left-align AI messages */ + max-width: 60%; + word-wrap: break-word; /* Wrap long words if needed */ +} + +.chat-container { + display: flex; + flex-direction: column; + height: 100%; + background-color: #1a1a1a; +} + +/* Responsive design adjustments */ +@media (max-width: 768px) { + .content { + flex-direction: column; + align-items: center; + } + + .button-container, + .input-area { + flex-direction: column; + align-items: center; + } + + .input-field, + .submit-button, + #startButton { + width: 100%; /* Full width on smaller screens */ + margin: 10px 0; /* Add vertical spacing */ + } + + .chat-header { + font-size: 1.5em; /* Smaller text size for smaller screens */ + } +} + +/* Further responsive adjustments for even smaller screens */ +@media (max-width: 480px) { + .chat-header { + font-size: 1.2em; + } +} diff --git a/neon_iris/templates/index.html b/neon_iris/templates/index.html new file mode 100644 index 0000000..3d008f7 --- /dev/null +++ b/neon_iris/templates/index.html @@ -0,0 +1,42 @@ + + + + + + + {{ title }} + + + + +
+
+ {{ description }} +
+ +
+
+ +
+
+
+
+ + +
+
+
+ + + + + + + + + + + + diff --git a/neon_iris/version.py b/neon_iris/version.py index 4488dcb..e8144d3 100644 --- a/neon_iris/version.py +++ b/neon_iris/version.py @@ -1,8 +1,9 @@ # NEON AI (TM) SOFTWARE, Software Development Kit & Application Framework # All trademark and other rights reserved by their respective owners -# Copyright 2008-2022 Neongecko.com Inc. +# Copyright 2008-2024 Neongecko.com Inc. # Contributors: Daniel McKnight, Guy Daniels, Elon Gasper, Richard Leeds, # Regina Bloomstine, Casimiro Ferreira, Andrii Pernatii, Kirill Hrymailo +# Mike Gray, David Scripka # BSD-3 License # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: diff --git a/neon_iris/wakeword_models/hey_neon/hey_neon.onnx b/neon_iris/wakeword_models/hey_neon/hey_neon.onnx new file mode 100644 index 0000000..c66a46d Binary files /dev/null and b/neon_iris/wakeword_models/hey_neon/hey_neon.onnx differ diff --git a/neon_iris/wakeword_models/hey_neon/hey_neon.tflite b/neon_iris/wakeword_models/hey_neon/hey_neon.tflite new file mode 100644 index 0000000..048ab6c Binary files /dev/null and b/neon_iris/wakeword_models/hey_neon/hey_neon.tflite differ diff --git a/neon_iris/wakeword_models/hey_neon/hey_neon_high.onnx b/neon_iris/wakeword_models/hey_neon/hey_neon_high.onnx new file mode 100644 index 0000000..3dd5d65 Binary files /dev/null and b/neon_iris/wakeword_models/hey_neon/hey_neon_high.onnx differ diff --git a/neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite b/neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite new file mode 100644 index 0000000..790eca4 Binary files /dev/null and b/neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite differ diff --git a/neon_iris/web_client.py b/neon_iris/web_client.py index b33fd66..d2bfc55 100644 --- a/neon_iris/web_client.py +++ b/neon_iris/web_client.py @@ -118,14 +118,14 @@ def update_profile(self, stt_lang: str, tts_lang: str, tts_lang_2: str, def on_user_input(self, utterance: str, chat_history: List[Tuple[str, str]], audio_input: str, - client_session: str) -> (List[Tuple[str, str]], str, str, None, str): + client_session: str):# -> tuple[List[Tuple[str, str]], str, Literal[''], None, Any]: """ Callback to handle textual user input @param utterance: String utterance submitted by the user @returns: Input box contents, Updated chat history, Gradio session ID, audio input, audio output """ input_time = time() - LOG.debug(f"Input received") + LOG.debug("Input received") if not self._await_response.wait(30): LOG.error("Previous response not completed after 30 seconds") in_queue = time() - input_time diff --git a/neon_iris/web_sat_client.py b/neon_iris/web_sat_client.py new file mode 100644 index 0000000..db9f40c --- /dev/null +++ b/neon_iris/web_sat_client.py @@ -0,0 +1,325 @@ +"""Runs a web server that serves the Neon AI Web UI and Voice Satellite.""" +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2024 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json +from os import makedirs +from os.path import isdir, join +from threading import Event +from time import time +from typing import Dict, Optional, Sequence +from uuid import uuid4 + +import numpy as np +import resampy +from fastapi import APIRouter, FastAPI, Request, WebSocket +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from neon_utils.file_utils import decode_base64_string_to_file +from openwakeword import Model +from ovos_bus_client import Message +from ovos_config import Configuration +from ovos_utils import LOG +from ovos_utils.xdg_utils import xdg_data_home + +from neon_iris.client import NeonAIClient +from neon_iris.models.web_sat import UserInput, UserInputResponse + + +class WebSatNeonClient(NeonAIClient): + """Neon AI Web UI and Voice Satellite client.""" + + def __init__(self, lang: str = ""): + config = Configuration() + self.config = config.get("iris") or dict() + self.mq_config = config.get("MQ") + if not self.mq_config: + raise ValueError( + "Missing MQ configuration, please set it in ~/.config/neon/neon.yaml" + ) + NeonAIClient.__init__(self, self.mq_config) + self.router = APIRouter() + self._await_response = Event() + self._response = None + self._transcribed = None + self._current_tts = dict() + self._profiles: Dict[str, dict] = dict() + self._audio_path = join( + xdg_data_home(), "iris", "stt" + ) # TODO: Clear periodically, or have persistent storage + if not isdir(self._audio_path): + makedirs(self._audio_path) + self.default_lang = lang or self.config.get("default_lang", "") + LOG.name = "iris" + LOG.init(self.config.get("logs")) + # OpenWW + # TODO: Allow for arbitrary models, or pre-existing OpenWW models + self.oww_model = Model( + wakeword_models=["neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite"], + inference_framework="tflite", + ) + # FastAPI + self.templates = Jinja2Templates(directory="neon_iris/templates") + self.build_routes() + + def get_lang(self, session_id: str): + """Get the language for a session.""" + if session_id and session_id in self._profiles: + return self._profiles[session_id]["speech"]["stt_language"] + return self.user_config["speech"]["stt_language"] or self.default_lang + + def handle_api_response(self, message: Message): + """ + Catch-all handler for `.response` messages routed to this client that + are not explicitly handled (i.e. get_stt, get_tts) + @param message: Response message to something emitted by this client + """ + LOG.debug(f"Got {message.msg_type}: {message.data}") + if message.msg_type == "neon.audio_input.response": + self._transcribed = message.data.get("transcripts", [""])[0] + + def handle_klat_response(self, message: Message): + """ + Handle a valid response from Neon. This includes text and base64-encoded + audio in all requested languages. + @param message: Neon response message + """ + LOG.debug(f"gradio context={message.context['gradio']}") + resp_data = message.data["responses"] + sentences = [] + session = message.context["gradio"]["session"] + for _, response in resp_data.items(): # lang, response + sentences.append(response.get("sentence")) + if response.get("audio"): + for _, data in response["audio"].items(): + self._current_tts[session] = data + self._response = "\n".join(sentences) + self._await_response.set() + + def send_audio( # pylint: disable=arguments-renamed + self, + audio_b64_string: str, + lang: str = "en-us", + username: Optional[str] = None, + user_profiles: Optional[list] = None, + context: Optional[dict] = None, + ): + """ + Optionally override this to queue audio inputs or do any pre-parsing + :param audio_file: path to audio file to send to speech module + :param lang: language code associated with request + :param username: username associated with request + :param user_profiles: user profiles expecting a response + :param context: Optional dict context to add to emitted message + """ + audio_path = decode_base64_string_to_file( + audio_b64_string, + join(f"{self._audio_path}/{time()}.wav"), + ) + self._send_audio( + audio_file=audio_path, + lang=lang, + username=username, + user_profiles=user_profiles, + context=context, + ) + + @property + def supported_languages(self) -> Sequence[str]: + """ + Get a list of supported languages from configuration + @returns: list of BCP-47 language codes + """ + languages = self.config.get("languages") + if languages is None: + return [self.default_lang] + if not isinstance(languages, list): + raise TypeError("Expected a list of languages in the configuration") + return languages + + def _start_session(self): + sid = uuid4().hex + self._current_tts[sid] = None + self._profiles[sid] = self.user_config + self._profiles[sid]["user"]["username"] = sid + return sid + + def build_routes(self): + """Build the FastAPI routes.""" + + @self.router.get("/") + async def read_root(request: Request): + """Render the Neon AI Web UI and Voice Satellite.""" + description = self.config.get("webui_description", "Chat With Neon") + title = self.config.get("webui_title", "Neon AI") + placeholder = self.config.get("webui_input_placeholder", "Ask me something") + ws_url = self.config.get("webui_ws_url", "ws://localhost:8000/ws") + + context = { + "request": request, + "title": title, + "description": description, + "placeholder": placeholder, + "ws_url": ws_url + } + return self.templates.TemplateResponse("index.html", context) + + @self.router.websocket("/ws") + async def websocket_endpoint(websocket: WebSocket): + """Handles websocket connections to OpenWakeWord, which runs as part of this service.""" + await websocket.accept() + # Send loaded models + await websocket.send_text( + json.dumps({"loaded_models": list(self.oww_model.models.keys())}) + ) + sample_rate = None + + while True: + message = await websocket.receive() + + if message["type"] == "websocket.disconnect": + break + + if message["type"] == "websocket.receive": + if "text" in message: + # Process text message + sample_rate = int(message["text"]) + elif "bytes" in message: + # Process bytes message + audio_bytes = message["bytes"] + + # Add extra bytes of silence if needed + if len(audio_bytes) % 2 == 1: + audio_bytes += b"\x00" + + # Convert audio to correct format and sample rate + audio_data = np.frombuffer(audio_bytes, dtype=np.int16) + if sample_rate and sample_rate != 16000: + audio_data = resampy.resample( + audio_data, sample_rate, 16000 + ) + + # Get openWakeWord predictions and send to browser client + predictions = self.oww_model.predict(audio_data) + + activations = [ + key for key, value in predictions.items() if value >= 0.5 + ] + + if activations: + await websocket.send_text( + json.dumps({"activations": activations}) + ) + + @self.router.post("/user_input") + async def on_user_input_worker( + req: UserInput, + ): + """ + Callback to handle textual user input + @param utterance: String utterance submitted by the user + @returns: Session ID, audio input, audio output + """ + utterance = req.utterance or "" + audio_input = req.audio_input or "" + session_id = req.session_id or "websat0000" + + chat_history = [] + input_time = time() + LOG.debug("Input received") + if not self._profiles.get("session_id"): + self._profiles[session_id] = { + "speech": {"stt_language": self.default_lang} + } + self._current_tts[session_id] = None + if not self._await_response.wait(30): + LOG.error("Previous response not completed after 30 seconds") + in_queue = time() - input_time + self._await_response.clear() + self._response = None + self._transcribed = None + lang = self.get_lang(session_id) + if utterance: + LOG.info(f"Sending utterance: {utterance} with lang: {lang}") + self.send_utterance( + utterance, + lang or "en-us", + username=session_id, + user_profiles=[self._profiles[session_id]], + context={ + "gradio": {"session": session_id}, + "timing": {"wait_in_queue": in_queue, "gradio_sent": time()}, + }, + ) + else: + LOG.info(f"Sending audio with length of {len(audio_input)} with lang: {lang}") + self.send_audio( + audio_input, + lang or "en-us", + username=session_id, + user_profiles=[self._profiles[session_id]], + context={ + "gradio": {"session": session_id}, + "timing": {"wait_in_queue": in_queue, "gradio_sent": time()}, + }, + ) + chat_history.append(((audio_input, None), None)) + if not self._await_response.wait(30): + LOG.error("No response received after 30s") + self._await_response.set() + self._response = self._response or "ERROR" + LOG.info(f"Got response={self._response}") + if utterance: + chat_history.append((utterance, self._response)) + elif isinstance(self._transcribed, str): + LOG.info(f"Got transcript: {self._transcribed}") + chat_history.append((self._transcribed, self._response)) + utterance = self._transcribed + resp = UserInputResponse( + **{ + "utterance": utterance, + "audio_output": self._current_tts[session_id], + "session_id": session_id, + "transcription": self._response, + } + ) + return resp + + +app = FastAPI() +neon_client = WebSatNeonClient() +app.mount( + "/static", + StaticFiles(directory="neon_iris/static"), + name="Neon Web Voice Satellite", +) +app.include_router(neon_client.router) + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/requirements/web_sat.txt b/requirements/web_sat.txt new file mode 100644 index 0000000..99132f3 --- /dev/null +++ b/requirements/web_sat.txt @@ -0,0 +1,8 @@ +fastapi~=0.104.1 +uvicorn[standard]~=0.24.0.post1 +aiohttp~=3.8.6 +resampy~=0.4.2 +openwakeword~=0.5.1 +tflite~=2.10.0 +onnxruntime~=1.16.3 +jinja2~=3.1.2 diff --git a/setup.py b/setup.py index 99fc4f4..94403a4 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,9 @@ # NEON AI (TM) SOFTWARE, Software Development Kit & Application Framework # All trademark and other rights reserved by their respective owners -# Copyright 2008-2022 Neongecko.com Inc. +# Copyright 2008-2024 Neongecko.com Inc. # Contributors: Daniel McKnight, Guy Daniels, Elon Gasper, Richard Leeds, # Regina Bloomstine, Casimiro Ferreira, Andrii Pernatii, Kirill Hrymailo +# Mike Gray, David Scripka # BSD-3 License # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -69,10 +70,13 @@ def get_requirements(requirements_filename: str): "Programming Language :: Python :: 3", "Operating System :: OS Independent" ], - python_requires='>=3.6', + python_requires='>=3.7', install_requires=get_requirements("requirements.txt"), - extras_require={"gradio": get_requirements("gradio.txt")}, + extras_require={"gradio": get_requirements("gradio.txt"), "web_sat": get_requirements("web_sat.txt")}, entry_points={ 'console_scripts': ['iris=neon_iris.cli:neon_iris_cli'] + }, + package_data={ + "neon_iris": ["static/*", "templates/*", "res/*", "wakeword_models/*"] } )