diff --git a/.github/workflows/publish_test_websat_build.yml b/.github/workflows/publish_test_websat_build.yml index 065f54c..0f5e3b6 100644 --- a/.github/workflows/publish_test_websat_build.yml +++ b/.github/workflows/publish_test_websat_build.yml @@ -1,8 +1,9 @@ name: Publish Docker Containers on: - registry: - type: string - default: ghcr.io + pull_request: + branches: + - dev + - main env: REGISTRY: ghcr.io @@ -12,7 +13,7 @@ jobs: build_and_publish_docker: runs-on: ubuntu-latest outputs: - version: ${{ steps.version.version }} + version: "${{ steps.version.version }}" permissions: contents: read packages: write @@ -33,7 +34,7 @@ jobs: id: version run: | VERSION=$(sed "s/a/-a./" <<< $(python setup.py --version)) - echo ::set-output name=version::${VERSION} + echo "version=${VERSION}" >> $GITHUB_OUTPUT env: image_name: ${{ env.IMAGE_NAME }} diff --git a/README.md b/README.md index 16a537e..fb04db4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Neon Iris + Neon Iris (Interactive Relay for Intelligence Systems) provides tools for interacting with Neon systems remotely, via [MQ](https://github.com/NeonGeckoCom/chat_api_mq_proxy). @@ -6,15 +7,18 @@ Install the Iris Python package with: `pip install neon-iris` The `iris` entrypoint is available to interact with a bus via CLI. Help is available via `iris --help`. ## Configuration -Configuration files can be specified via environment variables. By default, -`Iris` will read configuration from `~/.config/neon/diana.yaml` where + +Configuration files can be specified via environment variables. By default, +`Iris` will read configuration from `~/.config/neon/diana.yaml` where `XDG_CONFIG_HOME` is set to the default `~/.config`. -More information about configuration handling can be found +More information about configuration handling can be found [in the docs](https://neongeckocom.github.io/neon-docs/quick_reference/configuration/). -> *Note:* The neon-iris Docker image uses `neon.yaml` by default because the + +> _Note:_ The neon-iris Docker image uses `neon.yaml` by default because the > `iris` web UI is often deployed with neon-core. A default configuration might look like: + ```yaml MQ: server: neonaialpha.com @@ -34,22 +38,81 @@ iris: ``` ### Language Support + For Neon Core deployments that support language support queries via MQ, `languages` may be removed and `enable_lang_api: True` added to configuration. This will use the reported STT/TTS supported languages in place of any `iris` configuration. ## Interfacing with a Diana installation + The `iris` CLI includes utilities for interacting with a `Diana` backend. Use `iris --help` to get a current list of available commands. ### `iris start-listener` -This will start a local wake word recognizer and use a remote Neon + +This will start a local wake word recognizer and use a remote Neon instance connected to MQ for processing audio and providing responses. ### `iris start-gradio` + This will start a local webserver and serve a Gradio UI to interact with a Neon instance connected to MQ. ### `iris start-client` -This starts a CLI client for typing inputs and receiving responses from a Neon + +This starts a CLI client for typing inputs and receiving responses from a Neon instance connected via MQ. + +### `iris start-websat` + +This starts a local webserver and serves a web UI for interacting with a Neon +instance connected to MQ. + +## websat + +### Configuration + +The `websat` web UI is a simple web UI for interacting with a Neon instance. It +accepts special configuration items prefixed with `webui_` to customize the UI. + +| parameter | description | default | +| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- | +| webui_description | The header text for the web UI | Chat with Neon | +| webui_title | The title text for the web UI in the browser | Neon AI | +| webui_input_placeholder | The placeholder text for the input box | Ask me something | +| webui_ws_url | The websocket URL to connect to, which must be accessible from the browser you're running in. Note that the default will usually fail. | ws://localhost:8000/ws | + +Example configuration: + +```yaml +iris: + webui_title: Neon AI + webui_description: Chat with Neon + webui_input_placeholder: Ask me something + webui_ws_url: wss://neonaialpha.com:8000/ws +``` + +### Customization + +The websat web UI reads in the following items from `neon_iris/static`: + +- `error.mp3` - Used for error responses +- `wake.mp3` - Used for wake word responses +- `favicon.ico` - The favicon for the web UI +- `logo.webp` - The logo for the web UI + +To customize these items, you can replace them in the `neon_iris/static` folder. + +### Websocket endpoint + +The websat web UI uses a websocket to communicate with OpenWakeWord, which can +load `.tflite` or `.onnx` models. The websocket endpoint is `/ws`, but since it +is served with FastAPI, it also supports `wss` for secure connections. To +use `wss`, you must provide a certificate and key file. + +### Chat history + +The websat web UI stores chat history in the browser's [local storage](https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage). +This allows chat history to persist between browser sessions. However, it also +means that if you clear your browser's local storage, you will lose your chat +history. This is a feature, not a bug. diff --git a/neon_iris/static/scripts/audio.js b/neon_iris/static/scripts/audio.js index e5f7fc8..d573217 100644 --- a/neon_iris/static/scripts/audio.js +++ b/neon_iris/static/scripts/audio.js @@ -7,15 +7,17 @@ const AudioHandler = (() => { let sampleRate; let isRecording = false; - // Ensure the getUserMedia is correctly referenced - const getUserMedia = navigator.getUserMedia || - navigator.webkitGetUserMedia || - navigator.mozGetUserMedia || - navigator.msGetUserMedia; + // Ensure the getUserMedia is correctly referenced + const getUserMedia = + navigator.getUserMedia || + navigator.webkitGetUserMedia || + navigator.mozGetUserMedia || + navigator.msGetUserMedia; const startAudio = () => { if (getUserMedia) { - getUserMedia.call(navigator, + getUserMedia.call( + navigator, { audio: true }, (stream) => { audioStream = stream; @@ -57,12 +59,12 @@ const AudioHandler = (() => { if (recorder) { recorder.disconnect(); volume.disconnect(); - // Disconnecting the audio context might not be necessary; depends on your use case. + // Disconnecting the audio context might not be necessary // audioContext.close(); } if (audioStream) { const tracks = audioStream.getTracks(); - tracks.forEach(track => track.stop()); + tracks.forEach((track) => track.stop()); } } }; @@ -82,7 +84,7 @@ const AudioHandler = (() => { let l = buffer.length; let buf = new Int16Array(l); while (l--) { - buf[l] = Math.min(1, buffer[l]) * 0x7FFF; + buf[l] = Math.min(1, buffer[l]) * 0x7fff; } return buf.buffer; }; @@ -93,16 +95,16 @@ const AudioHandler = (() => { }; })(); -const startButton = document.getElementById('startButton'); -startButton.addEventListener('click', function() { - AudioHandler.toggle(); // Call the toggle method +const startButton = document.getElementById("startButton"); +startButton.addEventListener("click", function () { + AudioHandler.toggle(); // Update the button's text and class based on the recording state if (AudioHandler.isRecording()) { - startButton.classList.add('listening'); - startButton.textContent = 'Listening...'; + startButton.classList.add("listening"); + startButton.textContent = "Listening..."; } else { - startButton.classList.remove('listening'); - startButton.textContent = 'Start Listening'; + startButton.classList.remove("listening"); + startButton.textContent = "Start Listening"; } }); diff --git a/neon_iris/wakeword_models/hey_neon/hey_neon_high.onnx b/neon_iris/wakeword_models/hey_neon/hey_neon_high.onnx new file mode 100644 index 0000000..3dd5d65 Binary files /dev/null and b/neon_iris/wakeword_models/hey_neon/hey_neon_high.onnx differ diff --git a/neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite b/neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite new file mode 100644 index 0000000..790eca4 Binary files /dev/null and b/neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite differ diff --git a/neon_iris/web_sat_client.py b/neon_iris/web_sat_client.py index 9dadc78..91c8ef9 100644 --- a/neon_iris/web_sat_client.py +++ b/neon_iris/web_sat_client.py @@ -52,7 +52,7 @@ def __init__(self, lang: str = None): # OpenWW # TODO: Allow for arbitrary models, or pre-existing OpenWW models self.oww_model = Model( - wakeword_models=["neon_iris/wakeword_models/hey_neon/hey_neon.tflite"], + wakeword_models=["neon_iris/wakeword_models/hey_neon/hey_neon_high.tflite"], inference_framework="tflite", ) # FastAPI @@ -83,26 +83,17 @@ def handle_klat_response(self, message: Message): """ LOG.debug(f"gradio context={message.context['gradio']}") resp_data = message.data["responses"] - files = [] sentences = [] session = message.context["gradio"]["session"] for _, response in resp_data.items(): # lang, response sentences.append(response.get("sentence")) if response.get("audio"): for _, data in response["audio"].items(): - # filepath = "/".join( - # [self.audio_cache_dir] + response[gender].split("/")[-4:] - # ) - # TODO: This only plays the most recent, so it doesn't - # support multiple languages or multi-utterance responses self._current_tts[session] = data - # files.append(filepath) - # if not isfile(filepath): - # decode_base64_string_to_file(data, filepath) self._response = "\n".join(sentences) self._await_response.set() - def send_audio( + def send_audio( # pylint: disable=arguments-renamed self, audio_b64_string: str, lang: str = "en-us", diff --git a/requirements/web_sat.txt b/requirements/web_sat.txt index cfb9778..a5f6432 100644 --- a/requirements/web_sat.txt +++ b/requirements/web_sat.txt @@ -1,8 +1,8 @@ -fastapi -uvicorn +fastapi~=0.104.1 +uvicorn~=0.24.0.post1 aiohttp~=3.8.6 resampy~=0.4.2 openwakeword~=0.5.1 tflite~=2.10.0 -onnxruntime -jinja2 +onnxruntime~=1.16.3 +jinja2~=3.1.2