NeonGeckoCom · NeonDaniel · Dec 28, 2023 · Dec 13, 2023 · Dec 13, 2023 · Dec 16, 2023
diff --git a/.github/workflows/publish_test_websat_build.yml b/.github/workflows/publish_test_websat_build.yml
@@ -0,0 +1,62 @@
+name: Publish Docker Containers
+on:
+  push:
+    branches:
+      - dev
+      - main
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}-websat
+
+jobs:
+  build_and_publish_docker:
+    runs-on: ubuntu-latest
+    outputs:
+      version: "${{ steps.version.version }}"
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.ref }}
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Get Version
+        id: version
+        run: |
+          VERSION=$(sed "s/a/-a./" <<< $(python setup.py --version))
+          echo "version=${VERSION}" >> $GITHUB_OUTPUT
+        env:
+          image_name: ${{ env.IMAGE_NAME }}
+
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Setup Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Extract metadata for base Docker
+        id: base_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=semver,pattern={{version}},value=${{ steps.version.outputs.version }}
+            type=ref,event=branch
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./Dockerfile.websat
+          push: true
+          tags: ${{ steps.base_meta.outputs.tags }}
+          labels: ${{ steps.base_meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64,linux/arm/v7
diff --git a/Dockerfile.websat b/Dockerfile.websat
@@ -0,0 +1,24 @@
+FROM python:3.8-slim
+
+LABEL vendor=neon.ai \
+    ai.neon.name="neon-iris-websat"
+
+ENV OVOS_CONFIG_BASE_FOLDER neon
+ENV OVOS_CONFIG_FILENAME neon.yaml
+ENV XDG_CONFIG_HOME /config
+
+
+RUN apt update && \
+    apt install -y ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+ADD . /neon_iris
+WORKDIR /neon_iris
+
+RUN pip install wheel && \
+    pip install .[web_sat]
+
+COPY docker_overlay/ /
+EXPOSE 8000
+
+CMD ["iris", "start-websat"]
diff --git a/README.md b/README.md
@@ -1,20 +1,24 @@
 # Neon Iris
+
 Neon Iris (Interactive Relay for Intelligence Systems) provides tools for
 interacting with Neon systems remotely, via [MQ](https://github.com/NeonGeckoCom/chat_api_mq_proxy).
 
 Install the Iris Python package with: `pip install neon-iris`
 The `iris` entrypoint is available to interact with a bus via CLI. Help is available via `iris --help`.
 
 ## Configuration
-Configuration files can be specified via environment variables. By default, 
-`Iris` will read configuration from `~/.config/neon/diana.yaml` where 
+
+Configuration files can be specified via environment variables. By default,
+`Iris` will read configuration from `~/.config/neon/diana.yaml` where
 `XDG_CONFIG_HOME` is set to the default `~/.config`.
-More information about configuration handling can be found 
+More information about configuration handling can be found
 [in the docs](https://neongeckocom.github.io/neon-docs/quick_reference/configuration/).
-> *Note:* The neon-iris Docker image uses `neon.yaml` by default because the
+
+> _Note:_ The neon-iris Docker image uses `neon.yaml` by default because the
 > `iris` web UI is often deployed with neon-core.
 
 A default configuration might look like:
+
 ```yaml
 MQ:
   server: neonaialpha.com
@@ -34,22 +38,81 @@ iris:
 ```
 
 ### Language Support
+
 For Neon Core deployments that support language support queries via MQ, `languages`
 may be removed and `enable_lang_api: True` added to configuration. This will use
 the reported STT/TTS supported languages in place of any `iris` configuration.
 
 ## Interfacing with a Diana installation
+
 The `iris` CLI includes utilities for interacting with a `Diana` backend. Use
 `iris --help` to get a current list of available commands.
 
 ### `iris start-listener`
-This will start a local wake word recognizer and use a remote Neon 
+
+This will start a local wake word recognizer and use a remote Neon
 instance connected to MQ for processing audio and providing responses.
 
 ### `iris start-gradio`
+
 This will start a local webserver and serve a Gradio UI to interact with a Neon
 instance connected to MQ.
 
 ### `iris start-client`
-This starts a CLI client for typing inputs and receiving responses from a Neon 
+
+This starts a CLI client for typing inputs and receiving responses from a Neon
 instance connected via MQ.
+
+### `iris start-websat`
+
+This starts a local webserver and serves a web UI for interacting with a Neon
+instance connected to MQ.
+
+## websat
+
+### Configuration
+
+The `websat` web UI is a simple web UI for interacting with a Neon instance. It
+accepts special configuration items prefixed with `webui_` to customize the UI.
+
+| parameter               | description                                                                                                                            | default                |
+| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- |
+| webui_description       | The header text for the web UI                                                                                                         | Chat with Neon         |
+| webui_title             | The title text for the web UI in the browser                                                                                           | Neon AI                |
+| webui_input_placeholder | The placeholder text for the input box                                                                                                 | Ask me something       |
+| webui_ws_url            | The websocket URL to connect to, which must be accessible from the browser you're running in. Note that the default will usually fail. | ws://localhost:8000/ws |
+
+Example configuration:
+
+```yaml
+iris:
+  webui_title: Neon AI
+  webui_description: Chat with Neon
+  webui_input_placeholder: Ask me something
+  webui_ws_url: wss://neonaialpha.com:8000/ws
+```
+
+### Customization
+
+The websat web UI reads in the following items from `neon_iris/static`:
+
+- `error.mp3` - Used for error responses
+- `wake.mp3` - Used for wake word responses
+- `favicon.ico` - The favicon for the web UI
+- `logo.webp` - The logo for the web UI
+
+To customize these items, you can replace them in the `neon_iris/static` folder.
+
+### Websocket endpoint
+
+The websat web UI uses a websocket to communicate with OpenWakeWord, which can
+load `.tflite` or `.onnx` models. The websocket endpoint is `/ws`, but since it
+is served with FastAPI, it also supports `wss` for secure connections. To
+use `wss`, you must provide a certificate and key file.
+
+### Chat history
+
+The websat web UI stores chat history in the browser's [local storage](https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage).
+This allows chat history to persist between browser sessions. However, it also
+means that if you clear your browser's local storage, you will lose your chat
+history. This is a feature, not a bug.
diff --git a/docker_overlay/etc/neon/neon.yaml b/docker_overlay/etc/neon/neon.yaml
@@ -12,6 +12,7 @@ iris:
   webui_chatbot_label: Chat History
   webui_mic_label: Speak to Neon
   webui_text_label: Text with Neon
+  webui_ws_url: ws://localhost:8000/ws # Override, as this needs to be reachable by the browser
   server_address: "0.0.0.0"
   server_port: 7860
   default_lang: en-us
@@ -43,4 +44,4 @@ logs:
     error:
       - pika
     warning:
-      - filelock
+      - filelock
diff --git a/neon_iris/cli.py b/neon_iris/cli.py
@@ -138,6 +138,19 @@ def start_gradio():
         click.echo("Unable to connect to MQ server")
 
 
+@neon_iris_cli.command(help="Create a Web Voice Satellite session")
+@click.option("--port", "-p", default=8000, help="Port to run on, defaults to 8000")
+@click.option("--host", default="0.0.0.0", help="Host to run on, defaults to 0.0.0.0")
+def start_websat(port, host):
+    from neon_iris.web_sat_client import app
+    _print_config()
+    try:
+        import uvicorn
+        uvicorn.run(app, host=host, port=port)
+    except OSError:
+        click.echo("Unable to connect to MQ server")
+
+
 @neon_iris_cli.command(help="Query Neon Core for supported languages")
 def get_languages():
     from neon_iris.util import query_neon

diff --git a/neon_iris/client.py b/neon_iris/client.py
@@ -333,7 +333,7 @@ def _send_utterance(self, utterance: str, lang: str,
         self._send_serialized_message(serialized)
 
     def _send_audio(self, audio_file: str, lang: str,
-                    username: str, user_profiles: list,
+                    username: Optional[str], user_profiles: Optional[list],
                     context: Optional[dict] = None):
         context = context or dict()
         audio_data = encode_file_to_base64_string(audio_file)

diff --git a/neon_iris/models/__init__.py b/neon_iris/models/__init__.py
@@ -0,0 +1 @@
+from .web_sat import UserInput, UserInputResponse  # noqa
diff --git a/neon_iris/models/web_sat.py b/neon_iris/models/web_sat.py
@@ -0,0 +1,16 @@
+"""API data models for the WebSAT API."""
+from typing import Optional
+from pydantic import BaseModel
+
+class UserInput(BaseModel):
+    """UserInput is the input data model for the WebSAT API."""
+    utterance: Optional[str] = ""
+    audio_input: Optional[str] = ""
+    session_id: str = "websat0000"
+
+class UserInputResponse(BaseModel):
+    """UserInputResponse is the response data model for the WebSAT API."""
+    utterance: Optional[str] = ""
+    audio_output: Optional[str] = ""
+    session_id: str = "websat0000"
+    transcription: str
diff --git a/neon_iris/static/error.mp3 b/neon_iris/static/error.mp3
diff --git a/neon_iris/static/favicon.ico b/neon_iris/static/favicon.ico
diff --git a/neon_iris/static/logo.webp b/neon_iris/static/logo.webp
diff --git a/neon_iris/static/scripts/audio.js b/neon_iris/static/scripts/audio.js
@@ -0,0 +1,110 @@
+// Manages audio capture and processing
+const AudioHandler = (() => {
+  let audioStream;
+  let audioContext;
+  let recorder;
+  let volume;
+  let sampleRate;
+  let isRecording = false;
+
+  // Ensure the getUserMedia is correctly referenced
+  const getUserMedia =
+    navigator.getUserMedia ||
+    navigator.webkitGetUserMedia ||
+    navigator.mozGetUserMedia ||
+    navigator.msGetUserMedia;
+
+  const startAudio = () => {
+    if (getUserMedia) {
+      getUserMedia.call(
+        navigator,
+        { audio: true },
+        (stream) => {
+          audioStream = stream;
+          const AudioContext = window.AudioContext || window.webkitAudioContext;
+          audioContext = new AudioContext();
+          sampleRate = audioContext.sampleRate;
+          volume = audioContext.createGain();
+          const audioInput = audioContext.createMediaStreamSource(audioStream);
+          audioInput.connect(volume);
+
+          const bufferSize = 4096;
+          // Use the audio context to create the script processor
+          recorder = audioContext.createScriptProcessor(bufferSize, 1, 1);
+
+          recorder.onaudioprocess = (event) => {
+            const samples = event.inputBuffer.getChannelData(0);
+            const PCM16iSamples = convertFloat32ToInt16(samples);
+            WebSocketHandler.send(
+              new Blob([PCM16iSamples], { type: "application/octet-stream" })
+            );
+          };
+
+          volume.connect(recorder);
+          recorder.connect(audioContext.destination);
+          WebSocketHandler.setSampleRate(sampleRate);
+          isRecording = true;
+        },
+        (error) => {
+          console.error("Error capturing audio.", error);
+        }
+      );
+    } else {
+      console.error("getUserMedia not supported in this browser.");
+    }
+  };
+
+  const stopAudio = () => {
+    if (isRecording) {
+      if (recorder) {
+        recorder.disconnect();
+        volume.disconnect();
+        // Disconnecting the audio context might not be necessary
+        // audioContext.close();
+      }
+      if (audioStream) {
+        const tracks = audioStream.getTracks();
+        tracks.forEach((track) => track.stop());
+      }
+    }
+  };
+
+  const toggle = () => {
+    if (!isRecording) {
+      startAudio();
+    } else {
+      stopAudio();
+    }
+    isRecording = !isRecording; // Toggle the recording state
+  };
+
+  const isCurrentlyRecording = () => isRecording;
+
+  const convertFloat32ToInt16 = (buffer) => {
+    let l = buffer.length;
+    let buf = new Int16Array(l);
+    while (l--) {
+      buf[l] = Math.min(1, buffer[l]) * 0x7fff;
+    }
+    return buf.buffer;
+  };
+
+  return {
+    toggle,
+    isRecording: isCurrentlyRecording,
+  };
+})();
+
+const startButton = document.getElementById("startButton");
+startButton.addEventListener("click", function () {
+  AudioHandler.toggle();
+
+  // Update the button's text and class based on the recording state
+  if (AudioHandler.isRecording()) {
+    startButton.classList.add("listening");
+    startButton.textContent = "Listening...";
+  } else {
+    startButton.classList.remove("listening");
+    startButton.textContent = "Start Listening";
+  }
+});
diff --git a/neon_iris/static/scripts/main.js b/neon_iris/static/scripts/main.js
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .web_sat import UserInput, UserInputResponse # noqa
mikejgray marked this conversation as resolved. Show resolved Hide resolved