detect silence #1837

ffaerber · 2024-12-13T10:14:11Z

ffaerber
Dec 13, 2024

Hello Everyone,
i am building a chatGPT mobile app like voice chatbot.
currently i start the record/stream via touch. and stop the record/stream via release touch pad.
now i like to try to stop the record/stream via silence detection. i do this VolumeMeter.
i store the last 5 volume readings in a volumeBuffer

copierMicToVolumeMeter.copy();
int currentVolume = volumeMeter.volume();
updateVolumeBuffer(currentVolume);

with calculateVolumeVariance() i can check if the volume has lot of diversity in the last time.
if yes then i suspect user is still talking. if not i suspect user is silent.
it works but not great. so if someone has a better idea.
I would be happy to receive ideas or suggestions for improvement

here is the full code

#include "audio.h"
#include <Arduino.h>
#include <SPI.h>
#include <WiFi.h>
#include <Wire.h>
#include "AudioTools.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/AudioCodecs/CodecWAV.h"
#include "AudioTools/AudioLibs/MemoryManager.h"
#include "AudioTools/Concurrency/All.h"

namespace audio {
    TaskHandle_t xControllerHandle = NULL;
    TaskHandle_t xSpeechDetectHandle = NULL;

    WiFiClient client;

    AudioInfo info(16000, 1, 16);

    BufferRTOS<uint8_t> buffer(200 * 1024);
    QueueStream<uint8_t> queue(buffer);
    EncodedAudioStream encoder(&queue, new WAVEncoder());
    VolumeStream volumeStream(encoder);
    I2SStream microphone;
    StreamCopy copierMicToQueue(volumeStream, microphone);

    HttpRequest http(client);
    StreamCopy copierQueueToHttp(http, queue);

    VolumeMeter volumeMeter;
    StreamCopy copierMicToVolumeMeter(volumeMeter, microphone);

    Task writeTask("write", 7 * 1024, 1, 1);
    Task readTask("read", 7 * 1024, 1, 1);
    Task monitorTask("monitor", 7 * 1024, 1, 1);

    TimerHandle_t silenceTimer = nullptr;
    const int silenceTimeout = 300;

    const int volumeBufferSize = 5;  // Adjust based on your desired time window
    int volumeBuffer[volumeBufferSize] = {0};
    int volumeBufferIndex = 0;
    volatile int32_t initialVolume = 0;

    int varianceThreshold = 1000;  // Adjust this threshold for sensitivity
    volatile bool isSpeaking = false;

    void calibrateTask(void *pvParameters) {
        const int calibrationDuration = 3000;  // Calibration time in milliseconds
        const int sampleInterval = 100;        // Sample interval in milliseconds
        int sampleCount = 0;
        int32_t volumeSum = 0;

        Serial.println("Calibration started...");

        uint32_t startTime = millis();
        while (millis() - startTime < calibrationDuration) {
            int currentVolume = volumeMeter.volume();
            volumeSum += currentVolume;
            sampleCount++;
            Serial.print("Calibrating... Volume: ");
            Serial.println(currentVolume);

            vTaskDelay(sampleInterval / portTICK_PERIOD_MS);
        }

        if (sampleCount > 0) {
            initialVolume = volumeSum / sampleCount;
        }

        Serial.print("Calibration complete. Initial Volume: ");
        Serial.println(initialVolume);

        vTaskDelete(NULL);  // Terminate the calibration task
    }

    // Function to reset the volume buffer
    void resetVolumeBuffer() {
        for (int i = 0; i < volumeBufferSize; i++) {
            volumeBuffer[i] = 0;
        }
        volumeBufferIndex = 0;
    }

    // Function to update the volume buffer
    void updateVolumeBuffer(int currentVolume) {
        volumeBuffer[volumeBufferIndex] = currentVolume;
        volumeBufferIndex = (volumeBufferIndex + 1) % volumeBufferSize;
    }

    // Function to calculate volume variance
    int32_t calculateVolumeVariance() {
        int32_t sum = 0;
        int32_t sumOfSquares = 0;

        // Calculate sum and sum of squares
        for (int i = 0; i < volumeBufferSize; i++) {
            sum += volumeBuffer[i];
            sumOfSquares += volumeBuffer[i] * volumeBuffer[i];
        }

        // Calculate mean and variance using integer math
        int32_t mean = sum / volumeBufferSize;
        int32_t variance = (sumOfSquares / volumeBufferSize) - (mean * mean);

        // Ensure variance is non-negative (possible due to integer rounding)
        return variance > 0 ? variance : 0;
    }

    void startRecord() {
        Serial.println("start");
        isSpeaking = true;
        buffer.clear();
        encoder.begin(info);
        queue.begin();

        readTask.begin([]() {
            // Serial.print(".");
            copierMicToQueue.copy();
        });

        Url url("http://ffaerber-ubuntu:8000/stream?chatId=xxxxx");
        http.addRequestHeader("Authorization", "Bearer xxxx");
        http.header().put(TRANSFER_ENCODING, CHUNKED);
        if (!http.processBegin(POST, url, "audio/wav")) {
            Serial.println("post failed");
            stop();
        }

        writeTask.begin([]() {
            Serial.println("writeTask start");
            copierQueueToHttp.copyAll();
            Serial.println("writeTask end");
        });

        monitorTask.begin([]() {
            copierMicToVolumeMeter.copy();
            int currentVolume = volumeMeter.volume();
            updateVolumeBuffer(currentVolume);
            int volumeVariance = calculateVolumeVariance();

            if (volumeVariance > varianceThreshold) {
                if (!isSpeaking) {
                    isSpeaking = true;
                    xTimerStop(silenceTimer, 0);  // Stop the silence timer
                }
            } else {
                if (isSpeaking) {
                    isSpeaking = false;
                    xTimerStart(silenceTimer, 0);  // Start the silence timer
                }
            }

            Serial.print("Current Volume: ");
            Serial.print(currentVolume);
            Serial.print(", Variance: ");
            Serial.println(volumeVariance);

            vTaskDelay(100 / portTICK_PERIOD_MS);
        });
    }

    void stopRecord() {
        Serial.println("stop");
        monitorTask.suspend();

        readTask.suspend();
        while (!buffer.isEmpty()) {
            vTaskDelay(100 / portTICK_PERIOD_MS);  // Wait and check again
            Serial.print('>');
        }
        http.processEnd();
        writeTask.suspend();
        resetVolumeBuffer();

        http.end();
        encoder.end();
        queue.end();
        writeTask.end();
        readTask.end();
        monitorTask.end();
    }

    void silenceTimerCallback(TimerHandle_t xTimer) {
        stopRecord();
    }

    void controller(void *pvParameter) {
        uint32_t ulNotificationValue;
        const TickType_t xTicksToWait = portMAX_DELAY;

        AudioLogger::instance().begin(Serial, AudioLogger::Warning);

        // xiao esp32s3 sense microphone Configuration
        auto micConfig = microphone.defaultConfig(RX_MODE);
        micConfig.copyFrom(info);
        micConfig.signal_type = PDM;
        micConfig.i2s_format = I2S_PCM;
        micConfig.pin_bck = I2S_PIN_NO_CHANGE;
        micConfig.pin_ws = 42;
        micConfig.pin_data = 41;
        microphone.begin(micConfig);

        auto volumeStream_cfg = volumeStream.defaultConfig();
        volumeStream_cfg.copyFrom(info);
        volumeStream_cfg.allow_boost = true;
        volumeStream.begin(volumeStream_cfg);
        volumeStream.setVolume(12);

        volumeMeter.begin(info);

        silenceTimer = xTimerCreate("SilenceTimer", pdMS_TO_TICKS(silenceTimeout), pdFALSE, (void *)0, silenceTimerCallback);
        if (silenceTimer == nullptr) {
            Serial.println("Failed to create silence timer");
            vTaskDelete(NULL);
            return;
        }

        xTaskCreatePinnedToCore(calibrateTask, "Calibrate", 4 * 1024, NULL, 1, NULL, APP_CPU_NUM);

        while (true) {
            if (xTaskNotifyWait(pdFALSE, ULONG_MAX, &ulNotificationValue, xTicksToWait) == pdPASS) {
                EventType currentEvent = static_cast<EventType>(ulNotificationValue);

                switch (currentEvent) {
                    case START_RECORD:
                        startRecord();
                        break;

                    case STOP_RECORD:

                        break;

                    default:
                        break;
                }
            }
            vTaskDelay(1000 / portTICK_PERIOD_MS);
        }
    }

}  // namespace audio

Answered by pschatzmann

Dec 13, 2024

I think the following logic would make more sense to me:

starting: immediately if defined sound level is exeeded (= active level) and update last_active_time_ms = millis();
stopping: use a timeout value (e.g. 1000ms) and stop when millis() > last_active_time_ms + timeout value

In regard of your approach: with your current setting you are generating 32000 bytes/second. If you use a default copy size (which is 1024 bytes) this gives a volume for the last 32 ms. If you look at 5 readings, this means you still only look at only the last 160 ms

View full answer

pschatzmann · 2024-12-13T10:34:39Z

pschatzmann
Dec 13, 2024
Maintainer

I think the following logic would make more sense to me:

starting: immediately if defined sound level is exeeded (= active level) and update last_active_time_ms = millis();
stopping: use a timeout value (e.g. 1000ms) and stop when millis() > last_active_time_ms + timeout value

In regard of your approach: with your current setting you are generating 32000 bytes/second. If you use a default copy size (which is 1024 bytes) this gives a volume for the last 32 ms. If you look at 5 readings, this means you still only look at only the last 160 ms

0 replies

ffaerber · 2024-12-16T11:32:42Z

ffaerber
Dec 16, 2024
Author

yes you are right. here is a much simpler approach.
i think i only need to start record once. after the the audio is send to the server and the AI audio answer is played i start the record again.
thank you for your help

#include "audio.h"

#include <Arduino.h>
#include <SPI.h>
#include <WiFi.h>
#include <Wire.h>

#include "AudioTools.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/AudioCodecs/CodecWAV.h"
#include "AudioTools/AudioLibs/MemoryManager.h"
#include "AudioTools/Concurrency/All.h"

namespace audio {
    TaskHandle_t xControllerHandle = NULL;
    TimerHandle_t silenceTimer = nullptr;

    // copier1 = microphone > multiOutput
    //                                  > volumeStream > encoder > queue
    //                                  > volumeMeter

    // copier2 = queue > http

    WiFiClient client;
    AudioInfo info(16000, 1, 16);
    BufferRTOS<uint8_t> buffer(200 * 1024);
    QueueStream<uint8_t> queue(buffer);
    EncodedAudioStream encoder(&queue, new WAVEncoder());
    VolumeStream volumeStream(encoder);
    VolumeMeter volumeMeter;
    I2SStream microphone;
    MultiOutput multiOutput;
    StreamCopy copier1(multiOutput, microphone);

    HttpRequest http(client);
    StreamCopy copier2(http, queue);

    Task writeTask("write", 7 * 1024, 1, 1);
    Task readTask("read", 7 * 1024, 1, 1);

    const int volumeThreshold = 1500;   // Volume level below which silence is considered
    const int silenceTimeoutMs = 1000;  // Timeout in milliseconds for stopping recording

    void startRecord() {
        Serial.println("startRecord");
        xTimerStart(silenceTimer, 0);
        buffer.clear();
        encoder.begin(info);
        queue.begin();
        readTask.resume();
        Url url("http://ffaerber-ubuntu:8000/stream?chatId=xxxx");
        http.addRequestHeader("Authorization", "Bearer xxxx");
        http.header().put(TRANSFER_ENCODING, CHUNKED);
        http.processBegin(POST, url, "audio/wav");
        writeTask.resume();
    }

    void stopRecord() {
        Serial.println("stopRecord");
        xTimerStop(silenceTimer, 0);
        readTask.suspend();
        while (!buffer.isEmpty()) {
            vTaskDelay(100 / portTICK_PERIOD_MS);  // Wait and check again
            Serial.print('.');
        }
        http.processEnd();
        writeTask.suspend();
        http.end();
        encoder.end();
        queue.end();
    }

    void silenceTimerCallback(TimerHandle_t xTimer) {
        Serial.println("Silence timeout exceeded. Stopping recording...");
        stopRecord();
    }

    void controller(void *pvParameter) {
        uint32_t ulNotificationValue;
        const TickType_t xTicksToWait = portMAX_DELAY;

        AudioLogger::instance().begin(Serial, AudioLogger::Warning);

        multiOutput.add(volumeMeter);
        multiOutput.add(volumeStream);
        multiOutput.begin(info);

        // xiao esp32s3 sense microphone Configuration
        auto micConfig = microphone.defaultConfig(RX_MODE);
        micConfig.copyFrom(info);
        micConfig.signal_type = PDM;
        micConfig.i2s_format = I2S_PCM;
        micConfig.pin_bck = I2S_PIN_NO_CHANGE;
        micConfig.pin_ws = 42;
        micConfig.pin_data = 41;
        microphone.begin(micConfig);

        auto volumeStream_cfg = volumeStream.defaultConfig();
        volumeStream_cfg.copyFrom(info);
        volumeStream_cfg.allow_boost = true;
        volumeStream.begin(volumeStream_cfg);
        volumeStream.setVolume(12);

        readTask.begin([]() {
            copier1.copy();
            int currentVolume = volumeMeter.volume();
            if (currentVolume > volumeThreshold) {
                xTimerReset(silenceTimer, 0);
            }
        });
        readTask.suspend();

        writeTask.begin([]() {
            copier2.copyAll();
        });
        writeTask.suspend();

        silenceTimer = xTimerCreate("SilenceTimer", pdMS_TO_TICKS(silenceTimeoutMs), pdFALSE, (void *)0, silenceTimerCallback);
        if (silenceTimer == nullptr) {
            Serial.println("Failed to create silence timer");
            vTaskDelete(NULL);
            return;
        }

        while (true) {
            if (xTaskNotifyWait(pdFALSE, ULONG_MAX, &ulNotificationValue, xTicksToWait) == pdPASS) {
                EventType currentEvent = static_cast<EventType>(ulNotificationValue);

                switch (currentEvent) {
                    case START_RECORD:
                        startRecord();
                        break;

                    default:
                        break;
                }
            }
            vTaskDelay(1000 / portTICK_PERIOD_MS);
        }
    }

}  // namespace audio

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

detect silence #1837

{{title}}

Replies: 2 comments

{{title}}

{{editor}}'s edit

{{editor}}'s edit

{{title}}

Select a reply

detect silence #1837

ffaerber Dec 13, 2024

Replies: 2 comments

pschatzmann Dec 13, 2024 Maintainer

ffaerber Dec 16, 2024 Author

ffaerber
Dec 13, 2024

pschatzmann
Dec 13, 2024
Maintainer

ffaerber
Dec 16, 2024
Author