Skip to content

Commit

Permalink
Microphone and mWW Improvements (#43)
Browse files Browse the repository at this point in the history
* support 48kHz hacked decimation and secondary microphone stream

* use task notifications to manage mic

* don't use all of a core for mic reading

* cleanup and simplify logic

* add available function to secondary mic

* move mww into tasks

* use event group to manage tasks

* simplify component logic

* update some TODOs

* use quantized probability cutoff for comparison

* move ignore_windows into each model

* pass more information about detected wake word

* use a queue to send wake word info from task to loop

* enable/disable particular wake word models

* disable task debug logging

* add enabled wake word selector

* yamllint fix

* revert removing a mww start, you'll get a warning

* add short delay after stopping ble
  • Loading branch information
kahrendt authored Aug 8, 2024
1 parent 10f8d93 commit 8b1d697
Show file tree
Hide file tree
Showing 11 changed files with 851 additions and 675 deletions.
360 changes: 169 additions & 191 deletions esphome/components/i2s_audio/microphone/i2s_audio_microphone.cpp

Large diffs are not rendered by default.

33 changes: 16 additions & 17 deletions esphome/components/i2s_audio/microphone/i2s_audio_microphone.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
namespace esphome {
namespace i2s_audio {




class I2SAudioMicrophone : public I2SAudioIn, public microphone::Microphone, public Component {
public:
void setup() override;
Expand All @@ -25,10 +22,10 @@ class I2SAudioMicrophone : public I2SAudioIn, public microphone::Microphone, pub

void loop() override;

void set_din_pin(int8_t pin) { this->din_pin_ = pin; }
void set_pdm(bool pdm) { this->pdm_ = pdm; }

size_t read(int16_t *buf, size_t len) override;
size_t read_secondary(int16_t *buf, size_t len) override;

size_t available_secondary() override { return this->comm_ring_buffer_->available(); }

#if SOC_I2S_SUPPORTS_ADC
void set_adc_channel(adc1_channel_t channel) {
Expand All @@ -42,29 +39,31 @@ class I2SAudioMicrophone : public I2SAudioIn, public microphone::Microphone, pub
void set_bits_per_sample(i2s_bits_per_sample_t bits_per_sample) { this->bits_per_sample_ = bits_per_sample; }
void set_use_apll(uint32_t use_apll) { this->use_apll_ = use_apll; }

void set_din_pin(int8_t pin) { this->din_pin_ = pin; }
void set_pdm(bool pdm) { this->pdm_ = pdm; }

protected:
esp_err_t start_i2s_driver_();

static void read_task_(void *params);
void watch_();

TaskHandle_t read_task_handle_{nullptr};
QueueHandle_t event_queue_;
QueueHandle_t command_queue_;
std::unique_ptr<RingBuffer> output_ring_buffer_;

void start_();
void stop_();
void read_();
std::unique_ptr<RingBuffer> asr_ring_buffer_;
std::unique_ptr<RingBuffer> comm_ring_buffer_;

bool use_apll_;
bool pdm_{false};
int8_t din_pin_{I2S_PIN_NO_CHANGE};

#if SOC_I2S_SUPPORTS_ADC
adc1_channel_t adc_channel_{ADC1_CHANNEL_MAX};
bool adc_{false};
adc1_channel_t adc_channel_{ADC1_CHANNEL_MAX};
#endif
bool pdm_{false};

i2s_bits_per_sample_t bits_per_sample_;
i2s_channel_fmt_t channel_;
uint32_t sample_rate_;
i2s_bits_per_sample_t bits_per_sample_;
bool use_apll_;
};

} // namespace i2s_audio
Expand Down
32 changes: 21 additions & 11 deletions esphome/components/micro_wake_word/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@
"IsRunningCondition", automation.Condition
)

WakeWordModel_ = micro_wake_word_ns.class_("WakeWordModel")


def _validate_json_filename(value):
value = cv.string(value)
Expand Down Expand Up @@ -324,14 +326,15 @@ def _validate_source_shorthand(value):

MODEL_SCHEMA = cv.Schema(
{
cv.GenerateID(CONF_ID): cv.declare_id(WakeWordModel_),
cv.Optional(CONF_MODEL): MODEL_SOURCE_SCHEMA,
cv.Optional(CONF_PROBABILITY_CUTOFF): cv.percentage,
cv.Optional(CONF_SLIDING_WINDOW_SIZE): cv.positive_int,
cv.GenerateID(CONF_RAW_DATA_ID): cv.declare_id(cg.uint8),
}
)

# Provide a default VAD model that could be overridden
# Provides a default VAD model that could be overridden
VAD_MODEL_SCHEMA = MODEL_SCHEMA.extend(
cv.Schema(
{
Expand Down Expand Up @@ -476,6 +479,8 @@ async def to_code(config):
probability_cutoff = model_parameters.get(
CONF_PROBABILITY_CUTOFF, manifest[KEY_MICRO][CONF_PROBABILITY_CUTOFF]
)
quantized_probability_cutoff = int(probability_cutoff * 255)

sliding_window_size = model_parameters.get(
CONF_SLIDING_WINDOW_SIZE,
manifest[KEY_MICRO][CONF_SLIDING_WINDOW_SIZE],
Expand All @@ -485,24 +490,29 @@ async def to_code(config):
cg.add(
var.add_vad_model(
prog_arr,
probability_cutoff,
quantized_probability_cutoff,
sliding_window_size,
manifest[KEY_MICRO][CONF_TENSOR_ARENA_SIZE],
)
)
else:
cg.add(
var.add_wake_word_model(
prog_arr,
probability_cutoff,
sliding_window_size,
manifest[KEY_WAKE_WORD],
manifest[KEY_MICRO][CONF_TENSOR_ARENA_SIZE],
)
wake_word_model = cg.new_Pvariable(
model_parameters[CONF_ID],
prog_arr,
quantized_probability_cutoff,
sliding_window_size,
manifest[KEY_WAKE_WORD],
manifest[KEY_MICRO][CONF_TENSOR_ARENA_SIZE],
)

cg.add(var.add_wake_word_model(wake_word_model))

cg.add(var.set_features_step_size(manifest[KEY_MICRO][CONF_FEATURE_STEP_SIZE]))
cg.add_library(None,None,"https://github.com/kahrendt/ESPMicroSpeechFeatures.git#psram-allocations")
cg.add_library(
None,
None,
"https://github.com/kahrendt/ESPMicroSpeechFeatures.git#psram-allocations",
)
# cg.add_library("kahrendt/ESPMicroSpeechFeatures", "1.0.0")


Expand Down
Loading

0 comments on commit 8b1d697

Please sign in to comment.