Skip to content

Commit

Permalink
Bugfixes/Improvements for media player (#37)
Browse files Browse the repository at this point in the history
* quickly disable new special case fir filter

* fix pipeline stopped vs playing logic

* improve robustness of flac decoding

* Increase http buffer size and allow setting output sample rate

* fix flac sync errors after running out of data

* limit http transfer size per loop

* disable task debug logging

* revert min http transfer size

* always reset input_buffer_current

* update TODOs and describe media player framework

* read mute status from dac at setup

* raise error if bits per sample is too high for our optimized version

* block unprocessable streams

* clear appropriate mixer buffer when stopping a pipeline

* log which pipeline element has an error

* never transfer more input samples than can be processed in 1 step

* correctly account for mono to stereo adjustment

* check for upsampling before scaling max input samples

* fix memory leak
  • Loading branch information
kahrendt authored Aug 5, 2024
1 parent f5b159b commit dd7fd38
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 113 deletions.
35 changes: 20 additions & 15 deletions esphome/components/nabu/audio_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,21 +110,26 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
}
} else {
// Try to decode more data
size_t bytes_available = this->input_ring_buffer_->available();
size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);

if ((this->potentially_failed_count_ > 0) && (bytes_to_read == 0)) {
// We didn't have enough data last time, and we have no new data, so just return
return AudioDecoderState::DECODING;
}

// Shift unread data in input buffer to start
if ((this->input_buffer_length_ > 0) && (this->input_buffer_length_ < this->internal_buffer_size_)) {
if (this->input_buffer_length_ > 0) {
memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_);
}
this->input_buffer_current_ = this->input_buffer_;

// read in new ring buffer data to fill the remaining input buffer
size_t bytes_available = this->input_ring_buffer_->available();
size_t bytes_to_read = std::min(bytes_available, this->internal_buffer_size_ - this->input_buffer_length_);
size_t bytes_read = 0;

if (bytes_to_read > 0) {
uint8_t *new_mp3_data = this->input_buffer_ + this->input_buffer_length_;
bytes_read = this->input_ring_buffer_->read((void *) new_mp3_data, bytes_to_read);
uint8_t *new_audio_data = this->input_buffer_ + this->input_buffer_length_;
bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read);

this->input_buffer_length_ += bytes_read;
}
Expand Down Expand Up @@ -153,6 +158,8 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
++this->potentially_failed_count_;
} else if (state == FileDecoderState::END_OF_FILE) {
this->end_of_file_ = true;
} else if (state == FileDecoderState::FAILED) {
return AudioDecoderState::FAILED;
} else {
this->potentially_failed_count_ = 0;
}
Expand Down Expand Up @@ -191,9 +198,7 @@ FileDecoderState AudioDecoder::decode_wav_() {

printf("sample channels: %d\n", this->channels_.value());
printf("sample rate: %" PRId32 "\n", this->sample_rate_.value());
// printf("number of samples: %d\n",
// this->wav_decoder_->chunk_bytes_left() / (this->channels_.value() * (this->bits_per_sample.value()
// / 8)));
printf("bits per sample: %d\n", this->sample_depth_.value());
this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
header_finished = true;
} else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) {
Expand Down Expand Up @@ -226,11 +231,10 @@ FileDecoderState AudioDecoder::decode_wav_() {
this->wav_bytes_left_ -= bytes_to_write;
}

return FileDecoderState::MORE_TO_PROCESS;
return FileDecoderState::IDLE;
}

return FileDecoderState::END_OF_FILE;
// return DecoderState::FINISHED;
}

FileDecoderState AudioDecoder::decode_mp3_() {
Expand Down Expand Up @@ -280,14 +284,14 @@ FileDecoderState AudioDecoder::decode_flac_() {
// Header hasn't been read
auto result = this->flac_decoder_->read_header(this->input_buffer_length_);

if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
return FileDecoderState::POTENTIALLY_FAILED;
}

size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
this->input_buffer_current_ += bytes_consumed;
this->input_buffer_length_ = this->flac_decoder_->get_bytes_left();

if (result == flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
return FileDecoderState::POTENTIALLY_FAILED;
}

if (result != flac::FLAC_DECODER_SUCCESS) {
printf("failed to read flac header. Error: %d\n", result);
return FileDecoderState::FAILED;
Expand All @@ -311,10 +315,11 @@ FileDecoderState AudioDecoder::decode_flac_() {
this->flac_decoder_->decode_frame(this->input_buffer_length_, (int16_t *) this->output_buffer_, &output_samples);

if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
// not an issue, just needs more data!
// Not an issue, just needs more data that we'll get next time.
return FileDecoderState::POTENTIALLY_FAILED;
} else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
// Serious error, can't recover
printf("FLAC Decoder Error %d\n", result);
return FileDecoderState::FAILED;
}

Expand Down
56 changes: 46 additions & 10 deletions esphome/components/nabu/audio_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace nabu {

static const size_t QUEUE_COUNT = 10;

static const size_t HTTP_BUFFER_SIZE = 32 * 1024;
static const size_t HTTP_BUFFER_SIZE = 64 * 1024;
static const size_t BUFFER_SIZE_SAMPLES = 32768;
static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);

Expand Down Expand Up @@ -50,21 +50,23 @@ AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type)
this->event_group_ = xEventGroupCreate();
}

void AudioPipeline::start(const std::string &uri, const std::string &task_name, UBaseType_t priority) {
this->common_start_(task_name, priority);
void AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name,
UBaseType_t priority) {
this->common_start_(target_sample_rate, task_name, priority);

this->current_uri_ = uri;
xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP);
}

void AudioPipeline::start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority) {
this->common_start_(task_name, priority);
void AudioPipeline::start(media_player::MediaFile *media_file, uint32_t target_sample_rate,
const std::string &task_name, UBaseType_t priority) {
this->common_start_(target_sample_rate, task_name, priority);

this->current_media_file_ = media_file;
xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_FILE);
}

void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t priority) {
void AudioPipeline::common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority) {
if (this->read_task_handle_ == nullptr) {
this->read_task_handle_ =
xTaskCreateStatic(AudioPipeline::read_task_, (task_name + "_read").c_str(), 8192, (void *) this, priority,
Expand All @@ -82,13 +84,33 @@ void AudioPipeline::common_start_(const std::string &task_name, UBaseType_t prio
}

this->stop();

this->target_sample_rate_ = target_sample_rate;
}

AudioPipelineState AudioPipeline::get_state() {
EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) {
return AudioPipelineState::STOPPED;
} else if (event_bits & (READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED)) {
}

if ((event_bits & READER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, READER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_READING;
}

if ((event_bits & DECODER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, DECODER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_DECODING;
}

if ((event_bits & RESAMPLER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, RESAMPLER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_RESAMPLING;
}

if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) &&
(event_bits & RESAMPLER_MESSAGE_FINISHED)) {
return AudioPipelineState::STOPPED;
}

Expand All @@ -105,6 +127,15 @@ void AudioPipeline::stop() {
true, // Wait for all the bits,
pdMS_TO_TICKS(200)); // Block temporarily before deleting each task

// Clear the ring buffer in the mixer; avoids playing incorrect audio when starting a new file while paused
CommandEvent command_event;
if (this->pipeline_type_ == AudioPipelineType::MEDIA) {
command_event.command = CommandEventType::CLEAR_MEDIA;
} else {
command_event.command = CommandEventType::CLEAR_ANNOUNCEMENT;
}
this->mixer_->send_command(&command_event);

xEventGroupClearBits(this->event_group_, ALL_BITS);
this->reset_ring_buffers();
}
Expand Down Expand Up @@ -187,8 +218,9 @@ void AudioPipeline::decode_task_(void *params) {
xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED);

{
AudioDecoder decoder = AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(),
this_pipeline->decoded_ring_buffer_.get(), BUFFER_SIZE_BYTES);
AudioDecoder decoder =
AudioDecoder(this_pipeline->raw_file_ring_buffer_.get(), this_pipeline->decoded_ring_buffer_.get(),
HTTP_BUFFER_SIZE); // BUFFER_SIZE_BYTES);
decoder.start(this_pipeline->current_media_file_type_);

bool has_stream_info = false;
Expand Down Expand Up @@ -256,7 +288,11 @@ void AudioPipeline::resample_task_(void *params) {
AudioResampler resampler =
AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES);

resampler.start(this_pipeline->current_stream_info_);
if (!resampler.start(this_pipeline->current_stream_info_, this_pipeline->target_sample_rate_)) {
// Unsupported incoming audio stream
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
}

while (true) {
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
Expand Down
15 changes: 8 additions & 7 deletions esphome/components/nabu/audio_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,19 @@ enum class AudioPipelineType : uint8_t {
};

enum class AudioPipelineState : uint8_t {
STARTING,
STARTED,
PLAYING,
PAUSED,
STOPPING,
STOPPED,
ERROR_READING,
ERROR_DECODING,
ERROR_RESAMPLING,
};

class AudioPipeline {
public:
AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type);

void start(const std::string &uri, const std::string &task_name, UBaseType_t priority = 1);
void start(media_player::MediaFile *media_file, const std::string &task_name, UBaseType_t priority = 1);
void start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1);
void start(media_player::MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority = 1);

void stop();

Expand All @@ -50,7 +49,9 @@ class AudioPipeline {
void reset_ring_buffers();

protected:
void common_start_(const std::string &task_name, UBaseType_t priority);
void common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority);

uint32_t target_sample_rate_;

AudioMixer *mixer_;

Expand Down
72 changes: 47 additions & 25 deletions esphome/components/nabu/audio_resampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ AudioResampler::~AudioResampler() {
resampleFree(this->resampler_);
this->resampler_ = nullptr;
}

// dsps_fird_s16_aexx_free(&this->fir_filter_);
}

void AudioResampler::start(media_player::StreamInfo &stream_info) {
bool AudioResampler::start(media_player::StreamInfo &stream_info, uint32_t target_sample_rate) {
this->stream_info_ = stream_info;

this->input_buffer_current_ = this->input_buffer_;
Expand All @@ -63,33 +65,40 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {

this->needs_mono_to_stereo_ = (stream_info.channels != 2);

if ((stream_info.channels > 2) || (stream_info_.bits_per_sample != 16)) {
// TODO: Make these values configurable
return false;
}

if (stream_info.channels > 0) {
this->channel_factor_ = 2 / stream_info.channels;
printf("Converting %d channels to 2 channels\n", stream_info.channels);
}
constexpr float resample_rate = 16000.0f;
if (stream_info.sample_rate != 16000) {
if (stream_info.sample_rate == 48000) {
// Special case, we can do this a lot faster with esp-dsp code!
const uint8_t decimation = 48000 / 16000;
const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1);

int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH,
(float) 0.5 / decimation);
// dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
decimation, fir_out_offset, -shift);
this->decimation_filter_ = true;
this->needs_resampling_ = true;
// memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
} else {

if (stream_info.sample_rate != target_sample_rate) {
// if (stream_info.sample_rate == 48000) {
// // Special case, we can do this a lot faster with esp-dsp code!
// const uint8_t decimation = 48000 / 16000;
// const float fir_out_offset = 0; //((FIR_FILTER_LENGTH / decimation / 2) - 1);

// int8_t shift = this->generate_q15_fir_coefficients_(this->fir_filter_coeffecients_, (uint32_t)
// FIR_FILTER_LENGTH,
// (float) 0.5 / decimation);
// // dsps_16_array_rev(this->fir_filter_coeffecients_, (uint32_t) FIR_FILTER_LENGTH);
// dsps_fird_init_s16(&this->fir_filter_, this->fir_filter_coeffecients_, this->fir_delay_, FIR_FILTER_LENGTH,
// decimation, fir_out_offset, -shift);
// this->decimation_filter_ = true;
// this->needs_resampling_ = true;
// // memset(this->fir_delay_, 0, FIR_FILTER_LENGTH*sizeof(int16_t));
// } else
{
int flags = 0;

this->needs_resampling_ = true;

this->sample_ratio_ = resample_rate / static_cast<float>(stream_info.sample_rate);
this->sample_ratio_ = static_cast<float>(target_sample_rate) / static_cast<float>(stream_info.sample_rate);

printf("Resampling from %d Hz to 16000 Hz\n", stream_info.sample_rate);
printf("Resampling from %d Hz to %d Hz\n", stream_info.sample_rate, target_sample_rate);

if (this->sample_ratio_ < 1.0) {
this->lowpass_ratio_ -= (10.24 / 16);
Expand Down Expand Up @@ -137,6 +146,8 @@ void AudioResampler::start(media_player::StreamInfo &stream_info) {
} else {
this->needs_resampling_ = false;
}

return true;
}

AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
Expand Down Expand Up @@ -165,6 +176,19 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
// Refill input buffer
//////

// Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input
// samples we transfer
size_t max_input_samples = this->internal_buffer_samples_;

// Mono to stereo -> cut in half
max_input_samples /= (2 / this->stream_info_.channels);

if (this->sample_ratio_ > 1.0) {
// Upsampling -> reduce by a factor of the ceiling of sample_ratio_
uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
max_input_samples /= upsampling_factor;
}

// Move old data to the start of the buffer
if (this->input_buffer_length_ > 0) {
memmove((void *) this->input_buffer_, (void *) this->input_buffer_current_, this->input_buffer_length_);
Expand All @@ -173,8 +197,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {

// Copy new data to the end of the of the buffer
size_t bytes_available = this->input_ring_buffer_->available();
size_t bytes_to_read =
std::min(bytes_available, this->internal_buffer_samples_ * sizeof(int16_t) - this->input_buffer_length_);
size_t bytes_to_read = std::min(bytes_available, max_input_samples * sizeof(int16_t) - this->input_buffer_length_);

if (bytes_to_read > 0) {
int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t);
Expand Down Expand Up @@ -207,7 +230,8 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
}
} else {
// Interleaved stereo samples
// TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't mixed
// TODO: This doesn't sound correct! I need to use separate filters for each channel so the delay line isn't
// mixed
size_t available_samples = this->input_buffer_length_ / sizeof(int16_t);
for (int i = 0; i < available_samples / 2; ++i) {
// split interleaved samples into two separate streams
Expand Down Expand Up @@ -244,15 +268,12 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully) {

size_t samples_read = this->input_buffer_length_ / sizeof(int16_t);

// This is inefficient! It reconverts any samples that weren't used in the previous resampling run
for (int i = 0; i < samples_read; ++i) {
this->float_input_buffer_[i] = static_cast<float>(this->input_buffer_[i]) / 32768.0f;
}

size_t frames_read = samples_read / this->stream_info_.channels;

// The low pass filter seems to be causing glitches... probably because samples are repeated due to the above
// ineffeciency!
if (this->pre_filter_) {
for (int i = 0; i < this->stream_info_.channels; ++i) {
biquad_apply_buffer(&this->lowpass_[i][0], this->float_input_buffer_ + i, frames_read,
Expand Down Expand Up @@ -363,6 +384,7 @@ int8_t AudioResampler::generate_q15_fir_coefficients_(int16_t *fir_coeffs, const
}

free(fir_window);
free(float_coeffs);

return shift;
}
Expand Down
Loading

0 comments on commit dd7fd38

Please sign in to comment.