From 259edac8c4b52014c9d30f4507b79d1a0cd31580 Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Sun, 10 Dec 2023 21:21:05 -0800 Subject: [PATCH] google_rtc_audio_processing: Fixes for MTL branch Squashed fixups to this code from https://github.com/thesofproject/sof/pull/8571 Signed-off-by: Andy Ross --- src/audio/google/Kconfig | 22 +- src/audio/google/google_hotword_detect.c | 4 + .../google/google_rtc_audio_processing.c | 681 ++++++++++-------- 3 files changed, 400 insertions(+), 307 deletions(-) diff --git a/src/audio/google/Kconfig b/src/audio/google/Kconfig index cfb2f7cbbc97..9d61a11064b5 100644 --- a/src/audio/google/Kconfig +++ b/src/audio/google/Kconfig @@ -16,7 +16,6 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING bool "Google Real Time Communication Audio processing" select COMP_BLOB select GOOGLE_RTC_AUDIO_PROCESSING_MOCK if COMP_STUBS - depends on IPC_MAJOR_4 default n help Select for Google real-time communication audio processing. It @@ -36,10 +35,11 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING int "Number of channels to process for Google Real Time Communication Audio processing" - default 4 + default 2 if MT8195 + default 1 help - Sets the number of channels to process in the Google real-time - communication audio processing. + Sets the number of input/mic channels to process in the + Google real-time communication audio processing. config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING @@ -81,4 +81,18 @@ config GOOGLE_RTC_AUDIO_PROCESSING_MOCK Mock Google real-time communication audio processing. It allows for compilation check and basic audio flow checking. +config GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS + int "Sample format for capture input/output" + default 16 + help + Bits in the signed integer sample format used for the + capture stream. 16 or 32. + +config GOOGLE_RTC_AUDIO_PROCESSING_REF_BITS + int "Sample format for reference input" + default 16 + help + Bits in the signed integer sample format used for playback + reference. 16 or 32. + endmenu diff --git a/src/audio/google/google_hotword_detect.c b/src/audio/google/google_hotword_detect.c index cf2c4ea3f00c..be45cd9d2d08 100644 --- a/src/audio/google/google_hotword_detect.c +++ b/src/audio/google/google_hotword_detect.c @@ -224,6 +224,7 @@ static int ghd_setup_model(struct comp_dev *dev) return 0; } +#if CONFIG_IPC_MAJOR_3 static int ghd_ctrl_set_bin_data(struct comp_dev *dev, struct sof_ipc_ctrl_data *cdata) { @@ -309,6 +310,7 @@ static int ghd_cmd(struct comp_dev *dev, int cmd, void *data, return -EINVAL; } } +#endif static int ghd_trigger(struct comp_dev *dev, int cmd) { @@ -459,7 +461,9 @@ static const struct comp_driver ghd_driver = { .create = ghd_create, .free = ghd_free, .params = ghd_params, +#if CONFIG_IPC_MAJOR_3 .cmd = ghd_cmd, +#endif .trigger = ghd_trigger, .copy = ghd_copy, .prepare = ghd_prepare, diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c index f628d1433a2b..c06f13eb4471 100644 --- a/src/audio/google/google_rtc_audio_processing.c +++ b/src/audio/google/google_rtc_audio_processing.c @@ -41,7 +41,6 @@ #define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100 #define GOOGLE_RTC_NUM_INPUT_PINS 2 -#define GOOGLE_RTC_NUM_OUTPUT_PINS 1 LOG_MODULE_REGISTER(google_rtc_audio_processing, CONFIG_SOF_LOG_LEVEL); @@ -53,55 +52,88 @@ DECLARE_SOF_RT_UUID("google-rtc-audio-processing", google_rtc_audio_processing_u DECLARE_TR_CTX(google_rtc_audio_processing_tr, SOF_UUID(google_rtc_audio_processing_uuid), LOG_LEVEL_INFO); +#ifndef __ZEPHYR__ +/* Zephyr provides uncached memory for static variables on SMP, but we + * are single-core component and know we can safely use the cache for + * AEC work. XTOS SOF is cached by default, so stub the Zephyr API. + */ +#define arch_xtensa_cached_ptr(p) (p) +#endif + +static __aligned(PLATFORM_DCACHE_ALIGN) +uint8_t aec_mem_blob[CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES]; + +#define NUM_FRAMES (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ \ + / GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES) +#define REF_CHAN_MAX CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS +#define MIC_CHAN_MAX CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS +#define REFOUT_CHAN MAX(REF_CHAN_MAX, MIC_CHAN_MAX) + +static __aligned(PLATFORM_DCACHE_ALIGN) +float refoutbuf[REFOUT_CHAN][NUM_FRAMES]; + +static __aligned(PLATFORM_DCACHE_ALIGN) +float micbuf[MIC_CHAN_MAX][NUM_FRAMES]; + struct google_rtc_audio_processing_comp_data { +#if CONFIG_IPC_MAJOR_4 struct sof_ipc4_aec_config config; +#endif uint32_t num_frames; int num_aec_reference_channels; int num_capture_channels; GoogleRtcAudioProcessingState *state; - float *aec_reference_buffer; - float *aec_reference_buffer_ptrs[SOF_IPC_MAX_CHANNELS]; - float *process_buffer; - float *process_buffer_ptrs[SOF_IPC_MAX_CHANNELS]; - uint8_t *memory_buffer; + float *raw_mic_buffers[MIC_CHAN_MAX]; + float *refout_buffers[REFOUT_CHAN]; + int buffered_frames; struct comp_data_blob_handler *tuning_handler; bool reconfigure; int aec_reference_source; int raw_microphone_source; + struct comp_buffer *ref_comp_buffer; + int ref_frame_bytes; + int out_frame_bytes; }; -void *GoogleRtcMalloc(size_t size) +#if CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS == 16 +typedef int16_t mic_sample_t; +#define MIC_SCALE ((float)SHRT_MAX) +#else +typedef int32_t mic_sample_t; +#define MIC_SCALE ((float)INT_MAX) +#endif + +#if CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_REF_BITS == 16 +typedef int16_t ref_sample_t; +#define REF_SCALE ((float)SHRT_MAX) +#else +typedef int32_t ref_sample_t; +#define REF_SCALE ((float)INT_MAX) +#endif + +static inline float mic_to_float(mic_sample_t x) { - return rballoc(0, SOF_MEM_CAPS_RAM, size); + return (1.0f / MIC_SCALE) * (float)x; } -void GoogleRtcFree(void *ptr) +static inline mic_sample_t float_to_mic(float x) { - return rfree(ptr); + return (mic_sample_t)(MIC_SCALE * x); } -static void google_rtc_audio_processing_params(struct processing_module *mod, - struct sof_source *ref, - struct sof_source *mic, - struct sof_sink *out) +static inline float ref_to_float(ref_sample_t x) { - struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); - struct sof_ipc_stream_params *params = mod->stream_params; - struct comp_buffer *sinkb, *sourceb; - struct list_item *source_list; - struct comp_dev *dev = mod->dev; + return (1.0f / REF_SCALE) * (float)x; +} - ipc4_base_module_cfg_to_stream_params(&mod->priv.cfg.base_cfg, params); - component_set_nearest_period_frames(dev, params->rate); - /* TODO - it does not work, to be checked before merging!! - * ipc4_update_source_format(ref, &cd->config.reference_fmt); - */ - source_set_channels(ref, - CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS); - /* ipc4_update_source_format(mic, &mod->priv.cfg.base_cfg.audio_fmt); */ - source_set_channels(mic, CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS); - /* ipc4_update_sink_format(out, &mod->priv.cfg.base_cfg.audio_fmt); */ - sink_set_channels(out, CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS); +void *GoogleRtcMalloc(size_t size) +{ + return rballoc(0, SOF_MEM_CAPS_RAM, size); +} + +void GoogleRtcFree(void *ptr) +{ + return rfree(ptr); } static int google_rtc_audio_processing_reconfigure(struct processing_module *mod) @@ -251,6 +283,60 @@ static int google_rtc_audio_processing_reconfigure(struct processing_module *mod return 0; } +#if CONFIG_IPC_MAJOR_3 +static int google_rtc_audio_processing_cmd_set_data(struct processing_module *mod, + struct sof_ipc_ctrl_data *cdata) +{ + struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); + int ret; + + switch (cdata->cmd) { + case SOF_CTRL_CMD_BINARY: + ret = comp_data_blob_set_cmd(cd->tuning_handler, cdata); + if (ret) + return ret; + /* Accept the new blob immediately so that userspace can write + * the control in quick succession without error. + * This ensures the last successful control write from userspace + * before prepare/copy is applied. + * The config blob is not referenced after reconfigure() returns + * so it is safe to call comp_get_data_blob here which frees the + * old blob. This assumes cmd() and prepare()/copy() cannot run + * concurrently which is the case when there is no preemption. + */ + if (comp_is_new_data_blob_available(cd->tuning_handler)) { + comp_get_data_blob(cd->tuning_handler, NULL, NULL); + cd->reconfigure = true; + } + return 0; + default: + comp_err(mod->dev, + "google_rtc_audio_processing_ctrl_set_data(): Only binary controls supported %d", + cdata->cmd); + return -EINVAL; + } +} + +static int google_rtc_audio_processing_cmd_get_data(struct processing_module *mod, + struct sof_ipc_ctrl_data *cdata, + size_t max_data_size) +{ + struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); + + comp_info(mod->dev, "google_rtc_audio_processing_ctrl_get_data(): %u", cdata->cmd); + + switch (cdata->cmd) { + case SOF_CTRL_CMD_BINARY: + return comp_data_blob_get_cmd(cd->tuning_handler, cdata, max_data_size); + default: + comp_err(mod->dev, + "google_rtc_audio_processing_ctrl_get_data(): Only binary controls supported %d", + cdata->cmd); + return -EINVAL; + } +} +#endif + static int google_rtc_audio_processing_set_config(struct processing_module *mod, uint32_t param_id, enum module_cfg_fragment_position pos, uint32_t data_offset_size, @@ -258,6 +344,7 @@ static int google_rtc_audio_processing_set_config(struct processing_module *mod, size_t fragment_size, uint8_t *response, size_t response_size) { +#if CONFIG_IPC_MAJOR_4 struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); int ret; @@ -291,14 +378,25 @@ static int google_rtc_audio_processing_set_config(struct processing_module *mod, } return 0; +#elif CONFIG_IPC_MAJOR_3 + struct sof_ipc_ctrl_data *cdata = (struct sof_ipc_ctrl_data *)fragment; + + return google_rtc_audio_processing_cmd_set_data(mod, cdata); +#endif } static int google_rtc_audio_processing_get_config(struct processing_module *mod, uint32_t param_id, uint32_t *data_offset_size, uint8_t *fragment, size_t fragment_size) { +#if CONFIG_IPC_MAJOR_4 comp_err(mod->dev, "google_rtc_audio_processing_ctrl_get_config(): Not supported"); return -EINVAL; +#elif CONFIG_IPC_MAJOR_3 + struct sof_ipc_ctrl_data *cdata = (struct sof_ipc_ctrl_data *)fragment; + + return google_rtc_audio_processing_cmd_get_data(mod, cdata, fragment_size); +#endif } static int google_rtc_audio_processing_init(struct processing_module *mod) @@ -306,9 +404,7 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) struct module_data *md = &mod->priv; struct comp_dev *dev = mod->dev; struct google_rtc_audio_processing_comp_data *cd; - int ret; - int channel; - size_t buf_size; + int ret, i; comp_info(dev, "google_rtc_audio_processing_init()"); @@ -321,7 +417,7 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) md->private = cd; - struct module_config *cfg = &md->cfg; +#if CONFIG_IPC_MAJOR_4 const struct ipc4_base_module_extended_cfg *base_cfg = md->cfg.init_data; struct ipc4_input_pin_format reference_fmt, output_fmt; const size_t size = sizeof(struct ipc4_input_pin_format); @@ -336,7 +432,7 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) cd->config.reference_fmt = reference_fmt.audio_fmt; cd->config.output_fmt = output_fmt.audio_fmt; - cd->config = *(const struct sof_ipc4_aec_config *)cfg->init_data; +#endif cd->tuning_handler = comp_data_blob_handler_new(dev); if (!cd->tuning_handler) { @@ -346,21 +442,11 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS; cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS; - cd->num_frames = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ / - GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES; - - if (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES > 0) { - cd->memory_buffer = rballoc(0, SOF_MEM_CAPS_RAM, - CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES * - sizeof(cd->memory_buffer[0])); - if (!cd->memory_buffer) { - comp_err(dev, "google_rtc_audio_processing_init: failed to allocate memory buffer"); - ret = -ENOMEM; - goto fail; - } + cd->num_frames = NUM_FRAMES; - GoogleRtcAudioProcessingAttachMemoryBuffer(cd->memory_buffer, CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES); - } + /* Giant blob of scratch memory. */ + GoogleRtcAudioProcessingAttachMemoryBuffer(arch_xtensa_cached_ptr(&aec_mem_blob[0]), + sizeof(aec_mem_blob)); cd->state = GoogleRtcAudioProcessingCreateWithConfig(CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ, cd->num_capture_channels, @@ -386,31 +472,12 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) goto fail; } - buf_size = cd->num_frames * cd->num_capture_channels * sizeof(cd->process_buffer[0]); - comp_dbg(dev, "Allocating process_buffer of size %u", buf_size); - cd->process_buffer = rballoc(0, SOF_MEM_CAPS_RAM, buf_size); - if (!cd->process_buffer) { - comp_err(dev, "Allocating process_buffer failure"); - ret = -EINVAL; - goto fail; - } - bzero(cd->process_buffer, buf_size); - for (channel = 0; channel < cd->num_capture_channels; channel++) - cd->process_buffer_ptrs[channel] = &cd->process_buffer[channel * cd->num_frames]; - - buf_size = cd->num_frames * sizeof(cd->aec_reference_buffer[0]) * - cd->num_aec_reference_channels; - comp_dbg(dev, "Allocating aec_reference_buffer of size %u", buf_size); - cd->aec_reference_buffer = rballoc(0, SOF_MEM_CAPS_RAM, buf_size); - if (!cd->aec_reference_buffer) { - comp_err(dev, "Allocating aec_reference_buffer failure"); - ret = -ENOMEM; - goto fail; - } - bzero(cd->aec_reference_buffer, buf_size); - for (channel = 0; channel < cd->num_aec_reference_channels; channel++) - cd->aec_reference_buffer_ptrs[channel] = - &cd->aec_reference_buffer[channel * cd->num_frames]; + for (i = 0; i < MIC_CHAN_MAX; i++) + cd->raw_mic_buffers[i] = arch_xtensa_cached_ptr(&micbuf[i][0]); + for (i = 0; i < REFOUT_CHAN; i++) + cd->refout_buffers[i] = arch_xtensa_cached_ptr(&refoutbuf[i][0]); + + cd->buffered_frames = 0; /* comp_is_new_data_blob_available always returns false for the first * control write with non-empty config. The first non-empty write may @@ -428,13 +495,10 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) fail: comp_err(dev, "google_rtc_audio_processing_init(): Failed"); if (cd) { - rfree(cd->aec_reference_buffer); if (cd->state) { GoogleRtcAudioProcessingFree(cd->state); } GoogleRtcAudioProcessingDetachMemoryBuffer(); - rfree(cd->memory_buffer); - rfree(cd->process_buffer); comp_data_blob_handler_free(cd->tuning_handler); rfree(cd); } @@ -450,15 +514,28 @@ static int google_rtc_audio_processing_free(struct processing_module *mod) GoogleRtcAudioProcessingFree(cd->state); cd->state = NULL; - rfree(cd->aec_reference_buffer); GoogleRtcAudioProcessingDetachMemoryBuffer(); - rfree(cd->memory_buffer); - rfree(cd->process_buffer); comp_data_blob_handler_free(cd->tuning_handler); rfree(cd); return 0; } +static bool is_ref_buffer(struct comp_dev *dev, struct comp_buffer *b) +{ +#if CONFIG_IPC_MAJOR_4 + return IPC4_SINK_QUEUE_ID(buf_get_id(b)) == SOF_AEC_FEEDBACK_QUEUE_ID; +#else + return b->source->pipeline->pipeline_id != dev->pipeline->pipeline_id; +#endif +} + +static enum sof_ipc_frame bits_fmt(int bits) +{ + if (bits == 32) + return SOF_IPC_FRAME_S32_LE; + return SOF_IPC_FRAME_S16_LE; +} + static int google_rtc_audio_processing_prepare(struct processing_module *mod, struct sof_source **sources, int num_of_sources, @@ -468,300 +545,297 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod, struct comp_dev *dev = mod->dev; struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); struct list_item *source_buffer_list_item; - unsigned int aec_channels = 0, frame_fmt, rate; - int microphone_stream_channels = 0; - int output_stream_channels; - int ret; - int i = 0; + struct comp_buffer *output; + int ret = 0, i = 0; comp_info(dev, "google_rtc_audio_processing_prepare()"); - if (num_of_sources != GOOGLE_RTC_NUM_INPUT_PINS) { - comp_err(dev, "Expecting 2 sources - ref and mic, got %u", num_of_sources); - return -EINVAL; - } - - if (num_of_sinks != GOOGLE_RTC_NUM_OUTPUT_PINS) { - comp_err(dev, "Expecting 1 sink, got %u", num_of_sinks); - return -EINVAL; - } - /* searching for stream and feedback source buffers */ - for (i = 0; i < num_of_sources; i++) { - - if (IPC4_SINK_QUEUE_ID(source_get_id(sources[i])) == SOF_AEC_FEEDBACK_QUEUE_ID) { - + list_for_item(source_buffer_list_item, &dev->bsource_list) { + struct comp_buffer *source = container_of(source_buffer_list_item, + struct comp_buffer, sink_list); + if (is_ref_buffer(dev, source)) { cd->aec_reference_source = i; - aec_channels = source_get_channels(sources[i]); - comp_dbg(dev, "reference index = %d, channels = %d", i, aec_channels); + cd->ref_comp_buffer = source; } else { cd->raw_microphone_source = i; - microphone_stream_channels = source_get_channels(sources[i]); - comp_dbg(dev, "microphone index = %d, channels = %d", i, - microphone_stream_channels); } - source_set_alignment_constants(sources[i], 1, 1); + i++; } + output = list_first_item(&dev->bsink_list, struct comp_buffer, source_list); - google_rtc_audio_processing_params(mod, - sources[cd->aec_reference_source], - sources[cd->raw_microphone_source], - sinks[0]); - - - /* On some platform the playback output is left right left right due to a crossover - * later on the signal processing chain. That makes the aec_reference be 4 channels - * and the AEC should only use the 2 first. + /* Validate channel, format and rate on each of our three + * inputs. All much match our build-time configuration, AEC + * does not handle dynamic stream formats. */ - if (cd->num_aec_reference_channels > aec_channels) { - comp_err(dev, "unsupported number of AEC reference channels: %d", - aec_channels); - return -EINVAL; + int ref_fmt = source_get_frm_fmt(sources[cd->aec_reference_source]); + int ref_chan = source_get_channels(sources[cd->aec_reference_source]); + int ref_rate = source_get_rate(sources[cd->aec_reference_source]); + + int mic_fmt = source_get_frm_fmt(sources[cd->raw_microphone_source]); + int mic_chan = source_get_channels(sources[cd->raw_microphone_source]); + int mic_rate = source_get_rate(sources[cd->raw_microphone_source]); + + int out_fmt = sink_get_frm_fmt(sinks[0]); + int out_chan = sink_get_channels(sinks[0]); + int out_rate = sink_get_rate(sinks[0]); + + /* Too many channels is a soft failure, AEC treats only the first N */ + if (ref_chan > REF_CHAN_MAX) + comp_warn(dev, "Too many ref channels: %d, truncating to %d", + ref_chan, REF_CHAN_MAX); + if (mic_chan > MIC_CHAN_MAX) + comp_warn(dev, "Too many mic channels: %d, truncating to %d", + mic_chan, MIC_CHAN_MAX); + + if (out_chan != mic_chan) { + comp_err(dev, "Input/output mic channel mismatch"); + ret = -EINVAL; } - sink_set_alignment_constants(sinks[0], 1, 1); - frame_fmt = sink_get_frm_fmt(sinks[0]); - rate = sink_get_rate(sinks[0]); - output_stream_channels = sink_get_channels(sinks[0]); - - if (cd->num_capture_channels > microphone_stream_channels) { - comp_err(dev, "unsupported number of microphone channels: %d", - microphone_stream_channels); - return -EINVAL; + if (ref_rate != mic_rate || ref_rate != out_rate || + ref_rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) { + comp_err(dev, "Incorrect source/sink sample rate, expect %d\n", + CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ); + ret = -EINVAL; } - if (cd->num_capture_channels > output_stream_channels) { - comp_err(dev, "unsupported number of output channels: %d", - output_stream_channels); - return -EINVAL; + if (ref_fmt != bits_fmt(CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_REF_BITS)) { + comp_err(dev, "Reference stream must be %d bit samples\n", + CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_REF_BITS); + ret = -EINVAL; } - switch (frame_fmt) { -#if CONFIG_FORMAT_S16LE - case SOF_IPC_FRAME_S16_LE: - break; -#endif /* CONFIG_FORMAT_S16LE */ - default: - comp_err(dev, "unsupported data format: %d", frame_fmt); - return -EINVAL; - } + if (mic_fmt != bits_fmt(CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS)) { + comp_err(dev, "Mic stream must be %d bit samples\n", + CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS); - if (rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) { - comp_err(dev, "unsupported samplerate: %d", rate); - return -EINVAL; + // FIXME: squash for now, the streams lie. See below. + //ret = -EINVAL; } - /* check IBS/OBS in streams */ - if (cd->num_frames * source_get_frame_bytes(sources[cd->raw_microphone_source]) != - source_get_min_available(sources[cd->raw_microphone_source])) { - comp_warn(dev, "Incorrect IBS on microphone source: %d, expected %u", - source_get_min_available(sources[cd->raw_microphone_source]), - cd->num_frames * - source_get_frame_bytes(sources[cd->raw_microphone_source])); - } - if (cd->num_frames * sink_get_frame_bytes(sinks[0]) != - sink_get_min_free_space(sinks[0])) { - comp_warn(dev, "Incorrect OBS on sink :%d, expected %u", - sink_get_min_free_space(sinks[0]), - cd->num_frames * sink_get_frame_bytes(sinks[0])); - } - if (cd->num_frames * source_get_frame_bytes(sources[cd->aec_reference_source]) != - source_get_min_available(sources[cd->aec_reference_source])) { - comp_warn(dev, "Incorrect IBS on reference source: %d, expected %u", - source_get_min_available(sources[cd->aec_reference_source]), - cd->num_frames * - source_get_frame_bytes(sources[cd->aec_reference_source])); + if (mic_fmt != out_fmt) { + comp_err(dev, "Mismatched in/out frame format"); + ret = -EINVAL; } + // FIXME: the streams have a bad format on MTL, so we can't + // use this API. Compute by hand. + // + //cd->ref_frame_bytes = source_get_frame_bytes(sources[cd->aec_reference_source]); + //cd->out_frame_bytes = sink_get_frame_bytes(sinks[0]); + cd->ref_frame_bytes = sizeof(mic_sample_t) * source_get_channels(sources[cd->aec_reference_source]); + cd->out_frame_bytes = cd->ref_frame_bytes; + /* Blobs sent during COMP_STATE_READY is assigned to blob_handler->data * directly, so comp_is_new_data_blob_available always returns false. */ - ret = google_rtc_audio_processing_reconfigure(mod); - if (ret) - return ret; + if (ret == 0) + ret = google_rtc_audio_processing_reconfigure(mod); - comp_dbg(dev, "google_rtc_audio_processing_prepare() success"); - return 0; + return ret; +} + +static int trigger_handler(struct processing_module *mod, int cmd) +{ + struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); + + /* Ignore and halt propagation if we get a trigger from the + * playback pipeline: not for us. + */ + if (cd->ref_comp_buffer->walking) + return PPL_STATUS_PATH_STOP; + + /* Note: not module_adapter_set_state(). With IPC4 those are + * identical, but IPC3 has some odd-looking logic that + * validates that no sources are active when receiving a + * PRE_START command, which obviously breaks for our reference + * stream if playback was already running when our pipeline + * started + */ + return comp_set_state(mod->dev, cmd); } static int google_rtc_audio_processing_reset(struct processing_module *mod) { comp_dbg(mod->dev, "google_rtc_audio_processing_reset()"); - return 0; } -static int16_t convert_float_to_uint16_hifi(float data) +/* FunctionMostlyExistsToKeepLineLengthsUnderControl */ +static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd) { - const xtfloat ratio = 2 << 14; - xtfloat x0 = data; - xtfloat x1; - int16_t x; - - x1 = XT_MUL_S(x0, ratio); - x = XT_TRUNC_S(x1, 0); - - return x; + /* Note that reference input and mic output share the same + * buffer for efficiency + */ + GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state, + (const float **)cd->refout_buffers); + GoogleRtcAudioProcessingProcessCapture_float32(cd->state, + (const float **)cd->raw_mic_buffers, + cd->refout_buffers); + cd->buffered_frames = 0; } -static float convert_uint16_to_float_hifi(int16_t data) +static void mic_in_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0) { - const xtfloat ratio = 2 << 14; - xtfloat x0 = data; - float x; - - x = XT_DIV_S(x0, ratio); - - return x; + size_t chan = MIN(MIC_CHAN_MAX, source_get_channels(src)); + size_t samples = frames * chan; + size_t bytes = samples * sizeof(mic_sample_t); + const mic_sample_t *buf, *bufstart, *bufend; + float *dst[MIC_CHAN_MAX]; + int i, c, err; + size_t bufsz; + + for (i = 0; i < chan; i++) + dst[i] = &dst_bufs[i][frame0]; + + err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz); + assert(err == 0); + bufend = &bufstart[bufsz]; + + for (i = 0; i < frames; i++) { + for (c = 0; c < chan; c++) { + *dst[c]++ = mic_to_float(*buf++); + if (buf >= bufend) + buf = bufstart; + } + } + source_release_data(src, bytes); } -/* todo CONFIG_FORMAT_S32LE */ -static int google_rtc_audio_processing_process(struct processing_module *mod, - struct sof_source **sources, int num_of_sources, - struct sof_sink **sinks, int num_of_sinks) +/* Nearly verbatim except for types. Needs macro/inlining attention */ +static void ref_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0) { + size_t chan = MIN(REF_CHAN_MAX, source_get_channels(src)); + size_t samples = frames * chan; + size_t bytes = samples * sizeof(ref_sample_t); + const ref_sample_t *buf, *bufstart, *bufend; + float *dst[REF_CHAN_MAX]; + int i, c, err; + size_t bufsz; + + for (i = 0; i < chan; i++) + dst[i] = &dst_bufs[i][frame0]; + + err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz); + assert(err == 0); + bufend = &bufstart[bufsz]; + + for (i = 0; i < frames; i++) { + for (c = 0; c < chan; c++) { + *dst[c]++ = ref_to_float(*buf++); + if (buf >= bufend) + buf = bufstart; + } + } + source_release_data(src, bytes); +} - int ret; - uint16_t const *src; - uint8_t const *src_buf_start; - uint8_t const *src_buf_end; - size_t src_buf_size; - - uint16_t const *ref; - uint8_t const *ref_buf_start; - uint8_t const *ref_buf_end; - size_t ref_buf_size; - - uint16_t *dst; - uint8_t *dst_buf_start; - uint8_t *dst_buf_end; - size_t dst_buf_size; - - size_t num_of_bytes_to_process; - int num_samples_remaining; - int num_frames_remaining; - int channel; - int nmax; - - struct sof_source *ref_stream, *src_stream; - struct sof_sink *dst_stream; +static void mic_out_copy(struct sof_sink *sink, int frames, float **src_bufs) +{ + size_t chan = MIN(MIC_CHAN_MAX, sink_get_channels(sink)); + size_t samples = frames * chan; + size_t bytes = samples * sizeof(mic_sample_t); + mic_sample_t *buf, *bufstart, *bufend; + int i, c, err; + size_t bufsz; + float *src[MIC_CHAN_MAX]; + + for (i = 0; i < chan; i++) + src[i] = src_bufs[i]; + + err = sink_get_buffer(sink, bytes, (void *)&buf, (void *)&bufstart, &bufsz); + assert(err == 0); + bufend = &bufstart[bufsz]; + + for (i = 0; i < frames; i++) { + for (c = 0; c < chan; c++) { + *buf++ = float_to_mic(*src[c]++); + if (buf >= bufend) + buf = bufstart; + } + } + sink_commit_buffer(sink, bytes); +} +static int mod_process(struct processing_module *mod, struct sof_source **sources, + int num_of_sources, struct sof_sink **sinks, int num_of_sinks) +{ struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); - if (cd->reconfigure) { - ret = google_rtc_audio_processing_reconfigure(mod); - if (ret) - return ret; - } + if (cd->reconfigure) + google_rtc_audio_processing_reconfigure(mod); - src_stream = sources[cd->raw_microphone_source]; - ref_stream = sources[cd->aec_reference_source]; - dst_stream = sinks[0]; + struct sof_source *mic = sources[cd->raw_microphone_source]; + struct sof_source *ref = sources[cd->aec_reference_source]; + struct sof_sink *out = sinks[0]; - num_of_bytes_to_process = cd->num_frames * source_get_frame_bytes(ref_stream); - ret = source_get_data(ref_stream, num_of_bytes_to_process, (const void **)&ref, - (const void **)&ref_buf_start, &ref_buf_size); + bool ref_ok = cd->ref_comp_buffer->source->state == COMP_STATE_ACTIVE; - /* problems here are extremely unlikely, as it has been checked that - * the buffer contains enough data + /* Would be cleaner to store a bit of state to elide a bzero + * we already did, but we'd be doing the copy of real data in + * the ref_ok state anyway. */ - assert(!ret); - ref_buf_end = ref_buf_start + ref_buf_size; + if (!ref_ok) + bzero(refoutbuf, sizeof(refoutbuf)); + int fmic = source_get_data_frames_available(mic); + int fref = source_get_data_frames_available(ref); + int frames = ref_ok ? MIN(fmic, fref) : fmic; + int n, frames_rem; - /* can't use source_get_data_frames_available as number of available data may have changed - * other processes may put some data to the buffer + /* If fref > fmic (common at pipeline startup if + * playback was already active), we should consume the early + * samples so AEC compares the most recent values. */ - num_samples_remaining = num_of_bytes_to_process * source_get_channels(ref_stream) / - source_get_frame_bytes(ref_stream); - - /* de-interlace ref buffer, convert it to float */ - for (int i = 0; i < cd->num_frames; i++) { - for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) { - cd->aec_reference_buffer_ptrs[channel][i] = - convert_uint16_to_float_hifi(ref[channel]); - } - ref += cd->num_aec_reference_channels; - if ((void *)ref >= (void *)ref_buf_end) - ref = (void *)ref_buf_start; - } + if (ref_ok && fref > fmic) + source_release_data(ref, (fref - fmic) * cd->ref_frame_bytes); - GoogleRtcAudioProcessingAnalyzeRender_float32( - cd->state, - (const float **)cd->aec_reference_buffer_ptrs); + for (frames_rem = frames; frames_rem; frames_rem -= n) { + n = MIN(frames_rem, cd->num_frames - cd->buffered_frames); - source_release_data(ref_stream, num_of_bytes_to_process); + int si = cd->buffered_frames * source_get_channels(mic); - /* process main stream - de interlace and convert */ - num_of_bytes_to_process = cd->num_frames * source_get_frame_bytes(src_stream); - ret = source_get_data(src_stream, num_of_bytes_to_process, (const void **)&src, - (const void **)&src_buf_start, &src_buf_size); - assert(!ret); - src_buf_end = src_buf_start + src_buf_size; + mic_in_copy(mic, n, cd->raw_mic_buffers, si); - for (int i = 0; i < cd->num_frames; i++) { - for (channel = 0; channel < cd->num_capture_channels; channel++) - cd->process_buffer_ptrs[channel][i] = - convert_uint16_to_float_hifi(src[channel]); + if (ref_ok) + ref_copy(ref, n, cd->refout_buffers, si); - src += cd->num_capture_channels; - if ((void *)src >= (void *)src_buf_end) - src = (void *)src_buf_start; - } - - source_release_data(src_stream, num_of_bytes_to_process); + cd->buffered_frames += n; - /* call the library, use same in/out buffers */ - GoogleRtcAudioProcessingProcessCapture_float32(cd->state, - (const float **)cd->process_buffer_ptrs, - cd->process_buffer_ptrs); - - /* same numnber of bytes to process for output stream as for mic stream */ - ret = sink_get_buffer(dst_stream, num_of_bytes_to_process, (void **)&dst, - (void **)&dst_buf_start, &dst_buf_size); - assert(!ret); - dst_buf_end = dst_buf_start + dst_buf_size; - - for (int i = 0; i < cd->num_frames; i++) { - for (channel = 0; channel < cd->num_capture_channels; channel++) - dst[channel] = - convert_float_to_uint16_hifi(cd->process_buffer_ptrs[channel][i]); - dst += cd->num_capture_channels; - if ((void *)dst >= (void *)dst_buf_end) - dst = (void *)dst_buf_start; + if (cd->buffered_frames >= cd->num_frames) { + execute_aec(cd); + mic_out_copy(out, n, cd->refout_buffers); + } } - sink_commit_buffer(dst_stream, num_of_bytes_to_process); - return 0; } -bool google_rtc_audio_is_ready_to_process(struct processing_module *mod, - struct sof_source **sources, int num_of_sources, - struct sof_sink **sinks, int num_of_sinks) +static bool mod_is_ready_to_process(struct processing_module *mod, + struct sof_source **sources, int num_of_sources, + struct sof_sink **sinks, int num_of_sinks) { struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); - struct sof_source *ref_stream, *mic_stream; - struct sof_sink *out_stream; - size_t min_ref_bytes; - - - /* check if both input streams and output stream have enough data/space */ - mic_stream = sources[cd->raw_microphone_source]; - ref_stream = sources[cd->aec_reference_source]; - out_stream = sinks[0]; + struct sof_source *mic = sources[cd->raw_microphone_source]; + struct sof_source *ref = sources[cd->aec_reference_source]; + struct sof_sink *out = sinks[0]; + bool ref_ok = cd->ref_comp_buffer->source->state == COMP_STATE_ACTIVE; /* this should source_get_min_available(ref_stream)!!! * Currently the topology sets IBS incorrectly */ - if (source_get_data_available(ref_stream) < cd->num_frames * - source_get_frame_bytes(ref_stream)) + if (ref_ok && (source_get_data_available(ref) + < cd->num_frames * cd->ref_frame_bytes)) return false; - if (source_get_data_available(mic_stream) < source_get_min_available(mic_stream)) + if (source_get_data_available(mic) < source_get_min_available(mic)) return false; - if (sink_get_free_size(out_stream) < sink_get_min_free_space(out_stream)) + /* Output comes out all at once, the output sink much have + * space for the full block + */ + if (sink_get_free_size(out) < cd->num_frames * cd->out_frame_bytes) return false; return true; @@ -770,12 +844,13 @@ bool google_rtc_audio_is_ready_to_process(struct processing_module *mod, static struct module_interface google_rtc_audio_processing_interface = { .init = google_rtc_audio_processing_init, .free = google_rtc_audio_processing_free, - .process = google_rtc_audio_processing_process, + .process = mod_process, .prepare = google_rtc_audio_processing_prepare, .set_configuration = google_rtc_audio_processing_set_config, .get_configuration = google_rtc_audio_processing_get_config, + .trigger = trigger_handler, .reset = google_rtc_audio_processing_reset, - .is_ready_to_process = google_rtc_audio_is_ready_to_process, + .is_ready_to_process = mod_is_ready_to_process, }; DECLARE_MODULE_ADAPTER(google_rtc_audio_processing_interface,