From 720c38a742799cf8926046c6d3f51e13b20676fa Mon Sep 17 00:00:00 2001 From: Andy Ross Date: Tue, 12 Dec 2023 10:31:47 -0800 Subject: [PATCH] google_rtc_audio_processing: Major rework Lots of work on AEC, with an eye to getting it working on main, where it had bitrotten, and pulling in various features (in particular IPC3 pipeline state management and DP scheduling) that had merged in other branches. Dynamically configure stream formats (sample format/rate and channel count) from the connected streams at prepare() time instead of relying on build-time tuning. Port the code to use the source/sink API, in as sophisticated a manner as I can find. Copies unroll cleanly into just a few instructions per sample, including integer/float conversions and de-/interleaving. Support both 16 and 32 bit sample formats, with a fairly clever inlining scheme to share as much code as possible between them. The component will select "copy" function pointers at prepare() time. Support an is_ready_to_process() method to enable AEC's use as a async component in a DP scheduler. The large buffers required by this component (input and output staging and an internally-managed pool/heap block) are now static symbols instead of dynamic memory from the heap. These are very large, taking up about half of what is available to the linker on MTL. Relying on heap allocation is just dangerous in this context. This fully decouples AEC from the playback stream. It will run without an active reference happily, feeding zeros to the processing, and pick up in stride when the pipeline starts. This requires adding a trigger handler for pipeline control in IPC3, which will propagate certain triggers across pipeline boundaries when shutting down playback streams, breaking active capture. Fixes a few bugs and misfeatures also: + Chunk the copies by full buffer strides between AEC processing calls instead of testing at each copied frame. + Copy the reference and mic streams in tandem, preventing them from becoming out of sync if the devices weren't themselves synchronized. + Copy the AEC results to the output stream after the call to ProcessCapture() instead of before. This was a hidden latency bug in the original code, I think. + Cleans up the Kconfig to remove stale variables and guard all the component-specific tunables under the top-level component variable. Also uses a default instead of a select to couple to CONFIG_STUBS, allowing AEC to be manually tuned. Signed-off-by: Andy Ross --- app/stub_build_all_ipc3.conf | 1 + src/arch/host/configs/library_defconfig | 1 + src/audio/google/Kconfig | 28 +- .../google/google_rtc_audio_processing.c | 557 ++++++++++-------- .../google/google_rtc_audio_processing_mock.c | 60 +- 5 files changed, 363 insertions(+), 284 deletions(-) diff --git a/app/stub_build_all_ipc3.conf b/app/stub_build_all_ipc3.conf index 77c47241cde5..951cbb0a6c51 100644 --- a/app/stub_build_all_ipc3.conf +++ b/app/stub_build_all_ipc3.conf @@ -1,5 +1,6 @@ CONFIG_COMP_STUBS=y CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING=y +CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MOCK=y CONFIG_COMP_TONE=n CONFIG_COMP_CROSSOVER=y CONFIG_COMP_DRC=y diff --git a/src/arch/host/configs/library_defconfig b/src/arch/host/configs/library_defconfig index 071e30808f3c..b6839e25f250 100644 --- a/src/arch/host/configs/library_defconfig +++ b/src/arch/host/configs/library_defconfig @@ -4,6 +4,7 @@ CONFIG_COMP_DCBLOCK=y CONFIG_COMP_DRC=y CONFIG_COMP_FIR=y CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING=y +CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MOCK=y CONFIG_COMP_IIR=y CONFIG_COMP_MFCC=y CONFIG_COMP_MODULE_ADAPTER=y diff --git a/src/audio/google/Kconfig b/src/audio/google/Kconfig index 8ca513dc3b6c..ac323716a7f8 100644 --- a/src/audio/google/Kconfig +++ b/src/audio/google/Kconfig @@ -15,7 +15,6 @@ config COMP_GOOGLE_HOTWORD_DETECT config COMP_GOOGLE_RTC_AUDIO_PROCESSING bool "Google Real Time Communication Audio processing" select COMP_BLOB - select GOOGLE_RTC_AUDIO_PROCESSING_MOCK if COMP_STUBS default n help Select for Google real-time communication audio processing. It @@ -24,6 +23,8 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING This component takes raw microphones input and playback reference and outputs an echo-free microphone signal. +if COMP_GOOGLE_RTC_AUDIO_PROCESSING + config COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING int "Sample rate for Google Real Time Communication Audio processing" @@ -32,22 +33,15 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ Sets the sample rate for the memory buffer for the Google real-time communication audio processing. -config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS - depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING - int "Number of channels to process for Google Real Time Communication Audio processing" - default 2 if MT8195 - default 1 - help - Sets the number of input/mic channels to process in the - Google real-time communication audio processing. - -config COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS - depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING - int "Number of AEC reference channels for Google Real Time Communication Audio processing" +config COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX + int "Max number of AEC channels" default 2 help - Sets the number AEC reference channels in the Google real-time - communication audio processing. + Sets the maximum number source/sink channels Google Real + Time Communication Audio Processing will use for. This is a + computation and memory budget tunable. Channel counts are + retrieved at runtime, but channels higher than this number + are ignored (on input) or cleared (output). config COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING @@ -75,10 +69,12 @@ config COMP_GOOGLE_RTC_AUDIO_PROCESSING_MIC_HEADROOM_LINEAR config GOOGLE_RTC_AUDIO_PROCESSING_MOCK bool "Google Real Time Communication Audio processing mock" - default n + default y if COMP_STUBS depends on COMP_GOOGLE_RTC_AUDIO_PROCESSING help Mock Google real-time communication audio processing. It allows for compilation check and basic audio flow checking. +endif # COMP_GOOGLE_RTC_AUDIO_PROCESSING + endmenu diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c index 3834278ba42e..b55a464ef9d6 100644 --- a/src/audio/google/google_rtc_audio_processing.c +++ b/src/audio/google/google_rtc_audio_processing.c @@ -52,6 +52,31 @@ DECLARE_SOF_RT_UUID("google-rtc-audio-processing", google_rtc_audio_processing_u DECLARE_TR_CTX(google_rtc_audio_processing_tr, SOF_UUID(google_rtc_audio_processing_uuid), LOG_LEVEL_INFO); +#if !(defined(__ZEPHYR__) && defined(CONFIG_XTENSA)) +/* Zephyr provides uncached memory for static variables on SMP, but we + * are single-core component and know we can safely use the cache for + * AEC work. XTOS SOF is cached by default, so stub the Zephyr API. + */ +#define arch_xtensa_cached_ptr(p) (p) +#endif + +#ifndef __ZEPHYR__ +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#endif + +static __aligned(PLATFORM_DCACHE_ALIGN) +uint8_t aec_mem_blob[CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES]; + +#define NUM_FRAMES (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ \ + / GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES) +#define CHAN_MAX CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX + +static __aligned(PLATFORM_DCACHE_ALIGN) +float refoutbuf[CHAN_MAX][NUM_FRAMES]; + +static __aligned(PLATFORM_DCACHE_ALIGN) +float micbuf[CHAN_MAX][NUM_FRAMES]; + struct google_rtc_audio_processing_comp_data { #if CONFIG_IPC_MAJOR_4 struct sof_ipc4_aec_config config; @@ -60,17 +85,19 @@ struct google_rtc_audio_processing_comp_data { int num_aec_reference_channels; int num_capture_channels; GoogleRtcAudioProcessingState *state; - int16_t *aec_reference_buffer; - int aec_reference_frame_index; - int16_t *raw_mic_buffer; - int raw_mic_buffer_frame_index; - int16_t *output_buffer; - int output_buffer_frame_index; - uint8_t *memory_buffer; + float *raw_mic_buffers[CHAN_MAX]; + float *refout_buffers[CHAN_MAX]; + int buffered_frames; struct comp_data_blob_handler *tuning_handler; bool reconfigure; int aec_reference_source; int raw_microphone_source; + struct comp_buffer *ref_comp_buffer; + int ref_framesz; + int cap_framesz; + void (*mic_copy)(struct sof_source *src, int frames, float **dst_bufs, int frame0); + void (*ref_copy)(struct sof_source *src, int frames, float **dst_bufs, int frame0); + void (*out_copy)(struct sof_sink *dst, int frames, float **src_bufs); }; void *GoogleRtcMalloc(size_t size) @@ -83,30 +110,131 @@ void GoogleRtcFree(void *ptr) return rfree(ptr); } -#if CONFIG_IPC_MAJOR_4 -static void google_rtc_audio_processing_params(struct processing_module *mod) +static ALWAYS_INLINE float s16_to_float(const char *ptr) { - struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); - struct sof_ipc_stream_params *params = mod->stream_params; - struct comp_buffer *sinkb, *sourceb; - struct list_item *source_list; - struct comp_dev *dev = mod->dev; + float scale = -(float)SHRT_MIN; + float x = *(int16_t *)ptr; - ipc4_base_module_cfg_to_stream_params(&mod->priv.cfg.base_cfg, params); - component_set_nearest_period_frames(dev, params->rate); + return (1.0f / scale) * x; +} + +static ALWAYS_INLINE void float_to_s16(float x, char *dst) +{ + float scale = -(float)SHRT_MIN; + float min = -1.0f; + float max = 1.0f - 1.0f / scale; + int16_t i = (int16_t)(scale * (x < min ? min : (x > max ? max : x))); + + *(int16_t *)dst = i; +} + +static ALWAYS_INLINE float s32_to_float(const char *ptr) +{ + float scale = -(float)INT_MIN; + float x = *(int32_t *)ptr; - list_for_item(source_list, &dev->bsource_list) { - sourceb = container_of(source_list, struct comp_buffer, sink_list); - if (IPC4_SINK_QUEUE_ID(buf_get_id(sourceb)) == SOF_AEC_FEEDBACK_QUEUE_ID) - ipc4_update_buffer_format(sourceb, &cd->config.reference_fmt); - else - ipc4_update_buffer_format(sourceb, &mod->priv.cfg.base_cfg.audio_fmt); + return (1.0f / scale) * x; +} + +static ALWAYS_INLINE void float_to_s32(float x, char *dst) +{ + float scale = -(float)INT_MIN; + float min = -1.0f; + float max = 1.0f - 1.0f / scale; + int32_t i = (int32_t)(scale * (x < min ? min : (x > max ? max : x))); + + *(int32_t *)dst = i; +} + +static ALWAYS_INLINE void source_to_float(struct sof_source *src, float **dst_bufs, + float (*cvt_fn)(const char *), + int sample_sz, int frame0, int frames) +{ + size_t chan = source_get_channels(src); + size_t bytes = frames * chan * sample_sz; + int i, c, err, ndst = MIN(chan, CHAN_MAX); + const char *buf, *bufstart, *bufend; + float *dst[CHAN_MAX]; + size_t bufsz; + + for (i = 0; i < ndst; i++) + dst[i] = &dst_bufs[i][frame0]; + + err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz); + assert(err == 0); + bufend = &bufstart[bufsz]; + + while (frames) { + size_t n = MIN(frames, bufsz - (buf - bufstart)); + + for (i = 0; i < n; i++) { + for (c = 0; c < ndst; c++) { + *dst[c]++ = cvt_fn(buf); + buf += sample_sz; + } + buf += sample_sz * (chan - ndst); /* skip unused channels */ + } + frames -= n; + if (buf >= bufend) + buf = bufstart; } + source_release_data(src, bytes); +} + +static ALWAYS_INLINE void float_to_sink(struct sof_sink *dst, float **src_bufs, + void (*cvt_fn)(float, char *), + int sample_sz, int frames) +{ + size_t chan = sink_get_channels(dst); + size_t bytes = frames * chan * sample_sz; + int i, c, err, nsrc = MIN(chan, CHAN_MAX); + char *buf, *bufstart, *bufend; + float *src[CHAN_MAX]; + size_t bufsz; - sinkb = list_first_item(&dev->bsink_list, struct comp_buffer, source_list); - ipc4_update_buffer_format(sinkb, &mod->priv.cfg.base_cfg.audio_fmt); + for (i = 0; i < nsrc; i++) + src[i] = &src_bufs[i][0]; + + err = sink_get_buffer(dst, bytes, (void *)&buf, (void *)&bufstart, &bufsz); + assert(err == 0); + bufend = &bufstart[bufsz]; + + while (frames) { + size_t n = MIN(frames, bufsz - (buf - bufstart)); + + for (i = 0; i < n; i++) { + for (c = 0; c < nsrc; c++) { + cvt_fn(*src[c]++, buf); + buf += sample_sz; + } + buf += sample_sz * (chan - nsrc); /* skip unused channels */ + } + frames -= n; + if (buf >= bufend) + buf = bufstart; + } + sink_commit_buffer(dst, bytes); +} + +static void source_copy16(struct sof_source *src, int frames, float **dst_bufs, int frame0) +{ + source_to_float(src, dst_bufs, s16_to_float, sizeof(int16_t), frame0, frames); +} + +static void source_copy32(struct sof_source *src, int frames, float **dst_bufs, int frame0) +{ + source_to_float(src, dst_bufs, s32_to_float, sizeof(int32_t), frame0, frames); +} + +static void sink_copy16(struct sof_sink *dst, int frames, float **src_bufs) +{ + float_to_sink(dst, src_bufs, float_to_s16, sizeof(int16_t), frames); +} + +static void sink_copy32(struct sof_sink *dst, int frames, float **src_bufs) +{ + float_to_sink(dst, src_bufs, float_to_s32, sizeof(int32_t), frames); } -#endif static int google_rtc_audio_processing_reconfigure(struct processing_module *mod) { @@ -376,7 +504,7 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) struct module_data *md = &mod->priv; struct comp_dev *dev = mod->dev; struct google_rtc_audio_processing_comp_data *cd; - int ret; + int ret, i; comp_info(dev, "google_rtc_audio_processing_init()"); @@ -412,23 +540,13 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) goto fail; } - cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_AEC_REFERENCE_CHANNELS; - cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS; - cd->num_frames = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ / - GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES; - - if (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES > 0) { - cd->memory_buffer = rballoc(0, SOF_MEM_CAPS_RAM, - CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES * - sizeof(cd->memory_buffer[0])); - if (!cd->memory_buffer) { - comp_err(dev, "google_rtc_audio_processing_init: failed to allocate memory buffer"); - ret = -ENOMEM; - goto fail; - } + cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX; + cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX; + cd->num_frames = NUM_FRAMES; - GoogleRtcAudioProcessingAttachMemoryBuffer(cd->memory_buffer, CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_BYTES); - } + /* Giant blob of scratch memory. */ + GoogleRtcAudioProcessingAttachMemoryBuffer(arch_xtensa_cached_ptr(&aec_mem_blob[0]), + sizeof(aec_mem_blob)); cd->state = GoogleRtcAudioProcessingCreateWithConfig(CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ, cd->num_capture_channels, @@ -454,36 +572,12 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) goto fail; } - cd->raw_mic_buffer = rballoc( - 0, SOF_MEM_CAPS_RAM, - cd->num_frames * cd->num_capture_channels * sizeof(cd->raw_mic_buffer[0])); - if (!cd->raw_mic_buffer) { - ret = -EINVAL; - goto fail; + for (i = 0; i < CHAN_MAX; i++) { + cd->raw_mic_buffers[i] = arch_xtensa_cached_ptr(&micbuf[i][0]); + cd->refout_buffers[i] = arch_xtensa_cached_ptr(&refoutbuf[i][0]); } - bzero(cd->raw_mic_buffer, cd->num_frames * cd->num_capture_channels * sizeof(cd->raw_mic_buffer[0])); - cd->raw_mic_buffer_frame_index = 0; - - cd->aec_reference_buffer = rballoc( - 0, SOF_MEM_CAPS_RAM, - cd->num_frames * sizeof(cd->aec_reference_buffer[0]) * - cd->num_aec_reference_channels); - if (!cd->aec_reference_buffer) { - ret = -ENOMEM; - goto fail; - } - bzero(cd->aec_reference_buffer, cd->num_frames * cd->num_aec_reference_channels * sizeof(cd->aec_reference_buffer[0])); - cd->aec_reference_frame_index = 0; - cd->output_buffer = rballoc( - 0, SOF_MEM_CAPS_RAM, - cd->num_frames * cd->num_capture_channels * sizeof(cd->output_buffer[0])); - if (!cd->output_buffer) { - ret = -ENOMEM; - goto fail; - } - bzero(cd->output_buffer, cd->num_frames * sizeof(cd->output_buffer[0])); - cd->output_buffer_frame_index = 0; + cd->buffered_frames = 0; /* comp_is_new_data_blob_available always returns false for the first * control write with non-empty config. The first non-empty write may @@ -501,14 +595,10 @@ static int google_rtc_audio_processing_init(struct processing_module *mod) fail: comp_err(dev, "google_rtc_audio_processing_init(): Failed"); if (cd) { - rfree(cd->output_buffer); - rfree(cd->aec_reference_buffer); if (cd->state) { GoogleRtcAudioProcessingFree(cd->state); } GoogleRtcAudioProcessingDetachMemoryBuffer(); - rfree(cd->memory_buffer); - rfree(cd->raw_mic_buffer); comp_data_blob_handler_free(cd->tuning_handler); rfree(cd); } @@ -524,16 +614,21 @@ static int google_rtc_audio_processing_free(struct processing_module *mod) GoogleRtcAudioProcessingFree(cd->state); cd->state = NULL; - rfree(cd->output_buffer); - rfree(cd->aec_reference_buffer); GoogleRtcAudioProcessingDetachMemoryBuffer(); - rfree(cd->memory_buffer); - rfree(cd->raw_mic_buffer); comp_data_blob_handler_free(cd->tuning_handler); rfree(cd); return 0; } +static bool is_ref_buffer(struct comp_dev *dev, struct comp_buffer *b) +{ +#if CONFIG_IPC_MAJOR_4 + return IPC4_SINK_QUEUE_ID(buf_get_id(b)) == SOF_AEC_FEEDBACK_QUEUE_ID; +#else + return b->source->pipeline->pipeline_id != dev->pipeline->pipeline_id; +#endif +} + static int google_rtc_audio_processing_prepare(struct processing_module *mod, struct sof_source **sources, int num_of_sources, @@ -543,223 +638,217 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod, struct comp_dev *dev = mod->dev; struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); struct list_item *source_buffer_list_item; - struct comp_buffer *output; - unsigned int aec_channels = 0, frame_fmt, rate; - int microphone_stream_channels = 0; - int output_stream_channels; - int ret; - int i = 0; + int ret = 0, i = 0; comp_info(dev, "google_rtc_audio_processing_prepare()"); -#if CONFIG_IPC_MAJOR_4 - google_rtc_audio_processing_params(mod); -#endif - /* searching for stream and feedback source buffers */ list_for_item(source_buffer_list_item, &dev->bsource_list) { struct comp_buffer *source = container_of(source_buffer_list_item, struct comp_buffer, sink_list); -#if CONFIG_IPC_MAJOR_4 - if (IPC4_SINK_QUEUE_ID(buf_get_id(source)) == - SOF_AEC_FEEDBACK_QUEUE_ID) { -#else - if (source->source->pipeline->pipeline_id != dev->pipeline->pipeline_id) { -#endif + if (is_ref_buffer(dev, source)) { cd->aec_reference_source = i; - aec_channels = audio_stream_get_channels(&source->stream); - comp_dbg(dev, "reference index = %d, channels = %d", i, aec_channels); + cd->ref_comp_buffer = source; } else { cd->raw_microphone_source = i; - microphone_stream_channels = audio_stream_get_channels(&source->stream); - comp_dbg(dev, "microphone index = %d, channels = %d", i, - microphone_stream_channels); } - - audio_stream_init_alignment_constants(1, 1, &source->stream); i++; } - output = list_first_item(&dev->bsink_list, struct comp_buffer, source_list); - - /* On some platform the playback output is left right left right due to a crossover - * later on the signal processing chain. That makes the aec_reference be 4 channels - * and the AEC should only use the 2 first. + /* Validate channel, format and rate on each of our three + * inputs. All much match our build-time configuration, AEC + * does not handle dynamic stream formats. */ - if (cd->num_aec_reference_channels > aec_channels) { - comp_err(dev, "unsupported number of AEC reference channels: %d", - aec_channels); - return -EINVAL; - } + int ref_fmt = source_get_frm_fmt(sources[cd->aec_reference_source]); + int ref_chan = source_get_channels(sources[cd->aec_reference_source]); + int ref_rate = source_get_rate(sources[cd->aec_reference_source]); - audio_stream_init_alignment_constants(1, 1, &output->stream); - frame_fmt = audio_stream_get_frm_fmt(&output->stream); - rate = audio_stream_get_rate(&output->stream); - output_stream_channels = audio_stream_get_channels(&output->stream); + int mic_fmt = source_get_frm_fmt(sources[cd->raw_microphone_source]); + int mic_chan = source_get_channels(sources[cd->raw_microphone_source]); + int mic_rate = source_get_rate(sources[cd->raw_microphone_source]); - if (cd->num_capture_channels > microphone_stream_channels) { - comp_err(dev, "unsupported number of microphone channels: %d", - microphone_stream_channels); - return -EINVAL; + int out_fmt = sink_get_frm_fmt(sinks[0]); + int out_chan = sink_get_channels(sinks[0]); + int out_rate = sink_get_rate(sinks[0]); + + /* Too many channels is a soft failure, AEC treats only the first N */ + if (mic_chan > CHAN_MAX) + comp_warn(dev, "Too many mic channels: %d, truncating to %d", + mic_chan, CHAN_MAX); + + if (out_chan != mic_chan) { + comp_err(dev, "Input/output mic channel mismatch"); + ret = -EINVAL; } - if (cd->num_capture_channels > output_stream_channels) { - comp_err(dev, "unsupported number of output channels: %d", - output_stream_channels); - return -EINVAL; + cd->num_aec_reference_channels = MIN(ref_chan, CHAN_MAX); + cd->num_capture_channels = MIN(mic_chan, CHAN_MAX); + + if (ref_rate != mic_rate || ref_rate != out_rate || + ref_rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) { + comp_err(dev, "Incorrect source/sink sample rate, expect %d\n", + CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ); + ret = -EINVAL; } - switch (frame_fmt) { -#if CONFIG_FORMAT_S16LE - case SOF_IPC_FRAME_S16_LE: - break; -#endif /* CONFIG_FORMAT_S16LE */ - default: - comp_err(dev, "unsupported data format: %d", frame_fmt); - return -EINVAL; + if (mic_fmt != out_fmt) { + comp_err(dev, "Mismatched in/out frame format"); + ret = -EINVAL; } - if (rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) { - comp_err(dev, "unsupported samplerate: %d", rate); - return -EINVAL; + if ((mic_fmt != SOF_IPC_FRAME_S32_LE && mic_fmt != SOF_IPC_FRAME_S16_LE) || + (ref_fmt != SOF_IPC_FRAME_S32_LE && ref_fmt != SOF_IPC_FRAME_S16_LE)) { + comp_err(dev, "Unsupported sample format"); + ret = -EINVAL; } + cd->mic_copy = mic_fmt == SOF_IPC_FRAME_S16_LE ? source_copy16 : source_copy32; + cd->ref_copy = ref_fmt == SOF_IPC_FRAME_S16_LE ? source_copy16 : source_copy32; + cd->out_copy = out_fmt == SOF_IPC_FRAME_S16_LE ? sink_copy16 : sink_copy32; + + cd->ref_framesz = source_get_frame_bytes(sources[cd->aec_reference_source]); + cd->cap_framesz = sink_get_frame_bytes(sinks[0]); + + ret = GoogleRtcAudioProcessingSetStreamFormats(cd->state, mic_rate, + cd->num_capture_channels, + cd->num_capture_channels, + ref_rate, cd->num_aec_reference_channels); + /* Blobs sent during COMP_STATE_READY is assigned to blob_handler->data * directly, so comp_is_new_data_blob_available always returns false. */ - ret = google_rtc_audio_processing_reconfigure(mod); - if (ret) - return ret; + if (ret == 0) + ret = google_rtc_audio_processing_reconfigure(mod); - return 0; + return ret; +} + +static int trigger_handler(struct processing_module *mod, int cmd) +{ + struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); + + /* Ignore and halt propagation if we get a trigger from the + * playback pipeline: not for us. + */ + if (cd->ref_comp_buffer->walking) + return PPL_STATUS_PATH_STOP; + + /* Note: not module_adapter_set_state(). With IPC4 those are + * identical, but IPC3 has some odd-looking logic that + * validates that no sources are active when receiving a + * PRE_START command, which obviously breaks for our reference + * stream if playback was already running when our pipeline + * started + */ + return comp_set_state(mod->dev, cmd); } static int google_rtc_audio_processing_reset(struct processing_module *mod) { comp_dbg(mod->dev, "google_rtc_audio_processing_reset()"); - return 0; } -static int google_rtc_audio_processing_process(struct processing_module *mod, - struct input_stream_buffer *input_buffers, - int num_input_buffers, - struct output_stream_buffer *output_buffers, - int num_output_buffers) +static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd) +{ + /* Note that reference input and mic output share the same + * buffer for efficiency + */ + GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state, + (const float **)cd->refout_buffers); + GoogleRtcAudioProcessingProcessCapture_float32(cd->state, + (const float **)cd->raw_mic_buffers, + cd->refout_buffers); + cd->buffered_frames = 0; +} + +static bool ref_stream_active(struct google_rtc_audio_processing_comp_data *cd) +{ + return cd->ref_comp_buffer->source && + cd->ref_comp_buffer->source->state == COMP_STATE_ACTIVE; +} + +static int mod_process(struct processing_module *mod, struct sof_source **sources, + int num_of_sources, struct sof_sink **sinks, int num_of_sinks) { struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); - int16_t *src, *dst, *ref; - uint32_t num_aec_reference_frames; - uint32_t num_aec_reference_bytes; - int num_samples_remaining; - int num_frames_remaining; - int channel; - int frames; - int nmax; - int ret; - int i, j, n; - struct input_stream_buffer *ref_streamb, *mic_streamb; - struct output_stream_buffer *out_streamb; - struct audio_stream *ref_stream, *mic_stream, *out_stream; + if (cd->reconfigure) + google_rtc_audio_processing_reconfigure(mod); - if (cd->reconfigure) { - ret = google_rtc_audio_processing_reconfigure(mod); - if (ret) - return ret; - } + struct sof_source *mic = sources[cd->raw_microphone_source]; + struct sof_source *ref = sources[cd->aec_reference_source]; + struct sof_sink *out = sinks[0]; + bool ref_ok = ref_stream_active(cd); - ref_streamb = &input_buffers[cd->aec_reference_source]; - ref_stream = ref_streamb->data; - ref = audio_stream_get_rptr(ref_stream); - - num_aec_reference_frames = input_buffers[cd->aec_reference_source].size; - num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames; - - num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream); - while (num_samples_remaining) { - nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref); - n = MIN(num_samples_remaining, nmax); - for (i = 0; i < n; i += cd->num_aec_reference_channels) { - j = cd->num_aec_reference_channels * cd->aec_reference_frame_index; - for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) - cd->aec_reference_buffer[j++] = ref[channel]; - - ref += audio_stream_get_channels(ref_stream); - ++cd->aec_reference_frame_index; - - if (cd->aec_reference_frame_index == cd->num_frames) { - GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state, - cd->aec_reference_buffer); - cd->aec_reference_frame_index = 0; - } - } - num_samples_remaining -= n; - ref = audio_stream_wrap(ref_stream, ref); - } - input_buffers[cd->aec_reference_source].consumed = num_aec_reference_bytes; + /* Would be cleaner to store a bit of state to elide a bzero + * we already did, but we'd be doing the copy of real data in + * the ref_ok state anyway. + */ + if (!ref_ok) + bzero(arch_xtensa_cached_ptr(refoutbuf), sizeof(refoutbuf)); + + int fmic = source_get_data_frames_available(mic); + int fref = source_get_data_frames_available(ref); + int frames = ref_ok ? MIN(fmic, fref) : fmic; + int n, frames_rem; - mic_streamb = &input_buffers[cd->raw_microphone_source]; - mic_stream = mic_streamb->data; - out_streamb = &output_buffers[0]; - out_stream = out_streamb->data; + /* If fref > fmic (common at pipeline startup if + * playback was already active), we should consume the early + * samples so AEC compares the most recent values. + */ + if (ref_ok && fref > fmic) + source_release_data(ref, (fref - fmic) * cd->ref_framesz); - src = audio_stream_get_rptr(mic_stream); - dst = audio_stream_get_wptr(out_stream); + for (frames_rem = frames; frames_rem; frames_rem -= n) { + n = MIN(frames_rem, cd->num_frames - cd->buffered_frames); - frames = input_buffers[cd->raw_microphone_source].size; - num_frames_remaining = frames; + cd->mic_copy(mic, n, cd->raw_mic_buffers, cd->buffered_frames); - while (num_frames_remaining) { - nmax = audio_stream_frames_without_wrap(mic_stream, src); - n = MIN(num_frames_remaining, nmax); - nmax = audio_stream_frames_without_wrap(out_stream, dst); - n = MIN(n, nmax); - for (i = 0; i < n; i++) { - memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index * - cd->num_capture_channels]), - cd->num_frames * cd->num_capture_channels * - sizeof(cd->raw_mic_buffer[0]), src, - sizeof(int16_t) * cd->num_capture_channels); - ++cd->raw_mic_buffer_frame_index; - - memcpy_s(dst, cd->num_frames * cd->num_capture_channels * - sizeof(cd->output_buffer[0]), - &(cd->output_buffer[cd->output_buffer_frame_index * - cd->num_capture_channels]), - sizeof(int16_t) * cd->num_capture_channels); - ++cd->output_buffer_frame_index; - - if (cd->raw_mic_buffer_frame_index == cd->num_frames) { - GoogleRtcAudioProcessingProcessCapture_int16(cd->state, - cd->raw_mic_buffer, - cd->output_buffer); - cd->output_buffer_frame_index = 0; - cd->raw_mic_buffer_frame_index = 0; + if (ref_ok) + cd->ref_copy(ref, n, cd->refout_buffers, cd->buffered_frames); + + cd->buffered_frames += n; + + if (cd->buffered_frames >= cd->num_frames) { + /* Safety valve; is_ready() only guarantees us space for one block */ + if (sink_get_free_size(out) < cd->num_frames * cd->cap_framesz) { + comp_dbg(mod->dev, "AEC sink backed up!"); + break; } - src += audio_stream_get_channels(mic_stream); - dst += audio_stream_get_channels(out_stream); + execute_aec(cd); + cd->out_copy(out, cd->num_frames, cd->refout_buffers); } - num_frames_remaining -= n; - src = audio_stream_wrap(mic_stream, src); - dst = audio_stream_wrap(out_stream, dst); } + return 0; +} - module_update_buffer_position(&input_buffers[cd->raw_microphone_source], - &output_buffers[0], frames); +static bool mod_is_ready_to_process(struct processing_module *mod, + struct sof_source **sources, int num_of_sources, + struct sof_sink **sinks, int num_of_sinks) +{ + /* AEC produces its output in a single 10ms chunk, so we need + * at least that much space in the output buffer. We're + * otherwise happy to process any amount of input; it's + * accumulated in a relatively cheap copy, so frontload that + * as much as possible. + */ + struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod); - return 0; + return sink_get_free_size(sinks[0]) >= cd->num_frames * cd->cap_framesz; } static struct module_interface google_rtc_audio_processing_interface = { .init = google_rtc_audio_processing_init, .free = google_rtc_audio_processing_free, - .process_audio_stream = google_rtc_audio_processing_process, + .process = mod_process, .prepare = google_rtc_audio_processing_prepare, .set_configuration = google_rtc_audio_processing_set_config, .get_configuration = google_rtc_audio_processing_get_config, + .trigger = trigger_handler, .reset = google_rtc_audio_processing_reset, + .is_ready_to_process = mod_is_ready_to_process, }; DECLARE_MODULE_ADAPTER(google_rtc_audio_processing_interface, diff --git a/src/audio/google/google_rtc_audio_processing_mock.c b/src/audio/google/google_rtc_audio_processing_mock.c index a6c55c641270..681d003d3592 100644 --- a/src/audio/google/google_rtc_audio_processing_mock.c +++ b/src/audio/google/google_rtc_audio_processing_mock.c @@ -10,8 +10,6 @@ #include #include -#include -#include #include #include "ipc/topology.h" @@ -23,7 +21,7 @@ struct GoogleRtcAudioProcessingState { int num_aec_reference_channels; int num_output_channels; int num_frames; - int16_t *aec_reference; + float *aec_reference; }; static void SetFormats(GoogleRtcAudioProcessingState *const state, @@ -140,46 +138,40 @@ int GoogleRtcAudioProcessingReconfigure(GoogleRtcAudioProcessingState *const sta return 0; } -int GoogleRtcAudioProcessingProcessCapture_int16(GoogleRtcAudioProcessingState *const state, - const int16_t *const src, - int16_t *const dest) +int GoogleRtcAudioProcessingProcessCapture_float32(GoogleRtcAudioProcessingState * const state, + const float * const *src, + float * const *dest) { - int16_t *ref = state->aec_reference; - int16_t *mic = (int16_t *) src; - int16_t *out = dest; - int n, io, im, ir; - - /* Mix input and reference channels to output. The matching channels numbers - * are mixed. If e.g. microphone and output channels count is 4, and reference - * has 2 channels, output channels 3 and 4 are copy of microphone channels 3 and 4, - * and output channels 1 and 2 are sum of microphone and reference. - */ - memset(dest, 0, sizeof(int16_t) * state->num_output_channels * state->num_frames); - for (n = 0; n < state->num_frames; ++n) { - im = 0; - ir = 0; - for (io = 0; io < state->num_output_channels; io++) { - out[io] = sat_int16( - (im < state->num_capture_channels ? (int32_t)mic[im++] : 0) + - (ir < state->num_aec_reference_channels ? (int32_t)ref[ir++] : 0)); + float *ref = state->aec_reference; + float **mic = (float **)src; + int n, chan; + + for (chan = 0; chan < state->num_output_channels; chan++) { + for (n = 0; n < state->num_frames; ++n) { + float mic_save = mic[chan][n]; /* allow same in/out buffer */ + + if (chan < state->num_aec_reference_channels) + dest[chan][n] = mic_save + ref[n + (chan * state->num_frames)]; + else + dest[chan][n] = mic_save; } - - ref += state->num_aec_reference_channels; - out += state->num_output_channels; - mic += state->num_capture_channels; } return 0; } -int GoogleRtcAudioProcessingAnalyzeRender_int16(GoogleRtcAudioProcessingState *const state, - const int16_t *const data) +int GoogleRtcAudioProcessingAnalyzeRender_float32(GoogleRtcAudioProcessingState * const state, + const float * const *data) { const size_t buffer_size = sizeof(state->aec_reference[0]) - * state->num_frames - * state->num_aec_reference_channels; - memcpy_s(state->aec_reference, buffer_size, - data, buffer_size); + * state->num_frames; + int channel; + + for (channel = 0; channel < state->num_aec_reference_channels; channel++) { + memcpy_s(&state->aec_reference[channel * state->num_frames], buffer_size, + data[channel], buffer_size); + } + return 0; }