From 142549e6faa94d08ebb31cdb0760376aee1d9ef0 Mon Sep 17 00:00:00 2001
From: Andy Ross <andyross@google.com>
Date: Thu, 7 Mar 2024 08:56:08 -0800
Subject: [PATCH] google_aec: Rework prepare/process() to support dynamic
 formats

Big rewrite of the core processing code of AEC:

Support both S32 and S16 input and output formats, dynamically
selected at prepare() time based on stream configuration.

Copy/convert data in maximally inlined/unrolled loops, using
cleanly-generated (no duplication!) custom conversion utilities for
each format variant.

Orthogonalize and elaborate the validation code in prepare().  Check
all state for all input/output streams.

Decouple AEC operation from the input stream, filling zeros on
underflow and allowing AEC to run in circumstances where no playback
data exists and to recover when it starts/stops.  IPC3 setups can
exploit this now, unfortunately IPC4 always starts connected pipelines
from the host kernel so sees no benefit.

Fix a latency bug with the original code where it would copy the
processed results to the output stream before the call to
ProcessCapture() instead of after, leading to a needless delay.  Copy
the results as soon as they are available, if the output buffer backs
up, we'll continue at the next call to process()

Signed-off-by: Andy Ross <andyross@google.com>
---
 .../google/google_rtc_audio_processing.c      | 598 +++++++++---------
 1 file changed, 311 insertions(+), 287 deletions(-)

diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c
index 9979f58488af..0e070a87d496 100644
--- a/src/audio/google/google_rtc_audio_processing.c
+++ b/src/audio/google/google_rtc_audio_processing.c
@@ -35,13 +35,23 @@
 #include <stdlib.h>
 #include <user/trace.h>
 
+/* Zephyr provides uncached memory for static variables on SMP, but we
+ * are single-core component and know we can safely use the cache for
+ * AEC work.  XTOS SOF is cached by default, so stub the Zephyr API.
+ */
+#ifdef __ZEPHYR__
+#include <zephyr/cache.h>
+#else
+#define sys_cache_cached_ptr_get(p) (p)
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#endif
+
 #include <google_rtc_audio_processing.h>
 #include <google_rtc_audio_processing_platform.h>
 #include <google_rtc_audio_processing_sof_message_reader.h>
 
 #define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100
 #define GOOGLE_RTC_NUM_INPUT_PINS 2
-#define GOOGLE_RTC_NUM_OUTPUT_PINS 1
 
 LOG_MODULE_REGISTER(google_rtc_audio_processing, CONFIG_SOF_LOG_LEVEL);
 
@@ -53,34 +63,41 @@ DECLARE_SOF_RT_UUID("google-rtc-audio-processing", google_rtc_audio_processing_u
 DECLARE_TR_CTX(google_rtc_audio_processing_tr, SOF_UUID(google_rtc_audio_processing_uuid),
 			   LOG_LEVEL_INFO);
 
-#if !(defined(__ZEPHYR__) && defined(CONFIG_XTENSA))
-/* Zephyr provides uncached memory for static variables on SMP, but we
- * are single-core component and know we can safely use the cache for
- * AEC work.  XTOS SOF is cached by default, so stub the Zephyr API.
- */
-#define arch_xtensa_cached_ptr(p) (p)
-#endif
 
 static __aligned(PLATFORM_DCACHE_ALIGN)
 uint8_t aec_mem_blob[CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_KB * 1024];
 
+#define NUM_FRAMES (CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ \
+		    / GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES)
+#define CHAN_MAX CONFIG_COMP_GOOGLE_RTC_AUDIO_REFERENCE_CHANNEL_MAX
+
+static __aligned(PLATFORM_DCACHE_ALIGN)
+float refoutbuf[CHAN_MAX][NUM_FRAMES];
+
+static __aligned(PLATFORM_DCACHE_ALIGN)
+float micbuf[CHAN_MAX][NUM_FRAMES];
+
 struct google_rtc_audio_processing_comp_data {
-#if CONFIG_IPC_MAJOR_4
-	struct sof_ipc4_aec_config config;
-#endif
-	float *aec_reference_buffer;
-	float *process_buffer;
-	float *aec_reference_buffer_ptrs[SOF_IPC_MAX_CHANNELS];
-	float *process_buffer_ptrs[SOF_IPC_MAX_CHANNELS];
 	uint32_t num_frames;
 	int num_aec_reference_channels;
 	int num_capture_channels;
 	GoogleRtcAudioProcessingState *state;
-
+	float *raw_mic_buffers[CHAN_MAX];
+	float *refout_buffers[CHAN_MAX];
+	int buffered_frames;
 	struct comp_data_blob_handler *tuning_handler;
 	bool reconfigure;
+	bool last_ref_ok;
 	int aec_reference_source;
 	int raw_microphone_source;
+#ifdef CONFIG_IPC_MAJOR_3
+	struct comp_buffer *ref_comp_buffer;
+#endif
+	int ref_framesz;
+	int cap_framesz;
+	void (*mic_copy)(struct sof_source *src, int frames, float **dst_bufs, int frame0);
+	void (*ref_copy)(struct sof_source *src, int frames, float **dst_bufs, int frame0);
+	void (*out_copy)(struct sof_sink *dst, int frames, float **src_bufs);
 };
 
 void *GoogleRtcMalloc(size_t size)
@@ -93,6 +110,130 @@ void GoogleRtcFree(void *ptr)
 	return rfree(ptr);
 }
 
+static ALWAYS_INLINE float clamp_rescale(float max_val, float x)
+{
+	float min = -1.0f;
+	float max = 1.0f - 1.0f / max_val;
+
+	return max_val * (x < min ? min : (x > max ? max : x));
+}
+
+static ALWAYS_INLINE float s16_to_float(const char *ptr)
+{
+	float scale = -(float)SHRT_MIN;
+	float x = *(int16_t *)ptr;
+
+	return (1.0f / scale) * x;
+}
+
+static ALWAYS_INLINE void float_to_s16(float x, char *dst)
+{
+	*(int16_t *)dst = (int16_t)clamp_rescale(-(float)SHRT_MIN, x);
+}
+
+static ALWAYS_INLINE float s32_to_float(const char *ptr)
+{
+	float scale = -(float)INT_MIN;
+	float x = *(int32_t *)ptr;
+
+	return (1.0f / scale) * x;
+}
+
+static ALWAYS_INLINE void float_to_s32(float x, char *dst)
+{
+	*(int32_t *)dst = (int16_t)clamp_rescale(-(float)INT_MIN, x);
+}
+
+static ALWAYS_INLINE void source_to_float(struct sof_source *src, float **dst_bufs,
+					  float (*cvt_fn)(const char *),
+					  int sample_sz, int frame0, int frames)
+{
+	size_t chan = source_get_channels(src);
+	size_t bytes = frames * chan * sample_sz;
+	int i, c, err, ndst = MIN(chan, CHAN_MAX);
+	const char *buf, *bufstart, *bufend;
+	float *dst[CHAN_MAX];
+	size_t bufsz;
+
+	for (i = 0; i < ndst; i++)
+		dst[i] = &dst_bufs[i][frame0];
+
+	err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
+	assert(err == 0);
+	bufend = &bufstart[bufsz];
+
+	while (frames) {
+		size_t n = MIN(frames, (bufsz - (buf - bufstart)) / (chan * sample_sz));
+
+		for (i = 0; i < n; i++) {
+			for  (c = 0; c < ndst; c++) {
+				*dst[c]++ = cvt_fn(buf);
+				buf += sample_sz;
+			}
+			buf += sample_sz * (chan - ndst); /* skip unused channels */
+		}
+		frames -= n;
+		if (buf >= bufend)
+			buf = bufstart;
+	}
+	source_release_data(src, bytes);
+}
+
+static ALWAYS_INLINE void float_to_sink(struct sof_sink *dst, float **src_bufs,
+					void (*cvt_fn)(float, char *),
+					int sample_sz, int frames)
+{
+	size_t chan = sink_get_channels(dst);
+	size_t bytes = frames * chan * sample_sz;
+	int i, c, err, nsrc = MIN(chan, CHAN_MAX);
+	char *buf, *bufstart, *bufend;
+	float *src[CHAN_MAX];
+	size_t bufsz;
+
+	for (i = 0; i < nsrc; i++)
+		src[i] = &src_bufs[i][0];
+
+	err = sink_get_buffer(dst, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
+	assert(err == 0);
+	bufend = &bufstart[bufsz];
+
+	while (frames) {
+		size_t n = MIN(frames, (bufsz - (buf - bufstart)) / (chan * sample_sz));
+
+		for (i = 0; i < n; i++) {
+			for  (c = 0; c < nsrc; c++) {
+				cvt_fn(*src[c]++, buf);
+				buf += sample_sz;
+			}
+			buf += sample_sz * (chan - nsrc); /* skip unused channels */
+		}
+		frames -= n;
+		if (buf >= bufend)
+			buf = bufstart;
+	}
+	sink_commit_buffer(dst, bytes);
+}
+
+static void source_copy16(struct sof_source *src, int frames, float **dst_bufs, int frame0)
+{
+	source_to_float(src, dst_bufs, s16_to_float, sizeof(int16_t), frame0, frames);
+}
+
+static void source_copy32(struct sof_source *src, int frames, float **dst_bufs, int frame0)
+{
+	source_to_float(src, dst_bufs, s32_to_float, sizeof(int32_t), frame0, frames);
+}
+
+static void sink_copy16(struct sof_sink *dst, int frames, float **src_bufs)
+{
+	float_to_sink(dst, src_bufs, float_to_s16, sizeof(int16_t), frames);
+}
+
+static void sink_copy32(struct sof_sink *dst, int frames, float **src_bufs)
+{
+	float_to_sink(dst, src_bufs, float_to_s32, sizeof(int32_t), frames);
+}
+
 static int google_rtc_audio_processing_reconfigure(struct processing_module *mod)
 {
 	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
@@ -361,7 +502,8 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 	struct module_data *md = &mod->priv;
 	struct comp_dev *dev = mod->dev;
 	struct google_rtc_audio_processing_comp_data *cd;
-	int ret;
+	int ret, i;
+
 	comp_info(dev, "google_rtc_audio_processing_init()");
 
 	/* Create private component data */
@@ -373,35 +515,18 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 
 	md->private = cd;
 
-	if (mod->priv.cfg.nb_input_pins != GOOGLE_RTC_NUM_INPUT_PINS) {
-		comp_err(dev, "Expecting %u sources, got %u",
-			 GOOGLE_RTC_NUM_INPUT_PINS, mod->priv.cfg.nb_input_pins);
-		return -EINVAL;
-	}
-	if (mod->priv.cfg.nb_output_pins != GOOGLE_RTC_NUM_OUTPUT_PINS) {
-		comp_err(dev, "Expecting %u sink, got %u",
-			 GOOGLE_RTC_NUM_OUTPUT_PINS, mod->priv.cfg.nb_output_pins);
-		return -EINVAL;
-	}
-
-	cd->num_aec_reference_channels = cd->config.reference_fmt.channels_count;
-	cd->num_capture_channels = cd->config.output_fmt.channels_count;
-	if (cd->num_capture_channels > CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX)
-		cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_CHANNEL_MAX;
-	if (cd->num_aec_reference_channels > CONFIG_COMP_GOOGLE_RTC_AUDIO_REFERENCE_CHANNEL_MAX)
-		cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_REFERENCE_CHANNEL_MAX;
-
 	cd->tuning_handler = comp_data_blob_handler_new(dev);
 	if (!cd->tuning_handler) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 
-	cd->num_frames = CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ /
-		GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES;
+	cd->num_aec_reference_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_REFERENCE_CHANNEL_MAX;
+	cd->num_capture_channels = CONFIG_COMP_GOOGLE_RTC_AUDIO_REFERENCE_CHANNEL_MAX;
+	cd->num_frames = NUM_FRAMES;
 
 	/* Giant blob of scratch memory. */
-	GoogleRtcAudioProcessingAttachMemoryBuffer(arch_xtensa_cached_ptr(&aec_mem_blob[0]),
+	GoogleRtcAudioProcessingAttachMemoryBuffer(sys_cache_cached_ptr_get(&aec_mem_blob[0]),
 						   sizeof(aec_mem_blob));
 
 	cd->state = GoogleRtcAudioProcessingCreateWithConfig(CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ,
@@ -428,33 +553,12 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 		goto fail;
 	}
 
-	size_t buf_size = cd->num_frames * cd->num_capture_channels * sizeof(cd->process_buffer[0]);
-
-	comp_dbg(dev, "Allocating process_buffer of size %u", buf_size);
-	cd->process_buffer = rballoc(0, SOF_MEM_CAPS_RAM, buf_size);
-	if (!cd->process_buffer) {
-		comp_err(dev, "Allocating process_buffer failure");
-		ret = -EINVAL;
-		goto fail;
+	for (i = 0; i < CHAN_MAX; i++) {
+		cd->raw_mic_buffers[i] = sys_cache_cached_ptr_get(&micbuf[i][0]);
+		cd->refout_buffers[i] = sys_cache_cached_ptr_get(&refoutbuf[i][0]);
 	}
-	bzero(cd->process_buffer, buf_size);
-	buf_size = cd->num_frames * sizeof(cd->aec_reference_buffer[0]) *
-			cd->num_aec_reference_channels;
-	comp_dbg(dev, "Allocating aec_reference_buffer of size %u", buf_size);
-	cd->aec_reference_buffer = rballoc(0, SOF_MEM_CAPS_RAM, buf_size);
-	if (!cd->aec_reference_buffer) {
-		comp_err(dev, "Allocating aec_reference_buffer failure");
-		ret = -ENOMEM;
-		goto fail;
-	}
-	bzero(cd->aec_reference_buffer, buf_size);
-
-	for (size_t channel = 0; channel < cd->num_capture_channels; channel++)
-		cd->process_buffer_ptrs[channel] = &cd->process_buffer[channel * cd->num_frames];
 
-	for (size_t channel = 0; channel < cd->num_aec_reference_channels; channel++)
-		cd->aec_reference_buffer_ptrs[channel] =
-			&cd->aec_reference_buffer[channel * cd->num_frames];
+	cd->buffered_frames = 0;
 
 	/* comp_is_new_data_blob_available always returns false for the first
 	 * control write with non-empty config. The first non-empty write may
@@ -472,13 +576,10 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
 fail:
 	comp_err(dev, "google_rtc_audio_processing_init(): Failed");
 	if (cd) {
-		rfree(cd->aec_reference_buffer);
-
 		if (cd->state) {
 			GoogleRtcAudioProcessingFree(cd->state);
 		}
 		GoogleRtcAudioProcessingDetachMemoryBuffer();
-		rfree(cd->process_buffer);
 		comp_data_blob_handler_free(cd->tuning_handler);
 		rfree(cd);
 	}
@@ -494,9 +595,7 @@ static int google_rtc_audio_processing_free(struct processing_module *mod)
 
 	GoogleRtcAudioProcessingFree(cd->state);
 	cd->state = NULL;
-	rfree(cd->aec_reference_buffer);
 	GoogleRtcAudioProcessingDetachMemoryBuffer();
-	rfree(cd->process_buffer);
 	comp_data_blob_handler_free(cd->tuning_handler);
 	rfree(cd);
 	return 0;
@@ -510,22 +609,12 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
 {
 	struct comp_dev *dev = mod->dev;
 	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
-	unsigned int aec_channels = 0, frame_fmt, rate;
-	int microphone_stream_channels = 0;
-	int output_stream_channels;
-	int ret;
-	int i = 0;
+	int ret = 0;
 
 	comp_info(dev, "google_rtc_audio_processing_prepare()");
 
-	if (num_of_sources != GOOGLE_RTC_NUM_INPUT_PINS) {
-		comp_err(dev, "Expecting %u sources, got %u",
-			 GOOGLE_RTC_NUM_INPUT_PINS, num_of_sources);
-		return -EINVAL;
-	}
-	if (num_of_sinks != GOOGLE_RTC_NUM_OUTPUT_PINS) {
-		comp_err(dev, "Expecting %u sink, got %u",
-			 GOOGLE_RTC_NUM_OUTPUT_PINS, num_of_sinks);
+	if (num_of_sources != 2 || num_of_sinks != 1) {
+		comp_err(dev, "Invalid source/sink count");
 		return -EINVAL;
 	}
 
@@ -534,108 +623,132 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
 		source_get_pipeline_id(sources[0]) == sink_get_pipeline_id(sinks[0]);
 	cd->raw_microphone_source = cd->aec_reference_source ? 0 : 1;
 
-	/* searching for stream and feedback source buffers */
-	for (i = 0; i < num_of_sources; i++) {
-		source_set_alignment_constants(sources[i], 1, 1);
-	}
+#ifdef CONFIG_IPC_MAJOR_3
+	/* Don't need the ref buffer on IPC4 as pipelines are always
+	 * activated in tandem; also the API is deprecated
+	 */
+	cd->ref_comp_buffer = list_first_item(&dev->bsource_list,
+					      struct comp_buffer, sink_list);
+	if (cd->aec_reference_source == 1)
+		cd->ref_comp_buffer = list_next_item(cd->ref_comp_buffer, sink_list);
+#endif
 
 #ifdef CONFIG_IPC_MAJOR_4
-	/* enforce format on pins */
-	ipc4_update_source_format(sources[cd->aec_reference_source], &cd->config.reference_fmt);
-	ipc4_update_source_format(sources[cd->raw_microphone_source], &cd->config.output_fmt);
-	ipc4_update_sink_format(sinks[0], &cd->config.output_fmt);
+	/* Workaround: nothing in the framework sets up the stream for
+	 * the reference source correctly from topology input, so we
+	 * have to do it here.  Input pin "1" is just a magic number
+	 * that must match the input_pin_index token in a format
+	 * record from our topology.
+	 */
+	ipc4_update_source_format(sources[cd->aec_reference_source],
+				  &mod->priv.cfg.input_pins[1].audio_fmt);
 #endif
 
-	output = list_first_item(&dev->bsink_list, struct comp_buffer, source_list);
+	/* Validate channel, format and rate on each of our three inputs */
+	int ref_fmt = source_get_frm_fmt(sources[cd->aec_reference_source]);
+	int ref_chan = source_get_channels(sources[cd->aec_reference_source]);
+	int ref_rate = source_get_rate(sources[cd->aec_reference_source]);
 
-	/* On some platform the playback output is left right left right due to a crossover
-	 * later on the signal processing chain. That makes the aec_reference be 4 channels
-	 * and the AEC should only use the 2 first.
-	 */
-	if (cd->num_aec_reference_channels > aec_channels) {
-		comp_err(dev, "unsupported number of AEC reference channels: %d",
-			 aec_channels);
-		return -EINVAL;
-	}
+	int mic_fmt = source_get_frm_fmt(sources[cd->raw_microphone_source]);
+	int mic_chan = source_get_channels(sources[cd->raw_microphone_source]);
+	int mic_rate = source_get_rate(sources[cd->raw_microphone_source]);
 
-	sink_set_alignment_constants(sinks[0], 1, 1);
-	frame_fmt = sink_get_frm_fmt(sinks[0]);
-	rate = sink_get_rate(sinks[0]);
-	output_stream_channels = sink_get_channels(sinks[0]);
+	int out_fmt = sink_get_frm_fmt(sinks[0]);
+	int out_chan = sink_get_channels(sinks[0]);
+	int out_rate = sink_get_rate(sinks[0]);
 
-	if (cd->num_capture_channels > microphone_stream_channels) {
-		comp_err(dev, "unsupported number of microphone channels: %d",
-			 microphone_stream_channels);
-		return -EINVAL;
+	cd->ref_framesz = source_get_frame_bytes(sources[cd->aec_reference_source]);
+	cd->cap_framesz = sink_get_frame_bytes(sinks[0]);
+
+	cd->num_aec_reference_channels = MIN(ref_chan, CHAN_MAX);
+	cd->num_capture_channels = MIN(mic_chan, CHAN_MAX);
+
+	/* Too many channels is a soft failure, AEC treats only the first N */
+	if (mic_chan > CHAN_MAX)
+		comp_warn(dev, "Too many mic channels: %d, truncating to %d",
+			  mic_chan, CHAN_MAX);
+	if (ref_chan > CHAN_MAX)
+		comp_warn(dev, "Too many ref channels: %d, truncating to %d",
+			  ref_chan, CHAN_MAX);
+
+	if (out_chan != mic_chan) {
+		comp_err(dev, "Input/output mic channel mismatch");
+		ret = -EINVAL;
 	}
 
-	if (cd->num_capture_channels > output_stream_channels) {
-		comp_err(dev, "unsupported number of output channels: %d",
-			 output_stream_channels);
-		return -EINVAL;
+	if (ref_rate != mic_rate || ref_rate != out_rate ||
+	    ref_rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) {
+		comp_err(dev, "Incorrect source/sink sample rate, expect %d\n",
+			 CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ);
+		ret = -EINVAL;
 	}
 
-	switch (frame_fmt) {
-#if CONFIG_FORMAT_S16LE
-	case SOF_IPC_FRAME_S16_LE:
-		break;
-#endif /* CONFIG_FORMAT_S16LE */
-	default:
-		comp_err(dev, "unsupported data format: %d", frame_fmt);
-		return -EINVAL;
+	if (mic_fmt != out_fmt) {
+		comp_err(dev, "Mismatched in/out frame format");
+		ret = -EINVAL;
 	}
 
-	if (rate != CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_SAMPLE_RATE_HZ) {
-		comp_err(dev, "unsupported samplerate: %d", rate);
-		return -EINVAL;
+	if ((mic_fmt != SOF_IPC_FRAME_S32_LE && mic_fmt != SOF_IPC_FRAME_S16_LE) ||
+	    (ref_fmt != SOF_IPC_FRAME_S32_LE && ref_fmt != SOF_IPC_FRAME_S16_LE)) {
+		comp_err(dev, "Unsupported sample format");
+		ret = -EINVAL;
 	}
 
-#if CONFIG_IPC_MAJOR_4
-	/* check IBS/OBS in streams */
-	if (cd->num_frames * source_get_frame_bytes(sources[cd->raw_microphone_source]) !=
-	    source_get_min_available(sources[cd->raw_microphone_source])) {
-		comp_err(dev, "Incorrect IBS on microphone source: %d, expected %u",
-			 source_get_min_available(sources[cd->raw_microphone_source]),
-			 cd->num_frames *
-				 source_get_frame_bytes(sources[cd->raw_microphone_source]));
-		return -EINVAL;
+#ifdef CONFIG_IPC_MAJOR_4
+	int ref_bufsz = source_get_min_available(sources[cd->aec_reference_source]);
+	int mic_bufsz = source_get_min_available(sources[cd->raw_microphone_source]);
+	int out_bufsz = sink_get_min_free_space(sinks[0]);
+
+	if (mic_bufsz > cd->num_frames * cd->cap_framesz) {
+		comp_err(dev, "Mic IBS %d >1 AEC block, needless delay!", mic_bufsz);
+		ret = -EINVAL;
 	}
-	if (cd->num_frames * sink_get_frame_bytes(sinks[0]) !=
-	    sink_get_min_free_space(sinks[0])) {
-		comp_err(dev, "Incorrect OBS on sink :%d, expected %u",
-			 sink_get_min_free_space(sinks[0]),
-			 cd->num_frames * sink_get_frame_bytes(sinks[0]));
-		return -EINVAL;
+
+	if (ref_bufsz > cd->num_frames * cd->ref_framesz) {
+		comp_err(dev, "Ref IBS %d >1 one AEC block, needless delay!", ref_bufsz);
+		ret = -EINVAL;
 	}
-	if (cd->num_frames * source_get_frame_bytes(sources[cd->aec_reference_source]) !=
-	    source_get_min_available(sources[cd->aec_reference_source])) {
-		comp_err(dev, "Incorrect IBS on reference source: %d, expected %u",
-			 source_get_min_available(sources[cd->aec_reference_source]),
-			 cd->num_frames *
-			   source_get_frame_bytes(sources[cd->aec_reference_source]));
-		return -EINVAL;
+
+	if (out_bufsz < cd->num_frames * cd->cap_framesz) {
+		comp_err(dev, "Capture OBS %d too small, must fit 1 AEC block", out_bufsz);
+		ret = -EINVAL;
 	}
-#endif /* CONFIG_IPC_MAJOR_4 */
+#endif
+
+	if (ret < 0)
+		return ret;
+
+	cd->mic_copy = mic_fmt == SOF_IPC_FRAME_S16_LE ? source_copy16 : source_copy32;
+	cd->ref_copy = ref_fmt == SOF_IPC_FRAME_S16_LE ? source_copy16 : source_copy32;
+	cd->out_copy = out_fmt == SOF_IPC_FRAME_S16_LE ? sink_copy16 : sink_copy32;
+
+	cd->last_ref_ok = false;
+
+	ret = GoogleRtcAudioProcessingSetStreamFormats(cd->state, mic_rate,
+						       cd->num_capture_channels,
+						       cd->num_capture_channels,
+						       ref_rate, cd->num_aec_reference_channels);
 
 	/* Blobs sent during COMP_STATE_READY is assigned to blob_handler->data
 	 * directly, so comp_is_new_data_blob_available always returns false.
 	 */
-	ret = google_rtc_audio_processing_reconfigure(mod);
-	if (ret)
-		return ret;
+	if (ret == 0)
+		ret = google_rtc_audio_processing_reconfigure(mod);
 
-	return 0;
+	return ret;
 }
 
 static int trigger_handler(struct processing_module *mod, int cmd)
 {
 	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
 
+#ifdef CONFIG_IPC_MAJOR_3
 	/* Ignore and halt propagation if we get a trigger from the
-	 * playback pipeline: not for us.
+	 * playback pipeline: not for us. (Never happens on IPC4)
 	 */
 	if (cd->ref_comp_buffer->walking)
 		return PPL_STATUS_PATH_STOP;
+#endif
 
 	/* Note: not module_adapter_set_state().  With IPC4 those are
 	 * identical, but IPC3 has some odd-looking logic that
@@ -650,171 +763,82 @@ static int trigger_handler(struct processing_module *mod, int cmd)
 static int google_rtc_audio_processing_reset(struct processing_module *mod)
 {
 	comp_dbg(mod->dev, "google_rtc_audio_processing_reset()");
-
 	return 0;
 }
 
-static inline int16_t convert_float_to_int16(float data)
+static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd)
 {
-#if XCHAL_HAVE_HIFI3
-	const xtfloat ratio = 2 << 15;
-	xtfloat x0 = data;
-	xtfloat x1;
-	int16_t x;
-
-	x1 = XT_MUL_S(x0, ratio);
-	x = XT_TRUNC_S(x1, 0);
-
-	return x;
-#else /* XCHAL_HAVE_HIFI3 */
-	return Q_CONVERT_FLOAT(data, 15);
-#endif /* XCHAL_HAVE_HIFI3 */
+	/* Note that reference input and mic output share the same
+	 * buffer for efficiency
+	 */
+	GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
+						      (const float **)cd->refout_buffers);
+	GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
+						       (const float **)cd->raw_mic_buffers,
+						       cd->refout_buffers);
+	cd->buffered_frames = 0;
 }
 
-static inline float convert_int16_to_float(int16_t data)
+static bool ref_stream_active(struct google_rtc_audio_processing_comp_data *cd)
 {
-#if XCHAL_HAVE_HIFI3
-	const xtfloat ratio = 2 << 15;
-	xtfloat x0 = data;
-	float x;
-
-	x = XT_DIV_S(x0, ratio);
-
-	return x;
-#else /* XCHAL_HAVE_HIFI3 */
-	return Q_CONVERT_QTOF(data, 15);
-#endif /* XCHAL_HAVE_HIFI3 */
+#ifdef CONFIG_IPC_MAJOR_3
+	return cd->ref_comp_buffer->source &&
+		cd->ref_comp_buffer->source->state == COMP_STATE_ACTIVE;
+#else
+	return true;
+#endif
 }
 
-/* todo CONFIG_FORMAT_S32LE */
-static int google_rtc_audio_processing_process(struct processing_module *mod,
-					       struct sof_source **sources, int num_of_sources,
-					       struct sof_sink **sinks, int num_of_sinks)
+static int mod_process(struct processing_module *mod, struct sof_source **sources,
+		       int num_of_sources, struct sof_sink **sinks, int num_of_sinks)
 {
-	int ret;
-	int16_t const *src;
-	int8_t const *src_buf_start;
-	int8_t const *src_buf_end;
-	size_t src_buf_size;
-
-	int16_t const *ref;
-	int8_t const *ref_buf_start;
-	int8_t const *ref_buf_end;
-	size_t ref_buf_size;
-
-	int16_t *dst;
-	int8_t *dst_buf_start;
-	int8_t *dst_buf_end;
-	size_t dst_buf_size;
-
-	size_t num_of_bytes_to_process;
-	size_t channel;
-	size_t buffer_offset;
-
-	struct sof_source *ref_stream, *src_stream;
-	struct sof_sink *dst_stream;
-
 	struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
 
-	if (cd->reconfigure) {
-		ret = google_rtc_audio_processing_reconfigure(mod);
-		if (ret)
-			return ret;
-	}
+	if (cd->reconfigure)
+		google_rtc_audio_processing_reconfigure(mod);
 
-	src_stream = sources[cd->raw_microphone_source];
-	ref_stream = sources[cd->aec_reference_source];
-	dst_stream = sinks[0];
+	struct sof_source *mic = sources[cd->raw_microphone_source];
+	struct sof_source *ref = sources[cd->aec_reference_source];
+	struct sof_sink *out = sinks[0];
+	bool ref_ok = ref_stream_active(cd);
 
-	num_of_bytes_to_process = cd->num_frames * source_get_frame_bytes(ref_stream);
-	ret = source_get_data(ref_stream, num_of_bytes_to_process, (const void **)&ref,
-			      (const void **)&ref_buf_start, &ref_buf_size);
+	/* Clear the buffer if the reference pipeline shuts off */
+	if (!ref_ok && cd->last_ref_ok)
+		bzero(sys_cache_cached_ptr_get(refoutbuf), sizeof(refoutbuf));
 
-	/* problems here are extremely unlikely, as it has been checked that
-	 * the buffer contains enough data
-	 */
-	assert(!ret);
-	ref_buf_end = ref_buf_start + ref_buf_size;
+	int fmic = source_get_data_frames_available(mic);
+	int fref = source_get_data_frames_available(ref);
+	int frames = ref_ok ? MIN(fmic, fref) : fmic;
+	int n, frames_rem;
 
-	/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
-	 * 16int: linearize buffer, skip channels if > Max
-	 */
-	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
-			cd->aec_reference_buffer_ptrs[channel][i] =
-					convert_int16_to_float(ref[channel]);
-		}
+	for (frames_rem = frames; frames_rem; frames_rem -= n) {
+		n = MIN(frames_rem, cd->num_frames - cd->buffered_frames);
 
-		ref += cd->num_aec_reference_channels;
-		if ((void *)ref >= (void *)ref_buf_end)
-			ref = (void *)ref_buf_start;
-	}
+		cd->mic_copy(mic, n, cd->raw_mic_buffers, cd->buffered_frames);
 
-	GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
-						      (const float **)
-							cd->aec_reference_buffer_ptrs);
-	source_release_data(ref_stream, num_of_bytes_to_process);
-
-	/* process main stream - same as reference */
-	num_of_bytes_to_process = cd->num_frames * source_get_frame_bytes(src_stream);
-	ret = source_get_data(src_stream, num_of_bytes_to_process, (const void **)&src,
-			      (const void **)&src_buf_start,  &src_buf_size);
-	assert(!ret);
-	src_buf_end = src_buf_start + src_buf_size;
-
-	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->num_capture_channels; channel++)
-		cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
-
-		/* move pointer to next frame
-		 * number of incoming channels may be < cd->num_capture_channels
-		 */
-		src += cd->config.output_fmt.channels_count;
-		if ((void *)src >= (void *)src_buf_end)
-			src = (void *)src_buf_start;
-	}
+		if (ref_ok)
+			cd->ref_copy(ref, n, cd->refout_buffers, cd->buffered_frames);
 
-	source_release_data(src_stream, num_of_bytes_to_process);
+		cd->buffered_frames += n;
 
-	/* call the library, use same in/out buffers */
-	GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
-						       (const float **)cd->process_buffer_ptrs,
-						       cd->process_buffer_ptrs);
-
-	/* same number of bytes to process for output stream as for mic stream */
-	ret = sink_get_buffer(dst_stream, num_of_bytes_to_process, (void **)&dst,
-			      (void **)&dst_buf_start, &dst_buf_size);
-	assert(!ret);
-	dst_buf_end = dst_buf_start + dst_buf_size;
-
-	/* process all channels in output stream */
-	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
-			/* set data in processed channels, zeroize not processed */
-			if (channel < cd->num_capture_channels)
-				dst[channel] = convert_float_to_int16(
-						   cd->process_buffer_ptrs[channel][i]);
-			else
-				dst[channel] = 0;
-		}
+		if (cd->buffered_frames >= cd->num_frames) {
+			if (sink_get_free_size(out) < cd->num_frames * cd->cap_framesz) {
+				comp_warn(mod->dev, "AEC sink backed up!");
+				break;
+			}
 
-		dst += cd->config.output_fmt.channels_count;
-		if ((void *)dst >= (void *)dst_buf_end)
-			dst = (void *)dst_buf_start;
+			execute_aec(cd);
+			cd->out_copy(out, cd->num_frames, cd->refout_buffers);
+		}
 	}
-
-	sink_commit_buffer(dst_stream, num_of_bytes_to_process);
-
+	cd->last_ref_ok = ref_ok;
 	return 0;
 }
 
 static struct module_interface google_rtc_audio_processing_interface = {
 	.init  = google_rtc_audio_processing_init,
 	.free = google_rtc_audio_processing_free,
-	.process = google_rtc_audio_processing_process,
+	.process = mod_process,
 	.prepare = google_rtc_audio_processing_prepare,
 	.set_configuration = google_rtc_audio_processing_set_config,
 	.get_configuration = google_rtc_audio_processing_get_config,