Skip to content

Commit

Permalink
Hacky fixups
Browse files Browse the repository at this point in the history
There were multiple bugs with the buffer copying and interpretation.
Turns out that the microphone input was NOT lying: it really is giving
us 4 channels of S16_LE, contra what the downstream ALSA endpoint has
(S32_LE).  That seems like a topology bug?

But the reference input WAS lying.  It was claiming 4 channels but
providing only two, leading to underruns.

Combined with buffer copying bugs, the end result was that we were
moving the right amount of bytes but in the wrong format.

I think this is right now, but it needs a bunch of cleanup still.
Commit so I have a reference to fall back on.
  • Loading branch information
andyross committed Dec 15, 2023
1 parent feb3d4b commit 7e9b0db
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 62 deletions.
1 change: 0 additions & 1 deletion app/boards/intel_adsp_ace15_mtpm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,5 @@ CONFIG_PROBE_DMA_MAX=2
CONFIG_KCPS_DYNAMIC_CLOCK_CONTROL=y
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING=y
CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_NUM_CHANNELS=2
CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS=32

CONFIG_COMP_STUBS=y
168 changes: 107 additions & 61 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
#include <google_rtc_audio_processing_platform.h>
#include <google_rtc_audio_processing_sof_message_reader.h>

#define BYPASS_PROCESS
//#define BYPASS_AEC

#define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100
#define GOOGLE_RTC_NUM_INPUT_PINS 2

Expand Down Expand Up @@ -91,8 +94,8 @@ struct google_rtc_audio_processing_comp_data {
int aec_reference_source;
int raw_microphone_source;
struct comp_buffer *ref_comp_buffer;
int ref_frame_bytes;
int out_frame_bytes;
int ref_framesz;
int cap_framesz;
};

#if CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS == 16
Expand Down Expand Up @@ -604,6 +607,16 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
ret = -EINVAL;
}

/* Bug workaround: the ref source on MTL claims 4 channels,
* but only provides two. Use our kconfig-derived vlaue
* instead, but yell about it.
*/
if (ref_chan != REF_CHAN_MAX) {
comp_err(dev, "Incorrect ref channels %d, setting %d\n",
ref_chan, REF_CHAN_MAX);
ref_chan = REF_CHAN_MAX;
}

cd->num_aec_reference_channels = MIN(ref_chan, REF_CHAN_MAX);
cd->num_capture_channels = MIN(mic_chan, MIC_CHAN_MAX);

Expand All @@ -624,8 +637,7 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
comp_err(dev, "Mic stream must be %d bit samples\n",
CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS);

// FIXME: squash for now, the streams lie. See below.
//ret = -EINVAL;
ret = -EINVAL;
}

if (mic_fmt != out_fmt) {
Expand All @@ -636,10 +648,10 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
// FIXME: the streams have a bad format on MTL, so we can't
// use this API. Compute by hand.
//
//cd->ref_frame_bytes = source_get_frame_bytes(sources[cd->aec_reference_source]);
//cd->out_frame_bytes = sink_get_frame_bytes(sinks[0]);
cd->ref_frame_bytes = sizeof(mic_sample_t) * source_get_channels(sources[cd->aec_reference_source]);
cd->out_frame_bytes = cd->ref_frame_bytes;
//cd->ref_framesz = source_get_frame_bytes(sources[cd->aec_reference_source]);
//cd->cap_framesz = sink_get_frame_bytes(sinks[0]);
cd->ref_framesz = sizeof(ref_sample_t) * ref_chan;
cd->cap_framesz = sizeof(mic_sample_t) * mic_chan;

ret = GoogleRtcAudioProcessingSetStreamFormats(cd->state, mic_rate,
cd->num_capture_channels,
Expand Down Expand Up @@ -687,92 +699,103 @@ static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd)
/* Note that reference input and mic output share the same
* buffer for efficiency
*/
#ifndef BYPASS_AEC
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
(const float **)cd->refout_buffers);
GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
(const float **)cd->raw_mic_buffers,
cd->refout_buffers);
#else
int f, c;
// Cheap bypass
for (c = 0; c < cd->num_capture_channels; c++)
for (f = 0; f < cd->num_frames; f++)
cd->refout_buffers[c][f] = cd->raw_mic_buffers[c][f];
#endif
cd->buffered_frames = 0;
}

static void mic_in_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0)
{
size_t chan = MIN(MIC_CHAN_MAX, source_get_channels(src));
size_t chan = source_get_channels(src);
//size_t chan = MIC_CHAN_MAX;
size_t samples = frames * chan;
size_t bytes = samples * sizeof(mic_sample_t);
const mic_sample_t *buf, *bufstart, *bufend;
float *dst[MIC_CHAN_MAX];
int i, c, err;
size_t bufsz;

for (i = 0; i < chan; i++)
for (i = 0; i < MIC_CHAN_MAX; i++)
dst[i] = &dst_bufs[i][frame0];

err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];
bufend = (void *)(bufsz + (char *)bufstart);

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
for (c = 0; c < MIC_CHAN_MAX; c++)
*dst[c]++ = mic_to_float(*buf++);
if (buf >= bufend)
buf = bufstart;
}
buf += chan - MIC_CHAN_MAX; /* skip unused channels */
if (buf >= bufend)
buf = bufstart;
}
source_release_data(src, bytes);
}

/* Nearly verbatim except for types. Needs macro/inlining attention */
static void ref_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0)
{
size_t chan = MIN(REF_CHAN_MAX, source_get_channels(src));
// FIXME: MTL passes 4 channels, not 2
//size_t chan = source_get_channels(src);
size_t chan = REF_CHAN_MAX;
size_t samples = frames * chan;
size_t bytes = samples * sizeof(ref_sample_t);
const ref_sample_t *buf, *bufstart, *bufend;
float *dst[REF_CHAN_MAX];
int i, c, err;
size_t bufsz;

for (i = 0; i < chan; i++)
for (i = 0; i < REF_CHAN_MAX; i++)
dst[i] = &dst_bufs[i][frame0];

err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];
bufend = (void *)(bufsz + (char *)bufstart);

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
for (c = 0; c < chan; c++)
*dst[c]++ = ref_to_float(*buf++);
if (buf >= bufend)
buf = bufstart;
}
if (buf >= bufend)
buf = bufstart;
}
source_release_data(src, bytes);
}

static void mic_out_copy(struct sof_sink *sink, int frames, float **src_bufs)
{
size_t chan = MIN(MIC_CHAN_MAX, sink_get_channels(sink));
size_t chan = sink_get_channels(sink);
size_t samples = frames * chan;
size_t bytes = samples * sizeof(mic_sample_t);
mic_sample_t *buf, *bufstart, *bufend;
int i, c, err;
size_t bufsz;
float *src[MIC_CHAN_MAX];

for (i = 0; i < chan; i++)
for (i = 0; i < MIC_CHAN_MAX; i++)
src[i] = src_bufs[i];

err = sink_get_buffer(sink, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];
bufend = (void *)(bufsz + (char *)bufstart);

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
for (c = 0; c < MIC_CHAN_MAX; c++)
*buf++ = float_to_mic(*src[c]++);
if (buf >= bufend)
buf = bufstart;
}
for (/**/; c < chan; c++)
*buf++ = 0; /* clear unused channels */
if (buf >= bufend)
buf = bufstart;
}
sink_commit_buffer(sink, bytes);
}
Expand All @@ -796,15 +819,48 @@ static int mod_process(struct processing_module *mod, struct sof_source **source
struct sof_sink *out = sinks[0];
bool ref_ok = ref_stream_active(cd);

#ifdef BYPASS_PROCESS
char *src, *src0, *srcend, *dst, *dst0, *dstend;
size_t bytes, srcsz, dstsz;
int l, b, err;

bytes = MIN(source_get_data_available(mic), sink_get_free_size(out));
err = source_get_data(mic, bytes, (void *)&src, (void *)&src0, &srcsz);
assert(err == 0);
srcend = src0 + srcsz;
err = sink_get_buffer(out, bytes, (void *)&dst, (void *)&dst0, &dstsz);
assert(err == 0);
dstend = dst0 + dstsz;

for (b = bytes; b > 0; b -= l) {
l = MIN(bytes, MIN(srcend - src, dstend - dst));
memcpy(dst, src, l);
src += l;
dst += l;
if (src >= srcend)
src = src0;
if (dst >= dstend)
dst = dst0;
}
source_release_data(ref, source_get_data_available(ref));
source_release_data(mic, bytes);
sink_commit_buffer(out, bytes);

return 0;
#endif

/* Would be cleaner to store a bit of state to elide a bzero
* we already did, but we'd be doing the copy of real data in
* the ref_ok state anyway.
*/
if (!ref_ok)
bzero(refoutbuf, sizeof(refoutbuf));
bzero(arch_xtensa_cached_ptr(refoutbuf), sizeof(refoutbuf));

int fmic = source_get_data_frames_available(mic);
int fref = source_get_data_frames_available(ref);
// FIXME: SOF lies about these values
//int fmic = source_get_data_frames_available(mic);
//int fref = source_get_data_frames_available(ref);
int fmic = source_get_data_available(mic) / cd->cap_framesz;
int fref = source_get_data_available(ref) / cd->ref_framesz;
int frames = ref_ok ? MIN(fmic, fref) : fmic;
int n, frames_rem;

Expand All @@ -813,56 +869,46 @@ static int mod_process(struct processing_module *mod, struct sof_source **source
* samples so AEC compares the most recent values.
*/
if (ref_ok && fref > fmic)
source_release_data(ref, (fref - fmic) * cd->ref_frame_bytes);
source_release_data(ref, (fref - fmic) * cd->ref_framesz);

for (frames_rem = frames; frames_rem; frames_rem -= n) {
n = MIN(frames_rem, cd->num_frames - cd->buffered_frames);

int si = cd->buffered_frames * source_get_channels(mic);

mic_in_copy(mic, n, cd->raw_mic_buffers, si);
mic_in_copy(mic, n, cd->raw_mic_buffers, cd->buffered_frames);

if (ref_ok)
ref_copy(ref, n, cd->refout_buffers, si);
ref_copy(ref, n, cd->refout_buffers, cd->buffered_frames);

cd->buffered_frames += n;

if (cd->buffered_frames >= cd->num_frames) {
execute_aec(cd);
mic_out_copy(out, n, cd->refout_buffers);
mic_out_copy(out, cd->num_frames, cd->refout_buffers);

/* Safety valve, in case downstream backs up. */
if (sink_get_free_size(out) < cd->num_frames * cd->cap_framesz) {
break;
}
}
}

return 0;
}

static bool mod_is_ready_to_process(struct processing_module *mod,
struct sof_source **sources, int num_of_sources,
struct sof_sink **sinks, int num_of_sinks)
{
struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
struct sof_source *mic = sources[cd->raw_microphone_source];
struct sof_source *ref = sources[cd->aec_reference_source];
struct sof_sink *out = sinks[0];
bool ref_ok = ref_stream_active(cd);

/* this should source_get_min_available(ref_stream)!!!
* Currently the topology sets IBS incorrectly
*/
if (ref_ok && (source_get_data_available(ref)
< cd->num_frames * cd->ref_frame_bytes))
return false;

if (source_get_data_available(mic) < source_get_min_available(mic))
return false;

/* Output comes out all at once, the output sink much have
* space for the full block
#ifdef BYPASS_PROCESS
return sink_get_free_size(sinks[0]);
#endif
/* AEC produces its output in a single 10ms chunk, so we need
* at least that much space in the output buffer. We're
* otherwise happy to process any amount of input; it's
* accumulated in a relatively cheap copy, so frontload that
* as much as possible.
*/
if (sink_get_free_size(out) < cd->num_frames * cd->out_frame_bytes)
return false;

return true;
struct google_rtc_audio_processing_comp_data *cd = module_get_private_data(mod);
return sink_get_free_size(sinks[0]) >= cd->num_frames * cd->cap_framesz;
}

static struct module_interface google_rtc_audio_processing_interface = {
Expand Down

0 comments on commit 7e9b0db

Please sign in to comment.