From 7295b22d59ebd9e9d1fad45c361a89ab9a87f53c Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Wed, 30 Oct 2024 16:34:59 +0200 Subject: [PATCH] Audio: DRC: Optimize HiFi4 drc_update_detector_average() Use 64 bit SIMD for load/store and maximum absolute values search. This saves about 0.1 MCPS in MTL simulation in sof-testbench4. Signed-off-by: Seppo Ingalsuo --- src/audio/drc/drc_hifi4.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/audio/drc/drc_hifi4.c b/src/audio/drc/drc_hifi4.c index d8c92af60cca..b2252058e885 100644 --- a/src/audio/drc/drc_hifi4.c +++ b/src/audio/drc/drc_hifi4.c @@ -143,23 +143,29 @@ void drc_update_detector_average(struct drc_state *state, for (ch = 0; ch < nch; ch++) { abs_input_array_p = abs_input_array; sample16_p = (ae_int16 *)state->pre_delay_buffers[ch] + div_start; - for (i = 0; i < DRC_DIVISION_FRAMES; i++) { - AE_L16_XP(sample16, sample16_p, nbyte); + for (i = 0; i < DRC_DIVISION_FRAMES / 4; i++) { + AE_L16X4_XP(sample16, sample16_p, sizeof(ae_int16x4)); + + sample32 = AE_CVT32X2F16_32(sample16); + temp = AE_L32X2_I(abs_input_array_p, 0); + sample32 = AE_MAXABS32S(sample32, temp); + AE_S32X2_IP(sample32, abs_input_array_p, sizeof(ae_int32x2)); + sample32 = AE_CVT32X2F16_10(sample16); - temp = AE_L32_I(abs_input_array_p, 0); + temp = AE_L32X2_I(abs_input_array_p, 0); sample32 = AE_MAXABS32S(sample32, temp); - AE_S32_L_XP(sample32, abs_input_array_p, 4); + AE_S32X2_IP(sample32, abs_input_array_p, sizeof(ae_int32x2)); } } } else { /* 4 bytes per sample */ for (ch = 0; ch < nch; ch++) { abs_input_array_p = abs_input_array; sample32_p = (ae_int32 *)state->pre_delay_buffers[ch] + div_start; - for (i = 0; i < DRC_DIVISION_FRAMES; i++) { - AE_L32_XP(sample32, sample32_p, nbyte); - temp = AE_L32_I(abs_input_array_p, 0); + for (i = 0; i < DRC_DIVISION_FRAMES / 2; i++) { + AE_L32X2_IP(sample32, sample32_p, sizeof(ae_int32x2)); + temp = AE_L32X2_I(abs_input_array_p, 0); sample32 = AE_MAXABS32S(sample32, temp); - AE_S32_L_XP(sample32, abs_input_array_p, nbyte); + AE_S32X2_IP(sample32, abs_input_array_p, sizeof(ae_int32x2)); } } }