Skip to content

Commit

Permalink
Math: IIR DF1: Optimize IIR core for Xtensa HiFi4
Browse files Browse the repository at this point in the history
This patch adds iir_df1_hifi4.c that is a modified version of
iir_df1_hifi3.c. The IIR calculation uses 32x32 dual MAC.
The IIR delay lines update is improved with delay shift, round
and pack instruction.

The iir->delay address must be aligned 64 bits / 8 bytes due
to use of fastest non-aligning 64 bits load/store.

The updated version saves in sof-testbench4 run for MTL build
(scripts/sof-testbench-helper.sh -x -m eqiir) 0.8 MCPS,
from 10.6 to 9.8 MCPS for a 10th order filter.

In real MTL device with 2nd order high-pass filter the saving is
0.4 MCPS, from 7.8 to 7.4 MCPS.

Signed-off-by: Seppo Ingalsuo <[email protected]>
  • Loading branch information
singalsu committed Nov 27, 2024
1 parent 5249d28 commit 80f73c1
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/include/sof/math/iir_df1.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void iir_reset_df1(struct iir_state_df1 *iir);
int32_t iir_df1(struct iir_state_df1 *iir, int32_t x);

/* Inline functions */
#if SOF_USE_HIFI(3, FILTER) || SOF_USE_HIFI(4, FILTER)
#if SOF_USE_MIN_HIFI(3, FILTER)
#include "iir_df1_hifi3.h"
#else
#include "iir_df1_generic.h"
Expand Down
2 changes: 1 addition & 1 deletion src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ add_local_sources_ifdef(CONFIG_MATH_IIR_DF2T sof
iir_df2t_generic.c iir_df2t_hifi3.c iir_df2t.c)

add_local_sources_ifdef(CONFIG_MATH_IIR_DF1 sof
iir_df1_generic.c iir_df1_hifi3.c iir_df1.c)
iir_df1_generic.c iir_df1_hifi3.c iir_df1_hifi4.c iir_df1.c)

if(CONFIG_MATH_WINDOW)
add_local_sources(sof window.c)
Expand Down
2 changes: 1 addition & 1 deletion src/math/iir_df1_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include <rtos/symbol.h>

#if SOF_USE_MIN_HIFI(3, FILTER)
#if SOF_USE_HIFI(3, FILTER)

/*
* Direct form I second order filter block (biquad)
Expand Down
116 changes: 116 additions & 0 deletions src/math/iir_df1_hifi4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2022-2024 Intel Corporation.
//
// Author: Seppo Ingalsuo <[email protected]>

#include <stdint.h>
#include <stddef.h>
#include <errno.h>
#include <sof/audio/format.h>
#include <sof/math/iir_df1.h>
#include <user/eq.h>
#include <sof/common.h>

#include <rtos/symbol.h>

#if SOF_USE_MIN_HIFI(4, FILTER)

/*
* Direct form I second order filter block (biquad)
*
* +----+ +---+ +-------+
* X(z) ---o--->| b0 |---> + --+-------------o--->| g |--->| shift |---> Y(z)
* | +----+ ^ ^ | +---+ +-------+
* | | | |
* +------+ | | +------+
* | z^-1 | | | | z^-1 |
* +------+ | | +------+
* | +----+ | | +----+ |
* o--->| b1 |---> + + <---| a1 |---o
* | +----+ ^ ^ +----+ |
* | | | |
* +------+ | | +------+
* | z^-1 | | | | z^-1 |
* +------+ | | +------+
* | ^ ^ |
* | +----+ | | +----+ |
* o--->| b2 |---> + +<--- | a2 |---o
* +----+ +----+
*
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* the a1 a2 has been negated during calculation
*/

/* Series DF1 IIR */

/* 32 bit data, 32 bit coefficients and 32 bit state variables */

int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
{
ae_valign coef_align;
ae_f64 acc;
ae_int32x2 coef_a2a1;
ae_int32x2 coef_b2b1;
ae_int32x2 coef_b0;
ae_int32x2 gain;
ae_int32x2 delay_y2y1;
ae_int32x2 delay_x2x1;
ae_int32x2 shift;
ae_int32 in;
ae_int32 out = 0;
ae_int32x2 *coefp = (ae_int32x2 *)iir->coef;
ae_int32x2 *delay = (ae_int32x2 *)iir->delay;
int i;
int j;
int nseries = iir->biquads_in_series;

/* Bypass is set with number of biquads set to zero. */
if (!iir->biquads)
return x;

/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
for (j = 0; j < iir->biquads; j += nseries) {
in = x;
for (i = 0; i < nseries; i++) {
/* Load data */
delay_y2y1 = AE_L32X2_I(delay, 0);
delay_x2x1 = AE_L32X2_I(delay, sizeof(ae_int32x2));

/* Load coefficients */
coef_align = AE_LA64_PP(coefp);
AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
AE_L32_IP(gain, (ae_int32 *)coefp, 4);

acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */
AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */
delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */

/* Store data */
AE_S32X2_IP(delay_y2y1, delay, sizeof(ae_int32x2));
AE_S32X2_IP(delay_x2x1, delay, sizeof(ae_int32x2));

/* Apply gain */
acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */

/* Apply biquad output shift right parameter and then
* round and saturate to 32 bits Q1.31.
*/
acc = AE_SRAA64(acc, shift);
in = AE_ROUND32F48SSYM(acc);
}
/* Output of previous section is in variable in */
out = AE_F32_ADDS_F32(out, in);
}
return out;
}
EXPORT_SYMBOL(iir_df1);

#endif
1 change: 1 addition & 0 deletions test/cmocka/src/audio/eq_iir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ add_library(audio_for_eq_iir STATIC
${PROJECT_SOURCE_DIR}/src/math/iir_df1.c
${PROJECT_SOURCE_DIR}/src/math/iir_df1_generic.c
${PROJECT_SOURCE_DIR}/src/math/iir_df1_hifi3.c
${PROJECT_SOURCE_DIR}/src/math/iir_df1_hifi4.c
${PROJECT_SOURCE_DIR}/src/math/iir_df2t.c
${PROJECT_SOURCE_DIR}/src/math/iir_df2t_generic.c
${PROJECT_SOURCE_DIR}/src/math/iir_df2t_hifi3.c
Expand Down
1 change: 1 addition & 0 deletions zephyr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,7 @@ zephyr_library_sources_ifdef(CONFIG_MATH_FIR
zephyr_library_sources_ifdef(CONFIG_MATH_IIR_DF1
${SOF_MATH_PATH}/iir_df1_generic.c
${SOF_MATH_PATH}/iir_df1_hifi3.c
${SOF_MATH_PATH}/iir_df1_hifi4.c
${SOF_MATH_PATH}/iir_df1.c
)

Expand Down

0 comments on commit 80f73c1

Please sign in to comment.