Skip to content

Commit

Permalink
Enhancements for 4:1 eSBR configuration for USAC encoder
Browse files Browse the repository at this point in the history
- Fix for the spectral misalignment issue observed between HF and LF for 4:1 eSBR
- Delay synchronization between SBR encoder and the core coder
- Delay handling for the decoded files using pre-roll frames on encoder
- Bug fixes

Testing: Smoke-test
  • Loading branch information
Akshay Ragir committed Dec 3, 2024
1 parent 2aea9e7 commit 7a66832
Show file tree
Hide file tree
Showing 18 changed files with 361 additions and 246 deletions.
1 change: 1 addition & 0 deletions encoder/iusace_cnst.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,4 @@
#define MAX_PREROLL_CONFIG_SIZE (1024)
#define CC_NUM_PREROLL_FRAMES (1)
#define USAC_FIRST_FRAME_FLAG_DEFAULT_VALUE (1)
#define USAC_DEFAULT_DELAY_ADJUSTMENT_VALUE (1)
1 change: 1 addition & 0 deletions encoder/iusace_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ typedef struct {
ia_drc_internal_config str_internal_drc_cfg;
WORD32 use_measured_loudness;
UWORD16 stream_id;
FLAG use_delay_adjustment;
} ia_usac_encoder_config_struct;

typedef struct {
Expand Down
241 changes: 108 additions & 133 deletions encoder/ixheaace_api.c

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion encoder/ixheaace_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ typedef struct {
UWORD32 measurement_system;
FLOAT32 sample_peak_level;
UWORD16 stream_id;
FLAG use_delay_adjustment;
} ixheaace_input_config;

typedef struct {
Expand All @@ -140,7 +141,7 @@ typedef struct {
WORD32 header_samp_freq;
WORD32 audio_profile;
FLOAT32 down_sampling_ratio;
pWORD32 pb_inp_buf_32;
WORD32 expected_frame_count;
} ixheaace_output_config;

typedef struct {
Expand Down
17 changes: 13 additions & 4 deletions encoder/ixheaace_loudness_measurement.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,12 @@ FLOAT64 ixheaace_measure_integrated_loudness(pVOID loudness_handle) {
pstr_loudness_hdl->no_of_mf_passing_rel_gate = 0;
pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate = 0;

avg = (pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate /
pstr_loudness_hdl->no_of_mf_passing_abs_gate);
if (pstr_loudness_hdl->no_of_mf_passing_abs_gate) {
avg = (pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate /
pstr_loudness_hdl->no_of_mf_passing_abs_gate);
} else {
avg = IXHEAACE_SUM_SQUARE_EPS / pstr_loudness_hdl->num_samples_per_ch;
}
pstr_loudness_hdl->rel_gate = -0.691 + 10 * log10(avg) - 10;

while (count < pstr_loudness_hdl->ml_count_fn_call) {
Expand All @@ -368,8 +372,13 @@ FLOAT64 ixheaace_measure_integrated_loudness(pVOID loudness_handle) {
count++;
}

loudness = -0.691 + 10 * log10((pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate /
(FLOAT64)pstr_loudness_hdl->no_of_mf_passing_rel_gate));
if (pstr_loudness_hdl->no_of_mf_passing_rel_gate) {
loudness = -0.691 + 10 * log10((pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate /
(FLOAT64)pstr_loudness_hdl->no_of_mf_passing_rel_gate));
} else {
loudness =
-0.691 + 10 * log10(IXHEAACE_SUM_SQUARE_EPS / pstr_loudness_hdl->num_samples_per_ch);
}

return loudness;
}
Expand Down
1 change: 1 addition & 0 deletions encoder/ixheaace_loudness_measurement.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#define IXHEAACE_DEFAULT_SHORT_TERM_LOUDENSS (-1000)
#define IXHEAACE_DEFAULT_MOMENTARY_LOUDENSS (-1000)
#define IXHEAACE_SEC_TO_100MS_FACTOR (60 * 10)
#define IXHEAACE_SUM_SQUARE_EPS (1/32768.0f * 1/32768.0f)

typedef struct {
BOOL passes_abs_gate;
Expand Down
5 changes: 5 additions & 0 deletions encoder/ixheaace_rom.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ input buffer (1ch)
/* For 1:3 resampler -> max phase delay * resamp_fac */
#define MAXIMUM_DS_1_3_FILTER_DELAY (36)

#define CC_DELAY_ADJUSTMENT (448)
#define SBR_2_1_DELAY_ADJUSTMENT (-70)
#define SBR_4_1_DELAY_ADJUSTMENT (218)
#define SBR_8_3_DELAY_ADJUSTMENT (-74)

extern const FLOAT32 ixheaace_fd_quant_table[257];
extern const FLOAT32 ixheaace_fd_inv_quant_table[257];
extern const FLOAT32 ixheaace_pow_4_3_table[64];
Expand Down
69 changes: 45 additions & 24 deletions encoder/ixheaace_sbr_env_est.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(

i = 0;
while (i < n_envelopes) {
start_pos = time_step * pstr_const_frame_info->borders[i];
stop_pos = time_step * pstr_const_frame_info->borders[i + 1];
start_pos = pstr_const_frame_info->borders[i];
stop_pos = pstr_const_frame_info->borders[i + 1];
freq_res = pstr_const_frame_info->freq_res[i];
num_bands = pstr_sbr_cfg->num_scf[freq_res];

Expand All @@ -216,7 +216,7 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
stop_pos = stop_pos - temp;
}
} else {
stop_pos = stop_pos - time_step;
stop_pos = stop_pos - 1;
}
}
for (j = 0; j < num_bands; j++) {
Expand Down Expand Up @@ -265,14 +265,22 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
if (missing_harmonic) {
count = stop_pos - start_pos;
for (l = start_pos; l < stop_pos; l++) {
energy_left += ptr_y_buf_left[l / 2][li];
if (pstr_sbr_cfg->is_ld_sbr) {
energy_left += ptr_y_buf_left[l >> 1][li];
} else {
energy_left += ptr_y_buf_left[l][li];
}
}

k = li + 1;
while (k < ui) {
tmp_ene_l = 0.0f;
for (l = start_pos; l < stop_pos; l++) {
tmp_ene_l += ptr_y_buf_left[l / 2][k];
if (pstr_sbr_cfg->is_ld_sbr) {
tmp_ene_l += ptr_y_buf_left[l >> 1][k];
} else {
tmp_ene_l += ptr_y_buf_left[l][k];
}
}

if (tmp_ene_l > energy_left) {
Expand All @@ -291,14 +299,22 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(

if (stereo_mode == SBR_COUPLING) {
for (l = start_pos; l < stop_pos; l++) {
energy_right += ptr_y_buf_right[l / 2][li];
if (pstr_sbr_cfg->is_ld_sbr) {
energy_right += ptr_y_buf_right[l >> 1][li];
} else {
energy_right += ptr_y_buf_right[l][li];
}
}

k = li + 1;
while (k < ui) {
tmp_ene_r = 0.0f;
for (l = start_pos; l < stop_pos; l++) {
tmp_ene_r += ptr_y_buf_right[l / 2][k];
if (pstr_sbr_cfg->is_ld_sbr) {
tmp_ene_r += ptr_y_buf_right[l >> 1][k];
} else {
tmp_ene_r += ptr_y_buf_right[l][k];
}
}

if (tmp_ene_r > energy_right) {
Expand All @@ -314,22 +330,17 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
energy_right = energy_right * 0.5f;
}
}

tmp_ene_l = energy_left;
energy_left = (energy_left + energy_right) * 0.5f;
energy_right = (tmp_ene_l + 1) / (energy_right + 1);
energy_right = ((tmp_ene_l * time_step) + 1) / ((energy_right * time_step) + 1);
}
} else {
count = (stop_pos - start_pos) * (ui - li);

k = li;
while (k < ui) {
for (l = start_pos; l < stop_pos; l++) {
if (pstr_sbr_cfg->is_ld_sbr) {
energy_left += ptr_y_buf_left[l][k];
} else {
energy_left += ptr_y_buf_left[l / 2][k];
}
energy_left += ptr_y_buf_left[l][k];
}
k++;
}
Expand All @@ -338,17 +349,21 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
k = li;
while (k < ui) {
for (l = start_pos; l < stop_pos; l++) {
energy_right += ptr_y_buf_right[l / 2][k];
if (pstr_sbr_cfg->is_ld_sbr) {
energy_right += ptr_y_buf_right[l >> 1][k];
} else {
energy_right += ptr_y_buf_right[l][k];
}
}
k++;
}
tmp_ene_l = energy_left;
energy_left = (energy_left + energy_right) * 0.5f;
energy_right = (tmp_ene_l + 1) / (energy_right + 1);
energy_right = ((tmp_ene_l * time_step) + 1) / ((energy_right * time_step) + 1);
}
}

energy_left = (FLOAT32)(log(energy_left / (count * 64) + EPS) * SBR_INV_LOG_2);
energy_left = (FLOAT32)(log((energy_left / (count * 64)) + EPS) * SBR_INV_LOG_2);

if (energy_left < 0.0f) {
energy_left = 0.0f;
Expand All @@ -374,8 +389,9 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(

for (j = 0; j < num_bands; j++) {
if (freq_res == FREQ_RES_HIGH && pstr_sbr->str_sbr_extract_env.envelope_compensation[j]) {
ptr_sfb_ene_l[m] -= (WORD32)(
ca * ixheaac_abs32(pstr_sbr->str_sbr_extract_env.envelope_compensation[j]));
ptr_sfb_ene_l[m] -=
(WORD32)(ca *
ixheaac_abs32(pstr_sbr->str_sbr_extract_env.envelope_compensation[j]));
}

if (ptr_sfb_ene_l[m] < 0) {
Expand Down Expand Up @@ -2124,17 +2140,22 @@ IA_ERRORCODE ixheaace_extract_sbr_envelope(FLOAT32 *ptr_in_time, FLOAT32 *ptr_co
pstr_sbr_extract_env->ptr_y_buffer + pstr_sbr_extract_env->y_buffer_write_offset,
pstr_sbr_extract_env->ptr_r_buffer, pstr_sbr_extract_env->ptr_i_buffer,
pstr_sbr_cfg->is_ld_sbr, pstr_env_ch[ch]->str_sbr_qmf.num_time_slots, samp_ratio_fac,
pstr_hbe_enc, (IXHEAACE_OP_DELAY_OFFSET + IXHEAACE_ESBR_HBE_DELAY_OFFSET +
IXHEAACE_SBR_HF_ADJ_OFFSET), pstr_sbr_hdr->sbr_harmonic);
pstr_hbe_enc,
(IXHEAACE_OP_DELAY_OFFSET + IXHEAACE_ESBR_HBE_DELAY_OFFSET + IXHEAACE_SBR_HF_ADJ_OFFSET),
pstr_sbr_hdr->sbr_harmonic);

ixheaace_calculate_tonality_quotas(
&pstr_env_ch[ch]->str_ton_corr, pstr_sbr_extract_env->ptr_r_buffer,
pstr_sbr_extract_env->ptr_i_buffer,
pstr_sbr_cfg->ptr_freq_band_tab[HI][pstr_sbr_cfg->num_scf[HI]],
pstr_env_ch[ch]->str_sbr_qmf.num_time_slots, pstr_sbr_cfg->is_ld_sbr);
pstr_env_ch[ch]->str_sbr_qmf.num_time_slots, pstr_sbr_extract_env->time_step);
if (pstr_sbr_cfg->is_ld_sbr) {
ixheaace_detect_transient_eld(pstr_sbr_extract_env->ptr_y_buffer,
&pstr_env_ch[ch]->str_sbr_trans_detector, transient_info[ch]);
} else if (pstr_sbr_extract_env->time_step == 4) {
ixheaace_detect_transient_4_1(pstr_sbr_extract_env->ptr_y_buffer,
&pstr_env_ch[ch]->str_sbr_trans_detector, transient_info[ch],
pstr_sbr_extract_env->time_step, pstr_sbr_cfg->sbr_codec);
} else {
ixheaace_detect_transient(pstr_sbr_extract_env->ptr_y_buffer,
&pstr_env_ch[ch]->str_sbr_trans_detector, transient_info[ch],
Expand Down Expand Up @@ -2913,8 +2934,8 @@ IA_ERRORCODE ixheaace_extract_sbr_envelope(FLOAT32 *ptr_in_time, FLOAT32 *ptr_co
FLOAT32 *ptr_tmp;
ptr_tmp = pstr_sbr_extract_env->ptr_y_buffer[i];
pstr_sbr_extract_env->ptr_y_buffer[i] =
pstr_sbr_extract_env->ptr_y_buffer[i + (pstr_sbr_extract_env->no_cols >> 1)];
pstr_sbr_extract_env->ptr_y_buffer[i + (pstr_sbr_extract_env->no_cols >> 1)] = ptr_tmp;
pstr_sbr_extract_env->ptr_y_buffer[i + pstr_sbr_extract_env->time_slots];
pstr_sbr_extract_env->ptr_y_buffer[i + pstr_sbr_extract_env->time_slots] = ptr_tmp;
}

pstr_sbr_extract_env->buffer_flag ^= 1;
Expand Down
7 changes: 2 additions & 5 deletions encoder/ixheaace_sbr_env_est_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,9 @@ ixheaace_create_extract_sbr_envelope(WORD32 ch,
if ((sbr_codec == USAC_SBR) && (USAC_SBR_RATIO_INDEX_4_1 == sbr_ratio_idx)) {
qmf_time_slots = QMF_TIME_SLOTS_USAC_4_1;
y_buffer_write_offset = QMF_TIME_SLOTS_USAC_4_1;
no_cols = qmf_time_slots;
}
if (is_ld_sbr && frame_flag_480) {
y_buffer_write_offset = 30;
no_cols = 30;
time_slots = 15;
}

pstr_sbr_ext_env->y_buffer_write_offset = y_buffer_write_offset;

y_buffer_length = pstr_sbr_ext_env->y_buffer_write_offset + y_buffer_write_offset;
Expand Down
7 changes: 6 additions & 1 deletion encoder/ixheaace_sbr_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ UWORD32 ixheaace_sbr_limit_bitrate(UWORD32 bit_rate, UWORD32 num_ch, UWORD32 cor
VOID ixheaace_adjust_sbr_settings(const ixheaace_pstr_sbr_cfg pstr_config, UWORD32 bit_rate,
UWORD32 num_ch, UWORD32 fs_core, UWORD32 trans_fac,
UWORD32 std_br, ixheaace_str_qmf_tabs *pstr_qmf_tab,
WORD32 aot) {
WORD32 aot, WORD32 is_esbr_4_1) {
FLAG table_found = IXHEAACE_TABLE_IDX_NOT_FOUND;
WORD32 idx_sr = 0;
WORD32 idx_ch = 0;
Expand Down Expand Up @@ -538,6 +538,11 @@ VOID ixheaace_adjust_sbr_settings(const ixheaace_pstr_sbr_cfg pstr_config, UWORD
pstr_config->ps_mode = ixheaace_get_ps_mode(bit_rate);
}
}

if (is_esbr_4_1) {
pstr_config->start_freq = 10;
pstr_config->stop_freq = 11;
}
}

VOID ixheaace_initialize_sbr_defaults(ixheaace_pstr_sbr_cfg pstr_config) {
Expand Down
2 changes: 1 addition & 1 deletion encoder/ixheaace_sbr_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ UWORD32 ixheaace_sbr_limit_bitrate(UWORD32 bit_rate, UWORD32 num_channels,
VOID ixheaace_adjust_sbr_settings(const ixheaace_pstr_sbr_cfg pstr_config, UWORD32 bit_rate,
UWORD32 num_channels, UWORD32 fs_core, UWORD32 trans_fac,
UWORD32 standard_bitrate, ixheaace_str_qmf_tabs *ptr_qmf_tab,
WORD32 aot);
WORD32 aot, WORD32 is_esbr_4_1);

VOID ixheaace_initialize_sbr_defaults(ixheaace_pstr_sbr_cfg pstr_config);

Expand Down
24 changes: 8 additions & 16 deletions encoder/ixheaace_sbr_qmf_enc.c
Original file line number Diff line number Diff line change
Expand Up @@ -961,17 +961,14 @@ VOID ixheaace_get_energy_from_cplx_qmf(
}
if (0 == is_ld_sbr) {
FLOAT32 *ptr_energy_val = &ptr_energy_vals[0][0];
FLOAT32 *ptr_real = &ptr_real_values[0][0];
FLOAT32 *ptr_imag = &ptr_imag_values[0][0];
FLOAT32 *ptr_hbe_real = NULL;
FLOAT32 *ptr_hbe_imag = NULL;
if (harmonic_sbr == 1) {
ptr_hbe_real = &pstr_hbe_enc->qmf_buf_real[op_delay][0];
ptr_hbe_imag = &pstr_hbe_enc->qmf_buf_imag[op_delay][0];
}
k = (num_time_slots - 1);
while (k >= 0) {
for (j = 63; j >= 0; j--) {
for (k = 0; k < num_time_slots; k++) {
for (j = 0; j < IXHEAACE_QMF_CHANNELS; j++) {
FLOAT32 tmp = 0.0f;
if (harmonic_sbr == 1) {
FLOAT32 real_hbe, imag_hbe;
Expand All @@ -981,28 +978,23 @@ VOID ixheaace_get_energy_from_cplx_qmf(
*ptr_energy_val = tmp;
ptr_hbe_real++;
ptr_hbe_imag++;
ptr_energy_val++;
} else {
FLOAT32 real, imag;
WORD32 i;
WORD32 i, subband;
subband = samp_ratio_fac * k;
for (i = 0; i < samp_ratio_fac; i++) {
real = *(ptr_real + i * IXHEAACE_QMF_CHANNELS);
imag = *(ptr_imag + i * IXHEAACE_QMF_CHANNELS);
real = ptr_real_values[subband + i][j];
imag = ptr_imag_values[subband + i][j];
tmp += (real * real) + (imag * imag);
}
*ptr_energy_val = tmp * avg_fac;
ptr_real++;
ptr_imag++;
ptr_energy_vals[k][j] = tmp * avg_fac;
}
ptr_energy_val++;
}
if (harmonic_sbr == 1) {
ptr_hbe_real += 64;
ptr_hbe_imag += 64;
} else {
ptr_real += 64;
ptr_imag += 64;
}
k--;
}
} else {
FLOAT32 *ptr_real = &ptr_real_values[0][0];
Expand Down
15 changes: 6 additions & 9 deletions encoder/ixheaace_sbr_ton_corr_hp.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ static VOID ixheaace_calc_auto_corr_second_order(ixheaace_acorr_coeffs *pstr_ac,

VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_corr,
FLOAT32 **ptr_real, FLOAT32 **ptr_imag, WORD32 usb,
WORD32 num_time_slots, WORD32 is_ld_sbr) {
WORD32 num_time_slots, WORD32 time_step) {
WORD32 i, k, r, time_index;
FLOAT32 alphar[2], alphai[2], r01r, r02r, r11r, r12r, r01i, r02i, r12i, det, r00r;
ixheaace_acorr_coeffs ac;
Expand All @@ -129,7 +129,7 @@ VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_
WORD32 no_est_per_frame = pstr_ton_corr->est_cnt_per_frame;
WORD32 move = pstr_ton_corr->move;
WORD32 num_qmf_ch = pstr_ton_corr->num_qmf_ch;
WORD32 len = num_time_slots;
WORD32 len;
WORD32 qm_len;
for (i = 0; i < move; i++) {
memcpy(ptr_quota_mtx[i], ptr_quota_mtx[i + no_est_per_frame],
Expand All @@ -139,12 +139,9 @@ VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_
memmove(ptr_energy_vec, ptr_energy_vec + no_est_per_frame, move * sizeof(ptr_energy_vec[0]));
memset(ptr_energy_vec + start_index_matrix, 0,
(tot_no_est - start_index_matrix) * sizeof(ptr_energy_vec[0]));
if (is_ld_sbr) {
len = num_time_slots / 2;
qm_len = 2 + len;
} else {
qm_len = 18;
}

len = (num_time_slots * time_step) / 2;
qm_len = 2 + len;

for (r = 0; r < usb; r++) {
k = 2;
Expand Down Expand Up @@ -185,7 +182,7 @@ VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_
}
ptr_energy_vec[time_index] += r00r;

k += is_ld_sbr ? len : 16;
k += len;

time_index++;
}
Expand Down
Loading

0 comments on commit 7a66832

Please sign in to comment.