From 51268b3a71741d6f71ca6b9b980b548b49a20679 Mon Sep 17 00:00:00 2001
From: Ashwin Natesan <ashwin.natesan@ittiam.com>
Date: Fri, 27 Oct 2023 16:57:35 +0530
Subject: [PATCH] svcenc: Redundant code removed

The following lines of code have been removed to improve coverage -
  [x] Functions -
      - isvc_interleaved_copy
      - isvc_16bit_interleaved_copy
      - isvc_16bit_interleaved_memset
      - isvc_iquant_itrans_recon_chroma_4x4_neon
      - isvc_iquant_itrans_recon_chroma_4x4_sse42
      - isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon
      - isvc_iquant_itrans_recon_res_dc_4x4_sse42
      - isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon
      - isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42
      - isvce_wait_for_thread
  [x] Function pointer initialisations for the functions above

Test: svc_enc_fuzzer
---
 .../arm/svc/isvc_iquant_itrans_recon_neon.c   | 452 --------
 common/svc/isvc_mem_fns.c                     | 158 ---
 common/svc/isvc_mem_fns.h                     |   8 -
 common/svc/isvc_trans_quant_itrans_iquant.h   |   8 -
 .../x86/svc/isvc_iquant_itrans_recon_sse42.c  | 967 +++---------------
 encoder/arm/svc/isvce_function_selector_a9q.c |  14 -
 encoder/arm/svc/isvce_function_selector_av8.c |  16 +-
 encoder/svc/isvce_api.c                       | 265 ++---
 encoder/svc/isvce_encode.c                    |  33 -
 encoder/svc/isvce_function_selector_generic.c |   4 -
 encoder/svc/isvce_ilp_mv.c                    |   2 +-
 encoder/svc/isvce_structs.h                   |   8 -
 .../x86/svc/isvce_function_selector_sse42.c   |   7 -
 13 files changed, 205 insertions(+), 1737 deletions(-)

diff --git a/common/arm/svc/isvc_iquant_itrans_recon_neon.c b/common/arm/svc/isvc_iquant_itrans_recon_neon.c
index 270adde4..8a97fbc5 100644
--- a/common/arm/svc/isvc_iquant_itrans_recon_neon.c
+++ b/common/arm/svc/isvc_iquant_itrans_recon_neon.c
@@ -587,193 +587,6 @@ void isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon(
                   vreinterpret_u32_u8(pred23_un), 1);
 }
 
-void isvc_iquant_itrans_recon_chroma_4x4_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-
-    WORD16 i2_rnd_factor = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    int16x4x4_t src_16x4x2;
-    int16x4x4_t iscal_16x4x2;
-    int16x4x4_t weigh_16x4x2;
-
-    int16x4_t q0_16x4, q1_16x4, q2_16x4, q3_16x4;
-    int32x4_t q0_32x4, q1_32x4, q2_32x4, q3_32x4;
-    int16x4_t rq1_16x4, rq3_16x4;
-    int16x4_t x0_16x4, x1_16x4, x2_16x4, x3_16x4;
-    int16x8_t x0_16x8, x1_16x8, x2_16x8, x3_16x8;
-    int16x4_t xx0_16x4, xx1_16x4, xx2_16x4, xx3_16x4;
-    int16x4x2_t xx0_16x4x2, xx1_16x4x2;
-    int32x2x2_t x0_32x2x2, x1_32x2x2;
-    int16x4_t weigh0_16x4, weigh1_16x4, weigh2_16x4, weigh3_16x4;
-
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-    int16x8_t rec0, rec1, rec2, rec3;
-    uint8x8_t rec0_un, rec1_un, rec2_un, rec3_un;
-    uint8x8_t out0, out1, out2, out3;
-
-    uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff));
-
-    int16x4_t pos_255_16x4 = vdup_n_s16(((WORD16) UINT8_MAX));
-    int16x4_t neg_255_16x4 = vdup_n_s16(-((WORD16) UINT8_MAX));
-    int32x4_t qp_div_6_32x4 = vdupq_n_s32(u4_qp_div_6);
-    int32x4_t rnd_fact = vdupq_n_s32(i2_rnd_factor);
-
-    UNUSED(i4_iq_start_idx);
-    UNUSED(ps_res);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    src_16x4x2 = vld4_s16(pi2_src);
-    iscal_16x4x2 = vld4_s16((const int16_t *) pu2_iscal_mat);
-    weigh_16x4x2 = vld4_s16((const int16_t *) pu2_weigh_mat);
-
-    weigh0_16x4 = vmul_s16(weigh_16x4x2.val[0], iscal_16x4x2.val[0]);
-    weigh1_16x4 = vmul_s16(weigh_16x4x2.val[1], iscal_16x4x2.val[1]);
-    weigh2_16x4 = vmul_s16(weigh_16x4x2.val[2], iscal_16x4x2.val[2]);
-    weigh3_16x4 = vmul_s16(weigh_16x4x2.val[3], iscal_16x4x2.val[3]);
-
-    q0_32x4 = vmull_s16(weigh0_16x4, src_16x4x2.val[0]);
-    q1_32x4 = vmull_s16(weigh1_16x4, src_16x4x2.val[1]);
-    q2_32x4 = vmull_s16(weigh2_16x4, src_16x4x2.val[2]);
-    q3_32x4 = vmull_s16(weigh3_16x4, src_16x4x2.val[3]);
-
-    q0_32x4 = vaddq_s32(q0_32x4, rnd_fact);
-    q1_32x4 = vaddq_s32(q1_32x4, rnd_fact);
-    q2_32x4 = vaddq_s32(q2_32x4, rnd_fact);
-    q3_32x4 = vaddq_s32(q3_32x4, rnd_fact);
-
-    q0_32x4 = vshlq_s32(q0_32x4, qp_div_6_32x4);
-    q1_32x4 = vshlq_s32(q1_32x4, qp_div_6_32x4);
-    q2_32x4 = vshlq_s32(q2_32x4, qp_div_6_32x4);
-    q3_32x4 = vshlq_s32(q3_32x4, qp_div_6_32x4);
-
-    q0_16x4 = vqshrn_n_s32(q0_32x4, 4);
-    q1_16x4 = vqshrn_n_s32(q1_32x4, 4);
-    q2_16x4 = vqshrn_n_s32(q2_32x4, 4);
-    q3_16x4 = vqshrn_n_s32(q3_32x4, 4);
-
-    q0_16x4 = vset_lane_s16(pi2_dc_src[0], q0_16x4, 0);
-
-    rq1_16x4 = vshr_n_s16(q1_16x4, 1);
-    rq3_16x4 = vshr_n_s16(q3_16x4, 1);
-
-    x0_16x4 = vadd_s16(q0_16x4, q2_16x4);
-    x1_16x4 = vsub_s16(q0_16x4, q2_16x4);
-    x2_16x4 = vsub_s16(rq1_16x4, q3_16x4);
-    x3_16x4 = vadd_s16(q1_16x4, rq3_16x4);
-
-    xx0_16x4 = vadd_s16(x0_16x4, x3_16x4);
-    xx1_16x4 = vadd_s16(x1_16x4, x2_16x4);
-    xx2_16x4 = vsub_s16(x1_16x4, x2_16x4);
-    xx3_16x4 = vsub_s16(x0_16x4, x3_16x4);
-
-    /* row 0 to row 3 */
-    xx0_16x4x2 = vtrn_s16(xx0_16x4, xx1_16x4);
-    xx1_16x4x2 = vtrn_s16(xx2_16x4, xx3_16x4);
-    x0_32x2x2 =
-        vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[0]), vreinterpret_s32_s16(xx1_16x4x2.val[0]));
-    x1_32x2x2 =
-        vzip_s32(vreinterpret_s32_s16(xx0_16x4x2.val[1]), vreinterpret_s32_s16(xx1_16x4x2.val[1]));
-
-    x0_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[0]);
-    x1_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[0]);
-    x2_16x4 = vreinterpret_s16_s32(x0_32x2x2.val[1]);
-    x3_16x4 = vreinterpret_s16_s32(x1_32x2x2.val[1]);
-
-    /* Store Horz transform output into temp */
-    vst1_s16(pi2_tmp, x0_16x4);
-    vst1_s16(pi2_tmp + 4, x1_16x4);
-    vst1_s16(pi2_tmp + 8, x2_16x4);
-    vst1_s16(pi2_tmp + 12, x3_16x4);
-
-    /* vertical inverse transform */
-    rq1_16x4 = vshr_n_s16(x1_16x4, 1);
-    rq3_16x4 = vshr_n_s16(x3_16x4, 1);
-
-    xx0_16x4 = vadd_s16(x0_16x4, x2_16x4);
-    xx1_16x4 = vsub_s16(x0_16x4, x2_16x4);
-    xx2_16x4 = vsub_s16(rq1_16x4, x3_16x4);
-    xx3_16x4 = vadd_s16(x1_16x4, rq3_16x4);
-
-    x0_16x4 = vadd_s16(xx0_16x4, xx3_16x4);
-    x1_16x4 = vadd_s16(xx1_16x4, xx2_16x4);
-    x2_16x4 = vsub_s16(xx1_16x4, xx2_16x4);
-    x3_16x4 = vsub_s16(xx0_16x4, xx3_16x4);
-
-    x0_16x4 = vrshr_n_s16(x0_16x4, 6);
-    x1_16x4 = vrshr_n_s16(x1_16x4, 6);
-    x2_16x4 = vrshr_n_s16(x2_16x4, 6);
-    x3_16x4 = vrshr_n_s16(x3_16x4, 6);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    x0_16x4 = vmax_s16(x0_16x4, neg_255_16x4);
-    x1_16x4 = vmax_s16(x1_16x4, neg_255_16x4);
-    x2_16x4 = vmax_s16(x2_16x4, neg_255_16x4);
-    x3_16x4 = vmax_s16(x3_16x4, neg_255_16x4);
-
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    x0_16x4 = vmin_s16(x0_16x4, pos_255_16x4);
-    x1_16x4 = vmin_s16(x1_16x4, pos_255_16x4);
-    x2_16x4 = vmin_s16(x2_16x4, pos_255_16x4);
-    x3_16x4 = vmin_s16(x3_16x4, pos_255_16x4);
-
-    x0_16x8 = vreinterpretq_s16_s32(vmovl_s16(x0_16x4));
-    x1_16x8 = vreinterpretq_s16_s32(vmovl_s16(x1_16x4));
-    x2_16x8 = vreinterpretq_s16_s32(vmovl_s16(x2_16x4));
-    x3_16x8 = vreinterpretq_s16_s32(vmovl_s16(x3_16x4));
-
-    pred0_in = vld1_u8((uint8_t *) pu1_pred);
-    pred1_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride));
-    pred2_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride << 1));
-    pred3_in = vld1_u8((uint8_t *) pu1_pred + (i4_pred_stride * 3));
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    /* Out pixel = pred + res */
-    rec0 = vaddq_s16(pred0, x0_16x8);
-    rec1 = vaddq_s16(pred1, x1_16x8);
-    rec2 = vaddq_s16(pred2, x2_16x8);
-    rec3 = vaddq_s16(pred3, x3_16x8);
-
-    out0 = vld1_u8(pu1_out);
-    out1 = vld1_u8(pu1_out + i4_out_stride);
-    out2 = vld1_u8(pu1_out + i4_out_stride * 2);
-    out3 = vld1_u8(pu1_out + i4_out_stride * 3);
-
-    /* Convert to 8 bit unsigned with saturation */
-    rec0_un = vqmovun_s16(rec0);
-    rec1_un = vqmovun_s16(rec1);
-    rec2_un = vqmovun_s16(rec2);
-    rec3_un = vqmovun_s16(rec3);
-
-    /* Store in alternate postions */
-    out0 = vbsl_u8(chroma_mask_8x8, rec0_un, out0);
-    out1 = vbsl_u8(chroma_mask_8x8, rec1_un, out1);
-    out2 = vbsl_u8(chroma_mask_8x8, rec2_un, out2);
-    out3 = vbsl_u8(chroma_mask_8x8, rec3_un, out3);
-
-    vst1_u8((pu1_out), out0);
-    vst1_u8((pu1_out + i4_out_stride), out1);
-    vst1_u8((pu1_out + (i4_out_stride << 1)), out2);
-    vst1_u8((pu1_out + ((i4_out_stride << 1) + i4_out_stride)), out3);
-}
-
 void isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,
@@ -1280,271 +1093,6 @@ void isvc_iquant_itrans_recon_4x4_dc_neon(buffer_container_t *ps_src, buffer_con
     vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 3), vreinterpret_u32_u8(pred3_in), 0);
 }
 
-void isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    WORD16 i2_it_out;
-    WORD32 i4_iq_out_temp;
-    int16x8_t temp_0;
-    int16x4_t residue_res;
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-
-    UNUSED(pi2_tmp);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    if(i4_iq_start_idx == 0)
-    {
-        i4_iq_out_temp = pi2_src[0];
-        INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
-    }
-    else
-    {
-        i4_iq_out_temp = pi2_dc_src[0];
-    }
-
-    i2_it_out = ((i4_iq_out_temp + 32) >> 6);
-    temp_0 = vdupq_n_s16(i2_it_out);
-    residue_res = vdup_n_s16(isvc_get_residue(i2_it_out, 0, 0));
-
-    vst1_s16(pi2_res, residue_res);
-    vst1_s16(pi2_res + i4_res_stride, residue_res);
-    vst1_s16(pi2_res + (i4_res_stride << 1), residue_res);
-    vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, residue_res);
-
-    pred0_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred1_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred2_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred3_in = vld1_u8(pu1_pred);
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    /* Out pixel = Res + pred */
-    pred0 = vaddq_s16(pred0, temp_0);
-    pred1 = vaddq_s16(pred1, temp_0);
-    pred2 = vaddq_s16(pred2, temp_0);
-    pred3 = vaddq_s16(pred3, temp_0);
-
-    /* Convert to unsigned 8 bit with saturation */
-    pred0_in = vqmovun_s16(pred0);
-    pred1_in = vqmovun_s16(pred1);
-    pred2_in = vqmovun_s16(pred2);
-    pred3_in = vqmovun_s16(pred3);
-
-    vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred0_in), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred1_in), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 2), vreinterpret_u32_u8(pred2_in), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride * 3), vreinterpret_u32_u8(pred3_in), 0);
-}
-
-void isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    WORD32 i4_iq_out_temp;
-    int16x4_t temp_0;
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-    int16x8_t pred01_in, pred23_in;
-    uint8x8_t pred01_un, pred23_un;
-
-    int16x4_t resd0_in, resd1_in, resd2_in, resd3_in;
-    int16x8_t resd01_in, resd23_in;
-    int16x4_t pos_255 = vdup_n_s16(((WORD16) UINT8_MAX));
-    int16x4_t neg_255 = vdup_n_s16(-((WORD16) UINT8_MAX));
-
-    UNUSED(pi2_tmp);
-    UNUSED(u1_res_accumulate);
-
-    if(i4_iq_start_idx == 0)
-    {
-        i4_iq_out_temp = pi2_src[0];
-        INV_QUANT(i4_iq_out_temp, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
-    }
-    else
-    {
-        i4_iq_out_temp = pi2_dc_src[0];
-    }
-
-    temp_0 = vdup_n_s16((i4_iq_out_temp + 32) >> 6);
-
-    resd0_in = vld1_s16((int16_t *) pi2_res_pred);
-    resd1_in = vld1_s16((int16_t *) pi2_res_pred + i4_res_pred_stride);
-    resd2_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 2));
-    resd3_in = vld1_s16((int16_t *) pi2_res_pred + (i4_res_pred_stride * 3));
-
-    /* Add res pred to the res obtained */
-    resd0_in = vadd_s16(resd0_in, temp_0);
-    resd1_in = vadd_s16(resd1_in, temp_0);
-    resd2_in = vadd_s16(resd2_in, temp_0);
-    resd3_in = vadd_s16(resd3_in, temp_0);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    resd0_in = vmax_s16(resd0_in, neg_255);
-    resd1_in = vmax_s16(resd1_in, neg_255);
-    resd2_in = vmax_s16(resd2_in, neg_255);
-    resd3_in = vmax_s16(resd3_in, neg_255);
-
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    resd0_in = vmin_s16(resd0_in, pos_255);
-    resd1_in = vmin_s16(resd1_in, pos_255);
-    resd2_in = vmin_s16(resd2_in, pos_255);
-    resd3_in = vmin_s16(resd3_in, pos_255);
-
-    vst1_s16(pi2_res, resd0_in);
-    vst1_s16(pi2_res + i4_res_stride, resd1_in);
-    vst1_s16(pi2_res + (i4_res_stride << 1), resd2_in);
-    vst1_s16(pi2_res + (i4_res_stride << 1) + i4_res_stride, resd3_in);
-
-    resd01_in = vcombine_s16(resd0_in, resd1_in);
-    resd23_in = vcombine_s16(resd2_in, resd3_in);
-
-    pred0_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred1_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred2_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred3_in = vld1_u8(pu1_pred);
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    pred01_in = vcombine_s16(vget_low_s16(pred0), vget_low_s16(pred1));
-    pred23_in = vcombine_s16(vget_low_s16(pred2), vget_low_s16(pred3));
-
-    /* Out pixel = Res + pred */
-    pred01_in = vaddq_s16(pred01_in, resd01_in);
-    pred23_in = vaddq_s16(pred23_in, resd23_in);
-
-    /* Convert to unsigned 8 bit with saturation */
-    pred01_un = vqmovun_s16(pred01_in);
-    pred23_un = vqmovun_s16(pred23_in);
-
-    vst1_lane_u32((uint32_t *) (pu1_out), vreinterpret_u32_u8(pred01_un), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + i4_out_stride), vreinterpret_u32_u8(pred01_un), 1);
-    vst1_lane_u32((uint32_t *) (pu1_out + (i4_out_stride << 1)), vreinterpret_u32_u8(pred23_un), 0);
-    vst1_lane_u32((uint32_t *) (pu1_out + ((i4_out_stride << 1) + i4_out_stride)),
-                  vreinterpret_u32_u8(pred23_un), 1);
-}
-
-void isvc_iquant_itrans_recon_chroma_4x4_dc_neon(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-
-    WORD32 i4_iq_out_temp;
-    int16x8_t temp_0;
-    uint8x8_t pred0_in, pred1_in, pred2_in, pred3_in;
-    int16x8_t pred0, pred1, pred2, pred3;
-    uint8x8_t i4_out_horz_8x8_r0, i4_out_horz_8x8_r1, i4_out_horz_8x8_r2, i4_out_horz_8x8_r3;
-    uint8x8_t chroma_mask_8x8 = vreinterpret_u8_u16(vdup_n_u16(0x00ff));
-
-    UNUSED(pi2_src);
-    UNUSED(pu2_iscal_mat);
-    UNUSED(pu2_weigh_mat);
-    UNUSED(u4_qp_div_6);
-    UNUSED(pi2_tmp);
-    UNUSED(i4_iq_start_idx);
-    UNUSED(ps_res);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    i4_iq_out_temp = pi2_dc_src[0];
-    temp_0 = vdupq_n_s16((i4_iq_out_temp + 32) >> 6);
-
-    pred0_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred1_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred2_in = vld1_u8(pu1_pred);
-    pu1_pred = pu1_pred + i4_pred_stride;
-    pred3_in = vld1_u8(pu1_pred);
-
-    pred0 = vreinterpretq_s16_u16(vmovl_u8(pred0_in));
-    pred1 = vreinterpretq_s16_u16(vmovl_u8(pred1_in));
-    pred2 = vreinterpretq_s16_u16(vmovl_u8(pred2_in));
-    pred3 = vreinterpretq_s16_u16(vmovl_u8(pred3_in));
-
-    /* Out pixel = Res + pred */
-    pred0 = vaddq_s16(pred0, temp_0);
-    pred1 = vaddq_s16(pred1, temp_0);
-    pred2 = vaddq_s16(pred2, temp_0);
-    pred3 = vaddq_s16(pred3, temp_0);
-
-    /* Convert to unsigned 8 bit with saturation */
-    pred0_in = vqmovun_s16(pred0);
-    pred1_in = vqmovun_s16(pred1);
-    pred2_in = vqmovun_s16(pred2);
-    pred3_in = vqmovun_s16(pred3);
-
-    i4_out_horz_8x8_r0 = vld1_u8(pu1_out);
-    i4_out_horz_8x8_r1 = vld1_u8(pu1_out + i4_out_stride);
-    i4_out_horz_8x8_r2 = vld1_u8(pu1_out + i4_out_stride * 2);
-    i4_out_horz_8x8_r3 = vld1_u8(pu1_out + i4_out_stride * 3);
-
-    /* Store out pixels in alternate positions */
-    i4_out_horz_8x8_r0 = vbsl_u8(chroma_mask_8x8, pred0_in, i4_out_horz_8x8_r0);
-    i4_out_horz_8x8_r1 = vbsl_u8(chroma_mask_8x8, pred1_in, i4_out_horz_8x8_r1);
-    i4_out_horz_8x8_r2 = vbsl_u8(chroma_mask_8x8, pred2_in, i4_out_horz_8x8_r2);
-    i4_out_horz_8x8_r3 = vbsl_u8(chroma_mask_8x8, pred3_in, i4_out_horz_8x8_r3);
-
-    vst1_u8((uint8_t *) (pu1_out), i4_out_horz_8x8_r0);
-    vst1_u8((uint8_t *) (pu1_out + i4_out_stride), i4_out_horz_8x8_r1);
-    vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 2), i4_out_horz_8x8_r2);
-    vst1_u8((uint8_t *) (pu1_out + i4_out_stride * 3), i4_out_horz_8x8_r3);
-}
-
 void isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,
diff --git a/common/svc/isvc_mem_fns.c b/common/svc/isvc_mem_fns.c
index 345715af..35a4c662 100644
--- a/common/svc/isvc_mem_fns.c
+++ b/common/svc/isvc_mem_fns.c
@@ -120,164 +120,6 @@ void isvc_memset_2d(UWORD8 *pu1_dst, WORD32 i4_dst_stride, UWORD8 u1_val, WORD32
     }
 }
 
-/**
- *******************************************************************************
- *
- * @brief
- * Function for copying to an interleaved destination
- *
- * @par Description:
- *    Copies the array of width 'wd' and height 'ht' from the  location pointed
- *    by 'src' to the location pointed by 'dst'
- *
- * @param[in] pu1_src
- *  UWORD8 pointer to the source
- *
- * @param[out] pu1_dst
- *  UWORD8 pointer to the destination
- *
- * @param[in] src_strd
- *  integer source stride
- *
- * @param[in] dst_strd
- *  integer destination stride
- *
- * @param[in] ht
- *  integer height of the array
- *
- * @param[in] wd
- *  integer width of the array
- *
- * @returns
- *
- * @remarks
- *  The alternate elements of src will be copied to alternate locations in dsr
- *  Other locations are not touched
- *
- *******************************************************************************
- */
-void isvc_interleaved_copy(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, WORD32 dst_strd,
-                           WORD32 ht, WORD32 wd)
-{
-    WORD32 row, col;
-    wd *= 2;
-
-    for(row = 0; row < ht; row++)
-    {
-        for(col = 0; col < wd; col += 2)
-        {
-            pu1_dst[col] = pu1_src[col];
-        }
-
-        pu1_src += src_strd;
-        pu1_dst += dst_strd;
-    }
-}
-
-/**
- *******************************************************************************
- *
- * @brief
- * Function for copying to an interleaved destination
- *
- * @par Description:
- *    Copies the array of width 'wd' and height 'ht' from the  location pointed
- *    by 'src' to the location pointed by 'dst'
- *
- * @param[in] pu1_src
- *  UWORD8 pointer to the source
- *
- * @param[out] pu1_dst
- *  UWORD8 pointer to the destination
- *
- * @param[in] src_strd
- *  integer source stride
- *
- * @param[in] dst_strd
- *  integer destination stride
- *
- * @param[in] ht
- *  integer height of the array
- *
- * @param[in] wd
- *  integer width of the array
- *
- * @returns
- *
- * @remarks
- *  The alternate elements of src will be copied to alternate locations in dsr
- *  Other locations are not touched
- *
- *******************************************************************************
- */
-void isvc_16bit_interleaved_copy(WORD16 *pi2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 dst_strd,
-                                 WORD32 ht, WORD32 wd)
-{
-    WORD32 row, col;
-    wd *= 2;
-
-    for(row = 0; row < ht; row++)
-    {
-        for(col = 0; col < wd; col += 2)
-        {
-            pi2_dst[col] = pi2_src[col];
-        }
-
-        pi2_src += src_strd;
-        pi2_dst += dst_strd;
-    }
-}
-
-/**
- *******************************************************************************
- *
- * @brief
- * Function for memsetting to an interleaved destination
- *
- * @par Description:
- *    Memsets the array of width 'wd' and height 'ht' pointed by 'src'
- *
- * @param[in] pu1_src
- *  UWORD8 pointer to the source
- *
- * @param[in] src_strd
- *  integer source stride
- *
- * @param[in] value
- *  Value to set
- *
- * @param[in] ht
- *  integer height of the array
- *
- * @param[in] wd
- *  integer width of the array
- *
- * @returns
- *
- * @remarks
- *  The alternate elements of src will be copied to alternate locations in dsr
- *  Other locations are not touched
- *
- *******************************************************************************
- */
-void isvc_16bit_interleaved_memset(WORD16 *pi2_src, WORD32 i4_src_strd, WORD16 i2_value,
-                                   WORD32 i4_wd, WORD32 i4_ht)
-{
-    WORD32 row, col;
-
-    i4_wd *= 2;
-
-    for(row = 0; row < i4_ht; row++)
-    {
-        for(col = 0; col < i4_wd; col += 2)
-        {
-            pi2_src[col] = i2_value;
-        }
-
-        pi2_src += i4_src_strd;
-    }
-}
-
 /**
  *******************************************************************************
  *
diff --git a/common/svc/isvc_mem_fns.h b/common/svc/isvc_mem_fns.h
index a4d95f71..581e478b 100644
--- a/common/svc/isvc_mem_fns.h
+++ b/common/svc/isvc_mem_fns.h
@@ -64,32 +64,24 @@ typedef UWORD8 FT_NONZERO_CHECKER(UWORD8 *pu1_data, WORD32 i4_data_strd, UWORD32
                                   UWORD32 u4_ht);
 
 /* C function declarations */
-extern FT_MEMCPY ih264_memcpy;
 extern FT_MEMCPY ih264_memcpy_mul_8;
-extern FT_MEMSET ih264_memset;
 extern FT_MEMSET ih264_memset_mul_8;
 extern FT_MEMSET_16BIT ih264_memset_16bit;
 extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8;
 extern FT_COPY_2D isvc_copy_2d;
 extern FT_MEMSET_2D isvc_memset_2d;
-extern FT_16BIT_INTERLEAVED_COPY isvc_16bit_interleaved_copy;
-extern FT_16BIT_INTERLEAVED_MEMSET isvc_16bit_interleaved_memset;
 extern FT_NONZERO_CHECKER isvc_is_nonzero_blk;
 extern FT_MEM_ALLOC isvc_memory_alloc;
 extern FT_MEM_FREE isvc_memory_free;
 
 /* A9 Q function declarations */
-extern FT_MEMCPY isvc_memcpy_a9q;
 extern FT_MEMCPY ih264_memcpy_mul_8_a9q;
-extern FT_MEMSET ih264_memset_a9q;
 extern FT_MEMSET ih264_memset_mul_8_a9q;
 extern FT_MEMSET_16BIT ih264_memset_16bit_a9q;
 extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_a9q;
 
 /* AV8 function declarations */
-extern FT_MEMCPY ih264_memcpy_av8;
 extern FT_MEMCPY ih264_memcpy_mul_8_av8;
-extern FT_MEMSET ih264_memset_av8;
 extern FT_MEMSET ih264_memset_mul_8_av8;
 extern FT_MEMSET_16BIT ih264_memset_16bit_av8;
 extern FT_MEMSET_16BIT ih264_memset_16bit_mul_8_av8;
diff --git a/common/svc/isvc_trans_quant_itrans_iquant.h b/common/svc/isvc_trans_quant_itrans_iquant.h
index fd15dccd..7ded8112 100644
--- a/common/svc/isvc_trans_quant_itrans_iquant.h
+++ b/common/svc/isvc_trans_quant_itrans_iquant.h
@@ -195,15 +195,11 @@ extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_sse42;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_sse42;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42;
 
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_sse42;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_sse42;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42;
 
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_dc_4x4_sse42;
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_4x4_sse42;
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42;
 
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_sse42;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42;
 
@@ -217,15 +213,11 @@ extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_neon;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_output_neon;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
 
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_neon;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
 
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_neon;
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
 
-extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
 extern FT_IQ_IT_RECON isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
 
diff --git a/common/x86/svc/isvc_iquant_itrans_recon_sse42.c b/common/x86/svc/isvc_iquant_itrans_recon_sse42.c
index 829952b2..f86ebc70 100644
--- a/common/x86/svc/isvc_iquant_itrans_recon_sse42.c
+++ b/common/x86/svc/isvc_iquant_itrans_recon_sse42.c
@@ -1034,63 +1034,19 @@ void isvc_iquant_itrans_recon_res_4x4_with_res_acc_sse42(
     *(pu4_out) = _mm_cvtsi128_si32(resq_r3);
 }
 
-/*
- ********************************************************************************
- *
- * @brief This function reconstructs a 4x4 sub block from quantized chroma
- *resiude and prediction buffer
- *
- * @par Description:
- *  The quantized residue is first inverse quantized, then inverse transformed.
- *  This inverse transformed content is added to the prediction buffer to recon-
- *  struct the end output
- *
- * @param[in] pi2_src
- *  quantized 4x4 block
- *
- * @param[in] pu1_pred
- *  prediction 4x4 block
- *
- * @param[out] pu1_out
- *  reconstructed 4x4 block
- *
- * @param[in] src_strd
- *  quantization buffer stride
- *
- * @param[in] i4_pred_stride,
- *  Prediction buffer stride
- *
- * @param[in] i4_out_stride
- *  recon buffer Stride
- *
- * @param[in] pu2_scaling_list
- *  pointer to scaling list
- *
- * @param[in] pu2_norm_adjust
- *  pointer to inverse scale matrix
- *
- * @param[in] u4_qp_div_6
- *  Floor (qp/6)
- *
- * @param[in] pi4_tmp
- * temporary buffer of size 1*16
- *
- * @returns none
- *
- * @remarks none
- *
- *******************************************************************************
- */
-void isvc_iquant_itrans_recon_chroma_4x4_sse42(
+void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,
     iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
     WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
 {
     WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
+    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
+    WORD16 *pi2_res_ptr = pi2_res;
     UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
     UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
     WORD32 i4_src_stride = ps_src->i4_data_stride;
+    WORD32 i4_res_stride = ps_res->i4_data_stride;
     WORD32 i4_pred_stride = ps_pred->i4_data_stride;
     WORD32 i4_out_stride = ps_rec->i4_data_stride;
     const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
@@ -1111,13 +1067,13 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     __m128i value_32 = _mm_set1_epi32(32);
     __m128i chroma_mask = _mm_set1_epi16(0xFF);
     __m128i out_r0, out_r1, out_r2, out_r3;
+    __m128i res_r0, res_r1, res_r2, res_r3;
 
     ASSERT(4 == i4_src_stride);
     ASSERT(0 == u1_res_accumulate);
 
     UNUSED(i4_src_stride);
     UNUSED(u1_res_accumulate);
-    UNUSED(ps_res);
     UNUSED(ps_res_pred);
     UNUSED(i4_iq_start_idx);
 
@@ -1126,27 +1082,27 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     /* operations on platform                                    */
     /*************************************************************/
     /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source
-     matrix 0th,1st row */
+    matrix 0th,1st row */
     src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src));
 
     /* a20 a21 a22 a23 a30 a31 a32 a33 -- the
-      source matrix 2nd,3rd row */
+    source matrix 2nd,3rd row */
     src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
 
     /* b00 b01 b02 b03 b10 b11 b12 b13 -- the
-     scaling matrix 0th,1st row */
+    scaling matrix 0th,1st row */
     scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat));
 
     /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the
-     the scaling matrix 2nd,3rd row */
+    the scaling matrix 2nd,3rd row */
     scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8));
 
     /* q00 q01 q02 q03 q10 q11
-     q12 q13 -- all 16 bits */
+    q12 q13 -- all 16 bits */
     dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat));
 
     /* q20 q21 q22 q23 q30 q31
-     q32 q33 -- all 16 bits */
+    q32 q33 -- all 16 bits */
     dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8));
 
     temp0 = _mm_mullo_epi16(scalemat_r0_r1,
@@ -1239,7 +1195,7 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     temp2 = _mm_srai_epi32(resq_r1, 1);
     temp2 = _mm_sub_epi32(temp2, resq_r3);
     /* z3 = w1 + (w3 >> 1)                                      */
-    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
+    temp3 = _mm_srai_epi32(resq_r3, 1);
     temp3 = _mm_add_epi32(temp3, resq_r1);
     /*----------------------------------------------------------*/
     /* x0 = z0 + z3                                             */
@@ -1292,8 +1248,10 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
     pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
 
-    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1);
-    pred_r1 = _mm_unpacklo_epi64(pred_r2, pred_r3);
+    pred_r0 = _mm_cvtepu16_epi32(pred_r0);
+    pred_r1 = _mm_cvtepu16_epi32(pred_r1);
+    pred_r2 = _mm_cvtepu16_epi32(pred_r2);
+    pred_r3 = _mm_cvtepu16_epi32(pred_r3);
 
     /*--------------------------------------------------------------*/
     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
@@ -1333,17 +1291,50 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     temp1 = _mm_packs_epi32(temp6, temp7);
 
     /* Saturate all values < -255 to -255 and retain the rest as it is */
-    temp4 = _mm_max_epi16(temp0, neg_255_8x16b);
+    temp0 = _mm_max_epi16(temp0, neg_255_8x16b);
     /* Saturate all values > 255 to 255 and retain the rest as it is */
-    temp4 = _mm_min_epi16(temp4, pos_255_8x16b);
+    temp0 = _mm_min_epi16(temp0, pos_255_8x16b);
 
     /* Saturate all values < -255 to -255 and retain the rest as it is */
-    temp5 = _mm_max_epi16(temp1, neg_255_8x16b);
+    temp1 = _mm_max_epi16(temp1, neg_255_8x16b);
     /* Saturate all values > 255 to 255 and retain the rest as it is */
-    temp5 = _mm_min_epi16(temp5, pos_255_8x16b);
+    temp1 = _mm_min_epi16(temp1, pos_255_8x16b);
 
-    temp0 = _mm_add_epi16(temp4, pred_r0);
-    temp1 = _mm_add_epi16(temp5, pred_r1);
+    chroma_mask = _mm_set1_epi32(0xffff0000);
+    out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride]));
+    out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride]));
+    out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]));
+    out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]));
+
+    out_r0 = _mm_and_si128(out_r0, chroma_mask);
+    out_r1 = _mm_and_si128(out_r1, chroma_mask);
+    out_r2 = _mm_and_si128(out_r2, chroma_mask);
+    out_r3 = _mm_and_si128(out_r3, chroma_mask);
+
+    res_r0 = _mm_cvtepu16_epi32(temp0);
+    res_r2 = _mm_cvtepu16_epi32(temp1);
+    res_r1 = _mm_srli_si128(temp0, 8);
+    res_r3 = _mm_srli_si128(temp1, 8);
+    res_r1 = _mm_cvtepu16_epi32(res_r1);
+    res_r3 = _mm_cvtepu16_epi32(res_r3);
+
+    out_r0 = _mm_add_epi16(out_r0, res_r0);
+    out_r1 = _mm_add_epi16(out_r1, res_r1);
+    out_r2 = _mm_add_epi16(out_r2, res_r2);
+    out_r3 = _mm_add_epi16(out_r3, res_r3);
+
+    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride]), out_r0);
+    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride]), out_r1);
+    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), out_r2);
+    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), out_r3);
+
+    resq_r0 = _mm_add_epi16(pred_r0, res_r0);
+    resq_r1 = _mm_add_epi16(pred_r1, res_r1);
+    resq_r2 = _mm_add_epi16(pred_r2, res_r2);
+    resq_r3 = _mm_add_epi16(pred_r3, res_r3);
+
+    temp0 = _mm_packus_epi32(resq_r0, resq_r1);
+    temp1 = _mm_packus_epi32(resq_r2, resq_r3);
 
     /*------------------------------------------------------------------*/
     /* Clipping the results to 8 bits */
@@ -1362,7 +1353,7 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     resq_r2 = _mm_cvtepu8_epi16(resq_r2);
     resq_r3 = _mm_cvtepu8_epi16(resq_r3);
 
-    chroma_mask = _mm_set1_epi16(0xFF00);
+    chroma_mask = _mm_set1_epi16(0xff00);
     out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
     out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride]));
     out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
@@ -1384,7 +1375,7 @@ void isvc_iquant_itrans_recon_chroma_4x4_sse42(
     _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
 }
 
-void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
+void isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,
     iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
@@ -1392,11 +1383,12 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
 {
     WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
     WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    WORD16 *pi2_res_ptr = pi2_res;
+    WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data;
     UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
     UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
     WORD32 i4_src_stride = ps_src->i4_data_stride;
     WORD32 i4_res_stride = ps_res->i4_data_stride;
+    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
     WORD32 i4_pred_stride = ps_pred->i4_data_stride;
     WORD32 i4_out_stride = ps_rec->i4_data_stride;
     const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
@@ -1406,9 +1398,12 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     __m128i src_r0, src_r1, src_r2, src_r3;
     __m128i scalemat_r0_r1, scalemat_r2_r3;
     __m128i pred_r0, pred_r1, pred_r2, pred_r3;
-    __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
+    __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3;
+    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i dequant_r0_r1, dequant_r2_r3;
     /* all bits reset to zero */
     __m128i zero_8x16b = _mm_setzero_si128();
+    __m128i reg_chroma = _mm_set1_epi32(0xFFFF);
     __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX));
     __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX));
     __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
@@ -1417,14 +1412,13 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     __m128i value_32 = _mm_set1_epi32(32);
     __m128i chroma_mask = _mm_set1_epi16(0xFF);
     __m128i out_r0, out_r1, out_r2, out_r3;
-    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i mask_r0;
 
     ASSERT(4 == i4_src_stride);
-    ASSERT(0 == u1_res_accumulate);
+    ASSERT(1 == u1_res_accumulate);
 
     UNUSED(i4_src_stride);
     UNUSED(u1_res_accumulate);
-    UNUSED(ps_res_pred);
     UNUSED(i4_iq_start_idx);
 
     /*************************************************************/
@@ -1545,7 +1539,7 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     temp2 = _mm_srai_epi32(resq_r1, 1);
     temp2 = _mm_sub_epi32(temp2, resq_r3);
     /* z3 = w1 + (w3 >> 1)                                      */
-    temp3 = _mm_srai_epi32(resq_r3, 1);
+    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
     temp3 = _mm_add_epi32(temp3, resq_r1);
     /*----------------------------------------------------------*/
     /* x0 = z0 + z3                                             */
@@ -1598,11 +1592,6 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
     pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
 
-    pred_r0 = _mm_cvtepu16_epi32(pred_r0);
-    pred_r1 = _mm_cvtepu16_epi32(pred_r1);
-    pred_r2 = _mm_cvtepu16_epi32(pred_r2);
-    pred_r3 = _mm_cvtepu16_epi32(pred_r3);
-
     /*--------------------------------------------------------------*/
     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
     /*                                                              */
@@ -1623,22 +1612,50 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     temp4 = _mm_add_epi32(temp0, temp3);
     temp4 = _mm_add_epi32(temp4, value_32);
     temp4 = _mm_srai_epi32(temp4, 6);
+    res_r0 = temp4;
     /* x1j = z1j + z2j                                                        */
     temp5 = _mm_add_epi32(temp1, temp2);
     temp5 = _mm_add_epi32(temp5, value_32);
     temp5 = _mm_srai_epi32(temp5, 6);
+    res_r1 = temp5;
     /* x2j = z1j - z2j                                                        */
     temp6 = _mm_sub_epi32(temp1, temp2);
     temp6 = _mm_add_epi32(temp6, value_32);
     temp6 = _mm_srai_epi32(temp6, 6);
+    res_r2 = temp6;
     /* x3j = z0j - z3j                                                        */
     temp7 = _mm_sub_epi32(temp0, temp3);
     temp7 = _mm_add_epi32(temp7, value_32);
     temp7 = _mm_srai_epi32(temp7, 6);
+    res_r3 = temp7;
 
-    /* 32-bit to 16-bit conversion */
-    temp0 = _mm_packs_epi32(temp4, temp5);
-    temp1 = _mm_packs_epi32(temp6, temp7);
+    res_pred_r0 = _mm_loadu_si128((__m128i *) &pi2_res_pred[0 * i4_res_pred_stride]);
+    res_pred_r1 = _mm_loadu_si128((__m128i *) &pi2_res_pred[1 * i4_res_pred_stride]);
+    res_pred_r2 = _mm_loadu_si128((__m128i *) &pi2_res_pred[2 * i4_res_pred_stride]);
+    res_pred_r3 = _mm_loadu_si128((__m128i *) &pi2_res_pred[3 * i4_res_pred_stride]);
+
+    res_pred_r0 = _mm_and_si128(res_pred_r0, reg_chroma);
+    res_pred_r1 = _mm_and_si128(res_pred_r1, reg_chroma);
+    res_pred_r2 = _mm_and_si128(res_pred_r2, reg_chroma);
+    res_pred_r3 = _mm_and_si128(res_pred_r3, reg_chroma);
+
+    temp0 = _mm_packs_epi32(res_r0, res_r1);
+    temp1 = _mm_packs_epi32(res_r2, res_r3);
+
+    res_r0 = _mm_cvtepu16_epi32(temp0);
+    res_r2 = _mm_cvtepu16_epi32(temp1);
+    res_r1 = _mm_srli_si128(temp0, 8);
+    res_r3 = _mm_srli_si128(temp1, 8);
+    res_r1 = _mm_cvtepu16_epi32(res_r1);
+    res_r3 = _mm_cvtepu16_epi32(res_r3);
+
+    res_r0 = _mm_add_epi16(res_pred_r0, res_r0);
+    res_r1 = _mm_add_epi16(res_pred_r1, res_r1);
+    res_r2 = _mm_add_epi16(res_pred_r2, res_r2);
+    res_r3 = _mm_add_epi16(res_pred_r3, res_r3);
+
+    temp0 = _mm_packus_epi32(res_r0, res_r1);
+    temp1 = _mm_packus_epi32(res_r2, res_r3);
 
     /* Saturate all values < -255 to -255 and retain the rest as it is */
     temp0 = _mm_max_epi16(temp0, neg_255_8x16b);
@@ -1650,33 +1667,39 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     /* Saturate all values > 255 to 255 and retain the rest as it is */
     temp1 = _mm_min_epi16(temp1, pos_255_8x16b);
 
+    res_r0 = _mm_cvtepu16_epi32(temp0);
+    res_r1 = _mm_srli_si128(temp0, 8);
+    res_r1 = _mm_cvtepu16_epi32(res_r1);
+
+    res_r2 = _mm_cvtepu16_epi32(temp1);
+    res_r3 = _mm_srli_si128(temp1, 8);
+    res_r3 = _mm_cvtepu16_epi32(res_r3);
+
     chroma_mask = _mm_set1_epi32(0xffff0000);
-    out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride]));
-    out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride]));
-    out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]));
-    out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]));
+    out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res[0 * i4_res_stride]));
+    out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res[1 * i4_res_stride]));
+    out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res[2 * i4_res_stride]));
+    out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res[3 * i4_res_stride]));
 
     out_r0 = _mm_and_si128(out_r0, chroma_mask);
     out_r1 = _mm_and_si128(out_r1, chroma_mask);
     out_r2 = _mm_and_si128(out_r2, chroma_mask);
     out_r3 = _mm_and_si128(out_r3, chroma_mask);
 
-    res_r0 = _mm_cvtepu16_epi32(temp0);
-    res_r2 = _mm_cvtepu16_epi32(temp1);
-    res_r1 = _mm_srli_si128(temp0, 8);
-    res_r3 = _mm_srli_si128(temp1, 8);
-    res_r1 = _mm_cvtepu16_epi32(res_r1);
-    res_r3 = _mm_cvtepu16_epi32(res_r3);
-
     out_r0 = _mm_add_epi16(out_r0, res_r0);
     out_r1 = _mm_add_epi16(out_r1, res_r1);
     out_r2 = _mm_add_epi16(out_r2, res_r2);
     out_r3 = _mm_add_epi16(out_r3, res_r3);
 
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0 * i4_res_stride]), out_r0);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[1 * i4_res_stride]), out_r1);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), out_r2);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), out_r3);
+    _mm_storeu_si128((__m128i *) (&pi2_res[0 * i4_res_stride]), out_r0);
+    _mm_storeu_si128((__m128i *) (&pi2_res[1 * i4_res_stride]), out_r1);
+    _mm_storeu_si128((__m128i *) (&pi2_res[2 * i4_res_stride]), out_r2);
+    _mm_storeu_si128((__m128i *) (&pi2_res[3 * i4_res_stride]), out_r3);
+
+    pred_r0 = _mm_cvtepu16_epi32(pred_r0);
+    pred_r1 = _mm_cvtepu16_epi32(pred_r1);
+    pred_r2 = _mm_cvtepu16_epi32(pred_r2);
+    pred_r3 = _mm_cvtepu16_epi32(pred_r3);
 
     resq_r0 = _mm_add_epi16(pred_r0, res_r0);
     resq_r1 = _mm_add_epi16(pred_r1, res_r1);
@@ -1686,12 +1709,11 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     temp0 = _mm_packus_epi32(resq_r0, resq_r1);
     temp1 = _mm_packus_epi32(resq_r2, resq_r3);
 
-    /*------------------------------------------------------------------*/
     /* Clipping the results to 8 bits */
-    sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b);
-    temp0 = _mm_and_si128(temp0, sign_reg);
-    sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
-    temp1 = _mm_and_si128(temp1, sign_reg);
+    mask_r0 = _mm_cmpgt_epi16(temp0, zero_8x16b);
+    temp0 = _mm_and_si128(temp0, mask_r0);
+    mask_r0 = _mm_cmpgt_epi16(temp1, zero_8x16b);
+    temp1 = _mm_and_si128(temp1, mask_r0);
 
     resq_r0 = _mm_packus_epi16(temp0, temp1);
     resq_r1 = _mm_srli_si128(resq_r0, 4);
@@ -1703,9 +1725,9 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     resq_r2 = _mm_cvtepu8_epi16(resq_r2);
     resq_r3 = _mm_cvtepu8_epi16(resq_r3);
 
-    chroma_mask = _mm_set1_epi16(0xff00);
-    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
-    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride]));
+    chroma_mask = _mm_set1_epi16(0xFF00);
+    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0 * i4_out_stride]));
+    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[1 * i4_out_stride]));
     out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
     out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]));
 
@@ -1719,498 +1741,30 @@ void isvc_iquant_itrans_recon_res_chroma_4x4_sse42(
     out_r2 = _mm_add_epi8(out_r2, resq_r2);
     out_r3 = _mm_add_epi8(out_r3, resq_r3);
 
-    _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0);
-    _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1);
+    _mm_storel_epi64((__m128i *) (&pu1_out[0 * i4_out_stride]), out_r0);
+    _mm_storel_epi64((__m128i *) (&pu1_out[1 * i4_out_stride]), out_r1);
     _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2);
     _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
 }
 
-void isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
+void isvc_iquant_itrans_recon_dc_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred,
+                                           buffer_container_t *ps_res_pred,
+                                           buffer_container_t *ps_res, buffer_container_t *ps_rec,
+                                           iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
+                                           WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
+                                           WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
 {
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data;
     UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
     UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_src_stride = ps_src->i4_data_stride;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
     WORD32 i4_pred_stride = ps_pred->i4_data_stride;
     WORD32 i4_out_stride = ps_rec->i4_data_stride;
     const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
     const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
     UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    __m128i src_r0_r1, src_r2_r3;
-    __m128i src_r0, src_r1, src_r2, src_r3;
-    __m128i scalemat_r0_r1, scalemat_r2_r3;
-    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
-    __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3;
-    __m128i res_r0, res_r1, res_r2, res_r3;
-    __m128i dequant_r0_r1, dequant_r2_r3;
-    /* all bits reset to zero */
-    __m128i zero_8x16b = _mm_setzero_si128();
-    __m128i reg_chroma = _mm_set1_epi32(0xFFFF);
-    __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX));
-    __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX));
-    __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-    __m128i resq_r0, resq_r1, resq_r2, resq_r3;
-    __m128i add_rshift = _mm_set1_epi32((u4_qp_div_6 < 4) ? (1 << (3 - u4_qp_div_6)) : 0);
-    __m128i value_32 = _mm_set1_epi32(32);
-    __m128i chroma_mask = _mm_set1_epi16(0xFF);
-    __m128i out_r0, out_r1, out_r2, out_r3;
-    __m128i mask_r0;
-
-    ASSERT(4 == i4_src_stride);
-    ASSERT(1 == u1_res_accumulate);
-
-    UNUSED(i4_src_stride);
-    UNUSED(u1_res_accumulate);
-    UNUSED(i4_iq_start_idx);
-
-    /*************************************************************/
-    /* Dequantization of coefficients. Will be replaced by SIMD  */
-    /* operations on platform                                    */
-    /*************************************************************/
-    /* a00 a01 a02 a03 a10 a11 a12 a13 -- the source
-    matrix 0th,1st row */
-    src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src));
-
-    /* a20 a21 a22 a23 a30 a31 a32 a33 -- the
-    source matrix 2nd,3rd row */
-    src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8));
-
-    /* b00 b01 b02 b03 b10 b11 b12 b13 -- the
-    scaling matrix 0th,1st row */
-    scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat));
-
-    /* b20 b21 b22 b23 b30 b31 b32 b33 --b12 b13 -- the
-    the scaling matrix 2nd,3rd row */
-    scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8));
-
-    /* q00 q01 q02 q03 q10 q11
-    q12 q13 -- all 16 bits */
-    dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat));
-
-    /* q20 q21 q22 q23 q30 q31
-    q32 q33 -- all 16 bits */
-    dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8));
-
-    temp0 = _mm_mullo_epi16(scalemat_r0_r1,
-                            dequant_r0_r1);  // b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11
-                                             // b12*q12 b13*q13 -- 16 bit result
-
-    temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3);
-
-    /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */
-    temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b);
-
-    /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */
-    temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b);
-
-    /* b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long */
-    temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b);
-
-    /* b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long */
-    temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b);
-
-    /* a00 0 a01 0 a02 0 a03 0 -- 16 bit long */
-    src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b);
-    /* a10 0 a11 0 a12 0 a13 0 -- 16 bit long */
-    src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b);
-    /* a20 0 a21 0 a22 0 a23 0 -- 16 bit long */
-    src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b);
-    /* a30 0 a31 0 a32 0 a33 0 -- 16 bit long */
-    src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b);
-
-    temp4 = _mm_madd_epi16(src_r0, temp4);
-    temp5 = _mm_madd_epi16(src_r1, temp5);
-    temp6 = _mm_madd_epi16(src_r2, temp6);
-    temp7 = _mm_madd_epi16(src_r3, temp7);
-
-    if(u4_qp_div_6 >= 4)
-    {
-        resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
-        resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
-        resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
-        resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
-    }
-    else
-    {
-        temp4 = _mm_add_epi32(temp4, add_rshift);
-        temp5 = _mm_add_epi32(temp5, add_rshift);
-        temp6 = _mm_add_epi32(temp6, add_rshift);
-        temp7 = _mm_add_epi32(temp7, add_rshift);
-        resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
-        resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
-        resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
-        resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
-    }
-
-    resq_r0 = _mm_insert_epi32(resq_r0, (WORD32) pi2_dc_src[0], 0);
-    /* Perform Inverse transform */
-    /*-------------------------------------------------------------*/
-    /* IDCT [ Horizontal transformation ]                          */
-    /*-------------------------------------------------------------*/
-    // Matrix transpose
-    /*
-     *  a0 a1 a2 a3
-     *  b0 b1 b2 b3
-     *  c0 c1 c2 c3
-     *  d0 d1 d2 d3
-     */
-    /* a0 b0 a1 b1 */
-    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);
-    /* c0 d0 c1 d1 */
-    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);
-    /* a2 b2 a3 b3 */
-    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);
-    /* c2 d2 c3 d3 */
-    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);
-    /* a0 b0 c0 d0 */
-    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);
-    /* a1 b1 c1 d1 */
-    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);
-    /* a2 b2 c2 d2 */
-    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);
-    /* a3 b3 c3 d3 */
-    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);
-    /* Transform starts -- horizontal transform */
-
-    /*------------------------------------------------------------------*/
-    /* z0 = w0 + w2                                             */
-    temp0 = _mm_add_epi32(resq_r0, resq_r2);
-    /* z1 = w0 - w2                                             */
-    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
-    /* z2 = (w1 >> 1) - w3                                      */
-    temp2 = _mm_srai_epi32(resq_r1, 1);
-    temp2 = _mm_sub_epi32(temp2, resq_r3);
-    /* z3 = w1 + (w3 >> 1)                                      */
-    temp3 = _mm_srai_epi32(resq_r3, 1);  //(w3>>1) + w1
-    temp3 = _mm_add_epi32(temp3, resq_r1);
-    /*----------------------------------------------------------*/
-    /* x0 = z0 + z3                                             */
-    resq_r0 = _mm_add_epi32(temp0, temp3);
-    /* x1 = z1 + z2                                             */
-    resq_r1 = _mm_add_epi32(temp1, temp2);
-    /* x2 = z1 - z2                                             */
-    resq_r2 = _mm_sub_epi32(temp1, temp2);
-    /* x3 = z0 - z3                                             */
-    resq_r3 = _mm_sub_epi32(temp0, temp3);
-    // Matrix transpose
-    /*
-     *  a0 b0 c0 d0
-     *  a1 b1 c1 d1
-     *  a2 b2 c2 d2
-     *  a3 b3 c3 d3
-     */
-    /* a0 a1 b0 b1 */
-    temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1);
-    /* a2 a3 b2 b3 */
-    temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3);
-    /* c0 c1 d0 d1 */
-    temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1);
-    /* c2 c3 d2 d3 */
-    temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3);
-    /* a0 a1 a2 a3 */
-    resq_r0 = _mm_unpacklo_epi64(temp1, temp3);
-    /* b0 b1 b2 b3 */
-    resq_r1 = _mm_unpackhi_epi64(temp1, temp3);
-    /* c0 c1 c2 c3 */
-    resq_r2 = _mm_unpacklo_epi64(temp2, temp4);
-    /* d0 d1 d2 d3 */
-    resq_r3 = _mm_unpackhi_epi64(temp2, temp4);
-    /* Transform ends -- horizontal transform */
-
-    temp0 = _mm_packs_epi32(resq_r0, resq_r1);
-    temp1 = _mm_packs_epi32(resq_r2, resq_r3);
-
-    _mm_storeu_si128((__m128i *) (&pi2_tmp[0]), temp0);
-    _mm_storeu_si128((__m128i *) (&pi2_tmp[2 * 4]), temp1);
-
-    /* Load pred buffer */
-    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
-    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));
-    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride]));
-    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride]));
-
-    pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
-    pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
-    pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
-    pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
-
-    /*--------------------------------------------------------------*/
-    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6      */
-    /*                                                              */
-    /* Add the prediction and store it back to same buffer          */
-    /*--------------------------------------------------------------*/
-    /* z0j = y0j + y2j                                         */
-    temp0 = _mm_add_epi32(resq_r0, resq_r2);
-    /* z1j = y0j - y2j                                                        */
-    temp1 = _mm_sub_epi32(resq_r0, resq_r2);
-    /* z2j = (y1j>>1) - y3j */
-    temp2 = _mm_srai_epi32(resq_r1, 1);
-    temp2 = _mm_sub_epi32(temp2, resq_r3);
-    /* z3j = y1j + (y3j>>1) */
-    temp3 = _mm_srai_epi32(resq_r3, 1);
-    temp3 = _mm_add_epi32(temp3, resq_r1);
-
-    /* x0j = z0j + z3j                                                        */
-    temp4 = _mm_add_epi32(temp0, temp3);
-    temp4 = _mm_add_epi32(temp4, value_32);
-    temp4 = _mm_srai_epi32(temp4, 6);
-    res_r0 = temp4;
-    /* x1j = z1j + z2j                                                        */
-    temp5 = _mm_add_epi32(temp1, temp2);
-    temp5 = _mm_add_epi32(temp5, value_32);
-    temp5 = _mm_srai_epi32(temp5, 6);
-    res_r1 = temp5;
-    /* x2j = z1j - z2j                                                        */
-    temp6 = _mm_sub_epi32(temp1, temp2);
-    temp6 = _mm_add_epi32(temp6, value_32);
-    temp6 = _mm_srai_epi32(temp6, 6);
-    res_r2 = temp6;
-    /* x3j = z0j - z3j                                                        */
-    temp7 = _mm_sub_epi32(temp0, temp3);
-    temp7 = _mm_add_epi32(temp7, value_32);
-    temp7 = _mm_srai_epi32(temp7, 6);
-    res_r3 = temp7;
-
-    res_pred_r0 = _mm_loadu_si128((__m128i *) &pi2_res_pred[0 * i4_res_pred_stride]);
-    res_pred_r1 = _mm_loadu_si128((__m128i *) &pi2_res_pred[1 * i4_res_pred_stride]);
-    res_pred_r2 = _mm_loadu_si128((__m128i *) &pi2_res_pred[2 * i4_res_pred_stride]);
-    res_pred_r3 = _mm_loadu_si128((__m128i *) &pi2_res_pred[3 * i4_res_pred_stride]);
-
-    res_pred_r0 = _mm_and_si128(res_pred_r0, reg_chroma);
-    res_pred_r1 = _mm_and_si128(res_pred_r1, reg_chroma);
-    res_pred_r2 = _mm_and_si128(res_pred_r2, reg_chroma);
-    res_pred_r3 = _mm_and_si128(res_pred_r3, reg_chroma);
-
-    temp0 = _mm_packs_epi32(res_r0, res_r1);
-    temp1 = _mm_packs_epi32(res_r2, res_r3);
-
-    res_r0 = _mm_cvtepu16_epi32(temp0);
-    res_r2 = _mm_cvtepu16_epi32(temp1);
-    res_r1 = _mm_srli_si128(temp0, 8);
-    res_r3 = _mm_srli_si128(temp1, 8);
-    res_r1 = _mm_cvtepu16_epi32(res_r1);
-    res_r3 = _mm_cvtepu16_epi32(res_r3);
-
-    res_r0 = _mm_add_epi16(res_pred_r0, res_r0);
-    res_r1 = _mm_add_epi16(res_pred_r1, res_r1);
-    res_r2 = _mm_add_epi16(res_pred_r2, res_r2);
-    res_r3 = _mm_add_epi16(res_pred_r3, res_r3);
-
-    temp0 = _mm_packus_epi32(res_r0, res_r1);
-    temp1 = _mm_packus_epi32(res_r2, res_r3);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    temp0 = _mm_max_epi16(temp0, neg_255_8x16b);
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    temp0 = _mm_min_epi16(temp0, pos_255_8x16b);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    temp1 = _mm_max_epi16(temp1, neg_255_8x16b);
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    temp1 = _mm_min_epi16(temp1, pos_255_8x16b);
-
-    res_r0 = _mm_cvtepu16_epi32(temp0);
-    res_r1 = _mm_srli_si128(temp0, 8);
-    res_r1 = _mm_cvtepu16_epi32(res_r1);
-
-    res_r2 = _mm_cvtepu16_epi32(temp1);
-    res_r3 = _mm_srli_si128(temp1, 8);
-    res_r3 = _mm_cvtepu16_epi32(res_r3);
-
-    chroma_mask = _mm_set1_epi32(0xffff0000);
-    out_r0 = _mm_loadu_si128((__m128i *) (&pi2_res[0 * i4_res_stride]));
-    out_r1 = _mm_loadu_si128((__m128i *) (&pi2_res[1 * i4_res_stride]));
-    out_r2 = _mm_loadu_si128((__m128i *) (&pi2_res[2 * i4_res_stride]));
-    out_r3 = _mm_loadu_si128((__m128i *) (&pi2_res[3 * i4_res_stride]));
-
-    out_r0 = _mm_and_si128(out_r0, chroma_mask);
-    out_r1 = _mm_and_si128(out_r1, chroma_mask);
-    out_r2 = _mm_and_si128(out_r2, chroma_mask);
-    out_r3 = _mm_and_si128(out_r3, chroma_mask);
-
-    out_r0 = _mm_add_epi16(out_r0, res_r0);
-    out_r1 = _mm_add_epi16(out_r1, res_r1);
-    out_r2 = _mm_add_epi16(out_r2, res_r2);
-    out_r3 = _mm_add_epi16(out_r3, res_r3);
-
-    _mm_storeu_si128((__m128i *) (&pi2_res[0 * i4_res_stride]), out_r0);
-    _mm_storeu_si128((__m128i *) (&pi2_res[1 * i4_res_stride]), out_r1);
-    _mm_storeu_si128((__m128i *) (&pi2_res[2 * i4_res_stride]), out_r2);
-    _mm_storeu_si128((__m128i *) (&pi2_res[3 * i4_res_stride]), out_r3);
-
-    pred_r0 = _mm_cvtepu16_epi32(pred_r0);
-    pred_r1 = _mm_cvtepu16_epi32(pred_r1);
-    pred_r2 = _mm_cvtepu16_epi32(pred_r2);
-    pred_r3 = _mm_cvtepu16_epi32(pred_r3);
-
-    resq_r0 = _mm_add_epi16(pred_r0, res_r0);
-    resq_r1 = _mm_add_epi16(pred_r1, res_r1);
-    resq_r2 = _mm_add_epi16(pred_r2, res_r2);
-    resq_r3 = _mm_add_epi16(pred_r3, res_r3);
-
-    temp0 = _mm_packus_epi32(resq_r0, resq_r1);
-    temp1 = _mm_packus_epi32(resq_r2, resq_r3);
-
-    /* Clipping the results to 8 bits */
-    mask_r0 = _mm_cmpgt_epi16(temp0, zero_8x16b);
-    temp0 = _mm_and_si128(temp0, mask_r0);
-    mask_r0 = _mm_cmpgt_epi16(temp1, zero_8x16b);
-    temp1 = _mm_and_si128(temp1, mask_r0);
-
-    resq_r0 = _mm_packus_epi16(temp0, temp1);
-    resq_r1 = _mm_srli_si128(resq_r0, 4);
-    resq_r2 = _mm_srli_si128(resq_r1, 4);
-    resq_r3 = _mm_srli_si128(resq_r2, 4);
-
-    resq_r0 = _mm_cvtepu8_epi16(resq_r0);
-    resq_r1 = _mm_cvtepu8_epi16(resq_r1);
-    resq_r2 = _mm_cvtepu8_epi16(resq_r2);
-    resq_r3 = _mm_cvtepu8_epi16(resq_r3);
-
-    chroma_mask = _mm_set1_epi16(0xFF00);
-    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0 * i4_out_stride]));
-    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[1 * i4_out_stride]));
-    out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
-    out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]));
-
-    out_r0 = _mm_and_si128(out_r0, chroma_mask);
-    out_r1 = _mm_and_si128(out_r1, chroma_mask);
-    out_r2 = _mm_and_si128(out_r2, chroma_mask);
-    out_r3 = _mm_and_si128(out_r3, chroma_mask);
-
-    out_r0 = _mm_add_epi8(out_r0, resq_r0);
-    out_r1 = _mm_add_epi8(out_r1, resq_r1);
-    out_r2 = _mm_add_epi8(out_r2, resq_r2);
-    out_r3 = _mm_add_epi8(out_r3, resq_r3);
-
-    _mm_storel_epi64((__m128i *) (&pu1_out[0 * i4_out_stride]), out_r0);
-    _mm_storel_epi64((__m128i *) (&pu1_out[1 * i4_out_stride]), out_r1);
-    _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2);
-    _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
-}
-
-void isvc_iquant_itrans_recon_dc_4x4_sse42(buffer_container_t *ps_src, buffer_container_t *ps_pred,
-                                           buffer_container_t *ps_res_pred,
-                                           buffer_container_t *ps_res, buffer_container_t *ps_rec,
-                                           iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
-                                           WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-                                           WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
-    WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0];
-    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
-    __m128i sign_reg;
-    /* all bits reset to zero */
-    __m128i zero_8x16b = _mm_setzero_si128();
-    __m128i temp4, temp5, temp6, temp7;
-    __m128i value_add;
-
-    ASSERT(0 == u1_res_accumulate);
-
-    UNUSED(pi2_tmp);
-    UNUSED(ps_res);
-    UNUSED(ps_res_pred);
-    UNUSED(u1_res_accumulate);
-
-    INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
-
-    /* Restoring dc value for intra case */
-    if(i4_iq_start_idx != 0)
-    {
-        q0 = pi2_dc_src[0];
-    }
-
-    i_macro = ((q0 + 32) >> 6);
-
-    value_add = _mm_set1_epi16(i_macro);
-
-    zero_8x16b = _mm_setzero_si128();
-
-    /* Load pred buffer */
-
-    /* p00 p01 p02 p03 0 0 0 0 -- all 8 bits */
-    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
-
-    /* p10 p11 p12 p13 0 0 0 0 -- all 8 bits */
-    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));
-
-    /* p20 p21 p22 p23 0 0 0 0 -- all 8 bits */
-    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride]));
-
-    /* p30 p31 p32 p33 0 0 0 0 -- all 8 bits */
-    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride]));
-
-    pred_r0 = _mm_cvtepu8_epi16(pred_r0);
-    pred_r1 = _mm_cvtepu8_epi16(pred_r1);
-    pred_r2 = _mm_cvtepu8_epi16(pred_r2);
-    pred_r3 = _mm_cvtepu8_epi16(pred_r3);
-
-    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1);
-    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3);
-
-    temp4 = _mm_add_epi16(value_add, pred_r0);
-    temp5 = _mm_add_epi16(value_add, pred_r2);
-    /*------------------------------------------------------------------*/
-    /* Clipping the results to 8 bits */
-    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);
-    temp4 = _mm_and_si128(temp4, sign_reg);
-    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b);
-    temp5 = _mm_and_si128(temp5, sign_reg);
-
-    temp4 = _mm_packus_epi16(temp4, temp5);
-    temp5 = _mm_srli_si128(temp4, 4);
-    temp6 = _mm_srli_si128(temp5, 4);
-    temp7 = _mm_srli_si128(temp6, 4);
-
-    *pu4_out = _mm_cvtsi128_si32(temp4);
-    pu1_out += i4_out_stride;
-    pu4_out = (UWORD32 *) (pu1_out);
-    *(pu4_out) = _mm_cvtsi128_si32(temp5);
-    pu1_out += i4_out_stride;
-    pu4_out = (UWORD32 *) (pu1_out);
-    *(pu4_out) = _mm_cvtsi128_si32(temp6);
-    pu1_out += i4_out_stride;
-    pu4_out = (UWORD32 *) (pu1_out);
-    *(pu4_out) = _mm_cvtsi128_si32(temp7);
-}
-
-void isvc_iquant_itrans_recon_res_dc_4x4_sse42(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    WORD16 *pi2_res_ptr = pi2_res;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
-    WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0];
-    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
+    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
+    WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0];
+    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+
     __m128i pred_r0, pred_r1, pred_r2, pred_r3;
     __m128i sign_reg;
     /* all bits reset to zero */
@@ -2221,17 +1775,21 @@ void isvc_iquant_itrans_recon_res_dc_4x4_sse42(
     ASSERT(0 == u1_res_accumulate);
 
     UNUSED(pi2_tmp);
+    UNUSED(ps_res);
     UNUSED(ps_res_pred);
     UNUSED(u1_res_accumulate);
 
     INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
 
     /* Restoring dc value for intra case */
-    if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0];
+    if(i4_iq_start_idx != 0)
+    {
+        q0 = pi2_dc_src[0];
+    }
 
     i_macro = ((q0 + 32) >> 6);
 
-    value_add = _mm_set1_epi16(isvc_get_residue(i_macro, 0, 0));
+    value_add = _mm_set1_epi16(i_macro);
 
     zero_8x16b = _mm_setzero_si128();
 
@@ -2259,141 +1817,6 @@ void isvc_iquant_itrans_recon_res_dc_4x4_sse42(
 
     temp4 = _mm_add_epi16(value_add, pred_r0);
     temp5 = _mm_add_epi16(value_add, pred_r2);
-
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0]), value_add);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[i4_res_stride]), value_add);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), value_add);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), value_add);
-    /*------------------------------------------------------------------*/
-    /* Clipping the results to 8 bits */
-    sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);
-    temp4 = _mm_and_si128(temp4, sign_reg);
-    sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b);
-    temp5 = _mm_and_si128(temp5, sign_reg);
-
-    temp4 = _mm_packus_epi16(temp4, temp5);
-    temp5 = _mm_srli_si128(temp4, 4);
-    temp6 = _mm_srli_si128(temp5, 4);
-    temp7 = _mm_srli_si128(temp6, 4);
-
-    *pu4_out = _mm_cvtsi128_si32(temp4);
-    pu1_out += i4_out_stride;
-    pu4_out = (UWORD32 *) (pu1_out);
-    *(pu4_out) = _mm_cvtsi128_si32(temp5);
-    pu1_out += i4_out_stride;
-    pu4_out = (UWORD32 *) (pu1_out);
-    *(pu4_out) = _mm_cvtsi128_si32(temp6);
-    pu1_out += i4_out_stride;
-    pu4_out = (UWORD32 *) (pu1_out);
-    *(pu4_out) = _mm_cvtsi128_si32(temp7);
-}
-
-void isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_res = (WORD16 *) ps_res->pv_data;
-    WORD16 *pi2_res_ptr = pi2_res;
-    WORD16 *pi2_res_pred = (WORD16 *) ps_res_pred->pv_data;
-    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_res_stride = ps_res->i4_data_stride;
-    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    UWORD32 *pu4_out = (UWORD32 *) pu1_out;
-    WORD32 q0 = ((WORD16 *) (ps_src->pv_data))[0];
-    WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
-
-    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
-    __m128i sign_reg;
-    /* all bits reset to zero */
-    __m128i zero_8x16b = _mm_setzero_si128();
-    __m128i temp4, temp5, temp6, temp7;
-    __m128i value_add;
-    __m128i res_pred_r0, res_pred_r1, res_pred_r2, res_pred_r3;
-    __m128i temp0, temp1;
-    __m128i neg_255_8x16b = _mm_set1_epi16(-((WORD16) UINT8_MAX));
-    __m128i pos_255_8x16b = _mm_set1_epi16(((WORD16) UINT8_MAX));
-
-    ASSERT(1 == u1_res_accumulate);
-
-    UNUSED(pi2_tmp);
-    UNUSED(u1_res_accumulate);
-
-    INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
-
-    /* Restoring dc value for intra case */
-    if(i4_iq_start_idx != 0) q0 = pi2_dc_src[0];
-
-    i_macro = ((q0 + 32) >> 6);
-
-    value_add = _mm_set1_epi16(i_macro);
-
-    zero_8x16b = _mm_setzero_si128();
-
-    /* Load pred buffer */
-
-    /* p00 p01 p02 p03 0 0 0 0 -- all 8 bits */
-    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
-
-    /* p10 p11 p12 p13 0 0 0 0 -- all 8 bits */
-    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));
-
-    /* p20 p21 p22 p23 0 0 0 0 -- all 8 bits */
-    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride]));
-
-    /* p30 p31 p32 p33 0 0 0 0 -- all 8 bits */
-    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride]));
-
-    pred_r0 = _mm_cvtepu8_epi16(pred_r0);
-    pred_r1 = _mm_cvtepu8_epi16(pred_r1);
-    pred_r2 = _mm_cvtepu8_epi16(pred_r2);
-    pred_r3 = _mm_cvtepu8_epi16(pred_r3);
-
-    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1);
-    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3);
-
-    /* Accumulating res */
-    res_pred_r0 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[0]);
-    res_pred_r1 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[i4_res_pred_stride]);
-    res_pred_r2 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[2 * i4_res_pred_stride]);
-    res_pred_r3 = _mm_loadl_epi64((__m128i *) &pi2_res_pred_ptr[3 * i4_res_pred_stride]);
-
-    res_pred_r0 = _mm_unpacklo_epi64(res_pred_r0, res_pred_r1);
-    res_pred_r1 = _mm_unpacklo_epi64(res_pred_r2, res_pred_r3);
-
-    temp0 = _mm_add_epi16(value_add, res_pred_r0);
-    temp1 = _mm_add_epi16(value_add, res_pred_r1);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    temp0 = _mm_max_epi16(temp0, neg_255_8x16b);
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    temp0 = _mm_min_epi16(temp0, pos_255_8x16b);
-
-    /* Saturate all values < -255 to -255 and retain the rest as it is */
-    temp1 = _mm_max_epi16(temp1, neg_255_8x16b);
-    /* Saturate all values > 255 to 255 and retain the rest as it is */
-    temp1 = _mm_min_epi16(temp1, pos_255_8x16b);
-
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[0]), temp0);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[2 * i4_res_stride]), temp1);
-
-    temp4 = _mm_add_epi16(temp0, pred_r0);
-    temp5 = _mm_add_epi16(temp1, pred_r2);
-
-    temp0 = _mm_srli_si128(temp0, 8);
-    temp1 = _mm_srli_si128(temp1, 8);
-
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[i4_res_stride]), temp0);
-    _mm_storeu_si128((__m128i *) (&pi2_res_ptr[3 * i4_res_stride]), temp1);
-
     /*------------------------------------------------------------------*/
     /* Clipping the results to 8 bits */
     sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b);
@@ -2418,110 +1841,6 @@ void isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42(
     *(pu4_out) = _mm_cvtsi128_si32(temp7);
 }
 
-void isvc_iquant_itrans_recon_chroma_4x4_dc_sse42(
-    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
-    buffer_container_t *ps_res, buffer_container_t *ps_rec,
-    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
-    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
-{
-    WORD16 *pi2_src = (WORD16 *) ps_src->pv_data;
-    UWORD8 *pu1_pred = (UWORD8 *) ps_pred->pv_data;
-    UWORD8 *pu1_out = (UWORD8 *) ps_rec->pv_data;
-    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
-    WORD32 i4_out_stride = ps_rec->i4_data_stride;
-    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
-    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
-    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
-    /* DC value won't be dequantized for chroma
-    inverse transform */
-    WORD16 q0 = pi2_dc_src[0];
-    WORD16 i_macro = ((q0 + 32) >> 6);
-
-    __m128i pred_r0, pred_r1, pred_r2, pred_r3;
-    /* all bits reset to zero */
-    __m128i zero_8x16b = _mm_setzero_si128();
-    __m128i chroma_mask = _mm_set1_epi16(0xFF);
-    __m128i value_add = _mm_set1_epi16(i_macro);
-    __m128i out_r0, out_r1, out_r2, out_r3;
-
-    ASSERT(0 == u1_res_accumulate);
-
-    UNUSED(pi2_src);
-    UNUSED(pu2_iscal_mat);
-    UNUSED(pu2_weigh_mat);
-    UNUSED(u4_qp_div_6);
-    UNUSED(pi2_tmp);
-    UNUSED(ps_res_pred);
-    UNUSED(ps_res);
-    UNUSED(i4_iq_start_idx);
-    UNUSED(u1_res_accumulate);
-
-    /* Load pred buffer */
-    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0]));
-
-    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[i4_pred_stride]));
-
-    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * i4_pred_stride]));
-
-    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * i4_pred_stride]));
-
-    /* Mask alternate pred values from the interleaved pred buf */
-    pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
-    pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
-    pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
-    pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
-
-    /* Pack the first four 16 bit values of 2 regs into a single reg*/
-    pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1);
-    pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3);
-
-    /* Compute out pixel by adding res to pred */
-    pred_r0 = _mm_add_epi16(value_add, pred_r0);
-    pred_r2 = _mm_add_epi16(value_add, pred_r2);
-    /*------------------------------------------------------------------*/
-    /* Clipping the results to 8 bits */
-    pred_r0 = _mm_packus_epi16(pred_r0, pred_r2);
-    pred_r1 = _mm_srli_si128(pred_r0, 4);
-    pred_r2 = _mm_srli_si128(pred_r1, 4);
-    pred_r3 = _mm_srli_si128(pred_r2, 4);
-
-    /* p00 p01 p02 p03 -- all 16 bits */
-    pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b);
-    /* p10 p11 p12 p13 -- all 16 bits */
-    pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b);
-    /* p20 p21 p22 p23 -- all 16 bits */
-    pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b);
-    /* p30 p31 p32 p33 -- all 16 bits */
-    pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b);
-
-    /* Load interleaved out buffer */
-    out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
-    out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[i4_out_stride]));
-    out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]));
-    out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]));
-
-    /* Mask the interleaved out buf in order to save the U/V out pixel computed in
-    this function call without thrashing the U/V out pixel that was saved
-    during an earlier function call */
-    chroma_mask = _mm_set1_epi16(0xFF00);
-
-    out_r0 = _mm_and_si128(out_r0, chroma_mask);
-    out_r1 = _mm_and_si128(out_r1, chroma_mask);
-    out_r2 = _mm_and_si128(out_r2, chroma_mask);
-    out_r3 = _mm_and_si128(out_r3, chroma_mask);
-
-    /* Save the out pixels in alternate locations */
-    out_r0 = _mm_add_epi8(out_r0, pred_r0);
-    out_r1 = _mm_add_epi8(out_r1, pred_r1);
-    out_r2 = _mm_add_epi8(out_r2, pred_r2);
-    out_r3 = _mm_add_epi8(out_r3, pred_r3);
-
-    _mm_storel_epi64((__m128i *) (&pu1_out[0]), out_r0);
-    _mm_storel_epi64((__m128i *) (&pu1_out[i4_out_stride]), out_r1);
-    _mm_storel_epi64((__m128i *) (&pu1_out[2 * i4_out_stride]), out_r2);
-    _mm_storel_epi64((__m128i *) (&pu1_out[3 * i4_out_stride]), out_r3);
-}
-
 void isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42(
     buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
     buffer_container_t *ps_res, buffer_container_t *ps_rec,
diff --git a/encoder/arm/svc/isvce_function_selector_a9q.c b/encoder/arm/svc/isvce_function_selector_a9q.c
index b5f8ba4d..6a1abb17 100644
--- a/encoder/arm/svc/isvce_function_selector_a9q.c
+++ b/encoder/arm/svc/isvce_function_selector_a9q.c
@@ -161,35 +161,23 @@ void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec)
         isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
 
     /* Init inverse transform fn ptr */
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
-
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
         isvc_iquant_itrans_recon_4x4_with_res_output_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
         isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
 
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
-        isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
-        isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
 
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
         isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
         isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
-        isvc_iquant_itrans_recon_chroma_4x4_neon;
 
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
         isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
         isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
-        isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
 
     ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
     ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
@@ -243,9 +231,7 @@ void isvce_init_function_ptr_neon_a9q(isvce_codec_t *ps_codec)
     ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
 
     /* memor handling operations */
-    ps_mem_fxns->pf_mem_cpy = ih264_memcpy_a9q;
     ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
-    ps_mem_fxns->pf_mem_set = ih264_memset_a9q;
     ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
 
     /* sad me level functions */
diff --git a/encoder/arm/svc/isvce_function_selector_av8.c b/encoder/arm/svc/isvce_function_selector_av8.c
index 16c08bb9..c517c03f 100644
--- a/encoder/arm/svc/isvce_function_selector_av8.c
+++ b/encoder/arm/svc/isvce_function_selector_av8.c
@@ -161,35 +161,23 @@ void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec)
         isvc_resi_trans_quant_chroma_4x4_with_residual_sub_neon;
 
     /* Init inverse transform fn ptr */
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[0] = isvc_iquant_itrans_recon_8x8;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[1] = isvc_iquant_itrans_recon_8x8;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_8x8[2] = isvc_iquant_itrans_recon_8x8;
-
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[0] =
         isvc_iquant_itrans_recon_4x4_with_res_output_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[1] =
         isvc_iquant_itrans_recon_4x4_with_res_accumulate_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[2] = isvc_iquant_itrans_recon_4x4_neon;
-
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] =
-        isvc_iquant_itrans_recon_4x4_dc_with_res_output_neon;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
-        isvc_iquant_itrans_recon_4x4_dc_with_res_accumulate_neon;
+    ;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_4x4_dc_neon;
 
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[0] =
         isvc_iquant_itrans_recon_chroma_4x4_with_res_output_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
         isvc_iquant_itrans_recon_chroma_4x4_with_res_accumulate_neon;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
-        isvc_iquant_itrans_recon_chroma_4x4_neon;
 
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
         isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_output_neon;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
         isvc_iquant_itrans_recon_chroma_4x4_dc_with_res_accumulate_neon;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
-        isvc_iquant_itrans_recon_chroma_4x4_dc_neon;
 
     ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
     ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8;
@@ -243,9 +231,7 @@ void isvce_init_function_ptr_neon_av8(isvce_codec_t *ps_codec)
     ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
 
     /* memor handling operations */
-    ps_mem_fxns->pf_mem_cpy = ih264_memcpy_av8;
     ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
-    ps_mem_fxns->pf_mem_set = ih264_memset_av8;
     ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
 
     /* sad me level functions */
diff --git a/encoder/svc/isvce_api.c b/encoder/svc/isvce_api.c
index 2004dbaf..d44d7e72 100644
--- a/encoder/svc/isvce_api.c
+++ b/encoder/svc/isvce_api.c
@@ -3342,10 +3342,14 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
     WORD32 max_wd_luma, max_ht_luma;
     WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
 
-    /* temp var */
     WORD32 i, j;
     WORD32 status = IV_SUCCESS;
 
+    if(MAX_CTXT_SETS != 1)
+    {
+        return IV_FAIL;
+    }
+
     /* mem records */
     ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
 
@@ -3493,117 +3497,59 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
     ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_ENTROPY];
     {
         /* temp var */
-        WORD32 size = 0, offset;
+        WORD32 size = 0;
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                /* base ptr */
-                UWORD8 *pu1_buf = ps_mem_rec->pv_base;
-
-                /* reset size */
-                size = 0;
-
-                /* skip mb run */
-                ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = (WORD32 *) (pu1_buf + size);
-                size += sizeof(WORD32);
-                size = ALIGN8(size);
-
-                /* entropy map */
-                ps_codec->as_process[i].s_entropy.pu1_entropy_map =
-                    (UWORD8 *) (pu1_buf + size + max_mb_cols);
-                /* size in bytes to store entropy status of an entire frame */
-                size += (max_mb_cols * max_mb_rows);
-                /* add an additional 1 row of bytes to evade the special case of row 0
-                 */
-                size += max_mb_cols;
-                size = ALIGN128(size);
-
-                /* bit stream ptr */
-                ps_codec->as_process[i].s_entropy.ps_bitstrm = (bitstrm_t *) (pu1_buf + size);
-                size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm[0]);
-                size = ALIGN128(size);
-
-#if ENABLE_RE_ENC_AS_SKIP
-                ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr =
-                    (bitstrm_t *) (pu1_buf + size);
-                size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr[0]);
-                size = ALIGN128(size);
-#endif
-
-                /* nnz luma */
-                ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma = (UWORD8(*)[4])(pu1_buf + size);
-                size += (max_mb_cols * 4 * sizeof(UWORD8));
-                size = ALIGN128(size);
-
-                /* nnz chroma */
-                ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = (UWORD8(*)[4])(pu1_buf + size);
-                size += (max_mb_cols * 4 * sizeof(UWORD8));
-                size = ALIGN128(size);
-
-                /* ps_mb_qp_ctxt */
-                ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt = (mb_qp_ctxt_t *) (pu1_buf + size);
-                size += ALIGN128(sizeof(ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt[0]));
-
-                offset = size;
-
-                /* cabac Context */
-                ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac;
-            }
-            else
-            {
-                /* base ptr */
-                UWORD8 *pu1_buf = ps_mem_rec->pv_base;
-
-                /* reset size */
-                size = offset;
-
-                /* skip mb run */
-                ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = (WORD32 *) (pu1_buf + size);
-                size += sizeof(WORD32);
-                size = ALIGN8(size);
-
-                /* entropy map */
-                ps_codec->as_process[i].s_entropy.pu1_entropy_map =
-                    (UWORD8 *) (pu1_buf + size + max_mb_cols);
-                /* size in bytes to store entropy status of an entire frame */
-                size += (max_mb_cols * max_mb_rows);
-                /* add an additional 1 row of bytes to evade the special case of row 0
-                 */
-                size += max_mb_cols;
-                size = ALIGN128(size);
-
-                /* bit stream ptr */
-                ps_codec->as_process[i].s_entropy.ps_bitstrm = (bitstrm_t *) (pu1_buf + size);
-                size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm[0]);
-                size = ALIGN128(size);
+            /* base ptr */
+            UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+            /* reset size */
+            size = 0;
+
+            /* skip mb run */
+            ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = (WORD32 *) (pu1_buf + size);
+            size += sizeof(WORD32);
+            size = ALIGN8(size);
+
+            /* entropy map */
+            ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+                (UWORD8 *) (pu1_buf + size + max_mb_cols);
+            /* size in bytes to store entropy status of an entire frame */
+            size += (max_mb_cols * max_mb_rows);
+            /* add an additional 1 row of bytes to evade the special case of row 0
+             */
+            size += max_mb_cols;
+            size = ALIGN128(size);
+
+            /* bit stream ptr */
+            ps_codec->as_process[i].s_entropy.ps_bitstrm = (bitstrm_t *) (pu1_buf + size);
+            size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm[0]);
+            size = ALIGN128(size);
 
 #if ENABLE_RE_ENC_AS_SKIP
-                ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr =
-                    (bitstrm_t *) (pu1_buf + size);
-                size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr[0]);
-                size = ALIGN128(size);
+            ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr =
+                (bitstrm_t *) (pu1_buf + size);
+            size += sizeof(ps_codec->as_process[i].s_entropy.ps_bitstrm_after_slice_hdr[0]);
+            size = ALIGN128(size);
 #endif
 
-                /* nnz luma */
-                ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
-                    (UWORD8(*)[4])(UWORD8(*)[4])(pu1_buf + size);
-                size += (max_mb_cols * 4 * sizeof(UWORD8));
-                size = ALIGN128(size);
+            /* nnz luma */
+            ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma = (UWORD8(*)[4])(pu1_buf + size);
+            size += (max_mb_cols * 4 * sizeof(UWORD8));
+            size = ALIGN128(size);
 
-                /* nnz chroma */
-                ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = (UWORD8(*)[4])(pu1_buf + size);
-                size += (max_mb_cols * 4 * sizeof(UWORD8));
-                size = ALIGN128(size);
+            /* nnz chroma */
+            ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = (UWORD8(*)[4])(pu1_buf + size);
+            size += (max_mb_cols * 4 * sizeof(UWORD8));
+            size = ALIGN128(size);
 
-                /* ps_mb_qp_ctxt */
-                ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt = (mb_qp_ctxt_t *) (pu1_buf + size);
-                size = ALIGN128(sizeof(ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt[0]));
+            /* ps_mb_qp_ctxt */
+            ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt = (mb_qp_ctxt_t *) (pu1_buf + size);
+            size += ALIGN128(sizeof(ps_codec->as_process[i].s_entropy.ps_mb_qp_ctxt[0]));
 
-                /* cabac Context */
-                ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac;
-            }
+            /* cabac Context */
+            ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac;
         }
         ps_codec->as_process[0].s_entropy.ps_cabac->ps_mb_map_ctxt_inc_base = ps_mb_map_ctxt_inc;
     }
@@ -3631,16 +3577,8 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
-                ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf;
-            }
-            else
-            {
-                ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf + size;
-                ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf + size;
-            }
+            ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
+            ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf;
         }
     }
 
@@ -3667,16 +3605,8 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
-                ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = pu1_buf;
-            }
-            else
-            {
-                ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf + size;
-                ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = pu1_buf + size;
-            }
+            ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
+            ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = pu1_buf;
         }
     }
 
@@ -3729,17 +3659,7 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].ps_svc_nalu_ext_base = ps_mem_rec->pv_base;
-            }
-            else
-            {
-                WORD32 size = SVC_MAX_SLICE_HDR_CNT * sizeof(slice_header_t);
-                void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size;
-
-                ps_codec->as_process[i].ps_svc_nalu_ext_base = pv_buf;
-            }
+            ps_codec->as_process[i].ps_svc_nalu_ext_base = ps_mem_rec->pv_base;
         }
     }
 
@@ -3757,18 +3677,7 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
-            }
-            else
-            {
-                /* temp var */
-                WORD32 size = SVC_MAX_SLICE_HDR_CNT * sizeof(slice_header_t);
-                void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size;
-
-                ps_codec->as_process[i].ps_slice_hdr_base = pv_buf;
-            }
+            ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
         }
     }
 
@@ -3789,14 +3698,7 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
-            }
-            else
-            {
-                ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf + max_mb_cnt;
-            }
+            ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
         }
 
         ps_codec->pu2_intr_rfrsh_map = (UWORD16 *) (pu1_buf + max_mb_cnt * MAX_CTXT_SETS);
@@ -3805,22 +3707,14 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
     ps_mem_rec = &ps_mem_rec_base[ISVCE_MEM_REC_SLICE_MAP];
     {
         /* pointer to storage space */
-        UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+        UWORD8 *pu1_buf_ping;
 
         /* init pointer */
         pu1_buf_ping = ps_mem_rec->pv_base;
-        pu1_buf_pong = pu1_buf_ping + ALIGN64(max_mb_cnt);
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
-            }
-            else
-            {
-                ps_codec->as_process[i].pu1_slice_idx = pu1_buf_pong;
-            }
+            ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
         }
     }
 
@@ -3862,25 +3756,9 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
         /* pointer to storage space */
         UWORD8 *pu1_buf = ps_mem_rec->pv_base;
 
-        /* total size of the mem record */
-        WORD32 total_size = 0;
-
-        /* size in bytes to mb core coding status of an entire frame */
-        total_size = max_mb_cnt;
-
-        /* add an additional 1 row of bytes to evade the special case of row 0 */
-        total_size += max_mb_cols;
-
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
-            }
-            else
-            {
-                ps_codec->as_process[i].pu1_proc_map = pu1_buf + total_size + max_mb_cols;
-            }
+            ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
         }
     }
 
@@ -3903,14 +3781,7 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
 
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
-            }
-            else
-            {
-                ps_codec->as_process[i].pu1_deblk_map = pu1_buf + total_size + max_mb_cols;
-            }
+            ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
         }
     }
 
@@ -3919,25 +3790,9 @@ static WORD32 isvce_init_mem_rec(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *
         /* pointer to storage space */
         UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
 
-        /* total size of the mem record */
-        WORD32 total_size = 0;
-
-        /* size in bytes to mb core coding status of an entire frame */
-        total_size = max_mb_cnt;
-
-        /* add an additional 1 row of bytes to evade the special case of row 0 */
-        total_size += max_mb_cols;
-
         for(i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if(i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
-            {
-                ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
-            }
-            else
-            {
-                ps_codec->as_process[i].pu1_me_map = pu1_buf + total_size + max_mb_cols;
-            }
+            ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
         }
     }
 
diff --git a/encoder/svc/isvce_encode.c b/encoder/svc/isvce_encode.c
index 8c6aa114..7d57ced2 100644
--- a/encoder/svc/isvce_encode.c
+++ b/encoder/svc/isvce_encode.c
@@ -108,39 +108,6 @@
 /* Function Definitions                                                      */
 /*****************************************************************************/
 
-/**
-******************************************************************************
-*
-* @brief This function puts the current thread to sleep for a duration
-*  of sleep_us
-*
-* @par Description
-*  ithread_yield() method causes the calling thread to yield execution to
-*another thread that is ready to run on the current processor. The operating
-*system selects the thread to yield to. ithread_usleep blocks the current thread
-*for the specified number of milliseconds. In other words, yield just says, end
-*my timeslice prematurely, look around for other threads to run. If there is
-*nothing better than me, continue. Sleep says I don't want to run for x
-*  milliseconds. Even if no other thread wants to run, don't make me run.
-*
-* @param[in] sleep_us
-*  thread sleep duration
-*
-* @returns error_status
-*
-******************************************************************************
-*/
-IH264E_ERROR_T isvce_wait_for_thread(UWORD32 sleep_us)
-{
-    /* yield thread */
-    ithread_yield();
-
-    /* put thread to sleep */
-    ithread_sleep(sleep_us);
-
-    return IH264E_SUCCESS;
-}
-
 /**
 ******************************************************************************
 *
diff --git a/encoder/svc/isvce_function_selector_generic.c b/encoder/svc/isvce_function_selector_generic.c
index 044bbeb0..c3a48bfa 100644
--- a/encoder/svc/isvce_function_selector_generic.c
+++ b/encoder/svc/isvce_function_selector_generic.c
@@ -265,14 +265,10 @@ void isvce_init_function_ptr_generic(isvce_codec_t *ps_codec)
     ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
 
     /* memory handling operations */
-    ps_mem_fxns->pf_mem_cpy = ih264_memcpy;
     ps_mem_fxns->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
-    ps_mem_fxns->pf_mem_set = ih264_memset;
     ps_mem_fxns->pf_mem_set_mul8 = ih264_memset_mul_8;
     ps_mem_fxns->pf_copy_2d = isvc_copy_2d;
     ps_mem_fxns->pf_memset_2d = isvc_memset_2d;
-    ps_mem_fxns->pf_16bit_interleaved_copy = isvc_16bit_interleaved_copy;
-    ps_mem_fxns->pf_16bit_interleaved_memset = isvc_16bit_interleaved_memset;
     ps_mem_fxns->pf_nonzero_checker = isvc_is_nonzero_blk;
 
     /* sad me level functions */
diff --git a/encoder/svc/isvce_ilp_mv.c b/encoder/svc/isvce_ilp_mv.c
index 9aa45a36..e9a7c172 100644
--- a/encoder/svc/isvce_ilp_mv.c
+++ b/encoder/svc/isvce_ilp_mv.c
@@ -204,7 +204,7 @@ void isvce_ilp_mv_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
 
     if(u1_num_spatial_layers > 1)
     {
-        ilp_mv_layer_state_t *ps_layer_states;
+        ilp_mv_layer_state_t *ps_layer_states = NULL;
         ilp_mv_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS];
 
         DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio;
diff --git a/encoder/svc/isvce_structs.h b/encoder/svc/isvce_structs.h
index 9d8f6c52..68619bc3 100644
--- a/encoder/svc/isvce_structs.h
+++ b/encoder/svc/isvce_structs.h
@@ -1958,10 +1958,6 @@ typedef struct inter_pred_fxns_t
 
 typedef struct mem_fxns_t
 {
-    FT_MEMCPY *pf_mem_cpy;
-
-    FT_MEMSET *pf_mem_set;
-
     FT_MEMCPY *pf_mem_cpy_mul8;
 
     FT_MEMSET *pf_mem_set_mul8;
@@ -1970,10 +1966,6 @@ typedef struct mem_fxns_t
 
     FT_MEMSET_2D *pf_memset_2d;
 
-    FT_16BIT_INTERLEAVED_COPY *pf_16bit_interleaved_copy;
-
-    FT_16BIT_INTERLEAVED_MEMSET *pf_16bit_interleaved_memset;
-
     FT_NONZERO_CHECKER *pf_nonzero_checker;
 
 } mem_fxns_t;
diff --git a/encoder/x86/svc/isvce_function_selector_sse42.c b/encoder/x86/svc/isvce_function_selector_sse42.c
index 709155f0..03f1cf0a 100644
--- a/encoder/x86/svc/isvce_function_selector_sse42.c
+++ b/encoder/x86/svc/isvce_function_selector_sse42.c
@@ -128,20 +128,13 @@ void isvce_init_function_ptr_sse42(isvce_codec_t *ps_codec)
         isvc_iquant_itrans_recon_res_chroma_4x4_sse42;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[1] =
         isvc_iquant_itrans_recon_res_chroma_4x4_with_res_acc_sse42;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[2] =
-        isvc_iquant_itrans_recon_chroma_4x4_sse42;
 
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[0] = isvc_iquant_itrans_recon_res_dc_4x4_sse42;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[1] =
-        isvc_iquant_itrans_recon_res_dc_with_res_acc_4x4_sse42;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[2] = isvc_iquant_itrans_recon_dc_4x4_sse42;
 
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[0] =
         isvc_iquant_itrans_recon_res_chroma_4x4_dc_sse42;
     ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[1] =
         isvc_iquant_itrans_recon_res_chroma_4x4_dc_with_res_acc_sse42;
-    ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[2] =
-        isvc_iquant_itrans_recon_chroma_4x4_dc_sse42;
 
     ps_enc_loop_fxns->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42;