/****************************************************************************** * * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ /*! ****************************************************************************** * \file ihevce_recur_bracketing.c * * \brief * This file contains interface functions of recursive bracketing * module * \date * 12/02/2012 * * \author * Ittiam * * List of Functions * * ****************************************************************************** */ /*****************************************************************************/ /* File Includes */ /*****************************************************************************/ /* System include files */ #include #include #include #include #include #include /* User include files */ #include "ihevc_typedefs.h" #include "itt_video_api.h" #include "ihevce_api.h" #include "rc_cntrl_param.h" #include "rc_frame_info_collector.h" #include "rc_look_ahead_params.h" #include "ihevc_defs.h" #include "ihevc_structs.h" #include "ihevc_platform_macros.h" #include "ihevc_deblk.h" #include "ihevc_itrans_recon.h" #include "ihevc_chroma_itrans_recon.h" #include "ihevc_chroma_intra_pred.h" #include "ihevc_intra_pred.h" #include "ihevc_inter_pred.h" #include "ihevc_mem_fns.h" #include "ihevc_padding.h" #include "ihevc_weighted_pred.h" #include "ihevc_sao.h" #include "ihevc_resi_trans.h" #include "ihevc_quant_iquant_ssd.h" #include "ihevc_cabac_tables.h" #include "ihevce_defs.h" #include "ihevce_lap_enc_structs.h" #include "ihevce_multi_thrd_structs.h" #include "ihevce_me_common_defs.h" #include "ihevce_had_satd.h" #include "ihevce_error_codes.h" #include "ihevce_bitstream.h" #include "ihevce_cabac.h" #include "ihevce_rdoq_macros.h" #include "ihevce_function_selector.h" #include "ihevce_enc_structs.h" #include "ihevce_entropy_structs.h" #include "ihevce_cmn_utils_instr_set_router.h" #include "ihevce_enc_loop_structs.h" #include "ihevce_ipe_instr_set_router.h" #include "ihevce_ipe_structs.h" #include "ihevce_ipe_pass.h" #include "ihevce_recur_bracketing.h" #include "ihevce_nbr_avail.h" #include "ihevc_common_tables.h" #include "ihevce_decomp_pre_intra_structs.h" #include "ihevce_decomp_pre_intra_pass.h" #include "cast_types.h" #include "osal.h" #include "osal_defaults.h" /*****************************************************************************/ /* Constant Macros */ /*****************************************************************************/ #define IP_DBG_L1_l2 0 #define CHILD_BIAS 12 /*****************************************************************************/ /* Globals */ /*****************************************************************************/ extern pf_intra_pred g_apf_lum_ip[10]; extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES]; UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 }; UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 }; #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit)) /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ /*! ****************************************************************************** * \if Function name : ihevce_update_cand_list \endif * * \brief * Final Candidate list population, nbr flag andd nbr mode update function * * \param[in] ps_row_cu : pointer to cu analyse struct * \param[in] ps_cu_node : pointer to cu node info buffer * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer * \param[in] pu1_cand_mode_list : pointer to candidate list buffer * * \return * None * * \author * Ittiam * ***************************************************************************** */ void ihevce_update_cand_list( ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt) { WORD32 row, col, x, y, size; /* Candidate mode Update */ (void)ps_ed_blk_l1; /* Update CTB mode map for the finalised CU */ x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; size = ps_cu_node->u1_cu_size >> 2; for(row = y; row < (y + size); row++) { for(col = x; col < (x + size); col++) { ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode; } } return; } /*! ****************************************************************************** * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif * * \brief * Mpm indx calc function based on left and top available modes * * \param[in] top_intra_mode : Top available intra mode * \param[in] left_intra_mode : Left available intra mode * \param[in] available_top : Top availability flag * \param[in] available_left : Left availability flag * \param[in] cu_pos_y : cu position wrt to CTB * \param[in] mode_bits_cost : pointer to mode bits buffer * \param[in] lambda : Lambda value (SAD/SATD) * \param[in] cand_mode_list : pointer to candidate list buffer * * \return * None * * \author * Ittiam * ***************************************************************************** */ void ihevce_intra_populate_mode_bits_cost_bracketing( WORD32 top_intra_mode, WORD32 left_intra_mode, WORD32 available_top, WORD32 available_left, WORD32 cu_pos_y, UWORD16 *mode_bits_cost, UWORD16 *mode_bits, WORD32 lambda, WORD32 *cand_mode_list) { /* local variables */ WORD32 i; WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; UWORD16 one_bits_cost = COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda UWORD16 two_bits_cost = COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda UWORD16 five_bits_cost = COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda for(i = 0; i < 35; i++) { mode_bits_cost[i] = five_bits_cost; mode_bits[i] = 5; } /* EIID: set availability flag to zero if modes are invalid. Required since some CU's might be skipped (though available) and their modes will be set to 255 (-1)*/ if(35 < top_intra_mode || 0 > top_intra_mode) available_top = 0; if(35 < left_intra_mode || 0 > left_intra_mode) available_left = 0; /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ /* N = top */ if(0 == available_top) { cand_intra_pred_mode_top = INTRA_DC; } /* for neighbour != INTRA, setting DC is done outside */ else if(0 == cu_pos_y) /* It's on the CTB boundary */ { cand_intra_pred_mode_top = INTRA_DC; } else { cand_intra_pred_mode_top = top_intra_mode; } /* N = left */ if(0 == available_left) { cand_intra_pred_mode_left = INTRA_DC; //cand_intra_pred_mode_left = cand_intra_pred_mode_top; } /* for neighbour != INTRA, setting DC is done outside */ else { cand_intra_pred_mode_left = left_intra_mode; } /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) { if(cand_intra_pred_mode_left < 2) { cand_mode_list[0] = INTRA_PLANAR; cand_mode_list[1] = INTRA_DC; cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ } else { cand_mode_list[0] = cand_intra_pred_mode_left; cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); } } else { if(0 == available_left) { cand_mode_list[0] = cand_intra_pred_mode_top; cand_mode_list[1] = cand_intra_pred_mode_left; } else { cand_mode_list[0] = cand_intra_pred_mode_left; cand_mode_list[1] = cand_intra_pred_mode_top; } if((cand_intra_pred_mode_left != INTRA_PLANAR) && (cand_intra_pred_mode_top != INTRA_PLANAR)) { cand_mode_list[2] = INTRA_PLANAR; } else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) { cand_mode_list[2] = INTRA_DC; } else { cand_mode_list[2] = INTRA_ANGULAR(26); } } mode_bits_cost[cand_mode_list[0]] = one_bits_cost; mode_bits_cost[cand_mode_list[1]] = two_bits_cost; mode_bits_cost[cand_mode_list[2]] = two_bits_cost; mode_bits[cand_mode_list[0]] = 2; mode_bits[cand_mode_list[1]] = 3; mode_bits[cand_mode_list[2]] = 3; } /*! ****************************************************************************** * \if Function name : ihevce_pu_calc_4x4_blk \endif * * \brief * 4x4 pu (8x8 CU) mode decision using step 8421 method * * \param[in] ps_cu_node : pointer to cu node info buffer * \param[in] pu1_src : pointer to src pixels * \param[in] src_stride : frm source stride * \param[in] ref : pointer to reference pixels for prediction * \param[in] cand_mode_list : pointer to candidate list buffer * \param[in] best_costs_4x4 : pointer to 3 best cost buffer * \param[in] best_modes_4x4 : pointer to 3 best mode buffer * * \return * None * * \author * Ittiam * ***************************************************************************** */ void ihevce_pu_calc_4x4_blk( ihevce_ipe_ctxt_t *ps_ctxt, ihevce_ipe_cu_tree_t *ps_cu_node, UWORD8 *pu1_src, WORD32 src_stride, UWORD8 *ref, UWORD16 *mode_bits_cost, WORD32 *best_costs_4x4, UWORD8 *best_modes_4x4, func_selector_t *ps_func_selector) { WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp; WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out; UWORD8 u1_use_satd = ps_ctxt->u1_use_satd; UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on; WORD32 i, j = 0, i_end; UWORD8 mode, best_amode = 255; UWORD8 pred[16]; UWORD16 sad; WORD32 sad_cost = 0; WORD32 best_asad_cost = 0xFFFFF; WORD32 temp; UWORD8 modes_to_eval[5]; WORD32 costs_4x4[5]; UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; /* LO resolution hence low resolution disable */ WORD32 u1_low_resol = 0; UWORD8 au1_best_modes[1] = { 0 }; WORD32 ai4_best_sad_costs[1] = { 0 }; WORD16 *pi2_tmp = &pi2_trans_tmp[0]; ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list = &ps_ctxt->s_ipe_optimised_function_list; //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1; //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit; for(i = 0; i < 5; i++) { costs_4x4[i] = MAX_INTRA_COST_IPE; } ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes( pu1_src, src_stride, ref, mode_bits_cost, au1_best_modes, ai4_best_sad_costs, u1_low_resol, ps_ipe_optimised_function_list->pf_4x4_sad_computer); best_amode = au1_best_modes[0]; best_asad_cost = ai4_best_sad_costs[0]; ASSERT(best_amode != 255); /* Around best level 4 angular mode, search for best level 2 mode */ modes_to_eval[0] = best_amode - 2; modes_to_eval[1] = best_amode + 2; i = 0; i_end = 2; if(best_amode == 2) i = 1; else if(best_amode == 34) i_end = 1; for(; i < i_end; i++) { mode = modes_to_eval[i]; g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4); sad_cost = sad; sad_cost += mode_bits_cost[mode]; if(sad_cost < best_asad_cost) { best_amode = mode; best_asad_cost = sad_cost; } } /* Around best level 2 angular mode, search for best level 1 mode */ /* Also evaluate for non-angular mode */ i = 0; /*Level 1 refinement is disabled for ES preset */ if(1 == u1_level_1_refine_on) { if(best_amode != 2) modes_to_eval[i++] = best_amode - 1; modes_to_eval[i++] = best_amode; } modes_to_eval[i++] = 0; modes_to_eval[i++] = 1; if(1 == u1_level_1_refine_on) { if(best_amode != 34) modes_to_eval[i++] = best_amode + 1; } i_end = i; i = 0; for(; i < i_end; i++) { mode = modes_to_eval[i]; g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); /* Hard coding to use SATD */ if(u1_use_satd) { ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr( pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0); sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4); } else { sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer( pu1_src, &pred[0], src_stride, 4); } sad_cost = sad; sad_cost += mode_bits_cost[mode]; costs_4x4[i] = sad_cost; } /* Arrange the reference array in ascending order */ for(i = 0; i < (i_end - 1); i++) { for(j = i + 1; j < i_end; j++) { if(costs_4x4[i] > costs_4x4[j]) { temp = costs_4x4[i]; costs_4x4[i] = costs_4x4[j]; costs_4x4[j] = temp; temp = modes_4x4[i]; modes_4x4[i] = modes_4x4[j]; modes_4x4[j] = temp; } } } for(i = 0; i < 3; i++) { best_costs_4x4[i] = costs_4x4[i]; best_modes_4x4[i] = modes_to_eval[modes_4x4[i]]; } { ps_cu_node->best_mode = best_modes_4x4[0]; ps_cu_node->best_cost = best_costs_4x4[0]; ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode]; } } /*! ****************************************************************************** * \if Function name : ihevce_pu_calc_8x8_blk \endif * * \brief * 4x4 pu (8x8 CU) mode decision loop using step 8421 method * * \param[in] ps_curr_src : pointer to src pixels struct * \param[in] ps_ctxt : pointer to IPE context struct * \param[in] ps_cu_node : pointer to cu node info buffer * * \return * None * * \author * Ittiam * ***************************************************************************** */ void ihevce_pu_calc_8x8_blk( iv_enc_yuv_buf_t *ps_curr_src, ihevce_ipe_ctxt_t *ps_ctxt, ihevce_ipe_cu_tree_t *ps_cu_node, func_selector_t *ps_func_selector) { WORD32 i, j; WORD32 nbr_flags; nbr_avail_flags_t s_nbr; WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1; UWORD8 *pu1_src_4x4; WORD32 xA, xB, yA, yB; //WORD32 x, y, size; WORD32 top_intra_mode; WORD32 left_intra_mode; // WORD8 *top_intra_mode_ptr; // WORD8 *left_intra_mode_ptr; UWORD8 *pu1_orig; WORD32 src_strd = ps_curr_src->i4_y_strd; WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1; WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1; ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; ihevc_intra_pred_luma_ref_substitution_fptr = ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) + (ps_cu_node->ps_parent->u2_x0 << 3); for(i = 0; i < 2; i++) { for(j = 0; j < 2; j++) { WORD32 cand_mode_list[3]; pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size); /* get the neighbour availability flags */ nbr_flags = ihevce_get_nbr_intra( &s_nbr, ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, cu_pos_x + ((j) * (trans_size >> 2)), cu_pos_y + ((i) * (trans_size >> 2)), trans_size >> 2); /* call the function which populates sad cost for all the modes */ xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j; yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; xB = xA + 1; yB = yA - 1; left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; ihevce_intra_populate_mode_bits_cost_bracketing( top_intra_mode, left_intra_mode, s_nbr.u1_top_avail, s_nbr.u1_left_avail, ps_cu_node->ps_parent->u2_y0, &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], &ps_ctxt->au2_mode_bits_8x8_pu[0], ps_ctxt->i4_ol_sad_lambda, cand_mode_list); /* call the function which populates ref data for intra predicion */ ihevc_intra_pred_luma_ref_substitution_fptr( pu1_src_4x4 - src_strd - 1, pu1_src_4x4 - src_strd, pu1_src_4x4 - 1, src_strd, 4, nbr_flags, &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], 0); ihevce_pu_calc_4x4_blk( ps_ctxt, ps_cu_node->ps_sub_cu[(i * 2) + j], pu1_src_4x4, src_strd, &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0], &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0], ps_func_selector); /*&au4_cost_4x4[i*2 + j][0], &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, cu_pos_x + ((j) * (trans_size >> 2)), cu_pos_y + ((i) * (trans_size >> 2)), (trans_size >> 2), 1); xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j; yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode; ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode]; } } } /*! ****************************************************************************** * \if Function name : ihevce_bracketing_analysis \endif * * \brief * Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size * info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn * 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt * * \param[in] ps_ctxt : pointer to IPE context struct * \param[in] ps_cu_node : pointer to cu node info buffer * \param[in] ps_curr_src : pointer to src pixels struct * \param[in] ps_ctb_out : pointer to ip ctb out struct * \param[in] ps_row_cu : pointer to cu analyse struct * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct * * \return * None * * \author * Ittiam * ***************************************************************************** */ void ihevce_bracketing_analysis( ihevce_ipe_ctxt_t *ps_ctxt, ihevce_ipe_cu_tree_t *ps_cu_node, iv_enc_yuv_buf_t *ps_curr_src, ctb_analyse_t *ps_ctb_out, //cu_analyse_t *ps_row_cu, ihevce_ed_blk_t *ps_ed_l1_ctb, ihevce_ed_blk_t *ps_ed_l2_ctb, ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb) { WORD32 cu_pos_x = 0; WORD32 cu_pos_y = 0; UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width; UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height; WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3); WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3); ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb; ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb; WORD32 i; WORD32 cand_mode_list[3]; //cu_analyse_t *ps_curr_cu = ps_row_cu; WORD32 blk_cnt = 0; WORD32 j = 0; WORD32 merge_32x32_l1, merge_32x32_l2; WORD32 i4_skip_intra_eval_32x32_l1; //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block WORD32 parent_cost = 0; WORD32 child_cost[4] = { 0 }; WORD32 child_cost_least = 0; WORD32 child_satd[4] = { 0 }; WORD32 x, y, size; WORD32 merge_64x64 = 1; UWORD8 au1_best_32x32_modes[4]; WORD32 au4_best_32x32_cost[4]; WORD32 parent_best_mode; UWORD8 best_mode; WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset; /* flag to control 1CU-4TU modes based on quality preset */ /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */ WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || (i4_quality_preset == IHEVCE_QUALITY_P0); /* flag to control 4CU-16TU mode based on quality preset */ /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/ WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || (i4_quality_preset == IHEVCE_QUALITY_P0); WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2; float f_strength; /* Accumalte satd */ LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0; WORD32 i4_ctb_acc_satd = 0; /* Accumalate Mode bits cost */ LWORD64 i8_frame_acc_mode_bits_cost = 0; /* Step2 is bypassed for parent, uses children modes*/ WORD32 step2_bypass = 1; if(1 == ps_ctxt->u1_disable_child_cu_decide) step2_bypass = 0; ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree; for(i = 0; i < 4; i++) { ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i; } /* Loop for all 8x8 block in a CTB */ ps_ctb_out->u4_cu_split_flags = 0x1; /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */ for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++) { ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE; } for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++) { ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE; } for(i = 0; i < (MAX_CU_IN_CTB); i++) { ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE; } ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE; /* by default 64x64 modes are set to default values DC and Planar */ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0; ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1; ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255; /* by default 64x4 split is set to 1 */ ps_l0_ipe_out_ctb->u1_split_flag = 1; /* Modulation factor calculated based on spatial variance instead of hardcoded val*/ i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16; f_strength = ps_ctxt->f_strength; /* ------------------------------------------------ */ /* populate the early decisions done by L1 analysis */ /* ------------------------------------------------ */ { ihevce_ed_blk_t *ps_ed_blk_l1_curr = ps_ed_l1_ctb; WORD32 ctr_8x8; WORD8 *pi1_ed_buf; /* set all the decisions to invalid */ memset( &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0], 0, sizeof(UWORD8) * MAX_CU_IN_CTB); pi1_ed_buf = &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0]; for(ctr_8x8 = 0; ctr_8x8 < MAX_CTB_SIZE; ctr_8x8++) { WORD32 pos_x_8x8, pos_y_8x8; pos_x_8x8 = gau1_cu_pos_x[ctr_8x8]; pos_y_8x8 = gau1_cu_pos_y[ctr_8x8]; pi1_ed_buf[pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)] = ps_ed_blk_l1_curr->intra_or_inter; ps_ed_blk_l1_curr++; } for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) { ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[ctr_8x8] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[ctr_8x8]; ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[ctr_8x8]; /*Earlier only me sad was getting populated, now best of ipe and me is populated*/ ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[ctr_8x8] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[ctr_8x8]; //ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]; ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[ctr_8x8] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[ctr_8x8]; //ps_ed_ctb_l1->i4_sad_cost_me_for_ref[ctr_8x8]; } /*Init CTB level accumalated SATD and MPM bits */ ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0; ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0; } /* ------------------------------------------------ */ /* Loop over all the blocks in current CTB */ /* ------------------------------------------------ */ { /* 64 8x8 blocks should be encountered for the do,while loop to exit */ do { intra32_analyse_t *ps_intra32_analyse; intra16_analyse_t *ps_intra16_analyse; WORD32 *pi4_intra_32_cost; WORD32 *pi4_intra_16_cost; WORD32 *pi4_intra_8_cost; WORD32 merge_16x16_l1; /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */ cu_pos_x = gau1_cu_pos_x[blk_cnt]; cu_pos_y = gau1_cu_pos_y[blk_cnt]; /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */ au1_best_32x32_modes[blk_cnt >> 4] = 255; /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4]; /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2]; /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */ if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) { /* Reset to zero for every cu decision */ merge_32x32_l1 = 0; child_cost_least = 0; /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */ ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2); pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4]; /* by default 32x32 modes are set to default values DC and Planar */ ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0; ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1; ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255; /* By default 32x32 split is set to 1 */ ps_intra32_analyse->b1_split_flag = 1; ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0; ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1; ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255; /* 16x16 cost & 8x8 cost are stored in Raster scan order */ /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */ /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */ { WORD32 pos_x_8x8, pos_y_8x8; pos_x_8x8 = gau1_cu_pos_x[blk_cnt]; pos_y_8x8 = gau1_cu_pos_y[blk_cnt]; pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0]; pi4_intra_16_cost += ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1))); pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0]; pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)); } merge_32x32_l1 = 0; merge_32x32_l2 = 0; i4_skip_intra_eval_32x32_l1 = 0; /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */ merge_16x16_l1 = 0; if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2)) { #if !ENABLE_UNIFORM_CU_SIZE_8x8 merge_16x16_l1 = ps_ed_blk_l1->merge_success; #else merge_16x16_l1 = 0; #endif } /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */ if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4)) { /* Check 4 flags of L1(8x8) say merge */ for(i = 0; i < 4; i++) { merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success; //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval i4_skip_intra_eval_32x32_l1 += ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0; } #if !ENABLE_UNIFORM_CU_SIZE_8x8 /* Check 1 flag from L2(16x16) say merge */ merge_32x32_l2 = ps_ed_blk_l2->merge_success; #else merge_32x32_l1 = 0; merge_32x32_l2 = 0; #endif } #if DISABLE_L2_IPE_IN_PB_L1_IN_B if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE)) { merge_32x32_l2 = 0; ps_ed_blk_l2->merge_success = 0; } #endif ps_intra32_analyse->b1_valid_cu = 1; /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */ /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval, if no blocks to be skipped then eval entire 32x32, else break the merge and go to 16x16 level eval */ if((merge_32x32_l1 == 4) && merge_32x32_l2 && ((i4_skip_intra_eval_32x32_l1 == 0) || (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge ) { #if IP_DBG_L1_l2 /* Populate params for 32x32 block analysis */ ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; ps_cu_node->ps_parent->u1_cu_size = 32; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode; /* CU size 32x32 and fill the final cu params */ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); /* Increment pointers */ ps_ed_blk_l1 += 16; blk_cnt += 16; ps_row_cu++; merge_64x64 &= 1; #else /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/ if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE)) { WORD32 i4_local_ctr1, i4_local_ctr2; ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; ps_cu_node->ps_parent->u1_cu_size = 32; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->best_mode = INTRA_DC; //ps_ed_blk_l2->best_merge_mode; /* CU size 32x32 and fill the final cu params */ /* fill in the first modes as invalid */ ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_1tu[1] = INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; //ps_row_cu->u1_num_intra_rdopt_cands = 0; ps_intra32_analyse->b1_valid_cu = 0; ps_intra32_analyse->b1_split_flag = 0; ps_intra32_analyse->b1_merge_flag = 0; /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu, 255, NUM_BEST_MODES); memset (&ps_intra32_analyse->au1_best_modes_16x16_tu, 255, NUM_BEST_MODES);*/ //set only first mode since if it's 255. it wont go ahead ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255; ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255; ps_intra32_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; *pi4_intra_32_cost = MAX_INTRA_COST_IPE; /*since ME will start evaluating from bottom up, set the lower cu size data invalid */ for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++) { WORD32 *pi4_intra_8_cost_curr16; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .au1_best_modes_16x16_tu[0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .au1_best_modes_8x8_tu[0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .i4_best_intra_cost = MAX_INTRA_COST_IPE; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0; pi4_intra_16_cost [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE; pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1); pi4_intra_8_cost_curr16 += ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW; for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++) { ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .au1_4x4_best_modes[0][0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .au1_4x4_best_modes[1][0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .au1_4x4_best_modes[2][0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .au1_4x4_best_modes[3][0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .au1_best_modes_8x8_tu[0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .au1_best_modes_4x4_tu[0] = 255; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .i4_best_intra_cost = MAX_INTRA_COST_IPE; ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] .as_intra8_analyse[i4_local_ctr2] .b1_valid_cu = 0; pi4_intra_8_cost_curr16 [(i4_local_ctr2 & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] = MAX_INTRA_COST_IPE; } } /* set neighbours even if intra is not evaluated, since source is always available. */ ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, ps_cu_node->ps_parent->u2_x0 << 1, ps_cu_node->ps_parent->u2_y0 << 1, (ps_cu_node->ps_parent->u1_cu_size >> 2), 1); /* cost accumalation of best cu size candiate */ /*i8_frame_acc_satd_cost += parent_cost;*/ /* Mode bits cost accumalation for best cu size and cu mode */ /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/ /*satd/mod_qp accumulation of best cu */ /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/ /* Increment pointers */ ps_ed_blk_l1 += 16; blk_cnt += 16; //ps_row_cu++; merge_64x64 = 0; /* increment for stat purpose only. Increment is valid only on single thread */ ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4; } else { /* Revaluation of 4 16x16 blocks at 8x8 prediction level */ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type == PSLICE)) { ps_ctxt->u1_disable_child_cu_decide = 1; step2_bypass = 0; } /* Based on the flag, Child modes decision can be disabled*/ if(0 == ps_ctxt->u1_disable_child_cu_decide) { for(j = 0; j < 4; j++) { ps_cu_node->ps_sub_cu[j]->u2_x0 = gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */ ps_cu_node->ps_sub_cu[j]->u2_y0 = gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */ ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16; { WORD32 best_ang_mode = (ps_ed_blk_l1 + (j * 4))->best_merge_mode; if(best_ang_mode < 2) best_ang_mode = 26; ihevce_mode_eval_filtering( ps_cu_node->ps_sub_cu[j], ps_cu_node, ps_ctxt, ps_curr_src, best_ang_mode, &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], !step2_bypass, 1); if(i4_enable_4cu_16tu) { ihevce_mode_eval_filtering( ps_cu_node->ps_sub_cu[j], ps_cu_node, ps_ctxt, ps_curr_src, best_ang_mode, &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], !step2_bypass, 0); } else { /* 4TU not evaluated : 4tu modes set same as 1tu modes */ memcpy( &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], NUM_BEST_MODES); /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ memcpy( &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], NUM_BEST_MODES * sizeof(WORD32)); } child_cost[j] = MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); /* Child cost is sum of costs at 16x16 level */ child_cost_least += child_cost[j]; /* Select the best mode to be populated as top and left nbr depending on the 4tu and 1tu cost */ if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) { ps_cu_node->ps_sub_cu[j]->best_mode = ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; } else { ps_cu_node->ps_sub_cu[j]->best_mode = ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; } { /* Update the CTB nodes only for MAX - 1 CU nodes */ WORD32 xA, yA, row, col; xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; for(row = yA; row < (yA + size); row++) { for(col = xA; col < (xA + size); col++) { ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->ps_sub_cu[j]->best_mode; } } } } /*Child SATD cost*/ child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; /* store the child 16x16 costs */ pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] = child_cost[j]; /* set the CU valid flag */ ps_intra16_analyse[j].b1_valid_cu = 1; /* All 16x16 merge is valid, if Cu 32x32 is chosen */ /* To be reset, if CU 64x64 is chosen */ ps_intra16_analyse[j].b1_merge_flag = 1; /* storing the modes to intra 16 analyse */ /* store the best 16x16 modes 8x8 tu */ memcpy( &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; /* store the best 16x16 modes 16x16 tu */ memcpy( &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; /* divide the 16x16 costs (pro rating) to 4 8x8 costs */ /* store the same 16x16 modes as 4 8x8 child modes */ { WORD32 idx_8x8; WORD32 *pi4_intra_8_cost_curr16; intra8_analyse_t *ps_intra8_analyse; pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1); pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW; for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) { pi4_intra_8_cost_curr16 [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] = (child_cost[j] + 3) >> 2; ps_intra8_analyse = &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; ps_intra8_analyse->b1_enable_nxn = 0; ps_intra8_analyse->b1_valid_cu = 1; /* store the best 8x8 modes 8x8 tu */ memcpy( &ps_intra8_analyse->au1_best_modes_8x8_tu[0], &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], sizeof(UWORD8) * (NUM_BEST_MODES + 1)); /* store the best 8x8 modes 4x4 tu */ memcpy( &ps_intra8_analyse->au1_best_modes_4x4_tu[0], &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], sizeof(UWORD8) * (NUM_BEST_MODES + 1)); /* NXN modes not evaluated hence set to 0 */ memset( &ps_intra8_analyse->au1_4x4_best_modes[0][0], 255, sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); } } } ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), 0); } #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 else { for(j = 0; j < 4; j++) { WORD32 idx_8x8; intra8_analyse_t *ps_intra8_analyse; ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255; ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255; ps_intra16_analyse[j].b1_valid_cu = 0; for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) { ps_intra8_analyse = &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; ps_intra8_analyse->b1_enable_nxn = 0; ps_intra8_analyse->b1_valid_cu = 0; /* NXN modes not evaluated hence set to 0 */ memset( &ps_intra8_analyse->au1_4x4_best_modes[0][0], 255, sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); } } child_cost_least = MAX_INTRA_COST_IPE; } #endif /* Populate params for 32x32 block analysis */ ps_cu_node->ps_parent->u1_cu_size = 32; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ /* Revaluation for 32x32 parent block at 16x16 prediction level */ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); { /* Eval for TUSize = CuSize */ ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, 26, &ps_cu_node->ps_parent->au4_best_cost_1tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], step2_bypass, 1); if(i4_enable_1cu_4tu) { /* Eval for TUSize = CuSize/2 */ ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, 26, &ps_cu_node->ps_parent->au4_best_cost_4tu[0], &ps_cu_node->ps_parent->au1_best_mode_4tu[0], step2_bypass, 0); } else { /* 4TU not evaluated : 4tu modes set same as 1tu modes */ memcpy( &ps_cu_node->ps_parent->au1_best_mode_4tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], NUM_BEST_MODES); /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ memcpy( &ps_cu_node->ps_parent->au4_best_cost_4tu[0], &ps_cu_node->ps_parent->au4_best_cost_1tu[0], NUM_BEST_MODES * sizeof(WORD32)); } } ps_ctxt->u1_disable_child_cu_decide = 0; step2_bypass = 1; /* Update parent cost */ parent_cost = MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], ps_cu_node->ps_parent->au4_best_cost_1tu[0]); /* Select the best mode to be populated as top and left nbr depending on the 4tu and 1tu cost */ if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > ps_cu_node->ps_parent->au4_best_cost_1tu[0]) { ps_cu_node->ps_parent->best_mode = ps_cu_node->ps_parent->au1_best_mode_1tu[0]; } else { ps_cu_node->ps_parent->best_mode = ps_cu_node->ps_parent->au1_best_mode_4tu[0]; } /* store the 32x32 cost */ *pi4_intra_32_cost = parent_cost; /* set the CU valid flag */ ps_intra32_analyse->b1_valid_cu = 1; ps_intra32_analyse->b1_merge_flag = 1; /* storing the modes to intra 32 analyse */ { /* store the best 32x32 modes 16x16 tu */ memcpy( &ps_intra32_analyse->au1_best_modes_16x16_tu[0], &ps_cu_node->ps_parent->au1_best_mode_4tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; /* store the best 32x32 modes 32x32 tu */ memcpy( &ps_intra32_analyse->au1_best_modes_32x32_tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255; } parent_best_mode = ps_cu_node->ps_parent->best_mode; if((parent_cost <= child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT))) //|| identical_modes) { WORD32 i4_q_scale_q3_mod; UWORD8 u1_cu_possible_qp; WORD32 i4_act_factor; /* CU size 32x32 and fill the final cu params */ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); if((IHEVCE_QUALITY_P3 > i4_quality_preset)) { for(i = 0; i < 4; i++) { intra8_analyse_t *ps_intra8_analyse; ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; for(j = 0; j < 4; j++) { /* Populate best 3 nxn modes */ ps_intra8_analyse->au1_4x4_best_modes[j][0] = ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0]; ps_intra8_analyse->au1_4x4_best_modes[j][1] = ps_cu_node->ps_sub_cu[i] ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; ps_intra8_analyse->au1_4x4_best_modes[j][2] = ps_cu_node->ps_sub_cu[i] ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; } } } /* store the 32x32 non split flag */ ps_intra32_analyse->b1_split_flag = 0; ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0; ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0; ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0; ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0; au1_best_32x32_modes[blk_cnt >> 4] = ps_cu_node->ps_parent->au1_best_mode_1tu[0]; au4_best_32x32_cost[blk_cnt >> 4] = ps_cu_node->ps_parent->au4_best_cost_1tu[0]; /*As 32*32 has won, pick L2 8x8 qp which maps to L0 32x32 Qp*/ ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); u1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], ps_ctxt->ld_curr_frame_16x16_log_avg[0], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += parent_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; /* Mode bits cost accumalation for best cu size and cu mode */ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; /*satd/mod_qp accumulation of best cu */ i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Increment pointers */ ps_ed_blk_l1 += 16; blk_cnt += 16; //ps_row_cu++; merge_64x64 &= 1; } else { /* store the 32x32 split flag */ ps_intra32_analyse->b1_split_flag = 1; /* CU size 16x16 and fill the final cu params for all 4 blocks */ for(j = 0; j < 4; j++) { WORD32 i4_q_scale_q3_mod; UWORD8 u1_cu_possible_qp; WORD32 i4_act_factor; /* Set CU split flag */ ASSERT(blk_cnt % 4 == 0); ihevce_update_cand_list( ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); /* store the 16x16 non split flag */ ps_intra16_analyse[j].b1_split_flag = 0; ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); /*As 16*16 has won, pick L1 8x8 qp which maps to L0 16x16 Qp*/ u1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], ps_ctxt->ld_curr_frame_8x8_log_avg[0], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /*accum satd/qp for all child block*/ i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)child_satd[j] << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Accumalate mode bits for all child blocks */ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd += child_satd[j]; /* Increment pointers */ //ps_row_cu++; ps_ed_blk_l1 += 4; blk_cnt += 4; } /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += child_cost_least; /* 64x64 merge is not possible */ merge_64x64 = 0; } //ps_ed_blk_l2 += 4; } //end of EIID's else #endif } /* If Merge success for L1 max CU size 16x16 is chosen */ else if(merge_16x16_l1) { #if IP_DBG_L1_l2 ps_cu_node->ps_parent->u1_cu_size = 16; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode; ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); blk_cnt += 4; ps_ed_blk_l1 += 4; ps_row_cu++; merge_64x64 = 0; #else /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/ /* enable this only in B pictures */ if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) { WORD32 i4_q_scale_q3_mod, i4_local_ctr; WORD8 i1_cu_possible_qp; WORD32 i4_act_factor; /* make cost infinity. */ /* make modes invalid */ /* update loop variables */ /* set other output variales */ /* dont set neighbour flag so that next blocks wont access this cu */ /* what happens to ctb_mode_map?? */ ps_cu_node->ps_parent->u1_cu_size = 16; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->best_mode = INTRA_DC; //ps_ed_blk_l1->best_merge_mode; /* fill in the first modes as invalid */ ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_1tu[1] = INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; //ps_row_cu->u1_num_intra_rdopt_cands = 0; ps_intra32_analyse->b1_split_flag = 1; ps_intra32_analyse->b1_merge_flag = 0; ps_intra16_analyse->b1_valid_cu = 0; ps_intra16_analyse->b1_split_flag = 0; ps_intra16_analyse->b1_merge_flag = 1; //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu, // 255, // NUM_BEST_MODES); //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu, // 255, // NUM_BEST_MODES); //set only first mode since if it's 255. it wont go ahead ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; *pi4_intra_16_cost = MAX_INTRA_COST_IPE; /*since ME will start evaluating from bottom up, set the lower cu size data invalid */ for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++) { ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] .au1_4x4_best_modes[0][0] = 255; ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] .au1_4x4_best_modes[1][0] = 255; ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] .au1_4x4_best_modes[2][0] = 255; ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] .au1_4x4_best_modes[3][0] = 255; ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] .au1_best_modes_8x8_tu[0] = 255; ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] .au1_best_modes_4x4_tu[0] = 255; ps_intra16_analyse->as_intra8_analyse[i4_local_ctr].i4_best_intra_cost = MAX_INTRA_COST_IPE; pi4_intra_8_cost [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] = MAX_INTRA_COST_IPE; } /* set neighbours even if intra is not evaluated, since source is always available. */ ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, ps_cu_node->ps_parent->u2_x0 << 1, ps_cu_node->ps_parent->u2_y0 << 1, (ps_cu_node->ps_parent->u1_cu_size >> 2), 1); //what happends to RC variables?? /* run only constant Qp */ ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); i1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], ps_ctxt->ld_curr_frame_8x8_log_avg[0], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation /*satd/mod_qp accumulation of best cu */ i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod; /* Accumalate mode bits for all child blocks */ i8_frame_acc_mode_bits_cost += 0; //ps_cu_node->ps_parent->u2_mode_bits_cost; //incoorect accumulation blk_cnt += 4; ps_ed_blk_l1 += 4; //ps_row_cu++; merge_64x64 = 0; /* increment for stat purpose only. Increment is valid only on single thread */ ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1; } else { /* 64x64 merge is not possible */ merge_64x64 = 0; /* set the 32x32 split flag to 1 */ ps_intra32_analyse->b1_split_flag = 1; ps_intra32_analyse->b1_merge_flag = 0; ps_intra16_analyse->b1_merge_flag = 1; if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type == PSLICE)) { ps_ctxt->u1_disable_child_cu_decide = 1; step2_bypass = 0; } //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); /* Based on the flag, Child modes decision can be disabled*/ if(0 == ps_ctxt->u1_disable_child_cu_decide) { for(j = 0; j < 4; j++) { intra8_analyse_t *ps_intra8_analyse; WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode; if(best_ang_mode < 2) best_ang_mode = 26; //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE; //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode; ps_cu_node->ps_sub_cu[j]->u2_x0 = gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */ ps_cu_node->ps_sub_cu[j]->u2_y0 = gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */ ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8; ihevce_mode_eval_filtering( ps_cu_node->ps_sub_cu[j], ps_cu_node, ps_ctxt, ps_curr_src, best_ang_mode, &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], !step2_bypass, 1); if(i4_enable_4cu_16tu) { ihevce_mode_eval_filtering( ps_cu_node->ps_sub_cu[j], ps_cu_node, ps_ctxt, ps_curr_src, best_ang_mode, &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], !step2_bypass, 0); } else { /* 4TU not evaluated : 4tu modes set same as 1tu modes */ memcpy( &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], NUM_BEST_MODES); /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ memcpy( &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], NUM_BEST_MODES * sizeof(WORD32)); } child_cost[j] = MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); child_cost_least += child_cost[j]; /* Select the best mode to be populated as top and left nbr depending on the 4tu and 1tu cost */ if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) { ps_cu_node->ps_sub_cu[j]->best_mode = ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; } else { ps_cu_node->ps_sub_cu[j]->best_mode = ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; } { /* Update the CTB nodes only for MAX - 1 CU nodes */ WORD32 xA, yA, row, col; xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; for(row = yA; row < (yA + size); row++) { for(col = xA; col < (xA + size); col++) { ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->ps_sub_cu[j]->best_mode; } } } /*collect individual child satd for final SATD/qp accum*/ child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; /* store the child 8x8 costs */ pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] = child_cost[j]; /* set the CU valid flag */ ps_intra8_analyse->b1_valid_cu = 1; ps_intra8_analyse->b1_enable_nxn = 0; /* storing the modes to intra8 analyse */ /* store the best 8x8 modes 8x8 tu */ memcpy( &ps_intra8_analyse->au1_best_modes_8x8_tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; /* store the best 8x8 modes 4x4 tu */ memcpy( &ps_intra8_analyse->au1_best_modes_4x4_tu[0], &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; /* NXN modes not evaluated hence set to 255 */ memset( &ps_intra8_analyse->au1_4x4_best_modes[0][0], 255, sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); } ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), 0); } #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 else { for(j = 0; j < 4; j++) { intra8_analyse_t *ps_intra8_analyse; ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; /* NXN modes not evaluated hence set to 255 */ memset( &ps_intra8_analyse->au1_4x4_best_modes[0][0], 255, sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); ps_intra8_analyse->b1_valid_cu = 0; ps_intra8_analyse->b1_enable_nxn = 0; } child_cost_least = MAX_INTRA_COST_IPE; } #endif //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; ps_cu_node->ps_parent->u1_cu_size = 16; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); /* Eval for TUSize = CuSize */ ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, 26, &ps_cu_node->ps_parent->au4_best_cost_1tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], step2_bypass, 1); if(i4_enable_1cu_4tu) { /* Eval for TUSize = CuSize/2 */ ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, 26, &ps_cu_node->ps_parent->au4_best_cost_4tu[0], &ps_cu_node->ps_parent->au1_best_mode_4tu[0], step2_bypass, 0); } else { /* 4TU not evaluated : 4tu modes set same as 1tu modes */ memcpy( &ps_cu_node->ps_parent->au1_best_mode_4tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], NUM_BEST_MODES); /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ memcpy( &ps_cu_node->ps_parent->au4_best_cost_4tu[0], &ps_cu_node->ps_parent->au4_best_cost_1tu[0], NUM_BEST_MODES * sizeof(WORD32)); } ps_ctxt->u1_disable_child_cu_decide = 0; step2_bypass = 1; /* Update parent cost */ parent_cost = MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], ps_cu_node->ps_parent->au4_best_cost_1tu[0]); /* Select the best mode to be populated as top and left nbr depending on the 4tu and 1tu cost */ if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > ps_cu_node->ps_parent->au4_best_cost_1tu[0]) { ps_cu_node->ps_parent->best_mode = ps_cu_node->ps_parent->au1_best_mode_1tu[0]; } else { ps_cu_node->ps_parent->best_mode = ps_cu_node->ps_parent->au1_best_mode_4tu[0]; } /* store the 16x16 cost */ *pi4_intra_16_cost = parent_cost; /* accumulate the 32x32 cost */ if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) { *pi4_intra_32_cost = parent_cost; } else { *pi4_intra_32_cost += parent_cost; } /* set the CU valid flag */ ps_intra16_analyse->b1_valid_cu = 1; /* storing the modes to intra 16 analyse */ { /* store the best 16x16 modes 16x16 tu */ memcpy( &ps_intra16_analyse->au1_best_modes_16x16_tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], sizeof(UWORD8) * NUM_BEST_MODES); ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; /* store the best 16x16 modes 8x8 tu */ memcpy( &ps_intra16_analyse->au1_best_modes_8x8_tu[0], &ps_cu_node->ps_parent->au1_best_mode_4tu[0], sizeof(UWORD8) * NUM_BEST_MODES); ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; } parent_best_mode = ps_cu_node->ps_parent->best_mode; if(parent_cost <= child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT)) //|| identical_modes) { WORD32 i4_q_scale_q3_mod; WORD8 i1_cu_possible_qp; WORD32 i4_act_factor; //choose parent CU ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); /* set the 16x16 non split flag */ ps_intra16_analyse->b1_split_flag = 0; /*As 16*16 has won, pick L1 8x8 qp which maps to L0 16x16 Qp*/ ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); i1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], ps_ctxt->ld_curr_frame_8x8_log_avg[0], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += parent_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; /*satd/mod_qp accumulation of best cu */ i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Accumalate mode bits for all child blocks */ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; blk_cnt += 4; ps_ed_blk_l1 += 4; //ps_row_cu++; } else { //choose child CU WORD8 i1_cu_possible_qp; WORD32 i4_act_factor; WORD32 i4_q_scale_q3_mod; ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2); i1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1], ps_ctxt->ld_curr_frame_8x8_log_avg[1], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /* set the 16x16 split flag */ ps_intra16_analyse->b1_split_flag = 1; for(j = 0; j < 4; j++) { ihevce_update_cand_list( ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); if((IHEVCE_QUALITY_P3 > i4_quality_preset)) { WORD32 k; intra8_analyse_t *ps_intra8_analyse; ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; for(k = 0; k < 4; k++) { /* Populate best 3 nxn modes */ ps_intra8_analyse->au1_4x4_best_modes[k][0] = ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; ps_intra8_analyse->au1_4x4_best_modes[k][1] = ps_cu_node->ps_sub_cu[j] ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; ps_intra8_analyse->au1_4x4_best_modes[k][2] = ps_cu_node->ps_sub_cu[j] ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255; } } /*accum satd/qp for all child block*/ i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)child_satd[j] << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Accumalate mode bits for all child blocks */ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd += child_satd[j]; blk_cnt += 1; ps_ed_blk_l1 += 1; //ps_row_cu++; } /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += child_cost_least; } } //else of EIID #endif } // if(merge_16x16_l1) /* MAX CU SIZE 8x8 */ else { #if IP_DBG_L1_l2 for(i = 0; i < 4; i++) { ps_cu_node->ps_parent->u1_cu_size = 8; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); blk_cnt++; ps_ed_blk_l1++; ps_row_cu++; merge_64x64 = 0; } #else /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */ if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) { WORD32 i4_q_scale_q3_mod; WORD8 i1_cu_possible_qp; WORD32 i4_act_factor; merge_64x64 = 0; ps_intra32_analyse->b1_merge_flag = 0; ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255; ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255; ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; ps_intra16_analyse->b1_split_flag = 1; ps_intra16_analyse->b1_valid_cu = 0; ps_intra16_analyse->b1_merge_flag = 0; ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; for(i = 0; i < 4; i++) { intra8_analyse_t *ps_intra8_analyse; WORD32 ctr_sub_cu; cu_pos_x = gau1_cu_pos_x[blk_cnt]; cu_pos_y = gau1_cu_pos_y[blk_cnt]; if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) { ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; ps_intra8_analyse->b1_valid_cu = 0; ps_intra8_analyse->b1_enable_nxn = 0; ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255; ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255; ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255; ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255; ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; ps_intra8_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; ps_cu_node->ps_parent->u1_cu_size = 8; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->best_mode = INTRA_DC; //ps_ed_blk_l1->best_mode; /* fill in the first modes as invalid */ ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_1tu[1] = INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; ihevce_update_cand_list( ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; //ps_row_cu->u1_num_intra_rdopt_cands = 0; for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++) { ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] = INTRA_DC; ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] = INTRA_DC; ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] = MAX_INTRA_COST_IPE; ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] = MAX_INTRA_COST_IPE; ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost = MAX_INTRA_COST_IPE; } pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = MAX_INTRA_COST_IPE; ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); i1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], ps_ctxt->ld_curr_frame_8x8_log_avg[1], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /* set neighbours even if intra is not evaluated, since source is always available. */ ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, ps_cu_node->ps_parent->u2_x0 << 1, ps_cu_node->ps_parent->u2_y0 << 1, (ps_cu_node->ps_parent->u1_cu_size >> 2), 1); //ps_row_cu++; } blk_cnt++; ps_ed_blk_l1++; } } else { //cu_intra_cand_t *ps_cu_intra_cand; WORD8 i1_cu_possible_qp; WORD32 i4_act_factor; WORD32 i4_q_scale_q3_mod; ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); i1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], ps_ctxt->ld_curr_frame_8x8_log_avg[1], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); /* 64x64 merge is not possible */ merge_64x64 = 0; ps_intra32_analyse->b1_merge_flag = 0; ps_intra16_analyse->b1_merge_flag = 0; /* by default 16x16 modes are set to default values DC and Planar */ ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0; ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1; ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0; ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1; ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; ps_intra16_analyse->b1_split_flag = 1; ps_intra16_analyse->b1_valid_cu = 1; for(i = 0; i < 4; i++) { intra8_analyse_t *ps_intra8_analyse; cu_pos_x = gau1_cu_pos_x[blk_cnt]; cu_pos_y = gau1_cu_pos_y[blk_cnt]; if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) { //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand; //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; child_cost_least = 0; ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; ps_cu_node->ps_parent->u1_cu_size = 8; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); /*EARLY DECISION 8x8 block */ ihevce_pu_calc_8x8_blk( ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector); for(j = 0; j < 4; j++) { child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost; child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; } /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */ if(0 == ps_ctxt->u1_disable_child_cu_decide) { ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, ps_cu_node->ps_parent->u2_x0 << 1, ps_cu_node->ps_parent->u2_y0 << 1, (ps_cu_node->ps_parent->u1_cu_size >> 2), 0); //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); /* Eval for TUSize = CuSize */ ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, 26, &ps_cu_node->ps_parent->au4_best_cost_1tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], step2_bypass, 1); if(i4_enable_1cu_4tu) { /* Eval for TUSize = CuSize/2 */ ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, 26, &ps_cu_node->ps_parent->au4_best_cost_4tu[0], &ps_cu_node->ps_parent->au1_best_mode_4tu[0], step2_bypass, 0); } else { /* 4TU not evaluated : 4tu modes set same as 1tu modes */ memcpy( &ps_cu_node->ps_parent->au1_best_mode_4tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], NUM_BEST_MODES); /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ memcpy( &ps_cu_node->ps_parent->au4_best_cost_4tu[0], &ps_cu_node->ps_parent->au4_best_cost_1tu[0], NUM_BEST_MODES * sizeof(WORD32)); } /* Update parent cost */ parent_cost = MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], ps_cu_node->ps_parent->au4_best_cost_1tu[0]); /* Select the best mode to be populated as top and left nbr depending on the 4tu and 1tu cost */ if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > ps_cu_node->ps_parent->au4_best_cost_1tu[0]) { ps_cu_node->ps_parent->best_mode = ps_cu_node->ps_parent->au1_best_mode_1tu[0]; } else { ps_cu_node->ps_parent->best_mode = ps_cu_node->ps_parent->au1_best_mode_4tu[0]; } } /* set the CU valid flag */ ps_intra8_analyse->b1_valid_cu = 1; ps_intra8_analyse->b1_enable_nxn = 0; /* storing the modes to intra 8 analyse */ /* store the best 8x8 modes 8x8 tu */ memcpy( &ps_intra8_analyse->au1_best_modes_8x8_tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; /* store the best 8x8 modes 4x4 tu */ memcpy( &ps_intra8_analyse->au1_best_modes_4x4_tu[0], &ps_cu_node->ps_parent->au1_best_mode_4tu[0], sizeof(UWORD8) * (NUM_BEST_MODES)); ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; /*As 8*8 has won, pick L1 4x4 qp which is equal to L1 8x8 Qp*/ //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp; //ps_row_cu->i4_act_factor[0][1] = i4_act_factor; parent_best_mode = ps_cu_node->ps_parent->best_mode; if(parent_cost <= child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT)) { /*CU = 4TU */ ihevce_update_cand_list( ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); /* store the child 8x8 costs */ pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = parent_cost; /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += parent_cost; /*satd/mod_qp accumulation of best cu */ i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Accumalate mode bits for all child blocks */ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; /* accumulate the 16x16 cost*/ if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) { *pi4_intra_16_cost = parent_cost; } else { *pi4_intra_16_cost += parent_cost; } /* accumulate the 32x32 cost*/ if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) { *pi4_intra_32_cost = parent_cost; } else { *pi4_intra_32_cost += parent_cost; } } else { /*CU = 4PU*/ //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0; //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0; //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size; /* store the child 8x8 costs woth 4x4 pu summed cost */ pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = (child_cost_least); /* accumulate the 16x16 cost*/ if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) { *pi4_intra_16_cost = child_cost_least; } else { *pi4_intra_16_cost += child_cost_least; } /* cost accumalation of best cu size candiate */ i8_frame_acc_satd_cost += child_cost_least; for(j = 0; j < 4; j++) { /*satd/qp accumualtion*/ i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)child_satd[j] << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Accumalate mode bits for all child blocks */ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd += child_satd[j]; } /* accumulate the 32x32 cost*/ if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) { *pi4_intra_32_cost = child_cost_least; } else { *pi4_intra_32_cost += child_cost_least; } ps_intra8_analyse->b1_enable_nxn = 1; /* Insert the best 8x8 modes unconditionally */ x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; size = ps_cu_node->u1_cu_size >> 2; ps_ctxt->au1_ctb_mode_map[y][x] = ps_cu_node->ps_sub_cu[0]->best_mode; ps_ctxt->au1_ctb_mode_map[y][x + 1] = ps_cu_node->ps_sub_cu[1]->best_mode; ps_ctxt->au1_ctb_mode_map[y + 1][x] = ps_cu_node->ps_sub_cu[2]->best_mode; ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] = ps_cu_node->ps_sub_cu[3]->best_mode; } /* NXN mode population */ for(j = 0; j < 4; j++) { cand_mode_list[0] = ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; cand_mode_list[1] = ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1]; cand_mode_list[2] = ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2]; if(1) { /* Populate best 3 nxn modes */ ps_intra8_analyse->au1_4x4_best_modes[j][0] = cand_mode_list[0]; ps_intra8_analyse->au1_4x4_best_modes[j][1] = cand_mode_list[1]; //(ps_ed + 1)->best_mode; ps_intra8_analyse->au1_4x4_best_modes[j][2] = cand_mode_list[2]; //(ps_ed + 2)->best_mode; ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4); } /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */ else /* IHEVCE_QUALITY_P0 == i4_quality_preset */ { /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/ ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0; } ps_intra8_analyse ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255; } //ps_row_cu++; } else { /* For Incomplete CTB, 16x16 is not valid */ ps_intra16_analyse->b1_valid_cu = 0; } blk_cnt++; ps_ed_blk_l1++; } //ps_ed_blk_l2 ++; } //else of EIID #endif } } else { /* For incomplete CTB, init valid CU to 0 */ ps_ed_blk_l1++; ps_intra32_analyse->b1_valid_cu = 0; ps_intra16_analyse[0].b1_valid_cu = 0; blk_cnt++; merge_64x64 = 0; } } while(blk_cnt != MAX_CTB_SIZE); /* if 64x64 merge is possible then check for 32x32 having same best modes */ if(1 == merge_64x64) { WORD32 act_mode = au1_best_32x32_modes[0]; ps_ed_blk_l2 = ps_ed_l2_ctb; best_mode = ps_ed_blk_l2->best_mode; merge_64x64 = ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) + (act_mode == au1_best_32x32_modes[2]) + (act_mode == au1_best_32x32_modes[3]) == 4); if(merge_64x64 == 1) best_mode = au1_best_32x32_modes[0]; else best_mode = ps_ed_blk_l2->best_mode; /* All 32x32 costs are accumalated to 64x64 cost */ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; for(i = 0; i < 4; i++) { ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; } /* If all modes of 32x32 block is not same */ if(0 == merge_64x64) { /*Compute CHILD cost for 32x32 */ WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] + au4_best_32x32_cost[2] + au4_best_32x32_cost[3]; WORD32 cost = MAX_INTRA_COST_IPE; WORD32 best_mode_temp = 0; /*Compute 64x64 cost for each mode of 32x32*/ for(i = 0; i < 4; i++) { WORD32 mode = au1_best_32x32_modes[i]; if(mode < 2) mode = 26; ps_cu_node->ps_parent->u1_cu_size = 64; ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */ ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, (ps_cu_node->ps_parent->u2_x0 << 1), (ps_cu_node->ps_parent->u2_y0 << 1), (ps_cu_node->ps_parent->u1_cu_size >> 2), 0); ihevce_mode_eval_filtering( ps_cu_node->ps_parent, ps_cu_node, ps_ctxt, ps_curr_src, mode, &ps_cu_node->ps_parent->au4_best_cost_1tu[0], &ps_cu_node->ps_parent->au1_best_mode_1tu[0], !step2_bypass, 0); parent_cost = ps_cu_node->ps_parent->best_cost; if(cost > parent_cost) { cost = parent_cost; best_mode_temp = ps_cu_node->ps_parent->best_mode; } } if(cost < child_cost_64x64) { merge_64x64 = 1; best_mode = best_mode_temp; /* Update 64x64 cost if CU 64x64 is chosen */ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost; /* Accumalate the least cost for CU 64x64 */ i8_frame_acc_satd_cost = cost; i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost; /* satd and mpm bits accumalation of best cu size candiate */ i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd; } } } if(merge_64x64) { WORD32 i, j; intra32_analyse_t *ps_intra32_analyse; intra16_analyse_t *ps_intra16_analyse; WORD32 row, col; WORD32 i4_q_scale_q3_mod; WORD8 i1_cu_possible_qp; WORD32 i4_act_factor; //ps_row_cu = ps_curr_cu; ps_ctb_out->u4_cu_split_flags = 0x0; ps_ed_blk_l1 = ps_ed_l1_ctb; ps_ed_blk_l2 = ps_ed_l2_ctb; ps_l0_ipe_out_ctb->u1_split_flag = 0; /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/ for(i = 0; i < 4; i++) { /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i]; for(j = 0; j < 4; j++) { /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j]; ps_intra16_analyse->b1_merge_flag = 0; } } /* CU size 64x64 and fill the final cu params */ //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0]; //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0]; //ps_row_cu->u1_cu_size = 64; /* Candidate mode Update */ cand_mode_list[0] = best_mode; if(cand_mode_list[0] > 1) { if(cand_mode_list[0] == 2) { cand_mode_list[1] = 34; cand_mode_list[2] = 3; } else if(cand_mode_list[0] == 34) { cand_mode_list[1] = 2; cand_mode_list[2] = 33; } else { cand_mode_list[1] = cand_mode_list[0] - 1; cand_mode_list[2] = cand_mode_list[0] + 1; } //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode; //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode; } else { cand_mode_list[0] = 0; cand_mode_list[1] = 1; cand_mode_list[2] = 26; //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode; } /* All 32x32 costs are accumalated to 64x64 cost */ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; for(i = 0; i < 4; i++) { ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; } /* by default 64x64 modes are set to default values DC and Planar */ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0]; ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1]; ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2]; ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255; /* Update CTB mode map for the finalised CU */ x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; size = ps_cu_node->u1_cu_size >> 2; for(row = y; row < (y + size); row++) { for(col = x; col < (x + size); col++) { ps_ctxt->au1_ctb_mode_map[row][col] = best_mode; } } ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, (ps_cu_node->u2_x0 << 1), (ps_cu_node->u2_y0 << 1), (ps_cu_node->u1_cu_size >> 2), 1); /*As 64*64 has won, pick L1 32x32 qp*/ //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6)); //ASSERT((blk_cnt>>6) == 0); ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); i1_cu_possible_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_32x32_satd[0][0], ps_ctxt->ld_curr_frame_32x32_log_avg[0], f_strength, &i4_act_factor, &i4_q_scale_q3_mod, ps_ctxt->ps_rc_quant_ctxt); i8_frame_acc_satd_by_modqp_q10 = (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / i4_q_scale_q3_mod; /* Increment pointers */ ps_ed_blk_l1 += 64; ps_ed_blk_l2 += 16; //ps_row_cu++; } } //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu); { WORD32 i4_i, i4_j; WORD32 dummy; WORD8 i1_cu_qp; (void)i1_cu_qp; /*MAM_VAR_L1*/ for(i4_j = 0; i4_j < 2; i4_j++) { i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j]; f_strength = ps_ctxt->f_strength; //i4_mod_factor_num = 4; ps_ed_blk_l1 = ps_ed_l1_ctb; ps_ed_blk_l2 = ps_ed_l2_ctb; //ps_row_cu = ps_curr_cu; /*Valid only for complete CTB */ if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt)) { ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2); ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2); ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_32x32_satd[0][0], ps_ctxt->ld_curr_frame_32x32_log_avg[0], f_strength, &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_32x32_satd[0][1], ps_ctxt->ld_curr_frame_32x32_log_avg[1], f_strength, &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_32x32_satd[0][2], ps_ctxt->ld_curr_frame_32x32_log_avg[2], f_strength, &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_32x32_satd[0][3], 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0], f_strength, &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0); } else { ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024; ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024; ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024; ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024; } /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction for the usage by ME*/ { WORD32 pos_x_32, pos_y_32, pos; //WORD32 i4_incomplete_ctb_val_8; pos_x_32 = u1_curr_ctb_wdt / 16; pos_y_32 = u1_curr_ctb_hgt / 16; pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32; for(i4_i = 0; i4_i < 4; i4_i++) { if(i4_i < pos) { ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2); ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2); ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_16x16_satd[i4_i][0], ps_ctxt->ld_curr_frame_16x16_log_avg[0], f_strength, &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_16x16_satd[i4_i][1], ps_ctxt->ld_curr_frame_16x16_log_avg[1], f_strength, &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_16x16_satd[i4_i][2], ps_ctxt->ld_curr_frame_16x16_log_avg[2], f_strength, &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); } else { /*For incomplete CTB */ ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024; ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024; ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024; } } } /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction for the usage by ME*/ { WORD32 pos_x_16, pos_y_16, pos; //WORD32 i4_incomplete_ctb_val_8; pos_x_16 = u1_curr_ctb_wdt / 4; pos_y_16 = u1_curr_ctb_hgt / 4; pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16; for(i4_i = 0; i4_i < 16; i4_i++) { if(i4_i < pos) { ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2); ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[i4_i][0], ps_ctxt->ld_curr_frame_8x8_log_avg[0], f_strength, &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); i1_cu_qp = ihevce_cu_level_qp_mod( ps_ctxt->i4_qscale, ps_ed_ctb_l1->i4_8x8_satd[i4_i][1], ps_ctxt->ld_curr_frame_8x8_log_avg[1], f_strength, &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j], &dummy, ps_ctxt->ps_rc_quant_ctxt); } else { /*For incomplete CTB */ ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024; ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024; } } } } //for loop /* Accumalate the cost of ctb to the total cost */ ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost; ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10; ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost; /* satd and mpm bits accumalation of best cu size candiate for the ctb */ ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd; ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost; ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd; } { WORD32 ctr_8x8; for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) { /*Accumalate activity factor for Intra and Inter*/ if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] < ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]) { ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; } else { ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; } /*Accumalate activity factor at frame level*/ ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8]; } } return; } WORD32 ihevce_nxn_sad_computer( UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size) { WORD32 wd, ht, i, j; WORD32 sad = 0; wd = trans_size; ht = trans_size; for(i = 0; i < ht; i++) { for(j = 0; j < wd; j++) { sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j]))); } pu1_inp += i4_inp_stride; pu1_ref += i4_ref_stride; } return sad; } /*! ****************************************************************************** * \if Function name : ihevce_mode_eval_filtering \endif * * \brief * Evaluates best 3 modes for the given CU size with probable modes from, * early decision structure, mpm candidates and dc, planar mode * * \param[in] ps_cu_node : pointer to MAX cu node info buffer * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer * \param[in] ps_ctxt : pointer to IPE context struct * \param[in] ps_curr_src : pointer to src pixels struct * \param[in] best_amode : best angular mode from l1 layer or from (MAX - 1) CU mode * \param[in] best_costs_4x4 : pointer to 3 best cost buffer * \param[in] best_modes_4x4 : pointer to 3 best mode buffer * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated * if 1, (MAX CU) sugested is evaluated * \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2 * * \return * None * * \author * Ittiam * ***************************************************************************** */ void ihevce_mode_eval_filtering( ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ipe_cu_tree_t *ps_child_cu_node, ihevce_ipe_ctxt_t *ps_ctxt, iv_enc_yuv_buf_t *ps_curr_src, WORD32 best_amode, WORD32 *best_costs_4x4, UWORD8 *best_modes_4x4, WORD32 step2_bypass, WORD32 tu_eq_cu) { UWORD8 *pu1_origin, *pu1_orig; WORD32 src_strd = ps_curr_src->i4_y_strd; WORD32 nbr_flags; nbr_avail_flags_t s_nbr; WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1; WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2; WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2; UWORD8 mode; WORD32 cost_ang_mode = MAX_INTRA_COST_IPE; WORD32 filter_flag; WORD32 cost_amode_step2[7] = { 0 }; /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */ WORD32 sad = 0; WORD32 cu_pos_x, cu_pos_y; WORD32 temp; WORD32 i = 0, j, k, i_end, z; //WORD32 row, col, size; UWORD8 *pu1_ref; WORD32 xA, yA, xB, yB; WORD32 top_intra_mode; WORD32 left_intra_mode; UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; WORD32 count; pf_ipe_res_trans_had apf_resd_trns_had[4]; WORD32 cand_mode_satd_list[3]; ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; ihevc_intra_pred_luma_ref_substitution_fptr = ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit; apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit; apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit; apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit; /* initialize modes_to_eval as zero */ memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES); /* Compute the Parent Cost */ /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */ pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) + (ps_cu_node->u2_x0 << 3); /* Get position of CU within CTB at 4x4 granularity */ cu_pos_x = ps_cu_node->u2_x0 << 1; cu_pos_y = ps_cu_node->u2_y0 << 1; /* get the neighbour availability flags */ ihevce_get_only_nbr_flag( &s_nbr, ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, cu_pos_x, cu_pos_y, trans_size >> 2, trans_size >> 2); /* Traverse for all 4 child blocks in the parent block */ xA = (ps_cu_node->u2_x0 << 3) >> 2; yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; xB = xA + 1; yB = yA - 1; left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; /* call the function which populates sad cost for all the modes */ ihevce_intra_populate_mode_bits_cost_bracketing( top_intra_mode, left_intra_mode, s_nbr.u1_top_avail, s_nbr.u1_left_avail, ps_cu_node->u2_y0, &ps_ctxt->au2_mode_bits_satd_cost[0], &ps_ctxt->au2_mode_bits_satd[0], ps_ctxt->i4_ol_satd_lambda, cand_mode_satd_list); for(k = 0; k < num_tu_in_y; k++) { for(j = 0; j < num_tu_in_x; j++) { /* get the neighbour availability flags */ nbr_flags = ihevce_get_nbr_intra( &s_nbr, ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, cu_pos_x + ((j) * (trans_size >> 2)), cu_pos_y + ((k) * (trans_size >> 2)), trans_size >> 2); pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); /* Create reference samples array */ ihevc_intra_pred_luma_ref_substitution_fptr( pu1_origin - src_strd - 1, pu1_origin - src_strd, pu1_origin - 1, src_strd, trans_size, nbr_flags, pu1_ref_orig, 0); /* Perform reference samples filtering */ ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt); ihevce_set_nbr_map( ps_ctxt->pu1_ctb_nbr_map, ps_ctxt->i4_nbr_map_strd, cu_pos_x + ((j) * (trans_size >> 2)), cu_pos_y + ((k) * (trans_size >> 2)), (trans_size >> 2), 1); pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); } } /* Revaluation for angular mode */ //if(ps_ed_blk->ang_attr.mode_present == 1) //if(((best_amode & 0x1) != 1)) { WORD32 u1_trans_idx = trans_size >> 3; if(trans_size == 32) u1_trans_idx = 3; //best_amode = ps_ed_blk->ang_attr.best_mode; i = 0; if(!step2_bypass) { /* Around best level 4 angular mode, search for best level 2 mode */ ASSERT((best_amode >= 2) && (best_amode <= 34)); if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) { if(best_amode >= 4) ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2; } ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode; if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) { if(best_amode <= 32) ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2; } } else { ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode; ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode; ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode; ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode; } /* Add the left and top MPM modes for computation*/ ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0]; ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1]; i_end = i; count = 0; /*Remove duplicate modes from modes_to_eval_temp[] */ for(j = 0; j < i_end; j++) { for(k = 0; k < count; k++) { if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k]) break; } if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1)) { ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j]; count++; } } i_end = count; if(count == 0) { ps_ctxt->au1_modes_to_eval[0] = 26; i_end = 1; } for(i = 0; i < i_end; i++) { pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; mode = ps_ctxt->au1_modes_to_eval[i]; ASSERT((mode >= 2) && (mode <= 34)); cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); for(k = 0; k < num_tu_in_y; k++) { for(j = 0; j < num_tu_in_x; j++) { pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); if(0 == filter_flag) pu1_ref = pu1_ref_orig; else pu1_ref = pu1_ref_filt; g_apf_lum_ip[g_i4_ip_funcs[mode]]( pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); if(ps_ctxt->u1_use_satd) { sad = apf_resd_trns_had[u1_trans_idx]( pu1_origin, ps_curr_src->i4_y_strd, &ps_ctxt->au1_pred_samples[0], trans_size, NULL, 0 ); } else { sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer( pu1_origin, ps_curr_src->i4_y_strd, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size); } cost_amode_step2[i] += sad; pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); } } } best_amode = ps_ctxt->au1_modes_to_eval[0]; /*Init cost indx */ cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0]; for(z = 0; z < i_end; z++) { /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ if(cost_ang_mode >= cost_amode_step2[z]) { if(cost_ang_mode == cost_amode_step2[z]) { if(best_amode > ps_ctxt->au1_modes_to_eval[z]) best_amode = ps_ctxt->au1_modes_to_eval[z]; } else { best_amode = ps_ctxt->au1_modes_to_eval[z]; } cost_ang_mode = cost_amode_step2[z]; } } /*Modify mode bits for the angular modes */ } { /* Step - I modification */ ASSERT((best_amode >= 2) && (best_amode <= 34)); i_end = 0; z = 0; /* Around best level 3 angular mode, search for best level 1 mode */ ps_ctxt->au1_modes_to_eval[i_end++] = 0; ps_ctxt->au1_modes_to_eval[i_end++] = 1; if(best_amode != 2) ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1; ps_ctxt->au1_modes_to_eval[i_end++] = best_amode; if(best_amode != 34) ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1; /* Inserting step_2's best mode at last to avoid recalculation of it's SATD cost */ //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD //cost_amode_step2[i_end] = cost_ang_mode; /*best_sad[i_end] = cost_ang_mode - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */ cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */ for(i = 0; i < i_end; i++) { WORD32 u1_trans_idx = trans_size >> 3; if(trans_size == 32) u1_trans_idx = 3; pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; /*best_sad[i] = 0; //See NOTE_A01 above */ mode = ps_ctxt->au1_modes_to_eval[i]; cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); for(k = 0; k < num_tu_in_y; k++) { for(j = 0; j < num_tu_in_x; j++) { pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); if(0 == filter_flag) pu1_ref = pu1_ref_orig; else pu1_ref = pu1_ref_filt; g_apf_lum_ip[g_i4_ip_funcs[mode]]( pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); //if(trans_size != 4) { sad = apf_resd_trns_had[u1_trans_idx]( pu1_origin, ps_curr_src->i4_y_strd, &ps_ctxt->au1_pred_samples[0], trans_size, NULL, 0); } /*accumualting SATD though name says it is sad*/ cost_amode_step2[i] += sad; /*best_sad[i] +=sad; //See NOTE_A01 above */ pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); } } } /* Updating i_end for the step_2's inserted mode*/ // i_end++; /* Arrange the reference array in ascending order */ for(i = 0; i < (i_end - 1); i++) { for(j = i + 1; j < i_end; j++) { if(cost_amode_step2[i] > cost_amode_step2[j]) { temp = cost_amode_step2[i]; cost_amode_step2[i] = cost_amode_step2[j]; cost_amode_step2[j] = temp; temp = modes_4x4[i]; modes_4x4[i] = modes_4x4[j]; modes_4x4[j] = temp; } } } /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; cost_ang_mode = cost_amode_step2[0]; ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode]; ps_cu_node->best_cost = cost_amode_step2[0]; ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; ps_cu_node->best_satd = ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode]; /*Accumalate best mode bits cost for RC*/ ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode]; /* Store the best three candidates */ for(i = 0; i < 3; i++) { best_costs_4x4[i] = cost_amode_step2[i]; best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]]; } } return; }