1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 ******************************************************************************
23 * \file ihevce_enc_cu_recursion.c
24 *
25 * \brief
26 * This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 * 18/09/2012
30 *
31 * \author
32 * Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40
41 /*****************************************************************************/
42 /* File Includes */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51
52 /* User include files */
53 #include "ihevc_typedefs.h"
54 #include "itt_video_api.h"
55 #include "ihevce_api.h"
56
57 #include "rc_cntrl_param.h"
58 #include "rc_frame_info_collector.h"
59 #include "rc_look_ahead_params.h"
60
61 #include "ihevc_defs.h"
62 #include "ihevc_macros.h"
63 #include "ihevc_debug.h"
64 #include "ihevc_structs.h"
65 #include "ihevc_platform_macros.h"
66 #include "ihevc_deblk.h"
67 #include "ihevc_itrans_recon.h"
68 #include "ihevc_chroma_itrans_recon.h"
69 #include "ihevc_chroma_intra_pred.h"
70 #include "ihevc_intra_pred.h"
71 #include "ihevc_inter_pred.h"
72 #include "ihevc_mem_fns.h"
73 #include "ihevc_padding.h"
74 #include "ihevc_weighted_pred.h"
75 #include "ihevc_sao.h"
76 #include "ihevc_resi_trans.h"
77 #include "ihevc_quant_iquant_ssd.h"
78 #include "ihevc_cabac_tables.h"
79
80 #include "ihevce_defs.h"
81 #include "ihevce_hle_interface.h"
82 #include "ihevce_lap_enc_structs.h"
83 #include "ihevce_multi_thrd_structs.h"
84 #include "ihevce_multi_thrd_funcs.h"
85 #include "ihevce_me_common_defs.h"
86 #include "ihevce_had_satd.h"
87 #include "ihevce_error_codes.h"
88 #include "ihevce_bitstream.h"
89 #include "ihevce_cabac.h"
90 #include "ihevce_rdoq_macros.h"
91 #include "ihevce_function_selector.h"
92 #include "ihevce_enc_structs.h"
93 #include "ihevce_entropy_structs.h"
94 #include "ihevce_cmn_utils_instr_set_router.h"
95 #include "ihevce_ipe_instr_set_router.h"
96 #include "ihevce_decomp_pre_intra_structs.h"
97 #include "ihevce_decomp_pre_intra_pass.h"
98 #include "ihevce_enc_loop_structs.h"
99 #include "ihevce_global_tables.h"
100 #include "ihevce_nbr_avail.h"
101 #include "ihevce_enc_loop_utils.h"
102 #include "ihevce_bs_compute_ctb.h"
103 #include "ihevce_cabac_rdo.h"
104 #include "ihevce_dep_mngr_interface.h"
105 #include "ihevce_enc_loop_pass.h"
106 #include "ihevce_rc_enc_structs.h"
107 #include "ihevce_enc_cu_recursion.h"
108 #include "ihevce_stasino_helpers.h"
109
110 #include "cast_types.h"
111 #include "osal.h"
112 #include "osal_defaults.h"
113
114 /*****************************************************************************/
115 /* Macros */
116 /*****************************************************************************/
117 #define NUM_CTB_QUANT_ROUNDING 6
118
119 /*****************************************************************************/
120 /* Function Definitions */
121 /*****************************************************************************/
122
123 /**
124 *********************************************************************************
125 * Function name : ihevce_store_cu_final
126 *
127 * \brief
128 * This function store cu info to the enc loop cu context
129 *
130 * \param[in] ps_ctxt : pointer to enc loop context structure
131 * \param[in] ps_cu_final : pointer to enc loop output CU structure
132 * \param[in] pu1_ecd_data : ecd data pointer
133 * \param[in] ps_enc_out_ctxt : pointer to CU information structure
134 * \param[in] ps_cu_prms : pointer to cu level parameters for SATD / RDOPT
135 *
136 * \return
137 * None
138 *
139 **********************************************************************************/
ihevce_store_cu_final(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_enc_loop_out_t * ps_cu_final,UWORD8 * pu1_ecd_data,ihevce_enc_cu_node_ctxt_t * ps_enc_out_ctxt,enc_loop_cu_prms_t * ps_cu_prms)140 void ihevce_store_cu_final(
141 ihevce_enc_loop_ctxt_t *ps_ctxt,
142 cu_enc_loop_out_t *ps_cu_final,
143 UWORD8 *pu1_ecd_data,
144 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
145 enc_loop_cu_prms_t *ps_cu_prms)
146 {
147 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
148 WORD32 i4_8x8_blks_in_cu;
149 WORD32 i4_br_id, i4_enc_frm_id;
150
151 WORD32 u4_tex_bits, u4_hdr_bits;
152 WORD32 i4_qscale, i4_qscale_ctb;
153 ps_enc_loop_bestprms = ps_enc_out_ctxt->ps_cu_prms;
154 i4_qscale = ((ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
155 [ps_enc_out_ctxt->i1_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
156 i4_qscale_ctb = ((
157 ps_ctxt->ps_rc_quant_ctxt
158 ->pi4_qp_to_qscale[ps_ctxt->i4_frame_mod_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
159
160 /* All texture bits accumulated */
161 u4_tex_bits = ps_enc_loop_bestprms->u4_cu_luma_res_bits +
162 ps_enc_loop_bestprms->u4_cu_chroma_res_bits +
163 ps_enc_loop_bestprms->u4_cu_cbf_bits;
164
165 u4_hdr_bits = ps_enc_loop_bestprms->u4_cu_hdr_bits;
166
167 i4_br_id = ps_ctxt->i4_bitrate_instance_num;
168 i4_enc_frm_id = ps_ctxt->i4_enc_frm_id;
169
170 i4_8x8_blks_in_cu = ((ps_enc_out_ctxt->u1_cu_size >> 3) * (ps_enc_out_ctxt->u1_cu_size >> 3));
171
172 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd +=
173 ps_enc_loop_bestprms
174 ->i8_cu_ssd; // + (((float)(ps_ctxt->i8_cl_ssd_lambda_qf/ (1<< LAMBDA_Q_SHIFT))) * ps_enc_loop_bestprms->u4_cu_hdr_bits);
175
176 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad +=
177 (UWORD32)(
178 ps_enc_loop_bestprms->u4_cu_open_intra_sad +
179 (((float)(ps_ctxt->i4_sad_lamda) / (1 << LAMBDA_Q_SHIFT)) *
180 ps_enc_loop_bestprms->u4_cu_hdr_bits));
181
182 if(1 == ps_enc_loop_bestprms->u1_intra_flag)
183 {
184 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad_acc +=
185 ps_enc_loop_bestprms->u4_cu_sad;
186 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_intra_cost_acc +=
187 ps_enc_loop_bestprms->i8_best_rdopt_cost;
188 }
189 else
190 {
191 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_inter_sad_acc +=
192 ps_enc_loop_bestprms->u4_cu_sad;
193 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_inter_cost_acc +=
194 ps_enc_loop_bestprms->i8_best_rdopt_cost;
195 }
196 /*accumulating the frame level stats across frame*/
197 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc +=
198 ps_enc_loop_bestprms->u4_cu_sad;
199
200 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_cost_acc +=
201 ps_enc_loop_bestprms->i8_best_rdopt_cost;
202
203 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits +=
204 (u4_tex_bits + u4_hdr_bits);
205
206 /*Total bits and header bits accumalted here for CTB*/
207 ps_ctxt->u4_total_cu_bits += (u4_tex_bits + u4_hdr_bits);
208 ps_ctxt->u4_total_cu_bits_mul_qs +=
209 ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale_ctb)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
210 QSCALE_Q_FAC_3;
211 ps_ctxt->u4_total_cu_hdr_bits += u4_hdr_bits;
212 ps_ctxt->u4_cu_tot_bits_into_qscale +=
213 ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
214 QSCALE_Q_FAC_3;
215 ps_ctxt->u4_cu_tot_bits += (u4_tex_bits + u4_hdr_bits);
216
217 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits +=
218 u4_hdr_bits;
219
220 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
221 ->i8_sad_by_qscale[ps_enc_loop_bestprms->u1_intra_flag] +=
222 ((((LWORD64)ps_enc_loop_bestprms->u4_cu_sad) << SAD_BY_QSCALE_Q) / i4_qscale);
223
224 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
225 ->i4_qp_normalized_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] +=
226 (i4_8x8_blks_in_cu * i4_qscale);
227
228 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
229 ->i4_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] += i4_8x8_blks_in_cu;
230
231 /* PCM not supported */
232 ps_cu_final->b1_pcm_flag = 0;
233 ps_cu_final->b1_pred_mode_flag = ps_enc_loop_bestprms->u1_intra_flag;
234
235 ps_cu_final->b1_skip_flag = ps_enc_loop_bestprms->u1_skip_flag;
236 ps_cu_final->b1_tq_bypass_flag = 0;
237 ps_cu_final->b3_part_mode = ps_enc_loop_bestprms->u1_part_mode;
238
239 ps_cu_final->pv_coeff = pu1_ecd_data;
240
241 ps_cu_final->i1_cu_qp = ps_enc_out_ctxt->i1_cu_qp;
242 if(ps_enc_loop_bestprms->u1_is_cu_coded)
243 {
244 ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_enc_out_ctxt->i1_cu_qp;
245 }
246 else
247 {
248 ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_pred_qp;
249 }
250 ps_cu_final->b1_first_cu_in_qg = ps_enc_out_ctxt->b1_first_cu_in_qg;
251
252 /* Update the no residue flag. Needed for inter cu. */
253 /* Needed for deblocking inter/intra both */
254 //if(ps_cu_final->b1_pred_mode_flag == PRED_MODE_INTER)
255 {
256 ps_cu_final->b1_no_residual_syntax_flag = !ps_enc_loop_bestprms->u1_is_cu_coded;
257 }
258
259 /* store the number of TUs */
260 ps_cu_final->u2_num_tus_in_cu = ps_enc_loop_bestprms->u2_num_tus_in_cu;
261
262 /* ---- copy the TUs to final structure ----- */
263 memcpy(
264 ps_cu_final->ps_enc_tu,
265 &ps_enc_loop_bestprms->as_tu_enc_loop[0],
266 ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(tu_enc_loop_out_t));
267
268 /* ---- copy the PUs to final structure ----- */
269 memcpy(
270 ps_cu_final->ps_pu,
271 &ps_enc_loop_bestprms->as_pu_enc_loop[0],
272 ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_t));
273
274 /* --- copy reminder and prev_flags ----- */
275 /* only required for intra */
276 if(PRED_MODE_INTRA == ps_cu_final->b1_pred_mode_flag)
277 {
278 memcpy(
279 &ps_cu_final->as_prev_rem[0],
280 &ps_enc_loop_bestprms->as_intra_prev_rem[0],
281 ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(intra_prev_rem_flags_t));
282
283 ps_cu_final->b3_chroma_intra_pred_mode = ps_enc_loop_bestprms->u1_chroma_intra_pred_mode;
284 }
285
286 /* --------------------------------------------------- */
287 /* ---- Boundary Strength Calculation at CU level ---- */
288 /* --------------------------------------------------- */
289 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
290 {
291 WORD32 num_4x4_in_ctb;
292 nbr_4x4_t *ps_left_nbr_4x4;
293 nbr_4x4_t *ps_top_nbr_4x4;
294 nbr_4x4_t *ps_curr_nbr_4x4;
295 WORD32 nbr_4x4_left_strd;
296
297 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
298
299 ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
300 ps_curr_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
301 ps_curr_nbr_4x4 += ((ps_enc_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
302
303 /* CU left */
304 if(0 == ps_enc_out_ctxt->b3_cu_pos_x)
305 {
306 ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
307 ps_left_nbr_4x4 += ps_enc_out_ctxt->b3_cu_pos_y << 1;
308 nbr_4x4_left_strd = 1;
309 }
310 else
311 {
312 /* inside CTB */
313 ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
314 nbr_4x4_left_strd = num_4x4_in_ctb;
315 }
316
317 /* CU top */
318 if(0 == ps_enc_out_ctxt->b3_cu_pos_y)
319 {
320 /* CTB boundary */
321 ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
322 ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
323 ps_top_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
324 }
325 else
326 {
327 /* inside CTB */
328 ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
329 }
330
331 ihevce_bs_compute_cu(
332 ps_cu_final,
333 ps_top_nbr_4x4,
334 ps_left_nbr_4x4,
335 ps_curr_nbr_4x4,
336 nbr_4x4_left_strd,
337 num_4x4_in_ctb,
338 &ps_ctxt->s_deblk_bs_prms);
339 }
340 }
341
342 /**
343 *********************************************************************************
344 * Function name : ihevce_store_cu_results
345 *
346 * \brief
347 * This function store cu result to cu info context
348 *
349 * \param[in] ps_ctxt : pointer to enc loop context structure
350 * \param[out] ps_cu_prms : pointer to cu level parameters for SATD / RDOPT
351 *
352 * \return
353 * None
354 *
355 **********************************************************************************/
ihevce_store_cu_results(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,final_mode_state_t * ps_final_state)356 void ihevce_store_cu_results(
357 ihevce_enc_loop_ctxt_t *ps_ctxt,
358 enc_loop_cu_prms_t *ps_cu_prms,
359 final_mode_state_t *ps_final_state)
360 {
361 ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
362 nbr_4x4_t *ps_nbr_4x4, *ps_tmp_nbr_4x4, *ps_curr_nbr_4x4;
363
364 UWORD8 *pu1_recon, *pu1_final_recon;
365 WORD32 num_4x4_in_ctb, ctr;
366 WORD32 num_4x4_in_cu;
367 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
368 WORD32 cu_depth, log2_ctb_size, log2_cu_size;
369
370 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
371 (void)ps_final_state;
372 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
373 {
374 /* ---- copy the child luma recon back to curr. recon -------- */
375 pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
376
377 /* based on CU position derive the luma pointers */
378 pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
379
380 pu1_final_recon +=
381 ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
382
383 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
384 pu1_final_recon,
385 ps_cu_prms->i4_luma_recon_stride,
386 pu1_recon,
387 ps_enc_tmp_out_ctxt->u1_cu_size,
388 ps_enc_tmp_out_ctxt->u1_cu_size,
389 ps_enc_tmp_out_ctxt->u1_cu_size);
390
391 /* ---- copy the child chroma recon back to curr. recon -------- */
392 pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
393
394 /* based on CU position derive the chroma pointers */
395 pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
396
397 pu1_final_recon +=
398 ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
399 ps_cu_prms->i4_chrm_recon_stride);
400
401 /* Cb and Cr pixel interleaved */
402 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
403 pu1_final_recon,
404 ps_cu_prms->i4_chrm_recon_stride,
405 pu1_recon,
406 ps_enc_tmp_out_ctxt->u1_cu_size,
407 ps_enc_tmp_out_ctxt->u1_cu_size,
408 (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
409 }
410 #else
411 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
412 {
413 /* ---- copy the child luma recon back to curr. recon -------- */
414 pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
415
416 /* based on CU position derive the luma pointers */
417 pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
418
419 pu1_final_recon +=
420 ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
421
422 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
423 pu1_final_recon,
424 ps_cu_prms->i4_luma_recon_stride,
425 pu1_recon,
426 ps_enc_tmp_out_ctxt->u1_cu_size,
427 ps_enc_tmp_out_ctxt->u1_cu_size,
428 ps_enc_tmp_out_ctxt->u1_cu_size);
429
430 /* ---- copy the child chroma recon back to curr. recon -------- */
431 pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
432
433 /* based on CU position derive the chroma pointers */
434 pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
435
436 pu1_final_recon +=
437 ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
438 ps_cu_prms->i4_chrm_recon_stride);
439
440 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
441 pu1_final_recon,
442 ps_cu_prms->i4_chrm_recon_stride,
443 pu1_recon,
444 ps_enc_tmp_out_ctxt->u1_cu_size,
445 ps_enc_tmp_out_ctxt->u1_cu_size,
446 (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
447 }
448 #endif
449 /*copy qp for qg*/
450 {
451 WORD32 i4_num_8x8, i4_x, i4_y;
452 WORD32 i4_cu_pos_x, i4_cu_pox_y;
453 i4_num_8x8 = ps_enc_tmp_out_ctxt->u1_cu_size >> 3;
454 i4_cu_pos_x = ps_enc_tmp_out_ctxt->b3_cu_pos_x;
455 i4_cu_pox_y = ps_enc_tmp_out_ctxt->b3_cu_pos_y;
456 for(i4_y = 0; i4_y < i4_num_8x8; i4_y++)
457 {
458 for(i4_x = 0; i4_x < i4_num_8x8; i4_x++)
459 {
460 if(ps_enc_tmp_out_ctxt->ps_cu_prms->u1_is_cu_coded)
461 {
462 ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
463 ps_ctxt->i4_cu_qp;
464 }
465 else
466 {
467 ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
468 ps_ctxt->i4_pred_qp;
469 }
470 }
471 }
472 }
473
474 /* ------ copy the nbr 4x4 to final output ------ */
475 num_4x4_in_cu = ps_enc_tmp_out_ctxt->u1_cu_size >> 2;
476 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
477
478 ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
479 ps_curr_nbr_4x4 += (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 1);
480 ps_curr_nbr_4x4 += ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
481 ps_tmp_nbr_4x4 = ps_curr_nbr_4x4;
482
483 ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
484
485 GETRANGE(log2_ctb_size, ps_cu_prms->i4_ctb_size);
486 GETRANGE(log2_cu_size, ps_enc_tmp_out_ctxt->u1_cu_size);
487 cu_depth = log2_ctb_size - log2_cu_size;
488
489 ASSERT(cu_depth <= 3);
490 ASSERT(cu_depth >= 0);
491
492 /*assign qp for all 4x4 nbr blocks*/
493 for(ctr = 0; ctr < num_4x4_in_cu * num_4x4_in_cu; ctr++, ps_nbr_4x4++)
494 {
495 ps_nbr_4x4->b1_skip_flag = ps_enc_tmp_out_ctxt->s_cu_prms.u1_skip_flag;
496 ps_nbr_4x4->b2_cu_depth = cu_depth;
497 ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
498 }
499
500 ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
501
502 for(ctr = 0; ctr < num_4x4_in_cu; ctr++)
503 {
504 memcpy(ps_tmp_nbr_4x4, ps_nbr_4x4, num_4x4_in_cu * sizeof(nbr_4x4_t));
505
506 ps_tmp_nbr_4x4 += num_4x4_in_ctb;
507 ps_nbr_4x4 += num_4x4_in_cu;
508 }
509 }
510
511 /**
512 *********************************************************************************
513 * Function name : ihevce_populate_cu_struct
514 *
515 * \brief
516 * This function populate cu struct
517 *
518 * \param[in] ps_ctxt : pointer to enc loop context structure
519 * \param[in] ps_cur_ipe_ctb : pointer to IPE L0 analyze structure
520 * \param[in] ps_cu_tree_analyse : pointer to Structure for CU recursion
521 * \param[in] ps_best_results : pointer to strcuture contain result for partition type of CU
522 * \param[in] ps_cu_out : pointer to structre contain mode analysis info
523 * \param[in] i4_32x32_id : noise estimation id
524 * \param[in] u1_num_best_results : num best result value
525 *
526 * \return
527 * None
528 *
529 **********************************************************************************/
ihevce_populate_cu_struct(ihevce_enc_loop_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,cur_ctb_cu_tree_t * ps_cu_tree_analyse,part_type_results_t * ps_best_results,cu_analyse_t * ps_cu_out,WORD32 i4_32x32_id,UWORD8 u1_is_cu_noisy,UWORD8 u1_num_best_results)530 void ihevce_populate_cu_struct(
531 ihevce_enc_loop_ctxt_t *ps_ctxt,
532 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
533 cur_ctb_cu_tree_t *ps_cu_tree_analyse,
534 part_type_results_t *ps_best_results,
535 cu_analyse_t *ps_cu_out,
536 WORD32 i4_32x32_id,
537 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
538 UWORD8 u1_is_cu_noisy,
539 #endif
540 UWORD8 u1_num_best_results)
541 {
542 cu_inter_cand_t *ps_cu_candt;
543
544 WORD32 j;
545 /* open loop intra cost by IPE */
546 WORD32 intra_cost_ol;
547 /* closed loop intra cost based on empirical coding noise estimate */
548 WORD32 intra_cost_cl_est = 0;
549 /* closed loop intra coding noise estimate */
550 WORD32 intra_noise_cl_est;
551 WORD32 num_results_to_copy = 0;
552
553 WORD32 found_intra = 0;
554 WORD32 quality_preset = ps_ctxt->i4_quality_preset;
555 WORD32 frm_qp = ps_ctxt->i4_frame_qp;
556 WORD32 frm_qstep_multiplier = gau4_frame_qstep_multiplier[frm_qp - 1];
557 WORD32 frm_qstep = ps_ctxt->i4_frame_qstep;
558 UWORD8 u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
559 UWORD8 u1_x_off = ps_cu_tree_analyse->b3_cu_pos_x << 3;
560 UWORD8 u1_y_off = ps_cu_tree_analyse->b3_cu_pos_y << 3;
561 UWORD8 u1_threshold_multi;
562 switch(quality_preset)
563 {
564 case IHEVCE_QUALITY_P0:
565 case IHEVCE_QUALITY_P2:
566 {
567 num_results_to_copy =
568 MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ, u1_num_best_results);
569 break;
570 }
571 case IHEVCE_QUALITY_P3:
572 {
573 num_results_to_copy = MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_MS, u1_num_best_results);
574 break;
575 }
576 case IHEVCE_QUALITY_P4:
577 case IHEVCE_QUALITY_P5:
578 case IHEVCE_QUALITY_P6:
579 {
580 num_results_to_copy =
581 MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_HS_AND_XS, u1_num_best_results);
582 break;
583 }
584 }
585
586 ps_cu_out->u1_num_inter_cands = 0;
587
588 /***************************************************************/
589 /* Depending CU size that has won in ME, */
590 /* Estimate the closed loop intra cost for enabling intra */
591 /* evaluation in rdopt stage based on preset */
592 /***************************************************************/
593 switch(u1_cu_size)
594 {
595 case 64:
596 {
597 /* coding noise estimate for intra closed loop cost */
598 intra_cost_ol = ps_cur_ipe_ctb->i4_best64x64_intra_cost - frm_qstep * 256;
599
600 intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
601
602 intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 16;
603
604 intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
605 break;
606 }
607 case 32:
608 {
609 /* coding noise estimate for intra closed loop cost */
610 intra_cost_ol = ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id] - frm_qstep * 64;
611
612 intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
613
614 intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 4;
615
616 intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
617 break;
618 }
619 case 16:
620 {
621 /* coding noise estimate for intra closed loop cost */
622 intra_cost_ol =
623 ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_x_off >> 4) + ((u1_y_off >> 4) << 2)] -
624 frm_qstep * 16;
625
626 intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
627
628 intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16));
629
630 intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
631 break;
632 }
633 case 8:
634 {
635 /* coding noise estimate for intra closed loop cost */
636 intra_cost_ol =
637 ps_cur_ipe_ctb->ai4_best8x8_intra_cost[(u1_x_off >> 3) + u1_y_off] - frm_qstep * 4;
638
639 intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
640
641 intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) >> 2;
642
643 intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
644 break;
645 }
646 }
647 #if DISABLE_INTER_CANDIDATES
648 return;
649 #endif
650
651 u1_threshold_multi = 1;
652 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
653 if(u1_is_cu_noisy)
654 {
655 intra_cost_cl_est = INT_MAX;
656 }
657 #endif
658
659 ps_cu_candt = ps_cu_out->as_cu_inter_cand;
660
661 /* Check if the first best candidate is inter or intra */
662 if(ps_best_results[0].as_pu_results[0].pu.b1_intra_flag)
663 {
664 ps_cu_out->u1_best_is_intra = 1;
665 }
666 else
667 {
668 ps_cu_out->u1_best_is_intra = 0;
669 }
670
671 for(j = 0; j < u1_num_best_results; j++)
672 {
673 part_type_results_t *ps_best = &ps_best_results[j];
674
675 if(ps_best->as_pu_results[0].pu.b1_intra_flag)
676 {
677 found_intra = 1;
678 }
679 else
680 {
681 /* populate the TU split flags, 4 flags copied as max cu can be 64 */
682 memcpy(ps_cu_candt->ai4_tu_split_flag, ps_best->ai4_tu_split_flag, 4 * sizeof(WORD32));
683
684 /* populate the TU early CBF flags, 4 flags copied as max cu can be 64 */
685 memcpy(ps_cu_candt->ai4_tu_early_cbf, ps_best->ai4_tu_early_cbf, 4 * sizeof(WORD32));
686
687 /* Note: the enums of part size and me part types shall match */
688 ps_cu_candt->b3_part_size = ps_best->u1_part_type;
689
690 /* ME will always set the skip flag to 0 */
691 /* in closed loop skip will be added as a candidate */
692 ps_cu_candt->b1_skip_flag = 0;
693
694 /* copy the inter pus : Note: assuming NxN part type is not supported */
695 ps_cu_candt->as_inter_pu[0] = ps_best->as_pu_results[0].pu;
696
697 ps_cu_candt->as_inter_pu[0].b1_merge_flag = 0;
698
699 /* Copy the total cost of the CU candt */
700 ps_cu_candt->i4_total_cost = ps_best->i4_tot_cost;
701
702 ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][0] =
703 ps_best->as_pu_results[0].i4_mv_cost;
704
705 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
706 ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][0] =
707 ps_best->as_pu_results[0].i4_tot_cost - ps_best->as_pu_results[0].i4_mv_cost;
708 #endif
709
710 if(ps_best->u1_part_type)
711 {
712 ps_cu_candt->as_inter_pu[1] = ps_best->as_pu_results[1].pu;
713 ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][1] =
714 ps_best->as_pu_results[1].i4_mv_cost;
715 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
716 ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][1] =
717 ps_best->as_pu_results[1].i4_tot_cost - ps_best->as_pu_results[1].i4_mv_cost;
718 #endif
719
720 ps_cu_candt->as_inter_pu[1].b1_merge_flag = 0;
721 }
722
723 ps_cu_candt++;
724 ps_cu_out->u1_num_inter_cands++;
725 if(intra_cost_cl_est < ((ps_best->i4_tot_cost * u1_threshold_multi) >> 0))
726 {
727 /* The rationale - */
728 /* Artefacts were being observed in some sequences, */
729 /* Brooklyn_1080p in particular - where it was readily */
730 /* apparent. The cause was coding of CU's as inter CU's */
731 /* when they actually needed to be coded as intra CU's. */
732 /* This was observed during either fade-outs aor flashes. */
733 /* After tinkering with the magnitude of the coding noise */
734 /* factor that was added to the intra cost to see when the */
735 /* artefacts in Brooklyn vanished, it was observed that the */
736 /* factor multiplied with the frame_qstep followed a pattern. */
737 /* When the pattern was subjected to a regression analysis, the */
738 /* formula seen below emerged. Also note the fact that the coding */
739 /* noise factor is the product of the frame_qstep and a constant */
740 /* multiplier */
741
742 /*UWORD32 frm_qstep_multiplier =
743 -3.346 * log((float)frm_qstep) + 15.925;*/
744 found_intra = 1;
745 }
746
747 if(ps_cu_out->u1_num_inter_cands >= num_results_to_copy)
748 {
749 break;
750 }
751 }
752 }
753
754 if(quality_preset < IHEVCE_QUALITY_P4)
755 {
756 found_intra = 1;
757 }
758
759 if(!found_intra)
760 {
761 /* rdopt evaluation of intra disabled as inter is clear winner */
762 ps_cu_out->u1_num_intra_rdopt_cands = 0;
763
764 /* all the modes invalidated */
765 ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
766 ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
767 ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
768 ps_cu_out->u1_chroma_intra_pred_mode = 255;
769
770 /* no intra candt to verify */
771 ps_cu_out->s_cu_intra_cand.b6_num_intra_cands = 0;
772 }
773 }
774
775 /**
776 *********************************************************************************
777 * Function name : ihevce_create_child_nodes_cu_tree
778 *
779 * \brief
780 * This function create child node from cu tree
781 *
782 * \param[in] ps_cu_tree_root : pointer to Structure for CU recursion
783 * \param[out] ps_cu_tree_cur_node : pointer to Structure for CU recursion
784 * \param[in] ai4_child_node_enable : child node enable flag
785 * \param[in] nodes_already_created : already created node value
786 * \return
787 * None
788 *
789 **********************************************************************************/
ihevce_create_child_nodes_cu_tree(cur_ctb_cu_tree_t * ps_cu_tree_root,cur_ctb_cu_tree_t * ps_cu_tree_cur_node,WORD32 * ai4_child_node_enable,WORD32 nodes_already_created)790 WORD32 ihevce_create_child_nodes_cu_tree(
791 cur_ctb_cu_tree_t *ps_cu_tree_root,
792 cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
793 WORD32 *ai4_child_node_enable,
794 WORD32 nodes_already_created)
795 {
796 cur_ctb_cu_tree_t *ps_tl;
797 cur_ctb_cu_tree_t *ps_tr;
798 cur_ctb_cu_tree_t *ps_bl;
799 cur_ctb_cu_tree_t *ps_br;
800
801 ps_tl = ps_cu_tree_root + nodes_already_created;
802 ps_tr = ps_tl + 1;
803 ps_bl = ps_tr + 1;
804 ps_br = ps_bl + 1;
805
806 if(1 == ps_cu_tree_cur_node->is_node_valid)
807 {
808 ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
809 ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
810 ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
811 ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
812
813 /* In incomplete CTB, if any of the child nodes are assigned to NULL */
814 /* then parent node ceases to be valid */
815 if((ps_tl == NULL) || (ps_tr == NULL) || (ps_br == NULL) || (ps_bl == NULL))
816 {
817 ps_cu_tree_cur_node->is_node_valid = 0;
818 }
819 }
820 ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
821 ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
822 ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
823 ps_cu_tree_cur_node->ps_child_node_br = ps_br;
824
825 return 4;
826 }
827
828 /**
829 *********************************************************************************
830 * Function name : ihevce_populate_cu_tree
831 *
832 * \brief
833 * This function create child node from cu tree
834 *
835 * \param[in] ps_cur_ipe_ctb : pointer to Structure for CU recursion
836 * \param[out] ps_cu_tree : pointer to Structure for CU recursion
837 * \param[in] tree_depth : child node enable flag
838 * \param[in] e_quality_preset : already created node value
839 * \param[in] e_grandparent_blk_pos : already created node value
840 * \param[in] e_parent_blk_pos : already created node value
841 * \param[in] e_cur_blk_pos : already created node value
842 *
843 * \return
844 * None
845 *
846 **********************************************************************************/
ihevce_populate_cu_tree(ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,cur_ctb_cu_tree_t * ps_cu_tree,WORD32 tree_depth,IHEVCE_QUALITY_CONFIG_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)847 void ihevce_populate_cu_tree(
848 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
849 cur_ctb_cu_tree_t *ps_cu_tree,
850 WORD32 tree_depth,
851 IHEVCE_QUALITY_CONFIG_T e_quality_preset,
852 CU_POS_T e_grandparent_blk_pos,
853 CU_POS_T e_parent_blk_pos,
854 CU_POS_T e_cur_blk_pos)
855 {
856 WORD32 ai4_child_enable[4];
857 WORD32 children_nodes_required = 0;
858 WORD32 cu_pos_x = 0;
859 WORD32 cu_pos_y = 0;
860 WORD32 cu_size = 0;
861 WORD32 i;
862 WORD32 node_validity = 0;
863
864 if(NULL == ps_cu_tree)
865 {
866 return;
867 }
868
869 switch(tree_depth)
870 {
871 case 0:
872 {
873 /* 64x64 block */
874 intra32_analyse_t *ps_intra32_analyse = ps_cur_ipe_ctb->as_intra32_analyse;
875
876 children_nodes_required = 1;
877 cu_size = 64;
878 cu_pos_x = 0;
879 cu_pos_y = 0;
880
881 node_validity = !ps_cur_ipe_ctb->u1_split_flag;
882
883 if(e_quality_preset >= IHEVCE_QUALITY_P2)
884 {
885 if(node_validity == 1)
886 {
887 children_nodes_required = 0;
888 }
889 }
890
891 for(i = 0; i < 4; i++)
892 {
893 ai4_child_enable[i] = ps_intra32_analyse[i].b1_valid_cu;
894 }
895
896 break;
897 }
898 case 1:
899 {
900 /* 32x32 block */
901 WORD32 valid_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_valid_cu);
902
903 intra16_analyse_t *ps_intra16_analyse =
904 ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].as_intra16_analyse;
905
906 cu_size = 32;
907
908 /* Explanation for logic below - */
909 /* * pos_x and pos_y are in units of 8x8 CU's */
910 /* * pos_x = 0 for TL and BL children */
911 /* * pos_x = 4 for TR and BR children */
912 /* * pos_y = 0 for TL and TR children */
913 /* * pos_y = 4 for BL and BR children */
914 cu_pos_x = (e_cur_blk_pos & 1) << 2;
915 cu_pos_y = (e_cur_blk_pos & 2) << 1;
916
917 {
918 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
919
920 if(e_quality_preset >= IHEVCE_QUALITY_P2)
921 {
922 node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
923 }
924
925 node_validity = node_validity && valid_flag_32;
926 children_nodes_required = !node_validity || ps_cur_ipe_ctb->u1_split_flag;
927 }
928
929 if(e_quality_preset >= IHEVCE_QUALITY_P2)
930 {
931 if(node_validity == 1)
932 {
933 children_nodes_required = 0;
934 }
935 else
936 {
937 children_nodes_required =
938 (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
939 }
940 }
941
942 for(i = 0; i < 4; i++)
943 {
944 ai4_child_enable[i] = ps_intra16_analyse[i].b1_valid_cu;
945 }
946
947 break;
948 }
949 case 2:
950 {
951 /* 16x16 block */
952 WORD32 cu_pos_x_parent;
953 WORD32 cu_pos_y_parent;
954 WORD32 merge_flag_16;
955 WORD32 merge_flag_32;
956
957 intra8_analyse_t *ps_intra8_analyse = ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
958 .as_intra16_analyse[e_cur_blk_pos]
959 .as_intra8_analyse;
960
961 WORD32 valid_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
962 .as_intra16_analyse[e_cur_blk_pos]
963 .b1_valid_cu);
964
965 cu_size = 16;
966
967 /* Explanation for logic below - */
968 /* See similar explanation above */
969 cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
970 cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
971 cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
972 cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
973
974 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
975 .as_intra16_analyse[e_cur_blk_pos]
976 .b1_merge_flag);
977 merge_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos].b1_merge_flag);
978
979 #if !ENABLE_UNIFORM_CU_SIZE_8x8
980 node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
981 #else
982 node_validity = 0;
983 #endif
984
985 node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
986
987 if(e_quality_preset >= IHEVCE_QUALITY_P2)
988 {
989 node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
990 .as_intra16_analyse[e_cur_blk_pos]
991 .b1_split_flag);
992 }
993
994 node_validity = node_validity && valid_flag_16;
995
996 children_nodes_required = ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32)) ||
997 !merge_flag_16;
998
999 if(e_quality_preset >= IHEVCE_QUALITY_P2)
1000 {
1001 children_nodes_required = !node_validity;
1002 }
1003
1004 for(i = 0; i < 4; i++)
1005 {
1006 ai4_child_enable[i] = ps_intra8_analyse[i].b1_valid_cu;
1007 }
1008 break;
1009 }
1010 case 3:
1011 {
1012 /* 8x8 block */
1013 WORD32 cu_pos_x_grandparent;
1014 WORD32 cu_pos_y_grandparent;
1015
1016 WORD32 cu_pos_x_parent;
1017 WORD32 cu_pos_y_parent;
1018
1019 WORD32 valid_flag_8 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
1020 .as_intra16_analyse[e_parent_blk_pos]
1021 .as_intra8_analyse[e_cur_blk_pos]
1022 .b1_valid_cu);
1023
1024 cu_size = 8;
1025
1026 cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
1027 cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
1028 cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
1029 cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
1030 cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
1031 cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
1032
1033 node_validity = 1 && valid_flag_8;
1034
1035 children_nodes_required = 0;
1036
1037 break;
1038 }
1039 }
1040
1041 /* Fill the current cu_tree node */
1042 ps_cu_tree->is_node_valid = node_validity;
1043 ps_cu_tree->u1_cu_size = cu_size;
1044 ps_cu_tree->b3_cu_pos_x = cu_pos_x;
1045 ps_cu_tree->b3_cu_pos_y = cu_pos_y;
1046
1047 if(children_nodes_required)
1048 {
1049 tree_depth++;
1050
1051 ps_cur_ipe_ctb->nodes_created_in_cu_tree += ihevce_create_child_nodes_cu_tree(
1052 ps_cur_ipe_ctb->ps_cu_tree_root,
1053 ps_cu_tree,
1054 ai4_child_enable,
1055 ps_cur_ipe_ctb->nodes_created_in_cu_tree);
1056
1057 ihevce_populate_cu_tree(
1058 ps_cur_ipe_ctb,
1059 ps_cu_tree->ps_child_node_tl,
1060 tree_depth,
1061 e_quality_preset,
1062 e_parent_blk_pos,
1063 e_cur_blk_pos,
1064 POS_TL);
1065
1066 ihevce_populate_cu_tree(
1067 ps_cur_ipe_ctb,
1068 ps_cu_tree->ps_child_node_tr,
1069 tree_depth,
1070 e_quality_preset,
1071 e_parent_blk_pos,
1072 e_cur_blk_pos,
1073 POS_TR);
1074
1075 ihevce_populate_cu_tree(
1076 ps_cur_ipe_ctb,
1077 ps_cu_tree->ps_child_node_bl,
1078 tree_depth,
1079 e_quality_preset,
1080 e_parent_blk_pos,
1081 e_cur_blk_pos,
1082 POS_BL);
1083
1084 ihevce_populate_cu_tree(
1085 ps_cur_ipe_ctb,
1086 ps_cu_tree->ps_child_node_br,
1087 tree_depth,
1088 e_quality_preset,
1089 e_parent_blk_pos,
1090 e_cur_blk_pos,
1091 POS_BR);
1092 }
1093 else
1094 {
1095 ps_cu_tree->ps_child_node_tl = NULL;
1096 ps_cu_tree->ps_child_node_tr = NULL;
1097 ps_cu_tree->ps_child_node_bl = NULL;
1098 ps_cu_tree->ps_child_node_br = NULL;
1099 }
1100 }
1101
1102 /**
1103 *********************************************************************************
1104 * Function name : ihevce_intra_mode_populator
1105 *
1106 * \brief
1107 * This function populate intra mode info to strcut
1108 *
1109 * \param[in] ps_cu_intra_cand : pointer to Structure contain cu intra candidate info
1110 * \param[out] ps_ipe_data : pointer to IPE L0 analyze structure
1111 * \param[in] ps_cu_tree_data : poniter to cu recursive struct
1112 * \param[in] i1_slice_type : contain slice type value
1113 * \param[in] i4_quality_preset : contain quality preset value
1114 *
1115 * \return
1116 * None
1117 *
1118 **********************************************************************************/
ihevce_intra_mode_populator(cu_intra_cand_t * ps_cu_intra_cand,ipe_l0_ctb_analyse_for_me_t * ps_ipe_data,cur_ctb_cu_tree_t * ps_cu_tree_data,WORD8 i1_slice_type,WORD32 i4_quality_preset)1119 static void ihevce_intra_mode_populator(
1120 cu_intra_cand_t *ps_cu_intra_cand,
1121 ipe_l0_ctb_analyse_for_me_t *ps_ipe_data,
1122 cur_ctb_cu_tree_t *ps_cu_tree_data,
1123 WORD8 i1_slice_type,
1124 WORD32 i4_quality_preset)
1125 {
1126 WORD32 i4_32x32_id, i4_16x16_id, i4_8x8_id;
1127
1128 UWORD8 u1_cu_pos_x = ps_cu_tree_data->b3_cu_pos_x;
1129 UWORD8 u1_cu_pos_y = ps_cu_tree_data->b3_cu_pos_y;
1130
1131 i4_32x32_id = ((u1_cu_pos_x & 4) >> 2) + ((u1_cu_pos_y & 4) >> 1);
1132
1133 i4_16x16_id = ((u1_cu_pos_x & 2) >> 1) + ((u1_cu_pos_y & 2));
1134
1135 i4_8x8_id = (u1_cu_pos_x & 1) + ((u1_cu_pos_y & 1) << 1);
1136
1137 if(i4_quality_preset < IHEVCE_QUALITY_P3)
1138 {
1139 switch(ps_cu_tree_data->u1_cu_size)
1140 {
1141 case 64:
1142 {
1143 memcpy(
1144 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1145 ps_ipe_data->au1_best_modes_32x32_tu,
1146 MAX_INTRA_CU_CANDIDATES + 1);
1147
1148 break;
1149 }
1150 case 32:
1151 {
1152 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1153
1154 memcpy(
1155 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1156 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1157 MAX_INTRA_CU_CANDIDATES + 1);
1158
1159 if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1160 {
1161 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1162 }
1163 else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1164 {
1165 if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
1166 (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
1167 {
1168 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1169 }
1170 else
1171 {
1172 memcpy(
1173 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1174 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1175 MAX_INTRA_CU_CANDIDATES + 1);
1176 }
1177 }
1178 else
1179 {
1180 memcpy(
1181 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1182 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1183 MAX_INTRA_CU_CANDIDATES + 1);
1184 }
1185
1186 break;
1187 }
1188 case 16:
1189 {
1190 /* Copy best 16x16 CU modes */
1191 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1192
1193 intra16_analyse_t *ps_16x16_ipe_analyze =
1194 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1195
1196 memcpy(
1197 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1198 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1199 MAX_INTRA_CU_CANDIDATES + 1);
1200
1201 if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1202 {
1203 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1204 }
1205 else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1206 {
1207 if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
1208 (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
1209 {
1210 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1211 }
1212 else
1213 {
1214 memcpy(
1215 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1216 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1217 MAX_INTRA_CU_CANDIDATES + 1);
1218 }
1219 }
1220 else
1221 {
1222 memcpy(
1223 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1224 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1225 MAX_INTRA_CU_CANDIDATES + 1);
1226 }
1227
1228 break;
1229 }
1230 case 8:
1231 {
1232 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1233
1234 intra16_analyse_t *ps_16x16_ipe_analyze =
1235 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1236
1237 intra8_analyse_t *ps_8x8_ipe_analyze =
1238 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1239
1240 memcpy(
1241 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1242 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1243 MAX_INTRA_CU_CANDIDATES + 1);
1244
1245 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1246
1247 /* Initialise the hash */
1248 {
1249 WORD32 i, j;
1250
1251 for(i = 0; i < NUM_PU_PARTS; i++)
1252 {
1253 ps_cu_intra_cand->au1_num_modes_added[i] = 0;
1254
1255 for(j = 0; j < MAX_INTRA_CANDIDATES; j++)
1256 {
1257 ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i][j] = 0;
1258 }
1259 }
1260
1261 for(i = 0; i < NUM_PU_PARTS; i++)
1262 {
1263 for(j = 0; j < MAX_INTRA_CU_CANDIDATES; j++)
1264 {
1265 if(ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j] == 255)
1266 {
1267 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] = 255;
1268 break;
1269 }
1270
1271 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] =
1272 ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j];
1273
1274 ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash
1275 [i][ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j]] = 1;
1276
1277 ps_cu_intra_cand->au1_num_modes_added[i]++;
1278 }
1279
1280 if(ps_cu_intra_cand->au1_num_modes_added[i] == MAX_INTRA_CU_CANDIDATES)
1281 {
1282 if(i1_slice_type != BSLICE)
1283 {
1284 ps_cu_intra_cand->au1_num_modes_added[i] =
1285 ihevce_intra_mode_nxn_hash_updater(
1286 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1287 ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i],
1288 ps_cu_intra_cand->au1_num_modes_added[i]);
1289 }
1290 }
1291 }
1292 }
1293
1294 break;
1295 }
1296 }
1297 }
1298 else if(i4_quality_preset == IHEVCE_QUALITY_P6)
1299 {
1300 switch(ps_cu_tree_data->u1_cu_size)
1301 {
1302 case 64:
1303 {
1304 memcpy(
1305 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1306 ps_ipe_data->au1_best_modes_32x32_tu,
1307 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1308
1309 ps_cu_intra_cand->b1_eval_tx_cusize = 0;
1310 ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
1311 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1312
1313 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1314 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1315 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1316 #endif
1317
1318 break;
1319 }
1320 case 32:
1321 {
1322 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1323
1324 memcpy(
1325 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1326 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1327 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1328
1329 memcpy(
1330 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1331 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1332 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1333
1334 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1335 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1336 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1337 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1338 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1339 #endif
1340
1341 break;
1342 }
1343 case 16:
1344 {
1345 /* Copy best 16x16 CU modes */
1346 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1347
1348 intra16_analyse_t *ps_16x16_ipe_analyze =
1349 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1350
1351 memcpy(
1352 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1353 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1354 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1355
1356 memcpy(
1357 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1358 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1359 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1360
1361 #if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1362 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1363 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1364 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1365 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1366 #endif
1367
1368 break;
1369 }
1370 case 8:
1371 {
1372 WORD32 i;
1373
1374 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1375
1376 intra16_analyse_t *ps_16x16_ipe_analyze =
1377 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1378
1379 intra8_analyse_t *ps_8x8_ipe_analyze =
1380 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1381
1382 memcpy(
1383 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1384 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1385 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1386
1387 #if !ENABLE_INTRA_MODE_FILTERING_IN_XS25
1388 memcpy(
1389 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1390 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1391 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1392
1393 for(i = 0; i < 4; i++)
1394 {
1395 memcpy(
1396 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1397 ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1398 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1399
1400 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
1401 }
1402 #else
1403 if(255 == ps_8x8_ipe_analyze->au1_4x4_best_modes[0][0])
1404 {
1405 memcpy(
1406 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1407 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1408 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1409
1410 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1411 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1412 }
1413 else
1414 {
1415 for(i = 0; i < 4; i++)
1416 {
1417 memcpy(
1418 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1419 ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1420 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1421
1422 ps_cu_intra_cand->au1_intra_luma_modes_nxn
1423 [i][MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1424 }
1425 }
1426
1427 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1428 [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1429 #endif
1430
1431 #if FORCE_NXN_MODE_BASED_ON_OL_IPE
1432 if((i4_quality_preset == IHEVCE_QUALITY_P6) && (i1_slice_type != ISLICE))
1433 {
1434 /*Evaluate nxn mode for 8x8 if ol ipe wins for nxn over cu=tu and cu=4tu.*/
1435 /*Disbale CU=TU and CU=4TU modes */
1436 if(ps_8x8_ipe_analyze->b1_enable_nxn == 1)
1437 {
1438 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1439 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1440 ps_cu_intra_cand->au1_intra_luma_modes_nxn[0][1] = 255;
1441 ps_cu_intra_cand->au1_intra_luma_modes_nxn[1][1] = 255;
1442 ps_cu_intra_cand->au1_intra_luma_modes_nxn[2][1] = 255;
1443 ps_cu_intra_cand->au1_intra_luma_modes_nxn[3][1] = 255;
1444 }
1445 }
1446 #endif
1447
1448 break;
1449 }
1450 }
1451 }
1452 else
1453 {
1454 switch(ps_cu_tree_data->u1_cu_size)
1455 {
1456 case 64:
1457 {
1458 memcpy(
1459 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1460 ps_ipe_data->au1_best_modes_32x32_tu,
1461 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1462
1463 ps_cu_intra_cand->b1_eval_tx_cusize = 0;
1464 ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
1465 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1466
1467 break;
1468 }
1469 case 32:
1470 {
1471 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1472
1473 memcpy(
1474 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1475 ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1476 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1477
1478 memcpy(
1479 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1480 ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1481 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1482
1483 break;
1484 }
1485 case 16:
1486 {
1487 /* Copy best 16x16 CU modes */
1488 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1489
1490 intra16_analyse_t *ps_16x16_ipe_analyze =
1491 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1492
1493 memcpy(
1494 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1495 ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1496 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1497
1498 memcpy(
1499 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1500 ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1501 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1502
1503 break;
1504 }
1505 case 8:
1506 {
1507 WORD32 i;
1508
1509 intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1510
1511 intra16_analyse_t *ps_16x16_ipe_analyze =
1512 &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1513
1514 intra8_analyse_t *ps_8x8_ipe_analyze =
1515 &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1516
1517 memcpy(
1518 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1519 ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1520 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1521
1522 memcpy(
1523 ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1524 ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1525 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1526
1527 for(i = 0; i < 4; i++)
1528 {
1529 memcpy(
1530 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1531 ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1532 (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1533
1534 ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
1535 }
1536
1537 break;
1538 }
1539 }
1540 }
1541 }
1542 /**
1543 ******************************************************************************
1544 * \if Function name : ihevce_compute_rdo \endif
1545 *
1546 * \brief
1547 * Coding Unit mode decide function. Performs RD opt and decides the best mode
1548 *
1549 * \param[in] pv_ctxt : pointer to enc_loop module
1550 * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
1551 * \param[in] ps_cu_analyse : pointer to cu analyse
1552 * \param[out] ps_cu_final : pointer to cu final
1553 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
1554 * \param[out]ps_row_col_pu; colocated pu buffer pointer
1555 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
1556 * \param[in]col_start_pu_idx : pu index start value
1557 *
1558 * \return
1559 * None
1560 *
1561 *
1562 * \author
1563 * Ittiam
1564 *
1565 *****************************************************************************
1566 */
ihevce_compute_rdo(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cur_ctb_cu_tree_t * ps_cu_tree_analyse,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,me_ctb_data_t * ps_cu_me_data,pu_col_mv_t * ps_col_pu,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_col_pu_map,UWORD8 * pu1_ecd_data,WORD32 col_start_pu_idx,WORD32 i4_ctb_x_off,WORD32 i4_ctb_y_off)1567 LWORD64 ihevce_compute_rdo(
1568 ihevce_enc_loop_ctxt_t *ps_ctxt,
1569 enc_loop_cu_prms_t *ps_cu_prms,
1570 cur_ctb_cu_tree_t *ps_cu_tree_analyse,
1571 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
1572 me_ctb_data_t *ps_cu_me_data,
1573 pu_col_mv_t *ps_col_pu,
1574 final_mode_state_t *ps_final_mode_state,
1575 UWORD8 *pu1_col_pu_map,
1576 UWORD8 *pu1_ecd_data,
1577 WORD32 col_start_pu_idx,
1578 WORD32 i4_ctb_x_off,
1579 WORD32 i4_ctb_y_off)
1580 {
1581 /* Populate the rdo candiates to the structure */
1582 cu_analyse_t s_cu_analyse;
1583 LWORD64 rdopt_best_cost;
1584 /* Populate candidates of child nodes to CU analyse struct for further evaluation */
1585 cu_analyse_t *ps_cu_analyse;
1586 WORD32 curr_cu_pos_in_row;
1587 WORD32 cu_top_right_offset, cu_top_right_dep_pos;
1588 WORD32 is_first_cu_in_ctb, is_ctb_level_quant_rounding, is_nctb_level_quant_rounding;
1589
1590 WORD32 cu_pos_x = ps_cu_tree_analyse->b3_cu_pos_x;
1591 WORD32 cu_pos_y = ps_cu_tree_analyse->b3_cu_pos_y;
1592
1593 /*Derive the indices of 32*32, 16*16 and 8*8 blocks*/
1594 WORD32 i4_32x32_id = ((cu_pos_x & 4) >> 2) + ((cu_pos_y & 4) >> 1);
1595
1596 WORD32 i4_16x16_id = ((cu_pos_x & 2) >> 1) + ((cu_pos_y & 2));
1597
1598 WORD32 i4_8x8_id = (cu_pos_x & 1) + ((cu_pos_y & 1) << 1);
1599 if(i4_ctb_y_off == 0)
1600 {
1601 /* No wait for 1st row */
1602 cu_top_right_offset = -(MAX_CTB_SIZE);
1603 {
1604 ihevce_tile_params_t *ps_col_tile_params =
1605 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + ps_ctxt->i4_tile_col_idx);
1606
1607 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1608 }
1609
1610 cu_top_right_dep_pos = 0;
1611 }
1612 else
1613 {
1614 cu_top_right_offset = ps_cu_tree_analyse->u1_cu_size << 1;
1615 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1616 }
1617 ps_cu_analyse = &s_cu_analyse;
1618
1619 ps_cu_analyse->b3_cu_pos_x = cu_pos_x;
1620 ps_cu_analyse->b3_cu_pos_y = cu_pos_y;
1621 ps_cu_analyse->u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
1622
1623 /* Default initializations */
1624 ps_cu_analyse->u1_num_intra_rdopt_cands = MAX_INTRA_CU_CANDIDATES;
1625 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1626 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1627 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1628
1629 ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize = 1;
1630 ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
1631
1632 switch(ps_cu_tree_analyse->u1_cu_size)
1633 {
1634 case 64:
1635 {
1636 memcpy(
1637 ps_cu_analyse[0].i4_act_factor,
1638 ps_cur_ipe_ctb->i4_64x64_act_factor,
1639 4 * 2 * sizeof(WORD32));
1640
1641 ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize = 0;
1642 ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
1643 ps_cu_analyse[0].s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1644
1645 break;
1646 }
1647 case 32:
1648 {
1649 memcpy(
1650 ps_cu_analyse[0].i4_act_factor,
1651 ps_cur_ipe_ctb->i4_32x32_act_factor[i4_32x32_id],
1652 3 * 2 * sizeof(WORD32));
1653
1654 break;
1655 }
1656 case 16:
1657 {
1658 memcpy(
1659 ps_cu_analyse[0].i4_act_factor,
1660 ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
1661 2 * 2 * sizeof(WORD32));
1662
1663 break;
1664 }
1665 case 8:
1666 {
1667 memcpy(
1668 ps_cu_analyse[0].i4_act_factor,
1669 ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
1670 2 * 2 * sizeof(WORD32));
1671
1672 break;
1673 }
1674 }
1675
1676 /* Populate the me data in cu_analyse struct */
1677 /* For CU size 32 and 64, add me data to array of cu analyse struct */
1678 if(ISLICE != ps_ctxt->i1_slice_type)
1679 {
1680 if((ps_cu_tree_analyse->u1_cu_size >= 32) && (ps_cu_tree_analyse->u1_inter_eval_enable))
1681 {
1682 if(32 == ps_cu_tree_analyse->u1_cu_size)
1683 {
1684 ihevce_populate_cu_struct(
1685 ps_ctxt,
1686 ps_cur_ipe_ctb,
1687 ps_cu_tree_analyse,
1688 ps_cu_me_data->as_32x32_block_data[i4_32x32_id].as_best_results,
1689 ps_cu_analyse,
1690 i4_32x32_id,
1691 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1692 ps_cu_prms->u1_is_cu_noisy,
1693 #endif
1694 ps_cu_me_data->as_32x32_block_data[i4_32x32_id].num_best_results);
1695 }
1696 else
1697 {
1698 ihevce_populate_cu_struct(
1699 ps_ctxt,
1700 ps_cur_ipe_ctb,
1701 ps_cu_tree_analyse,
1702 ps_cu_me_data->s_64x64_block_data.as_best_results,
1703 ps_cu_analyse,
1704 i4_32x32_id,
1705 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1706 ps_cu_prms->u1_is_cu_noisy,
1707 #endif
1708 ps_cu_me_data->s_64x64_block_data.num_best_results);
1709 }
1710 }
1711 else if(ps_cu_tree_analyse->u1_cu_size < 32)
1712 {
1713 i4_8x8_id += (i4_32x32_id << 4) + (i4_16x16_id << 2);
1714 i4_16x16_id += (i4_32x32_id << 2);
1715
1716 if(16 == ps_cu_tree_analyse->u1_cu_size)
1717 {
1718 block_data_16x16_t *ps_data = &ps_cu_me_data->as_block_data[i4_16x16_id];
1719
1720 if(ps_cu_tree_analyse->u1_inter_eval_enable)
1721 {
1722 ihevce_populate_cu_struct(
1723 ps_ctxt,
1724 ps_cur_ipe_ctb,
1725 ps_cu_tree_analyse,
1726 ps_data->as_best_results,
1727 ps_cu_analyse,
1728 i4_32x32_id,
1729 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1730 ps_cu_prms->u1_is_cu_noisy,
1731 #endif
1732 ps_data->num_best_results);
1733 }
1734 else
1735 {
1736 ps_cu_analyse->u1_num_inter_cands = 0;
1737 ps_cu_analyse->u1_best_is_intra = 1;
1738 }
1739 }
1740 else /* If CU size is 8 */
1741 {
1742 block_data_8x8_t *ps_data = &ps_cu_me_data->as_8x8_block_data[i4_8x8_id];
1743
1744 if(ps_cu_tree_analyse->u1_inter_eval_enable)
1745 {
1746 ihevce_populate_cu_struct(
1747 ps_ctxt,
1748 ps_cur_ipe_ctb,
1749 ps_cu_tree_analyse,
1750 ps_data->as_best_results,
1751 ps_cu_analyse,
1752 i4_32x32_id,
1753 #if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1754 ps_cu_prms->u1_is_cu_noisy,
1755 #endif
1756 ps_data->num_best_results);
1757 }
1758 else
1759 {
1760 ps_cu_analyse->u1_num_inter_cands = 0;
1761 ps_cu_analyse->u1_best_is_intra = 1;
1762 }
1763 }
1764 }
1765 else
1766 {
1767 ps_cu_analyse->u1_num_inter_cands = 0;
1768 ps_cu_analyse->u1_best_is_intra = 1;
1769 }
1770 }
1771 else
1772 {
1773 ps_cu_analyse->u1_num_inter_cands = 0;
1774 ps_cu_analyse->u1_best_is_intra = 1;
1775 }
1776
1777 if(!ps_ctxt->i1_cu_qp_delta_enable)
1778 {
1779 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_frame_qp;
1780
1781 /*cu qp must be populated in cu_analyse_t struct*/
1782 ps_ctxt->i4_cu_qp = ps_cu_analyse->i1_cu_qp;
1783 }
1784 else
1785 {
1786 ASSERT(ps_cu_analyse->i4_act_factor[0] > 0);
1787 ASSERT(
1788 ((ps_cu_analyse->i4_act_factor[1] > 0) && (ps_cu_analyse->u1_cu_size != 8)) ||
1789 ((ps_cu_analyse->u1_cu_size == 8)));
1790 ASSERT(
1791 ((ps_cu_analyse->i4_act_factor[2] > 0) && (ps_cu_analyse->u1_cu_size == 32)) ||
1792 ((ps_cu_analyse->u1_cu_size != 32)));
1793 }
1794
1795 if(ps_ctxt->u1_disable_intra_eval)
1796 {
1797 /* rdopt evaluation of intra disabled as inter is clear winner */
1798 ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1799
1800 /* all the modes invalidated */
1801 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1802 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1803 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1804 ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
1805
1806 /* no intra candt to verify */
1807 ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
1808 }
1809
1810 #if DISABLE_L2_IPE_IN_PB_L1_IN_B
1811 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_cu_analyse->u1_cu_size == 32) &&
1812 (ps_ctxt->i1_slice_type != ISLICE))
1813 {
1814 /* rdopt evaluation of intra disabled as inter is clear winner */
1815 ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1816
1817 /* all the modes invalidated */
1818 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1819 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1820 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1821 ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
1822
1823 /* no intra candt to verify */
1824 ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
1825 }
1826 #endif
1827
1828 if(DISABLE_INTRA_WHEN_NOISY && ps_cu_prms->u1_is_cu_noisy)
1829 {
1830 ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1831 }
1832
1833 if(ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_tree_analyse->u1_intra_eval_enable)
1834 {
1835 ihevce_intra_mode_populator(
1836 &ps_cu_analyse->s_cu_intra_cand,
1837 ps_cur_ipe_ctb,
1838 ps_cu_tree_analyse,
1839 ps_ctxt->i1_slice_type,
1840 ps_ctxt->i4_quality_preset);
1841
1842 ps_cu_analyse->u1_num_intra_rdopt_cands = 1;
1843 }
1844
1845 ASSERT(!!ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_analyse->u1_num_inter_cands);
1846
1847 if(ps_ctxt->u1_use_top_at_ctb_boundary)
1848 {
1849 /* Wait till top data is ready */
1850 /* Currently checking till top right CU */
1851 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1852
1853 if(0 == ps_cu_analyse->b3_cu_pos_y)
1854 {
1855 ihevce_dmgr_chk_row_row_sync(
1856 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1857 curr_cu_pos_in_row,
1858 cu_top_right_offset,
1859 cu_top_right_dep_pos,
1860 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1861 ps_ctxt->thrd_id);
1862 }
1863 }
1864
1865 #if !DISABLE_TOP_SYNC
1866 {
1867 if(0 == ps_cu_analyse->b3_cu_pos_y)
1868 {
1869 if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
1870 {
1871 if(ps_cu_analyse->b3_cu_pos_x == 0)
1872 {
1873 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
1874 {
1875 /* Wait till top data is ready */
1876 /* Currently checking till top right CU */
1877 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1878
1879 if(0 == ps_cu_analyse->b3_cu_pos_y)
1880 {
1881 ihevce_dmgr_chk_row_row_sync(
1882 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1883 curr_cu_pos_in_row,
1884 cu_top_right_offset,
1885 cu_top_right_dep_pos,
1886 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1887 ps_ctxt->thrd_id);
1888 }
1889 }
1890
1891 ihevce_entropy_rdo_copy_states(
1892 &ps_ctxt->s_rdopt_entropy_ctxt,
1893 ps_ctxt->pu1_top_rt_cabac_state,
1894 UPDATE_ENT_SYNC_RDO_STATE);
1895 }
1896 }
1897 }
1898 }
1899 #else
1900 {
1901 if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset))
1902 {
1903 if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
1904 {
1905 if(ps_cu_analyse->b3_cu_pos_x == 0)
1906 {
1907 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
1908 {
1909 /* Wait till top data is ready */
1910 /* Currently checking till top right CU */
1911 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1912
1913 if(0 == ps_cu_analyse->b3_cu_pos_y)
1914 {
1915 ihevce_dmgr_chk_row_row_sync(
1916 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1917 curr_cu_pos_in_row,
1918 cu_top_right_offset,
1919 cu_top_right_dep_pos,
1920 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1921 ps_ctxt->thrd_id);
1922 }
1923 }
1924
1925 ihevce_entropy_rdo_copy_states(
1926 &ps_ctxt->s_rdopt_entropy_ctxt,
1927 ps_ctxt->pu1_top_rt_cabac_state,
1928 UPDATE_ENT_SYNC_RDO_STATE);
1929 }
1930 }
1931 }
1932 else if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
1933 {
1934 UWORD8 u1_cabac_init_idc;
1935 WORD8 i1_cabac_init_flag =
1936 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt->ps_slice_hdr->i1_cabac_init_flag;
1937
1938 if(ps_ctxt->i1_slice_type == ISLICE)
1939 {
1940 u1_cabac_init_idc = 0;
1941 }
1942 else if(ps_ctxt->i1_slice_type == PSLICE)
1943 {
1944 u1_cabac_init_idc = i1_cabac_init_flag ? 2 : 1;
1945 }
1946 else
1947 {
1948 u1_cabac_init_idc = i1_cabac_init_flag ? 1 : 2;
1949 }
1950
1951 ihevce_entropy_rdo_copy_states(
1952 &ps_ctxt->s_rdopt_entropy_ctxt,
1953 (UWORD8 *)gau1_ihevc_cab_ctxts[u1_cabac_init_idc][ps_ctxt->i4_frame_qp],
1954 UPDATE_ENT_SYNC_RDO_STATE);
1955 }
1956 }
1957 #endif
1958
1959 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
1960 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
1961 /* Currently the complete array will contain only single value*/
1962 /*The rounding factor is calculated with the formula
1963 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
1964 rounding factor = (1 - DeadZone Val)
1965
1966 Assumption: Cabac states of All the sub-blocks in the TU are considered independent
1967 */
1968
1969 /*As long as coef level rdoq is enabled perform this operation */
1970 is_first_cu_in_ctb = ((0 == ps_cu_analyse->b3_cu_pos_x) && (0 == ps_cu_analyse->b3_cu_pos_y));
1971 is_ctb_level_quant_rounding =
1972 ((ps_ctxt->i4_quant_rounding_level == CTB_LEVEL_QUANT_ROUNDING) &&
1973 (1 == is_first_cu_in_ctb));
1974 is_nctb_level_quant_rounding =
1975 ((ps_ctxt->i4_quant_rounding_level == NCTB_LEVEL_QUANT_ROUNDING) &&
1976 (1 == is_first_cu_in_ctb) && (((i4_ctb_x_off >> 6) % NUM_CTB_QUANT_ROUNDING) == 0));
1977
1978 if((ps_ctxt->i4_quant_rounding_level == CU_LEVEL_QUANT_ROUNDING) ||
1979 (ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) ||
1980 (1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
1981 {
1982 double i4_lamda_modifier, i4_lamda_modifier_uv;
1983 WORD32 trans_size, trans_size_cr;
1984 trans_size = ps_cu_analyse->u1_cu_size;
1985
1986 if((1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
1987 {
1988 trans_size = MAX_TU_SIZE;
1989 }
1990 else
1991 {
1992 if(ps_cu_analyse->u1_cu_size == 64)
1993 {
1994 trans_size >>= 1;
1995 }
1996 }
1997
1998 /*Chroma trans size = half of luma trans size */
1999 trans_size_cr = trans_size >> 1;
2000
2001 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
2002 {
2003 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
2004 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
2005 i4_lamda_modifier_uv =
2006 ps_ctxt->i4_uv_lamda_modifier *
2007 CLIP3((((double)(ps_ctxt->i4_chrm_cu_qp - 12)) / 6.0), 2.00, 4.00);
2008 }
2009 else
2010 {
2011 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
2012 i4_lamda_modifier_uv = ps_ctxt->i4_uv_lamda_modifier;
2013 }
2014 if(ps_ctxt->i4_use_const_lamda_modifier)
2015 {
2016 if(ISLICE == ps_ctxt->i1_slice_type)
2017 {
2018 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
2019 i4_lamda_modifier_uv = ps_ctxt->f_i_pic_lamda_modifier;
2020 }
2021 else
2022 {
2023 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
2024 i4_lamda_modifier_uv = CONST_LAMDA_MOD_VAL;
2025 }
2026 }
2027
2028 do
2029 {
2030 memset(
2031 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
2032 0,
2033 trans_size * trans_size * sizeof(WORD32));
2034 memset(
2035 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
2036 0,
2037 trans_size * trans_size * sizeof(WORD32));
2038
2039 /*ps_ctxt->i4_quant_rnd_factor[intra_flag], is currently not used */
2040 ihevce_quant_rounding_factor_gen(
2041 trans_size,
2042 1, //is_luma = 1
2043 &ps_ctxt->s_rdopt_entropy_ctxt,
2044 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
2045 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
2046 i4_lamda_modifier,
2047 0); //is_tu_level_quant rounding = 0
2048
2049 trans_size = trans_size >> 1;
2050
2051 } while(trans_size >= 4);
2052
2053 /*CHROMA Quant Rounding is to be enabled with CU/TU/CTB/NCTB Luma rounding */
2054 /*Please note chroma is calcualted only for 1st TU at TU level Rounding */
2055 if(ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING)
2056 {
2057 do
2058 {
2059 memset(
2060 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
2061 0,
2062 trans_size_cr * trans_size_cr * sizeof(WORD32));
2063 memset(
2064 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
2065 0,
2066 trans_size_cr * trans_size_cr * sizeof(WORD32));
2067
2068 ihevce_quant_rounding_factor_gen(
2069 trans_size_cr,
2070 0, //is_luma = 0
2071 &ps_ctxt->s_rdopt_entropy_ctxt,
2072 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
2073 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
2074 i4_lamda_modifier_uv,
2075 0); //is_tu_level_quant rounding = 0
2076
2077 trans_size_cr = trans_size_cr >> 1;
2078
2079 } while(trans_size_cr >= 4);
2080 }
2081 }
2082
2083 #if DISABLE_INTRAS_IN_BPIC
2084 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_cu_analyse->u1_num_inter_cands))
2085 {
2086 ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
2087 }
2088 #endif
2089
2090 rdopt_best_cost = ihevce_cu_mode_decide(
2091 ps_ctxt,
2092 ps_cu_prms,
2093 ps_cu_analyse,
2094 ps_final_mode_state,
2095 pu1_ecd_data,
2096 ps_col_pu,
2097 pu1_col_pu_map,
2098 col_start_pu_idx);
2099
2100 return rdopt_best_cost;
2101 }
2102
2103 /**
2104 ******************************************************************************
2105 * \if Function name : ihevce_enc_loop_cu_bot_copy \endif
2106 *
2107 * \brief
2108 * This function copy the bottom data at CU level to row buffers
2109 *
2110 * \date
2111 * 18/09/2012
2112 *
2113 * \author
2114 * Ittiam
2115 *
2116 * \return
2117 *
2118 * List of Functions
2119 *
2120 *
2121 ******************************************************************************
2122 */
ihevce_enc_loop_cu_bot_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,ihevce_enc_cu_node_ctxt_t * ps_enc_out_ctxt,WORD32 curr_cu_pos_in_row,WORD32 curr_cu_pos_in_ctb)2123 void ihevce_enc_loop_cu_bot_copy(
2124 ihevce_enc_loop_ctxt_t *ps_ctxt,
2125 enc_loop_cu_prms_t *ps_cu_prms,
2126 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
2127 WORD32 curr_cu_pos_in_row,
2128 WORD32 curr_cu_pos_in_ctb)
2129 {
2130 /* ---------------------------------------------- */
2131 /* copy the bottom row data to the row buffers */
2132 /* ---------------------------------------------- */
2133 nbr_4x4_t *ps_top_nbr;
2134 UWORD8 *pu1_buff;
2135 UWORD8 *pu1_luma_top, *pu1_chrm_top;
2136 WORD32 nbr_strd;
2137
2138 WORD32 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
2139
2140 /* derive the appropraite pointers */
2141 pu1_luma_top = (UWORD8 *)ps_ctxt->pv_bot_row_luma + curr_cu_pos_in_row;
2142 pu1_chrm_top = (UWORD8 *)ps_ctxt->pv_bot_row_chroma + curr_cu_pos_in_row;
2143 ps_top_nbr = ps_ctxt->ps_bot_row_nbr + (curr_cu_pos_in_row >> 2);
2144 nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
2145
2146 /* copy bottom luma data */
2147 pu1_buff = ps_cu_prms->pu1_luma_recon +
2148 (ps_cu_prms->i4_luma_recon_stride * (ps_cu_prms->i4_ctb_size - 1));
2149
2150 pu1_buff += curr_cu_pos_in_ctb;
2151
2152 memcpy(pu1_luma_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
2153
2154 /* copy bottom chroma data cb and cr pixel interleaved */
2155 pu1_buff = ps_cu_prms->pu1_chrm_recon + (ps_cu_prms->i4_chrm_recon_stride *
2156 ((ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)) - 1));
2157
2158 pu1_buff += curr_cu_pos_in_ctb;
2159
2160 memcpy(pu1_chrm_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
2161
2162 /* store the nbr 4x4 data at cu level */
2163 {
2164 nbr_4x4_t *ps_nbr;
2165
2166 /* copy bottom nbr data */
2167 ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
2168 ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1) * nbr_strd;
2169
2170 ps_nbr += (curr_cu_pos_in_ctb >> 2);
2171
2172 memcpy(ps_top_nbr, ps_nbr, (ps_enc_out_ctxt->u1_cu_size >> 2) * sizeof(nbr_4x4_t));
2173 }
2174 return;
2175 }
2176
2177 /**
2178 ******************************************************************************
2179 * \if Function name : ihevce_update_final_cu_results \endif
2180 *
2181 * \brief
2182 *
2183 * \return
2184 * None
2185 *
2186 * \author
2187 * Ittiam
2188 *
2189 *****************************************************************************
2190 */
ihevce_update_final_cu_results(ihevce_enc_loop_ctxt_t * ps_ctxt,ihevce_enc_cu_node_ctxt_t * ps_enc_out_ctxt,enc_loop_cu_prms_t * ps_cu_prms,pu_col_mv_t ** pps_row_col_pu,WORD32 * pi4_col_pu_map_idx,cu_final_update_prms * ps_cu_update_prms,WORD32 ctb_ctr,WORD32 vert_ctb_ctr)2191 void ihevce_update_final_cu_results(
2192 ihevce_enc_loop_ctxt_t *ps_ctxt,
2193 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
2194 enc_loop_cu_prms_t *ps_cu_prms,
2195 pu_col_mv_t **pps_row_col_pu,
2196 WORD32 *pi4_col_pu_map_idx,
2197 cu_final_update_prms *ps_cu_update_prms,
2198 WORD32 ctb_ctr,
2199 WORD32 vert_ctb_ctr)
2200 {
2201 WORD32 curr_cu_pos_in_row;
2202
2203 cu_enc_loop_out_t *ps_cu_final = *ps_cu_update_prms->pps_cu_final;
2204 pu_t **pps_row_pu = ps_cu_update_prms->pps_row_pu;
2205 tu_enc_loop_out_t **pps_row_tu = ps_cu_update_prms->pps_row_tu;
2206 UWORD8 **ppu1_row_ecd_data = ps_cu_update_prms->ppu1_row_ecd_data;
2207 WORD32 *pi4_num_pus_in_ctb = ps_cu_update_prms->pi4_num_pus_in_ctb;
2208 UWORD32 u4_cu_size = ps_enc_out_ctxt->u1_cu_size;
2209 ps_cu_final->b3_cu_pos_x = ps_enc_out_ctxt->b3_cu_pos_x;
2210 ps_cu_final->b3_cu_pos_y = ps_enc_out_ctxt->b3_cu_pos_y;
2211
2212 ps_cu_final->b4_cu_size = ps_enc_out_ctxt->u1_cu_size >> 3;
2213
2214 /* store the current pu and tu pointes */
2215 ps_cu_final->ps_pu = *pps_row_pu;
2216 ps_cu_final->ps_enc_tu = *pps_row_tu;
2217 curr_cu_pos_in_row = ctb_ctr * ps_cu_prms->i4_ctb_size + (ps_cu_final->b3_cu_pos_x << 3);
2218
2219 ihevce_store_cu_final(ps_ctxt, ps_cu_final, *ppu1_row_ecd_data, ps_enc_out_ctxt, ps_cu_prms);
2220
2221 if(NULL != pps_row_col_pu)
2222 {
2223 (*pps_row_col_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2224 }
2225 if(NULL != pi4_col_pu_map_idx)
2226 {
2227 (*pi4_col_pu_map_idx) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2228 }
2229 (*pi4_num_pus_in_ctb) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2230 (*pps_row_tu) += ps_cu_final->u2_num_tus_in_cu;
2231 (*pps_row_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2232 (*ppu1_row_ecd_data) += ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2233
2234 (*ps_cu_update_prms->pps_cu_final)++;
2235 (*ps_cu_update_prms->pu1_num_cus_in_ctb_out)++;
2236
2237 /* Updated for each CU in bottom row of CTB */
2238 if(((ps_cu_final->b3_cu_pos_y << 3) + u4_cu_size) == ps_ctxt->u4_cur_ctb_ht)
2239 {
2240 /* copy the bottom data to row buffers */
2241 ((pf_enc_loop_cu_bot_copy)ps_ctxt->pv_enc_loop_cu_bot_copy)(
2242 ps_ctxt,
2243 ps_cu_prms,
2244 ps_enc_out_ctxt,
2245 curr_cu_pos_in_row,
2246 (ps_enc_out_ctxt->b3_cu_pos_x << 3));
2247
2248 /* Setting Dependency for CU TopRight */
2249 ihevce_dmgr_set_row_row_sync(
2250 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
2251 (curr_cu_pos_in_row + ps_enc_out_ctxt->u1_cu_size),
2252 vert_ctb_ctr,
2253 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2254
2255 /* Setting Dependency for Entropy to consume is made at CTB level */
2256 }
2257 }
2258
2259 /**
2260 ******************************************************************************
2261 * \if Function name : ihevce_cu_recurse_decide \endif
2262 *
2263 * \brief
2264 * Coding Unit mode decide function. Performs RD opt and decides the best mode
2265 *
2266 * \param[in] pv_ctxt : pointer to enc_loop module
2267 * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
2268 * \param[in] ps_cu_analyse : pointer to cu analyse
2269 * \param[out] ps_cu_final : pointer to cu final
2270 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
2271 * \param[out]ps_row_col_pu; colocated pu buffer pointer
2272 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
2273 * \param[in]col_start_pu_idx : pu index start value
2274 *
2275 * \return
2276 * None
2277 *
2278 *
2279 * \author
2280 * Ittiam
2281 *
2282 *****************************************************************************
2283 */
ihevce_cu_recurse_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cur_ctb_cu_tree_t * ps_cu_tree_analyse,cur_ctb_cu_tree_t * ps_cu_tree_analyse_parent,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,me_ctb_data_t * ps_cu_me_data,pu_col_mv_t ** pps_col_pu,cu_final_update_prms * ps_cu_update_prms,UWORD8 * pu1_col_pu_map,WORD32 * pi4_col_start_pu_idx,WORD32 i4_tree_depth,WORD32 i4_ctb_x_off,WORD32 i4_ctb_y_off,WORD32 cur_ctb_ht)2284 WORD32 ihevce_cu_recurse_decide(
2285 ihevce_enc_loop_ctxt_t *ps_ctxt,
2286 enc_loop_cu_prms_t *ps_cu_prms,
2287 cur_ctb_cu_tree_t *ps_cu_tree_analyse,
2288 cur_ctb_cu_tree_t *ps_cu_tree_analyse_parent,
2289 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
2290 me_ctb_data_t *ps_cu_me_data,
2291 pu_col_mv_t **pps_col_pu,
2292 cu_final_update_prms *ps_cu_update_prms,
2293 UWORD8 *pu1_col_pu_map,
2294 WORD32 *pi4_col_start_pu_idx,
2295 WORD32 i4_tree_depth,
2296 WORD32 i4_ctb_x_off,
2297 WORD32 i4_ctb_y_off,
2298 WORD32 cur_ctb_ht)
2299 {
2300 cur_ctb_cu_tree_t *ps_cu_tree_analyse_child[4];
2301 final_mode_state_t s_final_mode_state;
2302
2303 WORD32 i;
2304 WORD32 child_nodes_null;
2305 LWORD64 i8_least_child_cost;
2306
2307 WORD32 num_children_encoded = 0;
2308
2309 /* Take backup of collocated start PU index for parent node rdo for PQ */
2310 WORD32 i4_col_pu_idx_bkup = *pi4_col_start_pu_idx;
2311 pu_col_mv_t *ps_col_mv_bkup = *pps_col_pu;
2312
2313 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2314 WORD32 x0_frm = i4_ctb_x_off + (ps_cu_tree_analyse->b3_cu_pos_x << 3);
2315 WORD32 y0_frm = i4_ctb_y_off + (ps_cu_tree_analyse->b3_cu_pos_y << 3);
2316 WORD32 pic_wd = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_width_in_luma_samples;
2317 WORD32 pic_ht = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_height_in_luma_samples;
2318 WORD32 log2_min_cb_size = ps_ctxt->s_sao_ctxt_t.ps_sps->i1_log2_min_coding_block_size;
2319 WORD32 cu_size = ps_cu_tree_analyse->u1_cu_size;
2320
2321 /* bits for coding split_cu_flag = 1 */
2322 WORD32 split_cu1_bits_q12 = 0;
2323
2324 /* bits for coding split_cu_flag = 0 */
2325 WORD32 split_cu0_bits_q12 = 0;
2326 #endif
2327
2328 UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_stasino_enabled
2329 ? ihevce_determine_cu_noise_based_on_8x8Blk_data(
2330 ps_cu_prms->pu1_is_8x8Blk_noisy,
2331 ((ps_cu_tree_analyse->b3_cu_pos_x << 3) >> 4) << 4,
2332 ((ps_cu_tree_analyse->b3_cu_pos_y << 3) >> 4) << 4,
2333 MAX(16, ps_cu_tree_analyse->u1_cu_size))
2334 : 0;
2335
2336 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2337 LWORD64 i8_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
2338 #endif
2339
2340 (void)ps_cu_tree_analyse_parent;
2341
2342 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
2343 if(!ps_ctxt->u1_enable_psyRDOPT && u1_is_cu_noisy)
2344 {
2345 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
2346 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
2347 }
2348 #endif
2349
2350 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
2351 {
2352 i8_lambda_qf = ((float)i8_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
2353 }
2354
2355 ps_cu_tree_analyse_child[0] = ps_cu_tree_analyse->ps_child_node_tl;
2356 ps_cu_tree_analyse_child[1] = ps_cu_tree_analyse->ps_child_node_tr;
2357 ps_cu_tree_analyse_child[2] = ps_cu_tree_analyse->ps_child_node_bl;
2358 ps_cu_tree_analyse_child[3] = ps_cu_tree_analyse->ps_child_node_br;
2359
2360 child_nodes_null =
2361 ((ps_cu_tree_analyse_child[0] == NULL) + (ps_cu_tree_analyse_child[1] == NULL) +
2362 (ps_cu_tree_analyse_child[2] == NULL) + (ps_cu_tree_analyse_child[3] == NULL));
2363
2364 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2365 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2366 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2367 #endif
2368 {
2369 /*----------------------------------------------*/
2370 /* ---------- CU Depth Bit Estimation --------- */
2371 /*----------------------------------------------*/
2372
2373 /* Encode cu split flags based on following conditions; See section 7.3.8*/
2374 if(((x0_frm + cu_size) <= pic_wd) && ((y0_frm + cu_size) <= pic_ht) &&
2375 (cu_size > (1 << log2_min_cb_size))) /* &&(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0)) */
2376 {
2377 WORD32 left_cu_depth = 0;
2378 WORD32 top_cu_depth = 0;
2379 WORD32 pos_x_4x4 = ps_cu_tree_analyse->b3_cu_pos_x << 1;
2380 WORD32 pos_y_4x4 = ps_cu_tree_analyse->b3_cu_pos_y << 1;
2381 WORD32 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
2382 WORD32 cur_4x4_in_ctb = pos_x_4x4 + (pos_y_4x4 * num_4x4_in_ctb);
2383 UWORD8 u1_split_cu_flag_cab_model;
2384 WORD32 split_cu_ctxt_inc;
2385
2386 /* Left and Top CU depth is required for cabac context */
2387
2388 /* CU left */
2389 if(0 == pos_x_4x4)
2390 {
2391 /* CTB boundary */
2392 if(i4_ctb_x_off)
2393 {
2394 left_cu_depth = ps_ctxt->as_left_col_nbr[pos_y_4x4].b2_cu_depth;
2395 }
2396 }
2397 else
2398 {
2399 /* inside CTB */
2400 left_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - 1].b2_cu_depth;
2401 }
2402
2403 /* CU top */
2404 if(0 == pos_y_4x4)
2405 {
2406 /* CTB boundary */
2407 if(i4_ctb_y_off)
2408 {
2409 /* Wait till top cu depth is available */
2410 ihevce_dmgr_chk_row_row_sync(
2411 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
2412 (i4_ctb_x_off) + (pos_x_4x4 << 2),
2413 4,
2414 ((i4_ctb_y_off >> 6) - 1),
2415 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2416 ps_ctxt->thrd_id);
2417
2418 top_cu_depth =
2419 ps_ctxt->ps_top_row_nbr[(i4_ctb_x_off >> 2) + pos_x_4x4].b2_cu_depth;
2420 }
2421 }
2422 else
2423 {
2424 /* inside CTB */
2425 top_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - num_4x4_in_ctb].b2_cu_depth;
2426 }
2427
2428 split_cu_ctxt_inc = IHEVC_CAB_SPLIT_CU_FLAG + (left_cu_depth > i4_tree_depth) +
2429 (top_cu_depth > i4_tree_depth);
2430
2431 u1_split_cu_flag_cab_model =
2432 ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc];
2433
2434 /* bits for coding split_cu_flag = 1 */
2435 split_cu1_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 1];
2436
2437 /* bits for coding split_cu_flag = 0 */
2438 split_cu0_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 0];
2439
2440 /* update the cu split cabac context of all child nodes before evaluating child */
2441 for(i = (i4_tree_depth + 1); i < 4; i++)
2442 {
2443 ps_ctxt->au1_rdopt_recur_ctxt_models[i][split_cu_ctxt_inc] =
2444 gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 1];
2445 }
2446
2447 /* update the cu split cabac context of the parent node with split flag = 0 */
2448 ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc] =
2449 gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 0];
2450 }
2451 }
2452 #endif
2453
2454 /* If all the child nodes are null, then do rdo for this node and return the cost */
2455 if((1 == ps_cu_tree_analyse->is_node_valid) && (4 == child_nodes_null))
2456 {
2457 WORD32 i4_num_bytes_ecd_data;
2458
2459 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2460 COPY_CABAC_STATES(
2461 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2462 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2463 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2464 #else
2465 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2466 {
2467 COPY_CABAC_STATES(
2468 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2469 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2470 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2471 }
2472 #endif
2473
2474 ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
2475 ihevce_update_pred_qp(
2476 ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
2477 /* DO rdo for current node here */
2478 /* return rdo cost for current node*/
2479 ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
2480 ps_ctxt,
2481 ps_cu_prms,
2482 ps_cu_tree_analyse,
2483 ps_cur_ipe_ctb,
2484 ps_cu_me_data,
2485 *pps_col_pu,
2486 &s_final_mode_state,
2487 pu1_col_pu_map,
2488 *ps_cu_update_prms->ppu1_row_ecd_data,
2489 *pi4_col_start_pu_idx,
2490 i4_ctb_x_off,
2491 i4_ctb_y_off);
2492
2493 if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
2494 cur_ctb_ht) &&
2495 (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
2496 {
2497 /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
2498 /* copy current ctb CU states into a entropy sync state */
2499 /* to be used for next row */
2500 COPY_CABAC_STATES(
2501 ps_ctxt->pu1_curr_row_cabac_state,
2502 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2503 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2504 }
2505
2506 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2507 {
2508 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2509 /* Add parent split cu = 0 cost signalling */
2510 ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2511 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2512 #endif
2513 for(i = (i4_tree_depth); i < 4; i++)
2514 {
2515 COPY_CABAC_STATES(
2516 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2517 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2518 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2519 }
2520 }
2521 #else
2522 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2523 {
2524 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2525 /* Add parent split cu = 0 cost signalling */
2526 ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2527 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2528 #endif
2529
2530 for(i = (i4_tree_depth); i < 4; i++)
2531 {
2532 COPY_CABAC_STATES(
2533 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2534 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2535 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2536 }
2537 }
2538 #endif
2539
2540 ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
2541 ps_ctxt, ps_cu_prms, &s_final_mode_state);
2542
2543 #if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2544 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
2545 {
2546 ihevce_update_final_cu_results(
2547 ps_ctxt,
2548 ps_ctxt->ps_enc_out_ctxt,
2549 ps_cu_prms,
2550 pps_col_pu,
2551 pi4_col_start_pu_idx,
2552 ps_cu_update_prms,
2553 i4_ctb_x_off >> 6,
2554 i4_ctb_y_off >> 6);
2555 }
2556 else
2557 {
2558 /* ---- copy the luma & chroma coeffs to final output -------- */
2559 i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2560
2561 if(0 != i4_num_bytes_ecd_data)
2562 {
2563 memcpy(
2564 ps_ctxt->pu1_ecd_data,
2565 &ps_ctxt->pu1_cu_recur_coeffs[0],
2566 i4_num_bytes_ecd_data * sizeof(UWORD8));
2567
2568 ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2569 }
2570
2571 /* Collocated PU updates */
2572 *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2573 *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2574 }
2575 #else
2576 /* ---- copy the luma & chroma coeffs to final output -------- */
2577 i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2578 if(0 != i4_num_bytes_ecd_data)
2579 {
2580 memcpy(
2581 ps_ctxt->pu1_ecd_data,
2582 &ps_ctxt->pu1_cu_recur_coeffs[0],
2583 i4_num_bytes_ecd_data * sizeof(UWORD8));
2584
2585 ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2586 }
2587
2588 /* Collocated PU updates */
2589 *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2590 *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2591 #endif
2592
2593 ps_ctxt->ps_enc_out_ctxt++;
2594 num_children_encoded++;
2595 }
2596 else
2597 {
2598 i8_least_child_cost = 0;
2599
2600 for(i = 0; i < 4; i++)
2601 {
2602 if(ps_cu_tree_analyse_child[i] != NULL)
2603 {
2604 num_children_encoded += ihevce_cu_recurse_decide(
2605 ps_ctxt,
2606 ps_cu_prms,
2607 ps_cu_tree_analyse_child[i],
2608 ps_cu_tree_analyse,
2609 ps_cur_ipe_ctb,
2610 ps_cu_me_data,
2611 pps_col_pu,
2612 ps_cu_update_prms,
2613 pu1_col_pu_map,
2614 pi4_col_start_pu_idx,
2615 i4_tree_depth + 1,
2616 i4_ctb_x_off,
2617 i4_ctb_y_off,
2618 cur_ctb_ht);
2619
2620 /* In case of incomplete ctb, */
2621 //if(MAX_COST != ps_cu_tree_analyse_child[i]->i4_best_rdopt_cost)
2622 if(((ULWORD64)(
2623 i8_least_child_cost + ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost)) >
2624 MAX_COST_64)
2625 {
2626 i8_least_child_cost = MAX_COST_64;
2627 }
2628 else
2629 {
2630 i8_least_child_cost += ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost;
2631 }
2632 }
2633 else
2634 {
2635 /* If the child node is NULL, return MAX_COST*/
2636 i8_least_child_cost = MAX_COST_64;
2637 }
2638 }
2639
2640 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2641 {
2642 #if !ENABLE_4CTB_EVALUATION
2643 if((ps_cu_tree_analyse->u1_cu_size == 64) && (num_children_encoded > 10) &&
2644 (ps_ctxt->i1_slice_type != ISLICE))
2645 {
2646 ps_cu_tree_analyse->is_node_valid = 0;
2647 }
2648 #endif
2649 }
2650
2651 /* If current CU node is valid, do rdo for the node and decide btwn child nodes and parent nodes */
2652 if(ps_cu_tree_analyse->is_node_valid)
2653 {
2654 UWORD8 au1_cu_pu_map[(MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE)];
2655 pu_col_mv_t as_col_mv[2]; /* Max of 2 PUs only per CU */
2656
2657 WORD32 i4_col_pu_idx_start = i4_col_pu_idx_bkup;
2658
2659 /* Copy the collocated PU map to the local array */
2660 memcpy(
2661 au1_cu_pu_map,
2662 pu1_col_pu_map,
2663 (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2664
2665 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2666 COPY_CABAC_STATES(
2667 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2668 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2669 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2670
2671 /* Reset the nbr maps while computing Parent CU node ()*/
2672 /* set the neighbour map to 0 */
2673 ihevce_set_nbr_map(
2674 ps_ctxt->pu1_ctb_nbr_map,
2675 ps_ctxt->i4_nbr_map_strd,
2676 (ps_cu_tree_analyse->b3_cu_pos_x << 1),
2677 (ps_cu_tree_analyse->b3_cu_pos_y << 1),
2678 (ps_cu_tree_analyse->u1_cu_size >> 2),
2679 0);
2680 #else
2681 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2682 {
2683 COPY_CABAC_STATES(
2684 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2685 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2686 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2687
2688 /* Reset the nbr maps while computing Parent CU node ()*/
2689 /* set the neighbour map to 0 */
2690 ihevce_set_nbr_map(
2691 ps_ctxt->pu1_ctb_nbr_map,
2692 ps_ctxt->i4_nbr_map_strd,
2693 (ps_cu_tree_analyse->b3_cu_pos_x << 1),
2694 (ps_cu_tree_analyse->b3_cu_pos_y << 1),
2695 (ps_cu_tree_analyse->u1_cu_size >> 2),
2696 0);
2697 }
2698 #endif
2699
2700 /* Do rdo for the parent node */
2701 /* Compare parent node cost vs child node costs */
2702 ps_ctxt->is_parent_cu_rdopt = 1;
2703
2704 ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
2705
2706 ihevce_update_pred_qp(
2707 ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
2708
2709 ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
2710 ps_ctxt,
2711 ps_cu_prms,
2712 ps_cu_tree_analyse,
2713 ps_cur_ipe_ctb,
2714 ps_cu_me_data,
2715 as_col_mv,
2716 &s_final_mode_state,
2717 au1_cu_pu_map,
2718 *ps_cu_update_prms->ppu1_row_ecd_data,
2719 i4_col_pu_idx_start,
2720 i4_ctb_x_off,
2721 i4_ctb_y_off);
2722
2723 ps_ctxt->is_parent_cu_rdopt = 0;
2724
2725 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2726 /* Add parent split cu cost signalling */
2727 ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2728 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2729
2730 COPY_CABAC_STATES(
2731 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2732 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2733 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2734
2735 /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
2736 + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
2737 ;
2738 /* bits for coding cu split flag as 1 */
2739 i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
2740 split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2741 #else
2742 #if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2743 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2744 {
2745 /* Add parent split cu cost signalling */
2746 ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2747 split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2748
2749 COPY_CABAC_STATES(
2750 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2751 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2752 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2753
2754 /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
2755 + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
2756 ;
2757 /* bits for coding cu split flag as 1 */
2758 i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
2759 split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2760 }
2761 #else
2762 i8_least_child_cost +=
2763 (num_children_encoded * ps_ctxt->i4_sad_lamda + ((1 << (LAMBDA_Q_SHIFT)))) >>
2764 (LAMBDA_Q_SHIFT + 1);
2765 #endif
2766 #endif
2767
2768 /* If child modes win over parent, discard parent enc ctxt */
2769 /* else discard child ctxt */
2770 if(ps_cu_tree_analyse->i8_best_rdopt_cost > i8_least_child_cost)
2771 {
2772 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2773 /* Store child node Models for evalution of next CU */
2774 for(i = (i4_tree_depth); i < 4; i++)
2775 {
2776 COPY_CABAC_STATES(
2777 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2778 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2779 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2780 }
2781 /* Reset cabac states if child has won */
2782 COPY_CABAC_STATES(
2783 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2784 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2785 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2786 #else
2787 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2788 {
2789 for(i = i4_tree_depth; i < 4; i++)
2790 {
2791 COPY_CABAC_STATES(
2792 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2793 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2794 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2795 }
2796 /* Reset cabac states if child has won */
2797 COPY_CABAC_STATES(
2798 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2799 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2800 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2801 }
2802 #endif
2803 ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
2804 ps_cu_tree_analyse->is_node_valid = 0;
2805 }
2806 else
2807 {
2808 /* Parent node wins over child node */
2809 ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
2810 WORD32 i4_num_bytes_ecd_data;
2811 WORD32 num_child_nodes = 0;
2812 WORD32 i4_num_pus_in_cu;
2813
2814 if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
2815 cur_ctb_ht) &&
2816 (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
2817 {
2818 /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
2819 /* copy current ctb CU states into a entropy sync state */
2820 /* to be used for next row */
2821 COPY_CABAC_STATES(
2822 ps_ctxt->pu1_curr_row_cabac_state,
2823 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2824 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2825 }
2826
2827 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2828 /* Store parent node Models for evalution of next CU */
2829 for(i = (i4_tree_depth + 1); i < 4; i++)
2830 {
2831 COPY_CABAC_STATES(
2832 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2833 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2834 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2835 }
2836 #else
2837 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2838 {
2839 for(i = (i4_tree_depth + 1); i < 4; i++)
2840 {
2841 COPY_CABAC_STATES(
2842 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2843 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2844 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2845 }
2846 }
2847 #endif
2848 ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
2849 ps_ctxt, ps_cu_prms, &s_final_mode_state);
2850
2851 #if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2852 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
2853 {
2854 ihevce_update_final_cu_results(
2855 ps_ctxt,
2856 ps_ctxt->ps_enc_out_ctxt,
2857 ps_cu_prms,
2858 pps_col_pu,
2859 pi4_col_start_pu_idx,
2860 ps_cu_update_prms,
2861 i4_ctb_x_off >> 6,
2862 i4_ctb_y_off >> 6);
2863
2864 ps_ctxt->ps_enc_out_ctxt++;
2865 }
2866 else
2867 {
2868 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
2869
2870 num_child_nodes = num_children_encoded;
2871
2872 /* ---- copy the luma & chroma coeffs to final output -------- */
2873 for(i = 0; i < num_child_nodes; i++)
2874 {
2875 i4_num_bytes_ecd_data =
2876 (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
2877 ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
2878 }
2879
2880 i4_num_bytes_ecd_data =
2881 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2882 if(0 != i4_num_bytes_ecd_data)
2883 {
2884 memcpy(
2885 ps_ctxt->pu1_ecd_data,
2886 &ps_ctxt->pu1_cu_recur_coeffs[0],
2887 i4_num_bytes_ecd_data);
2888
2889 ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2890 }
2891
2892 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
2893
2894 memcpy(
2895 ps_enc_tmp_out_ctxt,
2896 ps_ctxt->ps_enc_out_ctxt,
2897 sizeof(ihevce_enc_cu_node_ctxt_t));
2898 ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
2899
2900 /* Collocated PU updates */
2901 i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2902 /* Copy the collocated MVs and the PU map to frame buffers */
2903 memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
2904 memcpy(
2905 pu1_col_pu_map,
2906 au1_cu_pu_map,
2907 (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2908 /* Update the frame buffer pointer and the map index */
2909 *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
2910 *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
2911
2912 ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
2913 }
2914 #else
2915
2916 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
2917
2918 num_child_nodes = num_children_encoded;
2919
2920 /* ---- copy the luma & chroma coeffs to final output -------- */
2921 for(i = 0; i < num_child_nodes; i++)
2922 {
2923 i4_num_bytes_ecd_data =
2924 (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
2925 ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
2926 }
2927
2928 i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2929 if(0 != i4_num_bytes_ecd_data)
2930 {
2931 memcpy(
2932 ps_ctxt->pu1_ecd_data,
2933 &ps_ctxt->pu1_cu_recur_coeffs[0],
2934 i4_num_bytes_ecd_data * sizeof(UWORD8));
2935
2936 ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2937 }
2938
2939 ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
2940
2941 memcpy(
2942 ps_enc_tmp_out_ctxt,
2943 ps_ctxt->ps_enc_out_ctxt,
2944 sizeof(ihevce_enc_cu_node_ctxt_t));
2945
2946 ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
2947
2948 /* Collocated PU updates */
2949 i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2950 /* Copy the collocated MVs and the PU map to frame buffers */
2951 memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
2952 memcpy(
2953 pu1_col_pu_map,
2954 au1_cu_pu_map,
2955 (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2956 /* Update the frame buffer pointer and the map index */
2957 *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
2958 *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
2959
2960 ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
2961 #endif
2962
2963 num_children_encoded = 1;
2964 DISABLE_THE_CHILDREN_NODES(ps_cu_tree_analyse);
2965 }
2966 }
2967 else /* if(ps_cu_tree_analyse->is_node_valid) */
2968 {
2969 ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
2970
2971 /* Tree depth of four will occur for Incomplete CTB */
2972 if((i8_least_child_cost > 0) && (i4_tree_depth != 3))
2973 {
2974 #if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2975 /* Store child node Models for evalution of next CU */
2976 for(i = i4_tree_depth; i < 4; i++)
2977 {
2978 COPY_CABAC_STATES(
2979 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2980 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2981 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2982 }
2983 #else
2984 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2985 {
2986 for(i = (i4_tree_depth); i < 4; i++)
2987 {
2988 COPY_CABAC_STATES(
2989 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2990 &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2991 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2992 }
2993 }
2994 #endif
2995 }
2996 }
2997 }
2998
2999 return num_children_encoded;
3000 }
3001
ihevce_intraData_availability_extractor(WORD8 * pi1_8x8CULevel_intraData_availability_indicator,UWORD8 u1_cu_size,UWORD8 u1_x_8x8CU_units,UWORD8 u1_y_8x8CU_units)3002 static UWORD8 ihevce_intraData_availability_extractor(
3003 WORD8 *pi1_8x8CULevel_intraData_availability_indicator,
3004 UWORD8 u1_cu_size,
3005 UWORD8 u1_x_8x8CU_units,
3006 UWORD8 u1_y_8x8CU_units)
3007 {
3008 if(8 == u1_cu_size)
3009 {
3010 return (!pi1_8x8CULevel_intraData_availability_indicator
3011 [u1_x_8x8CU_units + MAX_CU_IN_CTB_ROW * u1_y_8x8CU_units]);
3012 }
3013 else
3014 {
3015 UWORD8 u1_data_availability = 0;
3016 UWORD8 u1_child_cu_size = u1_cu_size / 2;
3017
3018 u1_data_availability |= ihevce_intraData_availability_extractor(
3019 pi1_8x8CULevel_intraData_availability_indicator,
3020 u1_child_cu_size,
3021 u1_x_8x8CU_units,
3022 u1_y_8x8CU_units);
3023
3024 u1_data_availability |= ihevce_intraData_availability_extractor(
3025 pi1_8x8CULevel_intraData_availability_indicator,
3026 u1_child_cu_size,
3027 u1_x_8x8CU_units + u1_child_cu_size / 8,
3028 u1_y_8x8CU_units);
3029
3030 u1_data_availability |= ihevce_intraData_availability_extractor(
3031 pi1_8x8CULevel_intraData_availability_indicator,
3032 u1_child_cu_size,
3033 u1_x_8x8CU_units,
3034 u1_y_8x8CU_units + u1_child_cu_size / 8);
3035
3036 u1_data_availability |= ihevce_intraData_availability_extractor(
3037 pi1_8x8CULevel_intraData_availability_indicator,
3038 u1_child_cu_size,
3039 u1_x_8x8CU_units + u1_child_cu_size / 8,
3040 u1_y_8x8CU_units + u1_child_cu_size / 8);
3041
3042 return u1_data_availability;
3043 }
3044 }
3045
ihevce_intra_and_inter_cuTree_merger(cur_ctb_cu_tree_t * ps_merged_tree,cur_ctb_cu_tree_t * ps_intra_tree,cur_ctb_cu_tree_t * ps_inter_tree,WORD8 * pi1_8x8CULevel_intraData_availability_indicator)3046 void ihevce_intra_and_inter_cuTree_merger(
3047 cur_ctb_cu_tree_t *ps_merged_tree,
3048 cur_ctb_cu_tree_t *ps_intra_tree,
3049 cur_ctb_cu_tree_t *ps_inter_tree,
3050 WORD8 *pi1_8x8CULevel_intraData_availability_indicator)
3051 {
3052 /* 0 => Intra and inter children valid */
3053 /* 1 => Only Intra valid */
3054 /* 2 => Only Inter valid */
3055 /* 3 => Neither */
3056 UWORD8 au1_children_recursive_call_type[4];
3057
3058 if(NULL != ps_intra_tree)
3059 {
3060 ps_intra_tree->is_node_valid =
3061 ps_intra_tree->is_node_valid &
3062 ihevce_intraData_availability_extractor(
3063 pi1_8x8CULevel_intraData_availability_indicator,
3064 ps_intra_tree->u1_cu_size,
3065 ps_intra_tree->b3_cu_pos_x & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff),
3066 ps_intra_tree->b3_cu_pos_y & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff));
3067 }
3068
3069 switch(((NULL == ps_intra_tree) << 1) | (NULL == ps_inter_tree))
3070 {
3071 case 0:
3072 {
3073 ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid ||
3074 ps_inter_tree->is_node_valid;
3075 ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
3076 ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
3077
3078 au1_children_recursive_call_type[POS_TL] =
3079 ((NULL == ps_intra_tree->ps_child_node_tl) << 1) |
3080 (NULL == ps_inter_tree->ps_child_node_tl);
3081 au1_children_recursive_call_type[POS_TR] =
3082 ((NULL == ps_intra_tree->ps_child_node_tr) << 1) |
3083 (NULL == ps_inter_tree->ps_child_node_tr);
3084 au1_children_recursive_call_type[POS_BL] =
3085 ((NULL == ps_intra_tree->ps_child_node_bl) << 1) |
3086 (NULL == ps_inter_tree->ps_child_node_bl);
3087 au1_children_recursive_call_type[POS_BR] =
3088 ((NULL == ps_intra_tree->ps_child_node_br) << 1) |
3089 (NULL == ps_inter_tree->ps_child_node_br);
3090
3091 break;
3092 }
3093 case 1:
3094 {
3095 ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid;
3096 ps_merged_tree->u1_inter_eval_enable = 0;
3097 ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
3098
3099 au1_children_recursive_call_type[POS_TL] =
3100 ((NULL == ps_intra_tree->ps_child_node_tl) << 1) + 1;
3101 au1_children_recursive_call_type[POS_TR] =
3102 ((NULL == ps_intra_tree->ps_child_node_tr) << 1) + 1;
3103 au1_children_recursive_call_type[POS_BL] =
3104 ((NULL == ps_intra_tree->ps_child_node_bl) << 1) + 1;
3105 au1_children_recursive_call_type[POS_BR] =
3106 ((NULL == ps_intra_tree->ps_child_node_br) << 1) + 1;
3107
3108 break;
3109 }
3110 case 2:
3111 {
3112 ps_merged_tree->is_node_valid = ps_inter_tree->is_node_valid;
3113 ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
3114 ps_merged_tree->u1_intra_eval_enable = 0;
3115
3116 au1_children_recursive_call_type[POS_TL] = 2 + (NULL == ps_inter_tree->ps_child_node_tl);
3117 au1_children_recursive_call_type[POS_TR] = 2 + (NULL == ps_inter_tree->ps_child_node_tr);
3118 au1_children_recursive_call_type[POS_BL] = 2 + (NULL == ps_inter_tree->ps_child_node_bl);
3119 au1_children_recursive_call_type[POS_BR] = 2 + (NULL == ps_inter_tree->ps_child_node_br);
3120
3121 break;
3122 }
3123 case 3:
3124 {
3125 /* The swamps of Dagobah! */
3126 ASSERT(0);
3127
3128 break;
3129 }
3130 }
3131
3132 switch(au1_children_recursive_call_type[POS_TL])
3133 {
3134 case 0:
3135 {
3136 ihevce_intra_and_inter_cuTree_merger(
3137 ps_merged_tree->ps_child_node_tl,
3138 ps_intra_tree->ps_child_node_tl,
3139 ps_inter_tree->ps_child_node_tl,
3140 pi1_8x8CULevel_intraData_availability_indicator);
3141
3142 break;
3143 }
3144 case 2:
3145 {
3146 ihevce_intra_and_inter_cuTree_merger(
3147 ps_merged_tree->ps_child_node_tl,
3148 NULL,
3149 ps_inter_tree->ps_child_node_tl,
3150 pi1_8x8CULevel_intraData_availability_indicator);
3151
3152 break;
3153 }
3154 case 1:
3155 {
3156 ihevce_intra_and_inter_cuTree_merger(
3157 ps_merged_tree->ps_child_node_tl,
3158 ps_intra_tree->ps_child_node_tl,
3159 NULL,
3160 pi1_8x8CULevel_intraData_availability_indicator);
3161
3162 break;
3163 }
3164 }
3165
3166 switch(au1_children_recursive_call_type[POS_TR])
3167 {
3168 case 0:
3169 {
3170 ihevce_intra_and_inter_cuTree_merger(
3171 ps_merged_tree->ps_child_node_tr,
3172 ps_intra_tree->ps_child_node_tr,
3173 ps_inter_tree->ps_child_node_tr,
3174 pi1_8x8CULevel_intraData_availability_indicator);
3175
3176 break;
3177 }
3178 case 2:
3179 {
3180 ihevce_intra_and_inter_cuTree_merger(
3181 ps_merged_tree->ps_child_node_tr,
3182 NULL,
3183 ps_inter_tree->ps_child_node_tr,
3184 pi1_8x8CULevel_intraData_availability_indicator);
3185
3186 break;
3187 }
3188 case 1:
3189 {
3190 ihevce_intra_and_inter_cuTree_merger(
3191 ps_merged_tree->ps_child_node_tr,
3192 ps_intra_tree->ps_child_node_tr,
3193 NULL,
3194 pi1_8x8CULevel_intraData_availability_indicator);
3195
3196 break;
3197 }
3198 }
3199
3200 switch(au1_children_recursive_call_type[POS_BL])
3201 {
3202 case 0:
3203 {
3204 ihevce_intra_and_inter_cuTree_merger(
3205 ps_merged_tree->ps_child_node_bl,
3206 ps_intra_tree->ps_child_node_bl,
3207 ps_inter_tree->ps_child_node_bl,
3208 pi1_8x8CULevel_intraData_availability_indicator);
3209
3210 break;
3211 }
3212 case 2:
3213 {
3214 ihevce_intra_and_inter_cuTree_merger(
3215 ps_merged_tree->ps_child_node_bl,
3216 NULL,
3217 ps_inter_tree->ps_child_node_bl,
3218 pi1_8x8CULevel_intraData_availability_indicator);
3219
3220 break;
3221 }
3222 case 1:
3223 {
3224 ihevce_intra_and_inter_cuTree_merger(
3225 ps_merged_tree->ps_child_node_bl,
3226 ps_intra_tree->ps_child_node_bl,
3227 NULL,
3228 pi1_8x8CULevel_intraData_availability_indicator);
3229
3230 break;
3231 }
3232 }
3233
3234 switch(au1_children_recursive_call_type[POS_BR])
3235 {
3236 case 0:
3237 {
3238 ihevce_intra_and_inter_cuTree_merger(
3239 ps_merged_tree->ps_child_node_br,
3240 ps_intra_tree->ps_child_node_br,
3241 ps_inter_tree->ps_child_node_br,
3242 pi1_8x8CULevel_intraData_availability_indicator);
3243
3244 break;
3245 }
3246 case 2:
3247 {
3248 ihevce_intra_and_inter_cuTree_merger(
3249 ps_merged_tree->ps_child_node_br,
3250 NULL,
3251 ps_inter_tree->ps_child_node_br,
3252 pi1_8x8CULevel_intraData_availability_indicator);
3253
3254 break;
3255 }
3256 case 1:
3257 {
3258 ihevce_intra_and_inter_cuTree_merger(
3259 ps_merged_tree->ps_child_node_br,
3260 ps_intra_tree->ps_child_node_br,
3261 NULL,
3262 pi1_8x8CULevel_intraData_availability_indicator);
3263
3264 break;
3265 }
3266 }
3267 }
3268