1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_recur_bracketing.c
24 *
25 * \brief
26 * This file contains interface functions of recursive bracketing
27 * module
28 * \date
29 * 12/02/2012
30 *
31 * \author
32 * Ittiam
33 *
34 * List of Functions
35 *
36 *
37 ******************************************************************************
38 */
39
40 /*****************************************************************************/
41 /* File Includes */
42 /*****************************************************************************/
43 /* System include files */
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <assert.h>
48 #include <stdarg.h>
49 #include <math.h>
50
51 /* User include files */
52 #include "ihevc_typedefs.h"
53 #include "itt_video_api.h"
54 #include "ihevce_api.h"
55
56 #include "rc_cntrl_param.h"
57 #include "rc_frame_info_collector.h"
58 #include "rc_look_ahead_params.h"
59
60 #include "ihevc_defs.h"
61 #include "ihevc_structs.h"
62 #include "ihevc_platform_macros.h"
63 #include "ihevc_deblk.h"
64 #include "ihevc_itrans_recon.h"
65 #include "ihevc_chroma_itrans_recon.h"
66 #include "ihevc_chroma_intra_pred.h"
67 #include "ihevc_intra_pred.h"
68 #include "ihevc_inter_pred.h"
69 #include "ihevc_mem_fns.h"
70 #include "ihevc_padding.h"
71 #include "ihevc_weighted_pred.h"
72 #include "ihevc_sao.h"
73 #include "ihevc_resi_trans.h"
74 #include "ihevc_quant_iquant_ssd.h"
75 #include "ihevc_cabac_tables.h"
76
77 #include "ihevce_defs.h"
78 #include "ihevce_lap_enc_structs.h"
79 #include "ihevce_multi_thrd_structs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_ipe_instr_set_router.h"
92 #include "ihevce_ipe_structs.h"
93 #include "ihevce_ipe_pass.h"
94 #include "ihevce_recur_bracketing.h"
95 #include "ihevce_nbr_avail.h"
96 #include "ihevc_common_tables.h"
97 #include "ihevce_decomp_pre_intra_structs.h"
98 #include "ihevce_decomp_pre_intra_pass.h"
99
100 #include "cast_types.h"
101 #include "osal.h"
102 #include "osal_defaults.h"
103
104 /*****************************************************************************/
105 /* Constant Macros */
106 /*****************************************************************************/
107 #define IP_DBG_L1_l2 0
108 #define CHILD_BIAS 12
109
110 /*****************************************************************************/
111 /* Globals */
112 /*****************************************************************************/
113 extern pf_intra_pred g_apf_lum_ip[10];
114
115 extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES];
116
117 UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7,
118 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1,
119 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 };
120
121 UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0,
122 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7,
123 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 };
124
125 #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit))
126
127 /*****************************************************************************/
128 /* Function Definitions */
129 /*****************************************************************************/
130
131 /*!
132 ******************************************************************************
133 * \if Function name : ihevce_update_cand_list \endif
134 *
135 * \brief
136 * Final Candidate list population, nbr flag andd nbr mode update function
137 *
138 * \param[in] ps_row_cu : pointer to cu analyse struct
139 * \param[in] ps_cu_node : pointer to cu node info buffer
140 * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer
141 * \param[in] pu1_cand_mode_list : pointer to candidate list buffer
142 *
143 * \return
144 * None
145 *
146 * \author
147 * Ittiam
148 *
149 *****************************************************************************
150 */
ihevce_update_cand_list(ihevce_ipe_cu_tree_t * ps_cu_node,ihevce_ed_blk_t * ps_ed_blk_l1,ihevce_ipe_ctxt_t * ps_ctxt)151 void ihevce_update_cand_list(
152 ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt)
153 {
154 WORD32 row, col, x, y, size;
155
156 /* Candidate mode Update */
157 (void)ps_ed_blk_l1;
158 /* Update CTB mode map for the finalised CU */
159 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
160 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
161 size = ps_cu_node->u1_cu_size >> 2;
162 for(row = y; row < (y + size); row++)
163 {
164 for(col = x; col < (x + size); col++)
165 {
166 ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode;
167 }
168 }
169 return;
170 }
171
172 /*!
173 ******************************************************************************
174 * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif
175 *
176 * \brief
177 * Mpm indx calc function based on left and top available modes
178 *
179 * \param[in] top_intra_mode : Top available intra mode
180 * \param[in] left_intra_mode : Left available intra mode
181 * \param[in] available_top : Top availability flag
182 * \param[in] available_left : Left availability flag
183 * \param[in] cu_pos_y : cu position wrt to CTB
184 * \param[in] mode_bits_cost : pointer to mode bits buffer
185 * \param[in] lambda : Lambda value (SAD/SATD)
186 * \param[in] cand_mode_list : pointer to candidate list buffer
187 *
188 * \return
189 * None
190 *
191 * \author
192 * Ittiam
193 *
194 *****************************************************************************
195 */
ihevce_intra_populate_mode_bits_cost_bracketing(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,UWORD16 * mode_bits_cost,UWORD16 * mode_bits,WORD32 lambda,WORD32 * cand_mode_list)196 void ihevce_intra_populate_mode_bits_cost_bracketing(
197 WORD32 top_intra_mode,
198 WORD32 left_intra_mode,
199 WORD32 available_top,
200 WORD32 available_left,
201 WORD32 cu_pos_y,
202 UWORD16 *mode_bits_cost,
203 UWORD16 *mode_bits,
204 WORD32 lambda,
205 WORD32 *cand_mode_list)
206 {
207 /* local variables */
208 WORD32 i;
209 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
210
211 UWORD16 one_bits_cost =
212 COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda
213 UWORD16 two_bits_cost =
214 COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda
215 UWORD16 five_bits_cost =
216 COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda
217
218 for(i = 0; i < 35; i++)
219 {
220 mode_bits_cost[i] = five_bits_cost;
221 mode_bits[i] = 5;
222 }
223
224 /* EIID: set availability flag to zero if modes are invalid.
225 Required since some CU's might be skipped (though available)
226 and their modes will be set to 255 (-1)*/
227 if(35 < top_intra_mode || 0 > top_intra_mode)
228 available_top = 0;
229 if(35 < left_intra_mode || 0 > left_intra_mode)
230 available_left = 0;
231
232 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
233 /* N = top */
234 if(0 == available_top)
235 {
236 cand_intra_pred_mode_top = INTRA_DC;
237 }
238 /* for neighbour != INTRA, setting DC is done outside */
239 else if(0 == cu_pos_y) /* It's on the CTB boundary */
240 {
241 cand_intra_pred_mode_top = INTRA_DC;
242 }
243 else
244 {
245 cand_intra_pred_mode_top = top_intra_mode;
246 }
247
248 /* N = left */
249 if(0 == available_left)
250 {
251 cand_intra_pred_mode_left = INTRA_DC;
252 //cand_intra_pred_mode_left = cand_intra_pred_mode_top;
253 }
254 /* for neighbour != INTRA, setting DC is done outside */
255 else
256 {
257 cand_intra_pred_mode_left = left_intra_mode;
258 }
259
260 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
261 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
262 {
263 if(cand_intra_pred_mode_left < 2)
264 {
265 cand_mode_list[0] = INTRA_PLANAR;
266 cand_mode_list[1] = INTRA_DC;
267 cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
268 }
269 else
270 {
271 cand_mode_list[0] = cand_intra_pred_mode_left;
272 cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
273 cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
274 }
275 }
276 else
277 {
278 if(0 == available_left)
279 {
280 cand_mode_list[0] = cand_intra_pred_mode_top;
281 cand_mode_list[1] = cand_intra_pred_mode_left;
282 }
283 else
284 {
285 cand_mode_list[0] = cand_intra_pred_mode_left;
286 cand_mode_list[1] = cand_intra_pred_mode_top;
287 }
288 if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
289 (cand_intra_pred_mode_top != INTRA_PLANAR))
290 {
291 cand_mode_list[2] = INTRA_PLANAR;
292 }
293 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
294 {
295 cand_mode_list[2] = INTRA_DC;
296 }
297 else
298 {
299 cand_mode_list[2] = INTRA_ANGULAR(26);
300 }
301 }
302 mode_bits_cost[cand_mode_list[0]] = one_bits_cost;
303 mode_bits_cost[cand_mode_list[1]] = two_bits_cost;
304 mode_bits_cost[cand_mode_list[2]] = two_bits_cost;
305
306 mode_bits[cand_mode_list[0]] = 2;
307 mode_bits[cand_mode_list[1]] = 3;
308 mode_bits[cand_mode_list[2]] = 3;
309 }
310
311 /*!
312 ******************************************************************************
313 * \if Function name : ihevce_pu_calc_4x4_blk \endif
314 *
315 * \brief
316 * 4x4 pu (8x8 CU) mode decision using step 8421 method
317 *
318 * \param[in] ps_cu_node : pointer to cu node info buffer
319 * \param[in] pu1_src : pointer to src pixels
320 * \param[in] src_stride : frm source stride
321 * \param[in] ref : pointer to reference pixels for prediction
322 * \param[in] cand_mode_list : pointer to candidate list buffer
323 * \param[in] best_costs_4x4 : pointer to 3 best cost buffer
324 * \param[in] best_modes_4x4 : pointer to 3 best mode buffer
325 *
326 * \return
327 * None
328 *
329 * \author
330 * Ittiam
331 *
332 *****************************************************************************
333 */
ihevce_pu_calc_4x4_blk(ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,UWORD8 * pu1_src,WORD32 src_stride,UWORD8 * ref,UWORD16 * mode_bits_cost,WORD32 * best_costs_4x4,UWORD8 * best_modes_4x4,func_selector_t * ps_func_selector)334 void ihevce_pu_calc_4x4_blk(
335 ihevce_ipe_ctxt_t *ps_ctxt,
336 ihevce_ipe_cu_tree_t *ps_cu_node,
337 UWORD8 *pu1_src,
338 WORD32 src_stride,
339 UWORD8 *ref,
340 UWORD16 *mode_bits_cost,
341 WORD32 *best_costs_4x4,
342 UWORD8 *best_modes_4x4,
343 func_selector_t *ps_func_selector)
344 {
345 WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp;
346 WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out;
347 UWORD8 u1_use_satd = ps_ctxt->u1_use_satd;
348 UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on;
349
350 WORD32 i, j = 0, i_end;
351 UWORD8 mode, best_amode = 255;
352 UWORD8 pred[16];
353
354 UWORD16 sad;
355 WORD32 sad_cost = 0;
356 WORD32 best_asad_cost = 0xFFFFF;
357 WORD32 temp;
358 UWORD8 modes_to_eval[5];
359 WORD32 costs_4x4[5];
360 UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
361
362 /* LO resolution hence low resolution disable */
363 WORD32 u1_low_resol = 0;
364 UWORD8 au1_best_modes[1] = { 0 };
365 WORD32 ai4_best_sad_costs[1] = { 0 };
366
367 WORD16 *pi2_tmp = &pi2_trans_tmp[0];
368
369 ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list =
370 &ps_ctxt->s_ipe_optimised_function_list;
371
372 //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1;
373 //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit;
374
375 for(i = 0; i < 5; i++)
376 {
377 costs_4x4[i] = MAX_INTRA_COST_IPE;
378 }
379
380 ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
381 pu1_src,
382 src_stride,
383 ref,
384 mode_bits_cost,
385 au1_best_modes,
386 ai4_best_sad_costs,
387 u1_low_resol,
388 ps_ipe_optimised_function_list->pf_4x4_sad_computer);
389
390 best_amode = au1_best_modes[0];
391 best_asad_cost = ai4_best_sad_costs[0];
392
393 ASSERT(best_amode != 255);
394 /* Around best level 4 angular mode, search for best level 2 mode */
395 modes_to_eval[0] = best_amode - 2;
396 modes_to_eval[1] = best_amode + 2;
397 i = 0;
398 i_end = 2;
399 if(best_amode == 2)
400 i = 1;
401 else if(best_amode == 34)
402 i_end = 1;
403 for(; i < i_end; i++)
404 {
405 mode = modes_to_eval[i];
406
407 g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
408
409 sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
410
411 sad_cost = sad;
412 sad_cost += mode_bits_cost[mode];
413
414 if(sad_cost < best_asad_cost)
415 {
416 best_amode = mode;
417 best_asad_cost = sad_cost;
418 }
419 }
420
421 /* Around best level 2 angular mode, search for best level 1 mode */
422 /* Also evaluate for non-angular mode */
423
424 i = 0;
425 /*Level 1 refinement is disabled for ES preset */
426 if(1 == u1_level_1_refine_on)
427 {
428 if(best_amode != 2)
429 modes_to_eval[i++] = best_amode - 1;
430 modes_to_eval[i++] = best_amode;
431 }
432
433 modes_to_eval[i++] = 0;
434 modes_to_eval[i++] = 1;
435
436 if(1 == u1_level_1_refine_on)
437 {
438 if(best_amode != 34)
439 modes_to_eval[i++] = best_amode + 1;
440 }
441 i_end = i;
442 i = 0;
443
444 for(; i < i_end; i++)
445 {
446 mode = modes_to_eval[i];
447
448 g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
449
450 /* Hard coding to use SATD */
451 if(u1_use_satd)
452 {
453 ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr(
454 pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0);
455
456 sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4);
457 }
458 else
459 {
460 sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(
461 pu1_src, &pred[0], src_stride, 4);
462 }
463 sad_cost = sad;
464 sad_cost += mode_bits_cost[mode];
465
466 costs_4x4[i] = sad_cost;
467 }
468
469 /* Arrange the reference array in ascending order */
470 for(i = 0; i < (i_end - 1); i++)
471 {
472 for(j = i + 1; j < i_end; j++)
473 {
474 if(costs_4x4[i] > costs_4x4[j])
475 {
476 temp = costs_4x4[i];
477 costs_4x4[i] = costs_4x4[j];
478 costs_4x4[j] = temp;
479
480 temp = modes_4x4[i];
481 modes_4x4[i] = modes_4x4[j];
482 modes_4x4[j] = temp;
483 }
484 }
485 }
486 for(i = 0; i < 3; i++)
487 {
488 best_costs_4x4[i] = costs_4x4[i];
489 best_modes_4x4[i] = modes_to_eval[modes_4x4[i]];
490 }
491
492 {
493 ps_cu_node->best_mode = best_modes_4x4[0];
494 ps_cu_node->best_cost = best_costs_4x4[0];
495 ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode];
496 }
497 }
498
499 /*!
500 ******************************************************************************
501 * \if Function name : ihevce_pu_calc_8x8_blk \endif
502 *
503 * \brief
504 * 4x4 pu (8x8 CU) mode decision loop using step 8421 method
505 *
506 * \param[in] ps_curr_src : pointer to src pixels struct
507 * \param[in] ps_ctxt : pointer to IPE context struct
508 * \param[in] ps_cu_node : pointer to cu node info buffer
509 *
510 * \return
511 * None
512 *
513 * \author
514 * Ittiam
515 *
516 *****************************************************************************
517 */
ihevce_pu_calc_8x8_blk(iv_enc_yuv_buf_t * ps_curr_src,ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,func_selector_t * ps_func_selector)518 void ihevce_pu_calc_8x8_blk(
519 iv_enc_yuv_buf_t *ps_curr_src,
520 ihevce_ipe_ctxt_t *ps_ctxt,
521 ihevce_ipe_cu_tree_t *ps_cu_node,
522 func_selector_t *ps_func_selector)
523 {
524 WORD32 i, j;
525 WORD32 nbr_flags;
526 nbr_avail_flags_t s_nbr;
527 WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1;
528
529 UWORD8 *pu1_src_4x4;
530 WORD32 xA, xB, yA, yB;
531 //WORD32 x, y, size;
532 WORD32 top_intra_mode;
533 WORD32 left_intra_mode;
534 // WORD8 *top_intra_mode_ptr;
535 // WORD8 *left_intra_mode_ptr;
536 UWORD8 *pu1_orig;
537 WORD32 src_strd = ps_curr_src->i4_y_strd;
538
539 WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1;
540 WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1;
541 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
542
543 ihevc_intra_pred_luma_ref_substitution_fptr =
544 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
545
546 pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) +
547 ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) +
548 (ps_cu_node->ps_parent->u2_x0 << 3);
549 for(i = 0; i < 2; i++)
550 {
551 for(j = 0; j < 2; j++)
552 {
553 WORD32 cand_mode_list[3];
554 pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size);
555 /* get the neighbour availability flags */
556 nbr_flags = ihevce_get_nbr_intra(
557 &s_nbr,
558 ps_ctxt->pu1_ctb_nbr_map,
559 ps_ctxt->i4_nbr_map_strd,
560 cu_pos_x + ((j) * (trans_size >> 2)),
561 cu_pos_y + ((i) * (trans_size >> 2)),
562 trans_size >> 2);
563
564 /* call the function which populates sad cost for all the modes */
565 xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j;
566 yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
567 xB = xA + 1;
568 yB = yA - 1;
569 left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
570 top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
571
572 ihevce_intra_populate_mode_bits_cost_bracketing(
573 top_intra_mode,
574 left_intra_mode,
575 s_nbr.u1_top_avail,
576 s_nbr.u1_left_avail,
577 ps_cu_node->ps_parent->u2_y0,
578 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
579 &ps_ctxt->au2_mode_bits_8x8_pu[0],
580 ps_ctxt->i4_ol_sad_lambda,
581 cand_mode_list);
582
583 /* call the function which populates ref data for intra predicion */
584 ihevc_intra_pred_luma_ref_substitution_fptr(
585 pu1_src_4x4 - src_strd - 1,
586 pu1_src_4x4 - src_strd,
587 pu1_src_4x4 - 1,
588 src_strd,
589 4,
590 nbr_flags,
591 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
592 0);
593
594 ihevce_pu_calc_4x4_blk(
595 ps_ctxt,
596 ps_cu_node->ps_sub_cu[(i * 2) + j],
597 pu1_src_4x4,
598 src_strd,
599 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
600 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
601 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0],
602 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0],
603 ps_func_selector);
604
605 /*&au4_cost_4x4[i*2 + j][0],
606 &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition
607
608 ihevce_set_nbr_map(
609 ps_ctxt->pu1_ctb_nbr_map,
610 ps_ctxt->i4_nbr_map_strd,
611 cu_pos_x + ((j) * (trans_size >> 2)),
612 cu_pos_y + ((i) * (trans_size >> 2)),
613 (trans_size >> 2),
614 1);
615
616 xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j;
617 yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
618 ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode;
619 ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost =
620 ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode];
621 }
622 }
623 }
624
625 /*!
626 ******************************************************************************
627 * \if Function name : ihevce_bracketing_analysis \endif
628 *
629 * \brief
630 * Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size
631 * info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn
632 * 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt
633 *
634 * \param[in] ps_ctxt : pointer to IPE context struct
635 * \param[in] ps_cu_node : pointer to cu node info buffer
636 * \param[in] ps_curr_src : pointer to src pixels struct
637 * \param[in] ps_ctb_out : pointer to ip ctb out struct
638 * \param[in] ps_row_cu : pointer to cu analyse struct
639 * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct
640 * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct
641 * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct
642 *
643 * \return
644 * None
645 *
646 * \author
647 * Ittiam
648 *
649 *****************************************************************************
650 */
ihevce_bracketing_analysis(ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,iv_enc_yuv_buf_t * ps_curr_src,ctb_analyse_t * ps_ctb_out,ihevce_ed_blk_t * ps_ed_l1_ctb,ihevce_ed_blk_t * ps_ed_l2_ctb,ihevce_ed_ctb_l1_t * ps_ed_ctb_l1,ipe_l0_ctb_analyse_for_me_t * ps_l0_ipe_out_ctb)651 void ihevce_bracketing_analysis(
652 ihevce_ipe_ctxt_t *ps_ctxt,
653 ihevce_ipe_cu_tree_t *ps_cu_node,
654 iv_enc_yuv_buf_t *ps_curr_src,
655 ctb_analyse_t *ps_ctb_out,
656 //cu_analyse_t *ps_row_cu,
657 ihevce_ed_blk_t *ps_ed_l1_ctb,
658 ihevce_ed_blk_t *ps_ed_l2_ctb,
659 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
660 ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb)
661 {
662 WORD32 cu_pos_x = 0;
663 WORD32 cu_pos_y = 0;
664
665 UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width;
666 UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height;
667 WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3);
668 WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3);
669
670 ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb;
671 ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb;
672
673 WORD32 i;
674 WORD32 cand_mode_list[3];
675 //cu_analyse_t *ps_curr_cu = ps_row_cu;
676 WORD32 blk_cnt = 0;
677 WORD32 j = 0;
678 WORD32 merge_32x32_l1, merge_32x32_l2;
679
680 WORD32 i4_skip_intra_eval_32x32_l1;
681 //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block
682
683 WORD32 parent_cost = 0;
684 WORD32 child_cost[4] = { 0 };
685 WORD32 child_cost_least = 0;
686 WORD32 child_satd[4] = { 0 };
687 WORD32 x, y, size;
688 WORD32 merge_64x64 = 1;
689 UWORD8 au1_best_32x32_modes[4];
690 WORD32 au4_best_32x32_cost[4];
691 WORD32 parent_best_mode;
692 UWORD8 best_mode;
693
694 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
695 /* flag to control 1CU-4TU modes based on quality preset */
696 /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */
697 WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
698 (i4_quality_preset == IHEVCE_QUALITY_P0);
699
700 /* flag to control 4CU-16TU mode based on quality preset */
701 /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/
702 WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
703 (i4_quality_preset == IHEVCE_QUALITY_P0);
704
705 WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2;
706 float f_strength;
707 /* Accumalte satd */
708 LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0;
709 WORD32 i4_ctb_acc_satd = 0;
710
711 /* Accumalate Mode bits cost */
712 LWORD64 i8_frame_acc_mode_bits_cost = 0;
713
714 /* Step2 is bypassed for parent, uses children modes*/
715 WORD32 step2_bypass = 1;
716
717 if(1 == ps_ctxt->u1_disable_child_cu_decide)
718 step2_bypass = 0;
719
720 ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree;
721 for(i = 0; i < 4; i++)
722 {
723 ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i;
724 }
725
726 /* Loop for all 8x8 block in a CTB */
727 ps_ctb_out->u4_cu_split_flags = 0x1;
728
729 /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */
730 for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++)
731 {
732 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE;
733 }
734
735 for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++)
736 {
737 ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE;
738 }
739
740 for(i = 0; i < (MAX_CU_IN_CTB); i++)
741 {
742 ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE;
743 }
744
745 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE;
746
747 /* by default 64x64 modes are set to default values DC and Planar */
748 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0;
749 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1;
750 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255;
751
752 /* by default 64x4 split is set to 1 */
753 ps_l0_ipe_out_ctb->u1_split_flag = 1;
754
755 /* Modulation factor calculated based on spatial variance instead of hardcoded val*/
756 i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16;
757
758 f_strength = ps_ctxt->f_strength;
759
760 /* ------------------------------------------------ */
761 /* populate the early decisions done by L1 analysis */
762 /* ------------------------------------------------ */
763 {
764 ihevce_ed_blk_t *ps_ed_blk_l1_curr = ps_ed_l1_ctb;
765 WORD32 ctr_8x8;
766 WORD8 *pi1_ed_buf;
767
768 /* set all the decisions to invalid */
769 memset(
770 &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0],
771 0,
772 sizeof(UWORD8) * MAX_CU_IN_CTB);
773
774 pi1_ed_buf = &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0];
775
776 for(ctr_8x8 = 0; ctr_8x8 < MAX_CTB_SIZE; ctr_8x8++)
777 {
778 WORD32 pos_x_8x8, pos_y_8x8;
779
780 pos_x_8x8 = gau1_cu_pos_x[ctr_8x8];
781 pos_y_8x8 = gau1_cu_pos_y[ctr_8x8];
782
783 pi1_ed_buf[pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)] =
784 ps_ed_blk_l1_curr->intra_or_inter;
785 ps_ed_blk_l1_curr++;
786 }
787
788 for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++)
789 {
790 ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[ctr_8x8] =
791 ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[ctr_8x8];
792
793 ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] =
794 ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[ctr_8x8];
795
796 /*Earlier only me sad was getting populated, now best of ipe and me is populated*/
797 ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[ctr_8x8] =
798 ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[ctr_8x8];
799 //ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8];
800
801 ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[ctr_8x8] =
802 ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[ctr_8x8];
803 //ps_ed_ctb_l1->i4_sad_cost_me_for_ref[ctr_8x8];
804 }
805
806 /*Init CTB level accumalated SATD and MPM bits */
807 ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0;
808 ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0;
809 }
810
811 /* ------------------------------------------------ */
812 /* Loop over all the blocks in current CTB */
813 /* ------------------------------------------------ */
814
815 {
816 /* 64 8x8 blocks should be encountered for the do,while loop to exit */
817 do
818 {
819 intra32_analyse_t *ps_intra32_analyse;
820 intra16_analyse_t *ps_intra16_analyse;
821 WORD32 *pi4_intra_32_cost;
822 WORD32 *pi4_intra_16_cost;
823 WORD32 *pi4_intra_8_cost;
824 WORD32 merge_16x16_l1;
825
826 /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */
827 cu_pos_x = gau1_cu_pos_x[blk_cnt];
828 cu_pos_y = gau1_cu_pos_y[blk_cnt];
829
830 /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */
831 au1_best_32x32_modes[blk_cnt >> 4] = 255;
832
833 /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */
834 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
835 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4];
836
837 /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
838 /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
839 ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2];
840
841 /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */
842 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
843 {
844 /* Reset to zero for every cu decision */
845 merge_32x32_l1 = 0;
846
847 child_cost_least = 0;
848
849 /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */
850 ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2);
851
852 pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4];
853
854 /* by default 32x32 modes are set to default values DC and Planar */
855 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0;
856 ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1;
857 ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255;
858
859 /* By default 32x32 split is set to 1 */
860 ps_intra32_analyse->b1_split_flag = 1;
861
862 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0;
863 ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1;
864 ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255;
865
866 /* 16x16 cost & 8x8 cost are stored in Raster scan order */
867 /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */
868 /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */
869 {
870 WORD32 pos_x_8x8, pos_y_8x8;
871
872 pos_x_8x8 = gau1_cu_pos_x[blk_cnt];
873 pos_y_8x8 = gau1_cu_pos_y[blk_cnt];
874
875 pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0];
876
877 pi4_intra_16_cost +=
878 ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1)));
879
880 pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0];
881
882 pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW));
883 }
884
885 merge_32x32_l1 = 0;
886 merge_32x32_l2 = 0;
887 i4_skip_intra_eval_32x32_l1 = 0;
888
889 /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */
890 merge_16x16_l1 = 0;
891 if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2))
892 {
893 #if !ENABLE_UNIFORM_CU_SIZE_8x8
894 merge_16x16_l1 = ps_ed_blk_l1->merge_success;
895 #else
896 merge_16x16_l1 = 0;
897 #endif
898 }
899
900 /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */
901 if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4))
902 {
903 /* Check 4 flags of L1(8x8) say merge */
904 for(i = 0; i < 4; i++)
905 {
906 merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success;
907
908 //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval
909 i4_skip_intra_eval_32x32_l1 +=
910 ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0;
911 }
912
913 #if !ENABLE_UNIFORM_CU_SIZE_8x8
914 /* Check 1 flag from L2(16x16) say merge */
915 merge_32x32_l2 = ps_ed_blk_l2->merge_success;
916 #else
917 merge_32x32_l1 = 0;
918 merge_32x32_l2 = 0;
919 #endif
920 }
921
922 #if DISABLE_L2_IPE_IN_PB_L1_IN_B
923 if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE))
924 {
925 merge_32x32_l2 = 0;
926 ps_ed_blk_l2->merge_success = 0;
927 }
928 #endif
929
930 ps_intra32_analyse->b1_valid_cu = 1;
931
932 /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */
933 /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval,
934 if no blocks to be skipped then eval entire 32x32,
935 else break the merge and go to 16x16 level eval */
936 if((merge_32x32_l1 == 4) && merge_32x32_l2 &&
937 ((i4_skip_intra_eval_32x32_l1 == 0) ||
938 (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge
939 )
940 {
941 #if IP_DBG_L1_l2
942 /* Populate params for 32x32 block analysis */
943 ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
944
945 ps_cu_node->ps_parent->u1_cu_size = 32;
946 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
947 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
948 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode;
949 /* CU size 32x32 and fill the final cu params */
950
951 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
952
953 /* Increment pointers */
954 ps_ed_blk_l1 += 16;
955 blk_cnt += 16;
956 ps_row_cu++;
957 merge_64x64 &= 1;
958 #else
959
960 /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/
961 if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE))
962 {
963 WORD32 i4_local_ctr1, i4_local_ctr2;
964
965 ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
966
967 ps_cu_node->ps_parent->u1_cu_size = 32;
968 ps_cu_node->ps_parent->u2_x0 =
969 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
970 ps_cu_node->ps_parent->u2_y0 =
971 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
972 ps_cu_node->ps_parent->best_mode =
973 INTRA_DC; //ps_ed_blk_l2->best_merge_mode;
974 /* CU size 32x32 and fill the final cu params */
975
976 /* fill in the first modes as invalid */
977 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
978 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
979 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
980 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
981
982 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
983 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
984 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
985
986 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
987
988 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
989 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
990
991 ps_intra32_analyse->b1_valid_cu = 0;
992 ps_intra32_analyse->b1_split_flag = 0;
993 ps_intra32_analyse->b1_merge_flag = 0;
994 /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu,
995 255,
996 NUM_BEST_MODES);
997 memset (&ps_intra32_analyse->au1_best_modes_16x16_tu,
998 255,
999 NUM_BEST_MODES);*/
1000 //set only first mode since if it's 255. it wont go ahead
1001 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255;
1002 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255;
1003 ps_intra32_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
1004
1005 *pi4_intra_32_cost = MAX_INTRA_COST_IPE;
1006
1007 /*since ME will start evaluating from bottom up, set the lower
1008 cu size data invalid */
1009 for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++)
1010 {
1011 WORD32 *pi4_intra_8_cost_curr16;
1012
1013 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1014 .au1_best_modes_16x16_tu[0] = 255;
1015 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1016 .au1_best_modes_8x8_tu[0] = 255;
1017 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1018 .i4_best_intra_cost = MAX_INTRA_COST_IPE;
1019 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0;
1020 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0;
1021 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0;
1022
1023 pi4_intra_16_cost
1024 [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) *
1025 (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE;
1026
1027 pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1);
1028 pi4_intra_8_cost_curr16 +=
1029 ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW;
1030
1031 for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++)
1032 {
1033 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1034 .as_intra8_analyse[i4_local_ctr2]
1035 .au1_4x4_best_modes[0][0] = 255;
1036 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1037 .as_intra8_analyse[i4_local_ctr2]
1038 .au1_4x4_best_modes[1][0] = 255;
1039 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1040 .as_intra8_analyse[i4_local_ctr2]
1041 .au1_4x4_best_modes[2][0] = 255;
1042 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1043 .as_intra8_analyse[i4_local_ctr2]
1044 .au1_4x4_best_modes[3][0] = 255;
1045 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1046 .as_intra8_analyse[i4_local_ctr2]
1047 .au1_best_modes_8x8_tu[0] = 255;
1048 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1049 .as_intra8_analyse[i4_local_ctr2]
1050 .au1_best_modes_4x4_tu[0] = 255;
1051 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1052 .as_intra8_analyse[i4_local_ctr2]
1053 .i4_best_intra_cost = MAX_INTRA_COST_IPE;
1054 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1055 .as_intra8_analyse[i4_local_ctr2]
1056 .b1_valid_cu = 0;
1057
1058 pi4_intra_8_cost_curr16
1059 [(i4_local_ctr2 & 1) +
1060 (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] =
1061 MAX_INTRA_COST_IPE;
1062 }
1063 }
1064
1065 /* set neighbours even if intra is not evaluated, since source is always available. */
1066 ihevce_set_nbr_map(
1067 ps_ctxt->pu1_ctb_nbr_map,
1068 ps_ctxt->i4_nbr_map_strd,
1069 ps_cu_node->ps_parent->u2_x0 << 1,
1070 ps_cu_node->ps_parent->u2_y0 << 1,
1071 (ps_cu_node->ps_parent->u1_cu_size >> 2),
1072 1);
1073
1074 /* cost accumalation of best cu size candiate */
1075 /*i8_frame_acc_satd_cost += parent_cost;*/
1076
1077 /* Mode bits cost accumalation for best cu size and cu mode */
1078 /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/
1079
1080 /*satd/mod_qp accumulation of best cu */
1081 /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/
1082
1083 /* Increment pointers */
1084 ps_ed_blk_l1 += 16;
1085 blk_cnt += 16;
1086 //ps_row_cu++;
1087 merge_64x64 = 0;
1088
1089 /* increment for stat purpose only. Increment is valid only on single thread */
1090 ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4;
1091 }
1092 else
1093 {
1094 /* Revaluation of 4 16x16 blocks at 8x8 prediction level */
1095 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1096
1097 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1098 (ps_ctxt->i4_slice_type == PSLICE))
1099 {
1100 ps_ctxt->u1_disable_child_cu_decide = 1;
1101 step2_bypass = 0;
1102 }
1103
1104 /* Based on the flag, Child modes decision can be disabled*/
1105 if(0 == ps_ctxt->u1_disable_child_cu_decide)
1106 {
1107 for(j = 0; j < 4; j++)
1108 {
1109 ps_cu_node->ps_sub_cu[j]->u2_x0 =
1110 gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */
1111 ps_cu_node->ps_sub_cu[j]->u2_y0 =
1112 gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */
1113 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16;
1114
1115 {
1116 WORD32 best_ang_mode =
1117 (ps_ed_blk_l1 + (j * 4))->best_merge_mode;
1118
1119 if(best_ang_mode < 2)
1120 best_ang_mode = 26;
1121
1122 ihevce_mode_eval_filtering(
1123 ps_cu_node->ps_sub_cu[j],
1124 ps_cu_node,
1125 ps_ctxt,
1126 ps_curr_src,
1127 best_ang_mode,
1128 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1129 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1130 !step2_bypass,
1131 1);
1132
1133 if(i4_enable_4cu_16tu)
1134 {
1135 ihevce_mode_eval_filtering(
1136 ps_cu_node->ps_sub_cu[j],
1137 ps_cu_node,
1138 ps_ctxt,
1139 ps_curr_src,
1140 best_ang_mode,
1141 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1142 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1143 !step2_bypass,
1144 0);
1145 }
1146 else
1147 {
1148 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1149 memcpy(
1150 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1151 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1152 NUM_BEST_MODES);
1153
1154 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1155 memcpy(
1156 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1157 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1158 NUM_BEST_MODES * sizeof(WORD32));
1159 }
1160
1161 child_cost[j] =
1162 MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1163 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
1164
1165 /* Child cost is sum of costs at 16x16 level */
1166 child_cost_least += child_cost[j];
1167
1168 /* Select the best mode to be populated as top and left nbr depending on the
1169 4tu and 1tu cost */
1170 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
1171 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
1172 {
1173 ps_cu_node->ps_sub_cu[j]->best_mode =
1174 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
1175 }
1176 else
1177 {
1178 ps_cu_node->ps_sub_cu[j]->best_mode =
1179 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
1180 }
1181
1182 { /* Update the CTB nodes only for MAX - 1 CU nodes */
1183 WORD32 xA, yA, row, col;
1184 xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
1185 yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
1186 size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
1187 for(row = yA; row < (yA + size); row++)
1188 {
1189 for(col = xA; col < (xA + size); col++)
1190 {
1191 ps_ctxt->au1_ctb_mode_map[row][col] =
1192 ps_cu_node->ps_sub_cu[j]->best_mode;
1193 }
1194 }
1195 }
1196 }
1197
1198 /*Child SATD cost*/
1199 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
1200
1201 /* store the child 16x16 costs */
1202 pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] =
1203 child_cost[j];
1204
1205 /* set the CU valid flag */
1206 ps_intra16_analyse[j].b1_valid_cu = 1;
1207
1208 /* All 16x16 merge is valid, if Cu 32x32 is chosen */
1209 /* To be reset, if CU 64x64 is chosen */
1210 ps_intra16_analyse[j].b1_merge_flag = 1;
1211
1212 /* storing the modes to intra 16 analyse */
1213 /* store the best 16x16 modes 8x8 tu */
1214 memcpy(
1215 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1216 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1217 sizeof(UWORD8) * (NUM_BEST_MODES));
1218 ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1219
1220 /* store the best 16x16 modes 16x16 tu */
1221 memcpy(
1222 &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0],
1223 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1224 sizeof(UWORD8) * (NUM_BEST_MODES));
1225 ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1226
1227 /* divide the 16x16 costs (pro rating) to 4 8x8 costs */
1228 /* store the same 16x16 modes as 4 8x8 child modes */
1229 {
1230 WORD32 idx_8x8;
1231 WORD32 *pi4_intra_8_cost_curr16;
1232 intra8_analyse_t *ps_intra8_analyse;
1233
1234 pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1);
1235 pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW;
1236
1237 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
1238 {
1239 pi4_intra_8_cost_curr16
1240 [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] =
1241 (child_cost[j] + 3) >> 2;
1242
1243 ps_intra8_analyse =
1244 &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
1245
1246 ps_intra8_analyse->b1_enable_nxn = 0;
1247 ps_intra8_analyse->b1_valid_cu = 1;
1248
1249 /* store the best 8x8 modes 8x8 tu */
1250 memcpy(
1251 &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
1252 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1253 sizeof(UWORD8) * (NUM_BEST_MODES + 1));
1254
1255 /* store the best 8x8 modes 4x4 tu */
1256 memcpy(
1257 &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
1258 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1259 sizeof(UWORD8) * (NUM_BEST_MODES + 1));
1260
1261 /* NXN modes not evaluated hence set to 0 */
1262 memset(
1263 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1264 255,
1265 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1266 }
1267 }
1268 }
1269
1270 ihevce_set_nbr_map(
1271 ps_ctxt->pu1_ctb_nbr_map,
1272 ps_ctxt->i4_nbr_map_strd,
1273 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
1274 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
1275 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
1276 0);
1277 }
1278 #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1
1279 else
1280 {
1281 for(j = 0; j < 4; j++)
1282 {
1283 WORD32 idx_8x8;
1284 intra8_analyse_t *ps_intra8_analyse;
1285 ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255;
1286 ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255;
1287
1288 ps_intra16_analyse[j].b1_valid_cu = 0;
1289
1290 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
1291 {
1292 ps_intra8_analyse =
1293 &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
1294
1295 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
1296 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
1297
1298 ps_intra8_analyse->b1_enable_nxn = 0;
1299 ps_intra8_analyse->b1_valid_cu = 0;
1300
1301 /* NXN modes not evaluated hence set to 0 */
1302 memset(
1303 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1304 255,
1305 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1306 }
1307 }
1308
1309 child_cost_least = MAX_INTRA_COST_IPE;
1310 }
1311 #endif
1312
1313 /* Populate params for 32x32 block analysis */
1314
1315 ps_cu_node->ps_parent->u1_cu_size = 32;
1316 ps_cu_node->ps_parent->u2_x0 =
1317 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1318 ps_cu_node->ps_parent->u2_y0 =
1319 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1320
1321 /* Revaluation for 32x32 parent block at 16x16 prediction level */
1322 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1323
1324 {
1325 /* Eval for TUSize = CuSize */
1326 ihevce_mode_eval_filtering(
1327 ps_cu_node->ps_parent,
1328 ps_cu_node,
1329 ps_ctxt,
1330 ps_curr_src,
1331 26,
1332 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1333 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1334 step2_bypass,
1335 1);
1336
1337 if(i4_enable_1cu_4tu)
1338 {
1339 /* Eval for TUSize = CuSize/2 */
1340 ihevce_mode_eval_filtering(
1341 ps_cu_node->ps_parent,
1342 ps_cu_node,
1343 ps_ctxt,
1344 ps_curr_src,
1345 26,
1346 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1347 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1348 step2_bypass,
1349 0);
1350 }
1351 else
1352 {
1353 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1354 memcpy(
1355 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1356 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1357 NUM_BEST_MODES);
1358
1359 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1360 memcpy(
1361 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1362 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1363 NUM_BEST_MODES * sizeof(WORD32));
1364 }
1365 }
1366
1367 ps_ctxt->u1_disable_child_cu_decide = 0;
1368 step2_bypass = 1;
1369
1370 /* Update parent cost */
1371 parent_cost =
1372 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1373 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
1374
1375 /* Select the best mode to be populated as top and left nbr depending on the
1376 4tu and 1tu cost */
1377 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
1378 ps_cu_node->ps_parent->au4_best_cost_1tu[0])
1379 {
1380 ps_cu_node->ps_parent->best_mode =
1381 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1382 }
1383 else
1384 {
1385 ps_cu_node->ps_parent->best_mode =
1386 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
1387 }
1388
1389 /* store the 32x32 cost */
1390 *pi4_intra_32_cost = parent_cost;
1391
1392 /* set the CU valid flag */
1393 ps_intra32_analyse->b1_valid_cu = 1;
1394
1395 ps_intra32_analyse->b1_merge_flag = 1;
1396
1397 /* storing the modes to intra 32 analyse */
1398 {
1399 /* store the best 32x32 modes 16x16 tu */
1400 memcpy(
1401 &ps_intra32_analyse->au1_best_modes_16x16_tu[0],
1402 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1403 sizeof(UWORD8) * (NUM_BEST_MODES));
1404 ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1405
1406 /* store the best 32x32 modes 32x32 tu */
1407 memcpy(
1408 &ps_intra32_analyse->au1_best_modes_32x32_tu[0],
1409 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1410 sizeof(UWORD8) * (NUM_BEST_MODES));
1411 ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255;
1412 }
1413 parent_best_mode = ps_cu_node->ps_parent->best_mode;
1414 if((parent_cost <=
1415 child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
1416 LAMBDA_Q_SHIFT))) //|| identical_modes)
1417 {
1418 WORD32 i4_q_scale_q3_mod;
1419 UWORD8 u1_cu_possible_qp;
1420 WORD32 i4_act_factor;
1421
1422 /* CU size 32x32 and fill the final cu params */
1423
1424 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1425
1426 if((IHEVCE_QUALITY_P3 > i4_quality_preset))
1427 {
1428 for(i = 0; i < 4; i++)
1429 {
1430 intra8_analyse_t *ps_intra8_analyse;
1431 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
1432 for(j = 0; j < 4; j++)
1433 {
1434 /* Populate best 3 nxn modes */
1435 ps_intra8_analyse->au1_4x4_best_modes[j][0] =
1436 ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0];
1437 ps_intra8_analyse->au1_4x4_best_modes[j][1] =
1438 ps_cu_node->ps_sub_cu[i]
1439 ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode;
1440 ps_intra8_analyse->au1_4x4_best_modes[j][2] =
1441 ps_cu_node->ps_sub_cu[i]
1442 ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode;
1443 ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
1444 }
1445 }
1446 }
1447 /* store the 32x32 non split flag */
1448 ps_intra32_analyse->b1_split_flag = 0;
1449 ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0;
1450 ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0;
1451 ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0;
1452 ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0;
1453
1454 au1_best_32x32_modes[blk_cnt >> 4] =
1455 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1456
1457 au4_best_32x32_cost[blk_cnt >> 4] =
1458 ps_cu_node->ps_parent->au4_best_cost_1tu[0];
1459 /*As 32*32 has won, pick L2 8x8 qp which maps
1460 to L0 32x32 Qp*/
1461 ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
1462 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
1463 u1_cu_possible_qp = ihevce_cu_level_qp_mod(
1464 ps_ctxt->i4_qscale,
1465 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
1466 ps_ctxt->ld_curr_frame_16x16_log_avg[0],
1467 f_strength,
1468 &i4_act_factor,
1469 &i4_q_scale_q3_mod,
1470 ps_ctxt->ps_rc_quant_ctxt);
1471 /* cost accumalation of best cu size candiate */
1472 i8_frame_acc_satd_cost += parent_cost;
1473
1474 /* satd and mpm bits accumalation of best cu size candiate */
1475 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
1476
1477 /* Mode bits cost accumalation for best cu size and cu mode */
1478 i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
1479
1480 /*satd/mod_qp accumulation of best cu */
1481 i8_frame_acc_satd_by_modqp_q10 +=
1482 ((LWORD64)ps_cu_node->ps_parent->best_satd
1483 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1484 i4_q_scale_q3_mod;
1485
1486 /* Increment pointers */
1487 ps_ed_blk_l1 += 16;
1488 blk_cnt += 16;
1489 //ps_row_cu++;
1490 merge_64x64 &= 1;
1491 }
1492 else
1493 {
1494 /* store the 32x32 split flag */
1495 ps_intra32_analyse->b1_split_flag = 1;
1496
1497 /* CU size 16x16 and fill the final cu params for all 4 blocks */
1498 for(j = 0; j < 4; j++)
1499 {
1500 WORD32 i4_q_scale_q3_mod;
1501 UWORD8 u1_cu_possible_qp;
1502 WORD32 i4_act_factor;
1503
1504 /* Set CU split flag */
1505 ASSERT(blk_cnt % 4 == 0);
1506
1507 ihevce_update_cand_list(
1508 ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
1509
1510 /* store the 16x16 non split flag */
1511 ps_intra16_analyse[j].b1_split_flag = 0;
1512
1513 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1514 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
1515 /*As 16*16 has won, pick L1 8x8 qp which maps
1516 to L0 16x16 Qp*/
1517 u1_cu_possible_qp = ihevce_cu_level_qp_mod(
1518 ps_ctxt->i4_qscale,
1519 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
1520 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1521 f_strength,
1522 &i4_act_factor,
1523 &i4_q_scale_q3_mod,
1524 ps_ctxt->ps_rc_quant_ctxt);
1525
1526 /*accum satd/qp for all child block*/
1527 i8_frame_acc_satd_by_modqp_q10 +=
1528 ((LWORD64)child_satd[j]
1529 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1530 i4_q_scale_q3_mod;
1531
1532 /* Accumalate mode bits for all child blocks */
1533 i8_frame_acc_mode_bits_cost +=
1534 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
1535
1536 /* satd and mpm bits accumalation of best cu size candiate */
1537 i4_ctb_acc_satd += child_satd[j];
1538
1539 /* Increment pointers */
1540 //ps_row_cu++;
1541 ps_ed_blk_l1 += 4;
1542 blk_cnt += 4;
1543 }
1544
1545 /* cost accumalation of best cu size candiate */
1546 i8_frame_acc_satd_cost += child_cost_least;
1547
1548 /* 64x64 merge is not possible */
1549 merge_64x64 = 0;
1550 }
1551
1552 //ps_ed_blk_l2 += 4;
1553
1554 } //end of EIID's else
1555 #endif
1556 }
1557 /* If Merge success for L1 max CU size 16x16 is chosen */
1558 else if(merge_16x16_l1)
1559 {
1560 #if IP_DBG_L1_l2
1561 ps_cu_node->ps_parent->u1_cu_size = 16;
1562 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1563 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1564 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode;
1565 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1566
1567 blk_cnt += 4;
1568 ps_ed_blk_l1 += 4;
1569 ps_row_cu++;
1570 merge_64x64 = 0;
1571 #else
1572
1573 /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/
1574 /* enable this only in B pictures */
1575 if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
1576 {
1577 WORD32 i4_q_scale_q3_mod, i4_local_ctr;
1578 WORD8 i1_cu_possible_qp;
1579 WORD32 i4_act_factor;
1580 /* make cost infinity. */
1581 /* make modes invalid */
1582 /* update loop variables */
1583 /* set other output variales */
1584 /* dont set neighbour flag so that next blocks wont access this cu */
1585 /* what happens to ctb_mode_map?? */
1586
1587 ps_cu_node->ps_parent->u1_cu_size = 16;
1588 ps_cu_node->ps_parent->u2_x0 =
1589 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1590 ps_cu_node->ps_parent->u2_y0 =
1591 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1592 ps_cu_node->ps_parent->best_mode =
1593 INTRA_DC; //ps_ed_blk_l1->best_merge_mode;
1594
1595 /* fill in the first modes as invalid */
1596
1597 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
1598 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
1599 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
1600 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
1601
1602 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
1603 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
1604 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
1605
1606 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1607
1608 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
1609 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
1610
1611 ps_intra32_analyse->b1_split_flag = 1;
1612 ps_intra32_analyse->b1_merge_flag = 0;
1613
1614 ps_intra16_analyse->b1_valid_cu = 0;
1615 ps_intra16_analyse->b1_split_flag = 0;
1616 ps_intra16_analyse->b1_merge_flag = 1;
1617 //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu,
1618 // 255,
1619 // NUM_BEST_MODES);
1620 //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu,
1621 // 255,
1622 // NUM_BEST_MODES);
1623 //set only first mode since if it's 255. it wont go ahead
1624 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
1625 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
1626 ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
1627 *pi4_intra_16_cost = MAX_INTRA_COST_IPE;
1628
1629 /*since ME will start evaluating from bottom up, set the lower
1630 cu size data invalid */
1631 for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++)
1632 {
1633 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1634 .au1_4x4_best_modes[0][0] = 255;
1635 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1636 .au1_4x4_best_modes[1][0] = 255;
1637 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1638 .au1_4x4_best_modes[2][0] = 255;
1639 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1640 .au1_4x4_best_modes[3][0] = 255;
1641 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1642 .au1_best_modes_8x8_tu[0] = 255;
1643 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1644 .au1_best_modes_4x4_tu[0] = 255;
1645 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr].i4_best_intra_cost =
1646 MAX_INTRA_COST_IPE;
1647
1648 pi4_intra_8_cost
1649 [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] =
1650 MAX_INTRA_COST_IPE;
1651 }
1652
1653 /* set neighbours even if intra is not evaluated, since source is always available. */
1654 ihevce_set_nbr_map(
1655 ps_ctxt->pu1_ctb_nbr_map,
1656 ps_ctxt->i4_nbr_map_strd,
1657 ps_cu_node->ps_parent->u2_x0 << 1,
1658 ps_cu_node->ps_parent->u2_y0 << 1,
1659 (ps_cu_node->ps_parent->u1_cu_size >> 2),
1660 1);
1661
1662 //what happends to RC variables??
1663 /* run only constant Qp */
1664 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1665 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
1666 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1667 ps_ctxt->i4_qscale,
1668 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
1669 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1670 f_strength,
1671 &i4_act_factor,
1672 &i4_q_scale_q3_mod,
1673 ps_ctxt->ps_rc_quant_ctxt);
1674
1675 /* cost accumalation of best cu size candiate */
1676 i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation
1677
1678 /*satd/mod_qp accumulation of best cu */
1679 i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation
1680 //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod;
1681
1682 /* Accumalate mode bits for all child blocks */
1683 i8_frame_acc_mode_bits_cost +=
1684 0; //ps_cu_node->ps_parent->u2_mode_bits_cost;
1685 //incoorect accumulation
1686
1687 blk_cnt += 4;
1688 ps_ed_blk_l1 += 4;
1689 //ps_row_cu++;
1690 merge_64x64 = 0;
1691
1692 /* increment for stat purpose only. Increment is valid only on single thread */
1693 ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1;
1694 }
1695 else
1696 {
1697 /* 64x64 merge is not possible */
1698 merge_64x64 = 0;
1699
1700 /* set the 32x32 split flag to 1 */
1701 ps_intra32_analyse->b1_split_flag = 1;
1702
1703 ps_intra32_analyse->b1_merge_flag = 0;
1704
1705 ps_intra16_analyse->b1_merge_flag = 1;
1706
1707 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1708 (ps_ctxt->i4_slice_type == PSLICE))
1709 {
1710 ps_ctxt->u1_disable_child_cu_decide = 1;
1711 step2_bypass = 0;
1712 }
1713 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1714 /* Based on the flag, Child modes decision can be disabled*/
1715 if(0 == ps_ctxt->u1_disable_child_cu_decide)
1716 {
1717 for(j = 0; j < 4; j++)
1718 {
1719 intra8_analyse_t *ps_intra8_analyse;
1720 WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode;
1721
1722 if(best_ang_mode < 2)
1723 best_ang_mode = 26;
1724
1725 //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE;
1726 //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode;
1727
1728 ps_cu_node->ps_sub_cu[j]->u2_x0 =
1729 gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */
1730 ps_cu_node->ps_sub_cu[j]->u2_y0 =
1731 gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */
1732 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8;
1733
1734 ihevce_mode_eval_filtering(
1735 ps_cu_node->ps_sub_cu[j],
1736 ps_cu_node,
1737 ps_ctxt,
1738 ps_curr_src,
1739 best_ang_mode,
1740 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1741 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1742 !step2_bypass,
1743 1);
1744
1745 if(i4_enable_4cu_16tu)
1746 {
1747 ihevce_mode_eval_filtering(
1748 ps_cu_node->ps_sub_cu[j],
1749 ps_cu_node,
1750 ps_ctxt,
1751 ps_curr_src,
1752 best_ang_mode,
1753 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1754 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1755 !step2_bypass,
1756 0);
1757 }
1758 else
1759 {
1760 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1761 memcpy(
1762 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1763 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1764 NUM_BEST_MODES);
1765
1766 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1767 memcpy(
1768 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1769 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1770 NUM_BEST_MODES * sizeof(WORD32));
1771 }
1772
1773 child_cost[j] =
1774 MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1775 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
1776
1777 child_cost_least += child_cost[j];
1778
1779 /* Select the best mode to be populated as top and left nbr depending on the
1780 4tu and 1tu cost */
1781 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
1782 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
1783 {
1784 ps_cu_node->ps_sub_cu[j]->best_mode =
1785 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
1786 }
1787 else
1788 {
1789 ps_cu_node->ps_sub_cu[j]->best_mode =
1790 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
1791 }
1792 { /* Update the CTB nodes only for MAX - 1 CU nodes */
1793 WORD32 xA, yA, row, col;
1794 xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
1795 yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
1796 size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
1797 for(row = yA; row < (yA + size); row++)
1798 {
1799 for(col = xA; col < (xA + size); col++)
1800 {
1801 ps_ctxt->au1_ctb_mode_map[row][col] =
1802 ps_cu_node->ps_sub_cu[j]->best_mode;
1803 }
1804 }
1805 }
1806
1807 /*collect individual child satd for final SATD/qp accum*/
1808 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
1809
1810 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
1811
1812 /* store the child 8x8 costs */
1813 pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] =
1814 child_cost[j];
1815
1816 /* set the CU valid flag */
1817 ps_intra8_analyse->b1_valid_cu = 1;
1818 ps_intra8_analyse->b1_enable_nxn = 0;
1819
1820 /* storing the modes to intra8 analyse */
1821
1822 /* store the best 8x8 modes 8x8 tu */
1823 memcpy(
1824 &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
1825 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1826 sizeof(UWORD8) * (NUM_BEST_MODES));
1827 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1828
1829 /* store the best 8x8 modes 4x4 tu */
1830 memcpy(
1831 &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
1832 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1833 sizeof(UWORD8) * (NUM_BEST_MODES));
1834 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
1835
1836 /* NXN modes not evaluated hence set to 255 */
1837 memset(
1838 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1839 255,
1840 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1841 }
1842
1843 ihevce_set_nbr_map(
1844 ps_ctxt->pu1_ctb_nbr_map,
1845 ps_ctxt->i4_nbr_map_strd,
1846 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
1847 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
1848 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
1849 0);
1850 }
1851 #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1
1852 else
1853 {
1854 for(j = 0; j < 4; j++)
1855 {
1856 intra8_analyse_t *ps_intra8_analyse;
1857 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
1858 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
1859 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
1860 /* NXN modes not evaluated hence set to 255 */
1861 memset(
1862 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1863 255,
1864 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1865
1866 ps_intra8_analyse->b1_valid_cu = 0;
1867 ps_intra8_analyse->b1_enable_nxn = 0;
1868 }
1869 child_cost_least = MAX_INTRA_COST_IPE;
1870 }
1871 #endif
1872 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
1873 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
1874
1875 ps_cu_node->ps_parent->u1_cu_size = 16;
1876 ps_cu_node->ps_parent->u2_x0 =
1877 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1878 ps_cu_node->ps_parent->u2_y0 =
1879 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1880
1881 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1882
1883 /* Eval for TUSize = CuSize */
1884 ihevce_mode_eval_filtering(
1885 ps_cu_node->ps_parent,
1886 ps_cu_node,
1887 ps_ctxt,
1888 ps_curr_src,
1889 26,
1890 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1891 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1892 step2_bypass,
1893 1);
1894
1895 if(i4_enable_1cu_4tu)
1896 {
1897 /* Eval for TUSize = CuSize/2 */
1898 ihevce_mode_eval_filtering(
1899 ps_cu_node->ps_parent,
1900 ps_cu_node,
1901 ps_ctxt,
1902 ps_curr_src,
1903 26,
1904 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1905 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1906 step2_bypass,
1907 0);
1908 }
1909 else
1910 {
1911 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1912 memcpy(
1913 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1914 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1915 NUM_BEST_MODES);
1916
1917 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1918 memcpy(
1919 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1920 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1921 NUM_BEST_MODES * sizeof(WORD32));
1922 }
1923
1924 ps_ctxt->u1_disable_child_cu_decide = 0;
1925 step2_bypass = 1;
1926
1927 /* Update parent cost */
1928 parent_cost =
1929 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1930 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
1931
1932 /* Select the best mode to be populated as top and left nbr depending on the
1933 4tu and 1tu cost */
1934 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
1935 ps_cu_node->ps_parent->au4_best_cost_1tu[0])
1936 {
1937 ps_cu_node->ps_parent->best_mode =
1938 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1939 }
1940 else
1941 {
1942 ps_cu_node->ps_parent->best_mode =
1943 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
1944 }
1945
1946 /* store the 16x16 cost */
1947 *pi4_intra_16_cost = parent_cost;
1948
1949 /* accumulate the 32x32 cost */
1950 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
1951 {
1952 *pi4_intra_32_cost = parent_cost;
1953 }
1954 else
1955 {
1956 *pi4_intra_32_cost += parent_cost;
1957 }
1958
1959 /* set the CU valid flag */
1960 ps_intra16_analyse->b1_valid_cu = 1;
1961
1962 /* storing the modes to intra 16 analyse */
1963 {
1964 /* store the best 16x16 modes 16x16 tu */
1965 memcpy(
1966 &ps_intra16_analyse->au1_best_modes_16x16_tu[0],
1967 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1968 sizeof(UWORD8) * NUM_BEST_MODES);
1969 ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1970
1971 /* store the best 16x16 modes 8x8 tu */
1972 memcpy(
1973 &ps_intra16_analyse->au1_best_modes_8x8_tu[0],
1974 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1975 sizeof(UWORD8) * NUM_BEST_MODES);
1976 ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1977 }
1978
1979 parent_best_mode = ps_cu_node->ps_parent->best_mode;
1980 if(parent_cost <=
1981 child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
1982 LAMBDA_Q_SHIFT)) //|| identical_modes)
1983 {
1984 WORD32 i4_q_scale_q3_mod;
1985 WORD8 i1_cu_possible_qp;
1986 WORD32 i4_act_factor;
1987 //choose parent CU
1988
1989 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1990
1991 /* set the 16x16 non split flag */
1992 ps_intra16_analyse->b1_split_flag = 0;
1993
1994 /*As 16*16 has won, pick L1 8x8 qp which maps
1995 to L0 16x16 Qp*/
1996 ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
1997 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
1998 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1999 ps_ctxt->i4_qscale,
2000 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
2001 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
2002 f_strength,
2003 &i4_act_factor,
2004 &i4_q_scale_q3_mod,
2005 ps_ctxt->ps_rc_quant_ctxt);
2006
2007 /* cost accumalation of best cu size candiate */
2008 i8_frame_acc_satd_cost += parent_cost;
2009
2010 /* satd and mpm bits accumalation of best cu size candiate */
2011 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
2012
2013 /*satd/mod_qp accumulation of best cu */
2014 i8_frame_acc_satd_by_modqp_q10 +=
2015 ((LWORD64)ps_cu_node->ps_parent->best_satd
2016 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2017 i4_q_scale_q3_mod;
2018
2019 /* Accumalate mode bits for all child blocks */
2020 i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
2021
2022 blk_cnt += 4;
2023 ps_ed_blk_l1 += 4;
2024 //ps_row_cu++;
2025 }
2026 else
2027 {
2028 //choose child CU
2029 WORD8 i1_cu_possible_qp;
2030 WORD32 i4_act_factor;
2031 WORD32 i4_q_scale_q3_mod;
2032
2033 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2034 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2);
2035 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2036 ps_ctxt->i4_qscale,
2037 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1],
2038 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2039 f_strength,
2040 &i4_act_factor,
2041 &i4_q_scale_q3_mod,
2042 ps_ctxt->ps_rc_quant_ctxt);
2043
2044 /* set the 16x16 split flag */
2045 ps_intra16_analyse->b1_split_flag = 1;
2046
2047 for(j = 0; j < 4; j++)
2048 {
2049 ihevce_update_cand_list(
2050 ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
2051
2052 if((IHEVCE_QUALITY_P3 > i4_quality_preset))
2053 {
2054 WORD32 k;
2055 intra8_analyse_t *ps_intra8_analyse;
2056 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
2057
2058 for(k = 0; k < 4; k++)
2059 {
2060 /* Populate best 3 nxn modes */
2061 ps_intra8_analyse->au1_4x4_best_modes[k][0] =
2062 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
2063 ps_intra8_analyse->au1_4x4_best_modes[k][1] =
2064 ps_cu_node->ps_sub_cu[j]
2065 ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode;
2066 ps_intra8_analyse->au1_4x4_best_modes[k][2] =
2067 ps_cu_node->ps_sub_cu[j]
2068 ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode;
2069 ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255;
2070 }
2071 }
2072 /*accum satd/qp for all child block*/
2073 i8_frame_acc_satd_by_modqp_q10 +=
2074 ((LWORD64)child_satd[j]
2075 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2076 i4_q_scale_q3_mod;
2077
2078 /* Accumalate mode bits for all child blocks */
2079 i8_frame_acc_mode_bits_cost +=
2080 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
2081
2082 /* satd and mpm bits accumalation of best cu size candiate */
2083 i4_ctb_acc_satd += child_satd[j];
2084
2085 blk_cnt += 1;
2086 ps_ed_blk_l1 += 1;
2087 //ps_row_cu++;
2088 }
2089
2090 /* cost accumalation of best cu size candiate */
2091 i8_frame_acc_satd_cost += child_cost_least;
2092 }
2093
2094 } //else of EIID
2095 #endif
2096 } // if(merge_16x16_l1)
2097 /* MAX CU SIZE 8x8 */
2098 else
2099 {
2100 #if IP_DBG_L1_l2
2101 for(i = 0; i < 4; i++)
2102 {
2103 ps_cu_node->ps_parent->u1_cu_size = 8;
2104 ps_cu_node->ps_parent->u2_x0 =
2105 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2106 ps_cu_node->ps_parent->u2_y0 =
2107 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2108 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
2109
2110 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2111 blk_cnt++;
2112 ps_ed_blk_l1++;
2113 ps_row_cu++;
2114 merge_64x64 = 0;
2115 }
2116 #else
2117
2118 /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */
2119 if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
2120 {
2121 WORD32 i4_q_scale_q3_mod;
2122 WORD8 i1_cu_possible_qp;
2123 WORD32 i4_act_factor;
2124
2125 merge_64x64 = 0;
2126
2127 ps_intra32_analyse->b1_merge_flag = 0;
2128
2129 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
2130 ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255;
2131 ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
2132
2133 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
2134 ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255;
2135 ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
2136 ps_intra16_analyse->b1_split_flag = 1;
2137 ps_intra16_analyse->b1_valid_cu = 0;
2138 ps_intra16_analyse->b1_merge_flag = 0;
2139
2140 ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
2141
2142 for(i = 0; i < 4; i++)
2143 {
2144 intra8_analyse_t *ps_intra8_analyse;
2145 WORD32 ctr_sub_cu;
2146
2147 cu_pos_x = gau1_cu_pos_x[blk_cnt];
2148 cu_pos_y = gau1_cu_pos_y[blk_cnt];
2149
2150 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
2151 {
2152 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
2153
2154 ps_intra8_analyse->b1_valid_cu = 0;
2155 ps_intra8_analyse->b1_enable_nxn = 0;
2156 ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255;
2157 ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255;
2158 ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255;
2159 ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255;
2160 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
2161 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
2162 ps_intra8_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
2163
2164 ps_cu_node->ps_parent->u1_cu_size = 8;
2165 ps_cu_node->ps_parent->u2_x0 =
2166 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2167 ps_cu_node->ps_parent->u2_y0 =
2168 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2169 ps_cu_node->ps_parent->best_mode =
2170 INTRA_DC; //ps_ed_blk_l1->best_mode;
2171
2172 /* fill in the first modes as invalid */
2173
2174 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
2175 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
2176 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
2177 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
2178
2179 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
2180 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
2181 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
2182
2183 ihevce_update_cand_list(
2184 ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2185
2186 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
2187 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
2188
2189 for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++)
2190 {
2191 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] =
2192 INTRA_DC;
2193 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] =
2194 INTRA_DC;
2195 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] =
2196 MAX_INTRA_COST_IPE;
2197
2198 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] =
2199 MAX_INTRA_COST_IPE;
2200 ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost =
2201 MAX_INTRA_COST_IPE;
2202 }
2203
2204 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2205 MAX_INTRA_COST_IPE;
2206
2207 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2208 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
2209 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2210 ps_ctxt->i4_qscale,
2211 ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
2212 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2213 f_strength,
2214 &i4_act_factor,
2215 &i4_q_scale_q3_mod,
2216 ps_ctxt->ps_rc_quant_ctxt);
2217
2218 /* set neighbours even if intra is not evaluated, since source is always available. */
2219 ihevce_set_nbr_map(
2220 ps_ctxt->pu1_ctb_nbr_map,
2221 ps_ctxt->i4_nbr_map_strd,
2222 ps_cu_node->ps_parent->u2_x0 << 1,
2223 ps_cu_node->ps_parent->u2_y0 << 1,
2224 (ps_cu_node->ps_parent->u1_cu_size >> 2),
2225 1);
2226
2227 //ps_row_cu++;
2228 }
2229 blk_cnt++;
2230 ps_ed_blk_l1++;
2231 }
2232 }
2233 else
2234 {
2235 //cu_intra_cand_t *ps_cu_intra_cand;
2236 WORD8 i1_cu_possible_qp;
2237 WORD32 i4_act_factor;
2238 WORD32 i4_q_scale_q3_mod;
2239
2240 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2241 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
2242 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2243 ps_ctxt->i4_qscale,
2244 ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
2245 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2246 f_strength,
2247 &i4_act_factor,
2248 &i4_q_scale_q3_mod,
2249 ps_ctxt->ps_rc_quant_ctxt);
2250
2251 /* 64x64 merge is not possible */
2252 merge_64x64 = 0;
2253
2254 ps_intra32_analyse->b1_merge_flag = 0;
2255
2256 ps_intra16_analyse->b1_merge_flag = 0;
2257
2258 /* by default 16x16 modes are set to default values DC and Planar */
2259 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0;
2260 ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1;
2261 ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
2262
2263 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0;
2264 ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1;
2265 ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
2266 ps_intra16_analyse->b1_split_flag = 1;
2267 ps_intra16_analyse->b1_valid_cu = 1;
2268
2269 for(i = 0; i < 4; i++)
2270 {
2271 intra8_analyse_t *ps_intra8_analyse;
2272 cu_pos_x = gau1_cu_pos_x[blk_cnt];
2273 cu_pos_y = gau1_cu_pos_y[blk_cnt];
2274 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
2275 {
2276 //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand;
2277 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
2278
2279 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
2280
2281 child_cost_least = 0;
2282
2283 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
2284 ps_cu_node->ps_parent->u1_cu_size = 8;
2285 ps_cu_node->ps_parent->u2_x0 =
2286 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2287 ps_cu_node->ps_parent->u2_y0 =
2288 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2289
2290 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
2291
2292 /*EARLY DECISION 8x8 block */
2293 ihevce_pu_calc_8x8_blk(
2294 ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector);
2295 for(j = 0; j < 4; j++)
2296 {
2297 child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost;
2298 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
2299 }
2300
2301 /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */
2302 if(0 == ps_ctxt->u1_disable_child_cu_decide)
2303 {
2304 ihevce_set_nbr_map(
2305 ps_ctxt->pu1_ctb_nbr_map,
2306 ps_ctxt->i4_nbr_map_strd,
2307 ps_cu_node->ps_parent->u2_x0 << 1,
2308 ps_cu_node->ps_parent->u2_y0 << 1,
2309 (ps_cu_node->ps_parent->u1_cu_size >> 2),
2310 0);
2311
2312 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
2313
2314 /* Eval for TUSize = CuSize */
2315 ihevce_mode_eval_filtering(
2316 ps_cu_node->ps_parent,
2317 ps_cu_node,
2318 ps_ctxt,
2319 ps_curr_src,
2320 26,
2321 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2322 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2323 step2_bypass,
2324 1);
2325
2326 if(i4_enable_1cu_4tu)
2327 {
2328 /* Eval for TUSize = CuSize/2 */
2329 ihevce_mode_eval_filtering(
2330 ps_cu_node->ps_parent,
2331 ps_cu_node,
2332 ps_ctxt,
2333 ps_curr_src,
2334 26,
2335 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2336 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2337 step2_bypass,
2338 0);
2339 }
2340 else
2341 {
2342 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
2343 memcpy(
2344 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2345 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2346 NUM_BEST_MODES);
2347
2348 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
2349 memcpy(
2350 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2351 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2352 NUM_BEST_MODES * sizeof(WORD32));
2353 }
2354
2355 /* Update parent cost */
2356 parent_cost =
2357 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2358 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
2359
2360 /* Select the best mode to be populated as top and left nbr depending on the
2361 4tu and 1tu cost */
2362 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
2363 ps_cu_node->ps_parent->au4_best_cost_1tu[0])
2364 {
2365 ps_cu_node->ps_parent->best_mode =
2366 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
2367 }
2368 else
2369 {
2370 ps_cu_node->ps_parent->best_mode =
2371 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
2372 }
2373 }
2374
2375 /* set the CU valid flag */
2376 ps_intra8_analyse->b1_valid_cu = 1;
2377 ps_intra8_analyse->b1_enable_nxn = 0;
2378
2379 /* storing the modes to intra 8 analyse */
2380
2381 /* store the best 8x8 modes 8x8 tu */
2382 memcpy(
2383 &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
2384 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2385 sizeof(UWORD8) * (NUM_BEST_MODES));
2386 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
2387
2388 /* store the best 8x8 modes 4x4 tu */
2389 memcpy(
2390 &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
2391 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2392 sizeof(UWORD8) * (NUM_BEST_MODES));
2393 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
2394
2395 /*As 8*8 has won, pick L1 4x4 qp which is equal to
2396 L1 8x8 Qp*/
2397 //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp;
2398 //ps_row_cu->i4_act_factor[0][1] = i4_act_factor;
2399
2400 parent_best_mode = ps_cu_node->ps_parent->best_mode;
2401 if(parent_cost <=
2402 child_cost_least +
2403 (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT))
2404 {
2405 /*CU = 4TU */
2406 ihevce_update_cand_list(
2407 ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2408
2409 /* store the child 8x8 costs */
2410 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2411 parent_cost;
2412
2413 /* cost accumalation of best cu size candiate */
2414 i8_frame_acc_satd_cost += parent_cost;
2415
2416 /*satd/mod_qp accumulation of best cu */
2417 i8_frame_acc_satd_by_modqp_q10 +=
2418 ((LWORD64)ps_cu_node->ps_parent->best_satd
2419 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2420 i4_q_scale_q3_mod;
2421
2422 /* Accumalate mode bits for all child blocks */
2423 i8_frame_acc_mode_bits_cost +=
2424 ps_cu_node->ps_parent->u2_mode_bits_cost;
2425
2426 /* satd and mpm bits accumalation of best cu size candiate */
2427 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
2428
2429 /* accumulate the 16x16 cost*/
2430 if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
2431 {
2432 *pi4_intra_16_cost = parent_cost;
2433 }
2434 else
2435 {
2436 *pi4_intra_16_cost += parent_cost;
2437 }
2438
2439 /* accumulate the 32x32 cost*/
2440 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
2441 {
2442 *pi4_intra_32_cost = parent_cost;
2443 }
2444 else
2445 {
2446 *pi4_intra_32_cost += parent_cost;
2447 }
2448 }
2449 else
2450 {
2451 /*CU = 4PU*/
2452 //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0;
2453 //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0;
2454 //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size;
2455
2456 /* store the child 8x8 costs woth 4x4 pu summed cost */
2457 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2458 (child_cost_least);
2459
2460 /* accumulate the 16x16 cost*/
2461 if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
2462 {
2463 *pi4_intra_16_cost = child_cost_least;
2464 }
2465 else
2466 {
2467 *pi4_intra_16_cost += child_cost_least;
2468 }
2469
2470 /* cost accumalation of best cu size candiate */
2471 i8_frame_acc_satd_cost += child_cost_least;
2472
2473 for(j = 0; j < 4; j++)
2474 {
2475 /*satd/qp accumualtion*/
2476 i8_frame_acc_satd_by_modqp_q10 +=
2477 ((LWORD64)child_satd[j]
2478 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2479 i4_q_scale_q3_mod;
2480
2481 /* Accumalate mode bits for all child blocks */
2482 i8_frame_acc_mode_bits_cost +=
2483 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
2484
2485 /* satd and mpm bits accumalation of best cu size candiate */
2486 i4_ctb_acc_satd += child_satd[j];
2487 }
2488
2489 /* accumulate the 32x32 cost*/
2490 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
2491 {
2492 *pi4_intra_32_cost = child_cost_least;
2493 }
2494 else
2495 {
2496 *pi4_intra_32_cost += child_cost_least;
2497 }
2498
2499 ps_intra8_analyse->b1_enable_nxn = 1;
2500
2501 /* Insert the best 8x8 modes unconditionally */
2502
2503 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
2504 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
2505 size = ps_cu_node->u1_cu_size >> 2;
2506
2507 ps_ctxt->au1_ctb_mode_map[y][x] =
2508 ps_cu_node->ps_sub_cu[0]->best_mode;
2509 ps_ctxt->au1_ctb_mode_map[y][x + 1] =
2510 ps_cu_node->ps_sub_cu[1]->best_mode;
2511 ps_ctxt->au1_ctb_mode_map[y + 1][x] =
2512 ps_cu_node->ps_sub_cu[2]->best_mode;
2513 ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] =
2514 ps_cu_node->ps_sub_cu[3]->best_mode;
2515 }
2516 /* NXN mode population */
2517 for(j = 0; j < 4; j++)
2518 {
2519 cand_mode_list[0] =
2520 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
2521 cand_mode_list[1] =
2522 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1];
2523 cand_mode_list[2] =
2524 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2];
2525
2526 if(1)
2527 {
2528 /* Populate best 3 nxn modes */
2529 ps_intra8_analyse->au1_4x4_best_modes[j][0] =
2530 cand_mode_list[0];
2531 ps_intra8_analyse->au1_4x4_best_modes[j][1] =
2532 cand_mode_list[1]; //(ps_ed + 1)->best_mode;
2533 ps_intra8_analyse->au1_4x4_best_modes[j][2] =
2534 cand_mode_list[2]; //(ps_ed + 2)->best_mode;
2535 ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
2536
2537 //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4);
2538 }
2539 /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */
2540
2541 else /* IHEVCE_QUALITY_P0 == i4_quality_preset */
2542 {
2543 /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/
2544 ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0;
2545 }
2546
2547 ps_intra8_analyse
2548 ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255;
2549 }
2550
2551 //ps_row_cu++;
2552 }
2553 else
2554 {
2555 /* For Incomplete CTB, 16x16 is not valid */
2556 ps_intra16_analyse->b1_valid_cu = 0;
2557 }
2558 blk_cnt++;
2559 ps_ed_blk_l1++;
2560 }
2561 //ps_ed_blk_l2 ++;
2562 } //else of EIID
2563 #endif
2564 }
2565 }
2566 else
2567 {
2568 /* For incomplete CTB, init valid CU to 0 */
2569 ps_ed_blk_l1++;
2570 ps_intra32_analyse->b1_valid_cu = 0;
2571 ps_intra16_analyse[0].b1_valid_cu = 0;
2572 blk_cnt++;
2573 merge_64x64 = 0;
2574 }
2575 } while(blk_cnt != MAX_CTB_SIZE);
2576 /* if 64x64 merge is possible then check for 32x32 having same best modes */
2577 if(1 == merge_64x64)
2578 {
2579 WORD32 act_mode = au1_best_32x32_modes[0];
2580
2581 ps_ed_blk_l2 = ps_ed_l2_ctb;
2582 best_mode = ps_ed_blk_l2->best_mode;
2583 merge_64x64 =
2584 ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) +
2585 (act_mode == au1_best_32x32_modes[2]) +
2586 (act_mode == au1_best_32x32_modes[3]) ==
2587 4);
2588 if(merge_64x64 == 1)
2589 best_mode = au1_best_32x32_modes[0];
2590 else
2591 best_mode = ps_ed_blk_l2->best_mode;
2592 /* All 32x32 costs are accumalated to 64x64 cost */
2593 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
2594 for(i = 0; i < 4; i++)
2595 {
2596 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
2597 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
2598 }
2599
2600 /* If all modes of 32x32 block is not same */
2601 if(0 == merge_64x64)
2602 {
2603 /*Compute CHILD cost for 32x32 */
2604 WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] +
2605 au4_best_32x32_cost[2] + au4_best_32x32_cost[3];
2606 WORD32 cost = MAX_INTRA_COST_IPE;
2607
2608 WORD32 best_mode_temp = 0;
2609 /*Compute 64x64 cost for each mode of 32x32*/
2610 for(i = 0; i < 4; i++)
2611 {
2612 WORD32 mode = au1_best_32x32_modes[i];
2613 if(mode < 2)
2614 mode = 26;
2615 ps_cu_node->ps_parent->u1_cu_size = 64;
2616 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */
2617 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */
2618
2619 ihevce_set_nbr_map(
2620 ps_ctxt->pu1_ctb_nbr_map,
2621 ps_ctxt->i4_nbr_map_strd,
2622 (ps_cu_node->ps_parent->u2_x0 << 1),
2623 (ps_cu_node->ps_parent->u2_y0 << 1),
2624 (ps_cu_node->ps_parent->u1_cu_size >> 2),
2625 0);
2626
2627 ihevce_mode_eval_filtering(
2628 ps_cu_node->ps_parent,
2629 ps_cu_node,
2630 ps_ctxt,
2631 ps_curr_src,
2632 mode,
2633 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2634 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2635 !step2_bypass,
2636 0);
2637
2638 parent_cost = ps_cu_node->ps_parent->best_cost;
2639 if(cost > parent_cost)
2640 {
2641 cost = parent_cost;
2642 best_mode_temp = ps_cu_node->ps_parent->best_mode;
2643 }
2644 }
2645 if(cost < child_cost_64x64)
2646 {
2647 merge_64x64 = 1;
2648 best_mode = best_mode_temp;
2649
2650 /* Update 64x64 cost if CU 64x64 is chosen */
2651 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost;
2652
2653 /* Accumalate the least cost for CU 64x64 */
2654 i8_frame_acc_satd_cost = cost;
2655 i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost;
2656
2657 /* satd and mpm bits accumalation of best cu size candiate */
2658 i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd;
2659 }
2660 }
2661 }
2662
2663 if(merge_64x64)
2664 {
2665 WORD32 i, j;
2666 intra32_analyse_t *ps_intra32_analyse;
2667 intra16_analyse_t *ps_intra16_analyse;
2668 WORD32 row, col;
2669 WORD32 i4_q_scale_q3_mod;
2670 WORD8 i1_cu_possible_qp;
2671 WORD32 i4_act_factor;
2672 //ps_row_cu = ps_curr_cu;
2673 ps_ctb_out->u4_cu_split_flags = 0x0;
2674 ps_ed_blk_l1 = ps_ed_l1_ctb;
2675 ps_ed_blk_l2 = ps_ed_l2_ctb;
2676
2677 ps_l0_ipe_out_ctb->u1_split_flag = 0;
2678
2679 /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/
2680 for(i = 0; i < 4; i++)
2681 {
2682 /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */
2683 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
2684 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i];
2685
2686 for(j = 0; j < 4; j++)
2687 {
2688 /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
2689 /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
2690 ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j];
2691 ps_intra16_analyse->b1_merge_flag = 0;
2692 }
2693 }
2694
2695 /* CU size 64x64 and fill the final cu params */
2696 //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0];
2697 //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0];
2698 //ps_row_cu->u1_cu_size = 64;
2699
2700 /* Candidate mode Update */
2701 cand_mode_list[0] = best_mode;
2702 if(cand_mode_list[0] > 1)
2703 {
2704 if(cand_mode_list[0] == 2)
2705 {
2706 cand_mode_list[1] = 34;
2707 cand_mode_list[2] = 3;
2708 }
2709 else if(cand_mode_list[0] == 34)
2710 {
2711 cand_mode_list[1] = 2;
2712 cand_mode_list[2] = 33;
2713 }
2714 else
2715 {
2716 cand_mode_list[1] = cand_mode_list[0] - 1;
2717 cand_mode_list[2] = cand_mode_list[0] + 1;
2718 }
2719 //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode;
2720 //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode;
2721 }
2722 else
2723 {
2724 cand_mode_list[0] = 0;
2725 cand_mode_list[1] = 1;
2726 cand_mode_list[2] = 26;
2727 //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode;
2728 }
2729
2730 /* All 32x32 costs are accumalated to 64x64 cost */
2731 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
2732 for(i = 0; i < 4; i++)
2733 {
2734 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
2735 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
2736 }
2737 /* by default 64x64 modes are set to default values DC and Planar */
2738 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0];
2739 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1];
2740 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2];
2741 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255;
2742
2743 /* Update CTB mode map for the finalised CU */
2744 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
2745 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
2746 size = ps_cu_node->u1_cu_size >> 2;
2747
2748 for(row = y; row < (y + size); row++)
2749 {
2750 for(col = x; col < (x + size); col++)
2751 {
2752 ps_ctxt->au1_ctb_mode_map[row][col] = best_mode;
2753 }
2754 }
2755
2756 ihevce_set_nbr_map(
2757 ps_ctxt->pu1_ctb_nbr_map,
2758 ps_ctxt->i4_nbr_map_strd,
2759 (ps_cu_node->u2_x0 << 1),
2760 (ps_cu_node->u2_y0 << 1),
2761 (ps_cu_node->u1_cu_size >> 2),
2762 1);
2763
2764 /*As 64*64 has won, pick L1 32x32 qp*/
2765 //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6));
2766 //ASSERT((blk_cnt>>6) == 0);
2767 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
2768 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2769 ps_ctxt->i4_qscale,
2770 ps_ed_ctb_l1->i4_32x32_satd[0][0],
2771 ps_ctxt->ld_curr_frame_32x32_log_avg[0],
2772 f_strength,
2773 &i4_act_factor,
2774 &i4_q_scale_q3_mod,
2775 ps_ctxt->ps_rc_quant_ctxt);
2776
2777 i8_frame_acc_satd_by_modqp_q10 =
2778 (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2779 i4_q_scale_q3_mod;
2780 /* Increment pointers */
2781 ps_ed_blk_l1 += 64;
2782 ps_ed_blk_l2 += 16;
2783 //ps_row_cu++;
2784 }
2785 }
2786
2787 //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu);
2788
2789 {
2790 WORD32 i4_i, i4_j;
2791 WORD32 dummy;
2792 WORD8 i1_cu_qp;
2793 (void)i1_cu_qp;
2794 /*MAM_VAR_L1*/
2795 for(i4_j = 0; i4_j < 2; i4_j++)
2796 {
2797 i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j];
2798 f_strength = ps_ctxt->f_strength;
2799
2800 //i4_mod_factor_num = 4;
2801
2802 ps_ed_blk_l1 = ps_ed_l1_ctb;
2803 ps_ed_blk_l2 = ps_ed_l2_ctb;
2804 //ps_row_cu = ps_curr_cu;
2805
2806 /*Valid only for complete CTB */
2807 if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt))
2808 {
2809 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
2810 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2);
2811 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2);
2812 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2);
2813
2814 i1_cu_qp = ihevce_cu_level_qp_mod(
2815 ps_ctxt->i4_qscale,
2816 ps_ed_ctb_l1->i4_32x32_satd[0][0],
2817 ps_ctxt->ld_curr_frame_32x32_log_avg[0],
2818 f_strength,
2819 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j],
2820 &dummy,
2821 ps_ctxt->ps_rc_quant_ctxt);
2822
2823 i1_cu_qp = ihevce_cu_level_qp_mod(
2824 ps_ctxt->i4_qscale,
2825 ps_ed_ctb_l1->i4_32x32_satd[0][1],
2826 ps_ctxt->ld_curr_frame_32x32_log_avg[1],
2827 f_strength,
2828 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j],
2829 &dummy,
2830 ps_ctxt->ps_rc_quant_ctxt);
2831 i1_cu_qp = ihevce_cu_level_qp_mod(
2832 ps_ctxt->i4_qscale,
2833 ps_ed_ctb_l1->i4_32x32_satd[0][2],
2834 ps_ctxt->ld_curr_frame_32x32_log_avg[2],
2835 f_strength,
2836 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j],
2837 &dummy,
2838 ps_ctxt->ps_rc_quant_ctxt);
2839
2840 i1_cu_qp = ihevce_cu_level_qp_mod(
2841 ps_ctxt->i4_qscale,
2842 ps_ed_ctb_l1->i4_32x32_satd[0][3],
2843 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0],
2844 f_strength,
2845 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j],
2846 &dummy,
2847 ps_ctxt->ps_rc_quant_ctxt);
2848
2849 ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0);
2850 }
2851 else
2852 {
2853 ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024;
2854 ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024;
2855 ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024;
2856 ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024;
2857 }
2858
2859 /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction
2860 for the usage by ME*/
2861
2862 {
2863 WORD32 pos_x_32, pos_y_32, pos;
2864 //WORD32 i4_incomplete_ctb_val_8;
2865 pos_x_32 = u1_curr_ctb_wdt / 16;
2866 pos_y_32 = u1_curr_ctb_hgt / 16;
2867
2868 pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32;
2869
2870 for(i4_i = 0; i4_i < 4; i4_i++)
2871 {
2872 if(i4_i < pos)
2873 {
2874 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2);
2875 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2);
2876 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2);
2877 i1_cu_qp = ihevce_cu_level_qp_mod(
2878 ps_ctxt->i4_qscale,
2879 ps_ed_ctb_l1->i4_16x16_satd[i4_i][0],
2880 ps_ctxt->ld_curr_frame_16x16_log_avg[0],
2881 f_strength,
2882 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j],
2883 &dummy,
2884 ps_ctxt->ps_rc_quant_ctxt);
2885 i1_cu_qp = ihevce_cu_level_qp_mod(
2886 ps_ctxt->i4_qscale,
2887 ps_ed_ctb_l1->i4_16x16_satd[i4_i][1],
2888 ps_ctxt->ld_curr_frame_16x16_log_avg[1],
2889 f_strength,
2890 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j],
2891 &dummy,
2892 ps_ctxt->ps_rc_quant_ctxt);
2893 i1_cu_qp = ihevce_cu_level_qp_mod(
2894 ps_ctxt->i4_qscale,
2895 ps_ed_ctb_l1->i4_16x16_satd[i4_i][2],
2896 ps_ctxt->ld_curr_frame_16x16_log_avg[2],
2897 f_strength,
2898 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j],
2899 &dummy,
2900 ps_ctxt->ps_rc_quant_ctxt);
2901 }
2902 else
2903 {
2904 /*For incomplete CTB */
2905 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024;
2906 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024;
2907 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024;
2908 }
2909 }
2910 }
2911
2912 /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction
2913 for the usage by ME*/
2914 {
2915 WORD32 pos_x_16, pos_y_16, pos;
2916 //WORD32 i4_incomplete_ctb_val_8;
2917 pos_x_16 = u1_curr_ctb_wdt / 4;
2918 pos_y_16 = u1_curr_ctb_hgt / 4;
2919
2920 pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16;
2921 for(i4_i = 0; i4_i < 16; i4_i++)
2922 {
2923 if(i4_i < pos)
2924 {
2925 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2);
2926 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2);
2927 i1_cu_qp = ihevce_cu_level_qp_mod(
2928 ps_ctxt->i4_qscale,
2929 ps_ed_ctb_l1->i4_8x8_satd[i4_i][0],
2930 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
2931 f_strength,
2932 &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j],
2933 &dummy,
2934 ps_ctxt->ps_rc_quant_ctxt);
2935 i1_cu_qp = ihevce_cu_level_qp_mod(
2936 ps_ctxt->i4_qscale,
2937 ps_ed_ctb_l1->i4_8x8_satd[i4_i][1],
2938 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2939 f_strength,
2940 &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j],
2941 &dummy,
2942 ps_ctxt->ps_rc_quant_ctxt);
2943 }
2944 else
2945 {
2946 /*For incomplete CTB */
2947 ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024;
2948 ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024;
2949 }
2950 }
2951 }
2952 } //for loop
2953
2954 /* Accumalate the cost of ctb to the total cost */
2955 ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost;
2956 ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10;
2957
2958 ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost;
2959
2960 /* satd and mpm bits accumalation of best cu size candiate for the ctb */
2961 ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd;
2962 ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost;
2963
2964 ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd;
2965 }
2966
2967 {
2968 WORD32 ctr_8x8;
2969 for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++)
2970 {
2971 /*Accumalate activity factor for Intra and Inter*/
2972 if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] <
2973 ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8])
2974 {
2975 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
2976 ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
2977 }
2978 else
2979 {
2980 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
2981 ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
2982 }
2983
2984 /*Accumalate activity factor at frame level*/
2985 ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8];
2986 }
2987 }
2988 return;
2989 }
2990
ihevce_nxn_sad_computer(UWORD8 * pu1_inp,WORD32 i4_inp_stride,UWORD8 * pu1_ref,WORD32 i4_ref_stride,WORD32 trans_size)2991 WORD32 ihevce_nxn_sad_computer(
2992 UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size)
2993 {
2994 WORD32 wd, ht, i, j;
2995 WORD32 sad = 0;
2996
2997 wd = trans_size;
2998 ht = trans_size;
2999
3000 for(i = 0; i < ht; i++)
3001 {
3002 for(j = 0; j < wd; j++)
3003 {
3004 sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j])));
3005 }
3006 pu1_inp += i4_inp_stride;
3007 pu1_ref += i4_ref_stride;
3008 }
3009
3010 return sad;
3011 }
3012
3013 /*!
3014 ******************************************************************************
3015 * \if Function name : ihevce_mode_eval_filtering \endif
3016 *
3017 * \brief
3018 * Evaluates best 3 modes for the given CU size with probable modes from,
3019 * early decision structure, mpm candidates and dc, planar mode
3020 *
3021 * \param[in] ps_cu_node : pointer to MAX cu node info buffer
3022 * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer
3023 * \param[in] ps_ctxt : pointer to IPE context struct
3024 * \param[in] ps_curr_src : pointer to src pixels struct
3025 * \param[in] best_amode : best angular mode from l1 layer or
3026 from (MAX - 1) CU mode
3027 * \param[in] best_costs_4x4 : pointer to 3 best cost buffer
3028 * \param[in] best_modes_4x4 : pointer to 3 best mode buffer
3029 * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated
3030 * if 1, (MAX CU) sugested is evaluated
3031 * \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2
3032 *
3033 * \return
3034 * None
3035 *
3036 * \author
3037 * Ittiam
3038 *
3039 *****************************************************************************
3040 */
ihevce_mode_eval_filtering(ihevce_ipe_cu_tree_t * ps_cu_node,ihevce_ipe_cu_tree_t * ps_child_cu_node,ihevce_ipe_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src,WORD32 best_amode,WORD32 * best_costs_4x4,UWORD8 * best_modes_4x4,WORD32 step2_bypass,WORD32 tu_eq_cu)3041 void ihevce_mode_eval_filtering(
3042 ihevce_ipe_cu_tree_t *ps_cu_node,
3043 ihevce_ipe_cu_tree_t *ps_child_cu_node,
3044 ihevce_ipe_ctxt_t *ps_ctxt,
3045 iv_enc_yuv_buf_t *ps_curr_src,
3046 WORD32 best_amode,
3047 WORD32 *best_costs_4x4,
3048 UWORD8 *best_modes_4x4,
3049 WORD32 step2_bypass,
3050 WORD32 tu_eq_cu)
3051 {
3052 UWORD8 *pu1_origin, *pu1_orig;
3053 WORD32 src_strd = ps_curr_src->i4_y_strd;
3054 WORD32 nbr_flags;
3055 nbr_avail_flags_t s_nbr;
3056 WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1;
3057 WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2;
3058 WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2;
3059 UWORD8 mode;
3060
3061 WORD32 cost_ang_mode = MAX_INTRA_COST_IPE;
3062 WORD32 filter_flag;
3063 WORD32 cost_amode_step2[7] = { 0 };
3064 /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */
3065 WORD32 sad = 0;
3066 WORD32 cu_pos_x, cu_pos_y;
3067 WORD32 temp;
3068 WORD32 i = 0, j, k, i_end, z;
3069 //WORD32 row, col, size;
3070 UWORD8 *pu1_ref;
3071 WORD32 xA, yA, xB, yB;
3072 WORD32 top_intra_mode;
3073 WORD32 left_intra_mode;
3074 UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3075 UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3076
3077 UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
3078 WORD32 count;
3079
3080 pf_ipe_res_trans_had apf_resd_trns_had[4];
3081
3082 WORD32 cand_mode_satd_list[3];
3083 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
3084
3085 ihevc_intra_pred_luma_ref_substitution_fptr =
3086 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3087
3088 apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit;
3089 apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit;
3090 apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit;
3091 apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit;
3092
3093 /* initialize modes_to_eval as zero */
3094 memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES);
3095
3096 /* Compute the Parent Cost */
3097
3098 /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */
3099 pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) +
3100 (ps_cu_node->u2_x0 << 3);
3101
3102 /* Get position of CU within CTB at 4x4 granularity */
3103 cu_pos_x = ps_cu_node->u2_x0 << 1;
3104 cu_pos_y = ps_cu_node->u2_y0 << 1;
3105
3106 /* get the neighbour availability flags */
3107 ihevce_get_only_nbr_flag(
3108 &s_nbr,
3109 ps_ctxt->pu1_ctb_nbr_map,
3110 ps_ctxt->i4_nbr_map_strd,
3111 cu_pos_x,
3112 cu_pos_y,
3113 trans_size >> 2,
3114 trans_size >> 2);
3115
3116 /* Traverse for all 4 child blocks in the parent block */
3117 xA = (ps_cu_node->u2_x0 << 3) >> 2;
3118 yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
3119 xB = xA + 1;
3120 yB = yA - 1;
3121 left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
3122 top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
3123 /* call the function which populates sad cost for all the modes */
3124
3125 ihevce_intra_populate_mode_bits_cost_bracketing(
3126 top_intra_mode,
3127 left_intra_mode,
3128 s_nbr.u1_top_avail,
3129 s_nbr.u1_left_avail,
3130 ps_cu_node->u2_y0,
3131 &ps_ctxt->au2_mode_bits_satd_cost[0],
3132 &ps_ctxt->au2_mode_bits_satd[0],
3133 ps_ctxt->i4_ol_satd_lambda,
3134 cand_mode_satd_list);
3135
3136 for(k = 0; k < num_tu_in_y; k++)
3137 {
3138 for(j = 0; j < num_tu_in_x; j++)
3139 {
3140 /* get the neighbour availability flags */
3141 nbr_flags = ihevce_get_nbr_intra(
3142 &s_nbr,
3143 ps_ctxt->pu1_ctb_nbr_map,
3144 ps_ctxt->i4_nbr_map_strd,
3145 cu_pos_x + ((j) * (trans_size >> 2)),
3146 cu_pos_y + ((k) * (trans_size >> 2)),
3147 trans_size >> 2);
3148
3149 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3150
3151 /* Create reference samples array */
3152 ihevc_intra_pred_luma_ref_substitution_fptr(
3153 pu1_origin - src_strd - 1,
3154 pu1_origin - src_strd,
3155 pu1_origin - 1,
3156 src_strd,
3157 trans_size,
3158 nbr_flags,
3159 pu1_ref_orig,
3160 0);
3161
3162 /* Perform reference samples filtering */
3163 ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt);
3164
3165 ihevce_set_nbr_map(
3166 ps_ctxt->pu1_ctb_nbr_map,
3167 ps_ctxt->i4_nbr_map_strd,
3168 cu_pos_x + ((j) * (trans_size >> 2)),
3169 cu_pos_y + ((k) * (trans_size >> 2)),
3170 (trans_size >> 2),
3171 1);
3172
3173 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3174 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3175 }
3176 }
3177
3178 /* Revaluation for angular mode */
3179 //if(ps_ed_blk->ang_attr.mode_present == 1)
3180 //if(((best_amode & 0x1) != 1))
3181
3182 {
3183 WORD32 u1_trans_idx = trans_size >> 3;
3184 if(trans_size == 32)
3185 u1_trans_idx = 3;
3186 //best_amode = ps_ed_blk->ang_attr.best_mode;
3187
3188 i = 0;
3189 if(!step2_bypass)
3190 {
3191 /* Around best level 4 angular mode, search for best level 2 mode */
3192 ASSERT((best_amode >= 2) && (best_amode <= 34));
3193
3194 if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
3195 {
3196 if(best_amode >= 4)
3197 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2;
3198 }
3199
3200 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode;
3201
3202 if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
3203 {
3204 if(best_amode <= 32)
3205 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2;
3206 }
3207 }
3208 else
3209 {
3210 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode;
3211 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode;
3212 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode;
3213 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode;
3214 }
3215
3216 /* Add the left and top MPM modes for computation*/
3217
3218 ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0];
3219 ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1];
3220
3221 i_end = i;
3222 count = 0;
3223
3224 /*Remove duplicate modes from modes_to_eval_temp[] */
3225 for(j = 0; j < i_end; j++)
3226 {
3227 for(k = 0; k < count; k++)
3228 {
3229 if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k])
3230 break;
3231 }
3232 if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1))
3233 {
3234 ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j];
3235 count++;
3236 }
3237 }
3238 i_end = count;
3239 if(count == 0)
3240 {
3241 ps_ctxt->au1_modes_to_eval[0] = 26;
3242 i_end = 1;
3243 }
3244
3245 for(i = 0; i < i_end; i++)
3246 {
3247 pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3248 pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3249
3250 mode = ps_ctxt->au1_modes_to_eval[i];
3251 ASSERT((mode >= 2) && (mode <= 34));
3252 cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
3253 filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
3254
3255 for(k = 0; k < num_tu_in_y; k++)
3256 {
3257 for(j = 0; j < num_tu_in_x; j++)
3258 {
3259 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3260
3261 if(0 == filter_flag)
3262 pu1_ref = pu1_ref_orig;
3263 else
3264 pu1_ref = pu1_ref_filt;
3265
3266 g_apf_lum_ip[g_i4_ip_funcs[mode]](
3267 pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
3268
3269 if(ps_ctxt->u1_use_satd)
3270 {
3271 sad = apf_resd_trns_had[u1_trans_idx](
3272 pu1_origin,
3273 ps_curr_src->i4_y_strd,
3274 &ps_ctxt->au1_pred_samples[0],
3275 trans_size,
3276 NULL,
3277 0
3278
3279 );
3280 }
3281 else
3282 {
3283 sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer(
3284 pu1_origin,
3285 ps_curr_src->i4_y_strd,
3286 &ps_ctxt->au1_pred_samples[0],
3287 trans_size,
3288 trans_size);
3289 }
3290
3291 cost_amode_step2[i] += sad;
3292
3293 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3294 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3295 }
3296 }
3297 }
3298 best_amode = ps_ctxt->au1_modes_to_eval[0];
3299 /*Init cost indx */
3300 cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0];
3301 for(z = 0; z < i_end; z++)
3302 {
3303 /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
3304 if(cost_ang_mode >= cost_amode_step2[z])
3305 {
3306 if(cost_ang_mode == cost_amode_step2[z])
3307 {
3308 if(best_amode > ps_ctxt->au1_modes_to_eval[z])
3309 best_amode = ps_ctxt->au1_modes_to_eval[z];
3310 }
3311 else
3312 {
3313 best_amode = ps_ctxt->au1_modes_to_eval[z];
3314 }
3315 cost_ang_mode = cost_amode_step2[z];
3316 }
3317 }
3318
3319 /*Modify mode bits for the angular modes */
3320 }
3321
3322 {
3323 /* Step - I modification */
3324 ASSERT((best_amode >= 2) && (best_amode <= 34));
3325 i_end = 0;
3326 z = 0;
3327
3328 /* Around best level 3 angular mode, search for best level 1 mode */
3329 ps_ctxt->au1_modes_to_eval[i_end++] = 0;
3330 ps_ctxt->au1_modes_to_eval[i_end++] = 1;
3331
3332 if(best_amode != 2)
3333 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1;
3334
3335 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode;
3336
3337 if(best_amode != 34)
3338 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1;
3339
3340 /* Inserting step_2's best mode at last to avoid
3341 recalculation of it's SATD cost */
3342
3343 //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD
3344 //cost_amode_step2[i_end] = cost_ang_mode;
3345
3346 /*best_sad[i_end] = cost_ang_mode
3347 - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */
3348
3349 cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */
3350
3351 for(i = 0; i < i_end; i++)
3352 {
3353 WORD32 u1_trans_idx = trans_size >> 3;
3354 if(trans_size == 32)
3355 u1_trans_idx = 3;
3356 pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3357 pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3358
3359 /*best_sad[i] = 0; //See NOTE_A01 above */
3360 mode = ps_ctxt->au1_modes_to_eval[i];
3361 cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
3362 filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
3363
3364 for(k = 0; k < num_tu_in_y; k++)
3365 {
3366 for(j = 0; j < num_tu_in_x; j++)
3367 {
3368 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3369
3370 if(0 == filter_flag)
3371 pu1_ref = pu1_ref_orig;
3372 else
3373 pu1_ref = pu1_ref_filt;
3374
3375 g_apf_lum_ip[g_i4_ip_funcs[mode]](
3376 pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
3377
3378 //if(trans_size != 4)
3379 {
3380 sad = apf_resd_trns_had[u1_trans_idx](
3381 pu1_origin,
3382 ps_curr_src->i4_y_strd,
3383 &ps_ctxt->au1_pred_samples[0],
3384 trans_size,
3385 NULL,
3386 0);
3387 }
3388
3389 /*accumualting SATD though name says it is sad*/
3390 cost_amode_step2[i] += sad;
3391 /*best_sad[i] +=sad; //See NOTE_A01 above */
3392 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3393 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3394 }
3395 }
3396 }
3397 /* Updating i_end for the step_2's inserted mode*/
3398 // i_end++;
3399
3400 /* Arrange the reference array in ascending order */
3401
3402 for(i = 0; i < (i_end - 1); i++)
3403 {
3404 for(j = i + 1; j < i_end; j++)
3405 {
3406 if(cost_amode_step2[i] > cost_amode_step2[j])
3407 {
3408 temp = cost_amode_step2[i];
3409 cost_amode_step2[i] = cost_amode_step2[j];
3410 cost_amode_step2[j] = temp;
3411
3412 temp = modes_4x4[i];
3413 modes_4x4[i] = modes_4x4[j];
3414 modes_4x4[j] = temp;
3415 }
3416 }
3417 }
3418
3419 /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
3420 best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
3421 cost_ang_mode = cost_amode_step2[0];
3422 ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode];
3423 ps_cu_node->best_cost = cost_amode_step2[0];
3424 ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
3425 ps_cu_node->best_satd =
3426 ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode];
3427
3428 /*Accumalate best mode bits cost for RC*/
3429 ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode];
3430
3431 /* Store the best three candidates */
3432 for(i = 0; i < 3; i++)
3433 {
3434 best_costs_4x4[i] = cost_amode_step2[i];
3435 best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]];
3436 }
3437 }
3438
3439 return;
3440 }
3441