1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_utils.c
24 *
25 * \brief
26 * This file contains utility functions of Encode loop
27 *
28 * \date
29 * 18/09/2012
30 *
31 * \author
32 * Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40
41 /*****************************************************************************/
42 /* File Includes */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81
82 #include "ihevce_defs.h"
83 #include "ihevce_hle_interface.h"
84 #include "ihevce_lap_enc_structs.h"
85 #include "ihevce_multi_thrd_structs.h"
86 #include "ihevce_multi_thrd_funcs.h"
87 #include "ihevce_me_common_defs.h"
88 #include "ihevce_had_satd.h"
89 #include "ihevce_error_codes.h"
90 #include "ihevce_bitstream.h"
91 #include "ihevce_cabac.h"
92 #include "ihevce_rdoq_macros.h"
93 #include "ihevce_function_selector.h"
94 #include "ihevce_enc_structs.h"
95 #include "ihevce_entropy_structs.h"
96 #include "ihevce_cmn_utils_instr_set_router.h"
97 #include "ihevce_ipe_instr_set_router.h"
98 #include "ihevce_decomp_pre_intra_structs.h"
99 #include "ihevce_decomp_pre_intra_pass.h"
100 #include "ihevce_enc_loop_structs.h"
101 #include "ihevce_nbr_avail.h"
102 #include "ihevce_enc_loop_utils.h"
103 #include "ihevce_sub_pic_rc.h"
104 #include "ihevce_global_tables.h"
105 #include "ihevce_bs_compute_ctb.h"
106 #include "ihevce_cabac_rdo.h"
107 #include "ihevce_deblk.h"
108 #include "ihevce_frame_process.h"
109 #include "ihevce_rc_enc_structs.h"
110 #include "hme_datatype.h"
111 #include "hme_interface.h"
112 #include "hme_common_defs.h"
113 #include "hme_defs.h"
114 #include "hme_common_utils.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128 #include "ihevce_stasino_helpers.h"
129 #include "ihevce_tu_tree_selector.h"
130
131 /*****************************************************************************/
132 /* Globals */
133 /*****************************************************************************/
134
135 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136 extern const UWORD8 gu1_hevce_scan4x4[3][16];
137 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141 /*****************************************************************************/
142 /* Constant Macros */
143 /*****************************************************************************/
144 #define ENABLE_ZERO_CBF 1
145 #define DISABLE_RDOQ_INTRA 0
146
147 /*****************************************************************************/
148 /* Function Definitions */
149 /*****************************************************************************/
ihevce_tu_tree_update(tu_prms_t * ps_tu_prms,WORD32 * pnum_tu_in_cu,WORD32 depth,WORD32 tu_split_flag,WORD32 tu_early_cbf,WORD32 i4_x_off,WORD32 i4_y_off)150 void *ihevce_tu_tree_update(
151 tu_prms_t *ps_tu_prms,
152 WORD32 *pnum_tu_in_cu,
153 WORD32 depth,
154 WORD32 tu_split_flag,
155 WORD32 tu_early_cbf,
156 WORD32 i4_x_off,
157 WORD32 i4_y_off)
158 {
159 //WORD32 tu_split_flag = p_tu_split_flag[0];
160 WORD32 p_tu_split_flag[4];
161 WORD32 p_tu_early_cbf[4];
162
163 WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165 if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166 {
167 if((tu_size >> depth) == 32)
168 {
169 /* Get the individual TU split flags */
170 p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171 p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172 p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173 p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175 /* Get the early CBF flags */
176 p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177 p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178 p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179 p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180 }
181 else
182 {
183 /* Get the individual TU split flags */
184 p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185 p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186 p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187 p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189 /* Get the early CBF flags */
190 p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191 p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192 p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193 p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194 }
195
196 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197 ps_tu_prms,
198 pnum_tu_in_cu,
199 depth + 1,
200 p_tu_split_flag[0],
201 p_tu_early_cbf[0],
202 i4_x_off,
203 i4_y_off);
204
205 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206 ps_tu_prms,
207 pnum_tu_in_cu,
208 depth + 1,
209 p_tu_split_flag[1],
210 p_tu_early_cbf[1],
211 (i4_x_off + (tu_size >> (depth + 1))),
212 i4_y_off);
213
214 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215 ps_tu_prms,
216 pnum_tu_in_cu,
217 depth + 1,
218 p_tu_split_flag[2],
219 p_tu_early_cbf[2],
220 i4_x_off,
221 (i4_y_off + (tu_size >> (depth + 1))));
222
223 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224 ps_tu_prms,
225 pnum_tu_in_cu,
226 depth + 1,
227 p_tu_split_flag[3],
228 p_tu_early_cbf[3],
229 (i4_x_off + (tu_size >> (depth + 1))),
230 (i4_y_off + (tu_size >> (depth + 1))));
231 }
232 else
233 {
234 if(tu_split_flag & 0x1)
235 {
236 /* This piece of code will be entered for the 8x8, if it is split
237 Update the 4 child TU's accordingly. */
238
239 (*pnum_tu_in_cu) += 4;
240
241 /* TL TU update */
242 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244 ps_tu_prms->u1_x_off = i4_x_off;
245
246 ps_tu_prms->u1_y_off = i4_y_off;
247
248 /* Early CBF is not done for 4x4 transforms */
249 ps_tu_prms->i4_early_cbf = 1;
250
251 ps_tu_prms++;
252
253 /* TR TU update */
254 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256 ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258 ps_tu_prms->u1_y_off = i4_y_off;
259
260 /* Early CBF is not done for 4x4 transforms */
261 ps_tu_prms->i4_early_cbf = 1;
262
263 ps_tu_prms++;
264
265 /* BL TU update */
266 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268 ps_tu_prms->u1_x_off = i4_x_off;
269
270 ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272 /* Early CBF is not done for 4x4 transforms */
273 ps_tu_prms->i4_early_cbf = 1;
274
275 ps_tu_prms++;
276
277 /* BR TU update */
278 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280 ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282 ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284 /* Early CBF is not done for 4x4 transforms */
285 ps_tu_prms->i4_early_cbf = 1;
286 }
287 else
288 {
289 /* Update the TU params */
290 ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292 ps_tu_prms->u1_x_off = i4_x_off;
293
294 ps_tu_prms->u1_y_off = i4_y_off;
295
296 (*pnum_tu_in_cu)++;
297
298 /* Early CBF update for current TU */
299 ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300 }
301 if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302 {
303 ps_tu_prms++;
304
305 ps_tu_prms->u1_tu_size = tu_size;
306 }
307 }
308
309 return ps_tu_prms;
310 }
311
312 /*!
313 ******************************************************************************
314 * \if Function name : ihevce_compute_quant_rel_param \endif
315 *
316 * \brief
317 * This function updates quantization related parameters like qp_mod_6 etc in
318 * context according to new qp
319 *
320 * \date
321 * 08/01/2013
322 *
323 * \author
324 * Ittiam
325 *
326 * \return
327 *
328 * List of Functions
329 *
330 *
331 ******************************************************************************
332 */
ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD8 i1_cu_qp)333 void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334 {
335 WORD32 i4_div_factor;
336
337 ps_ctxt->i4_chrm_cu_qp =
338 (ps_ctxt->u1_chroma_array_type == 2)
339 ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340 : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341 ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342 i4_div_factor = (i1_cu_qp + 3) / 6;
343 i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344 ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345 ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346 ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348 #define INTER_RND_QP_BY_6
349 #ifdef INTER_RND_QP_BY_6
350 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351 {
352 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354 }
355 #else
356 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358 #endif
359
360 if(ISLICE == ps_ctxt->i1_slice_type)
361 {
362 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365 }
366 else
367 {
368 if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369 {
370 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373 }
374 else
375 {
376 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380 }
381 }
382 }
383
384 /*!
385 ******************************************************************************
386 * \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387 *
388 * \brief
389 * Function whihc calculates the Lambda params for current picture
390 *
391 * \param[in] ps_enc_ctxt : encoder ctxt pointer
392 * \param[in] ps_cur_pic_ctxt : current pic ctxt
393 * \param[in] i4_cur_frame_qp : current pic QP
394 * \param[in] first_field : is first field flag
395 * \param[in] i4_temporal_lyr_id : Current picture layer id
396 *
397 * \return
398 * None
399 *
400 * \author
401 * Ittiam
402 *
403 *****************************************************************************
404 */
ihevce_populate_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t * ps_ctxt,frm_lambda_ctxt_t * ps_frm_lamda,WORD32 i4_slice_type,WORD32 i4_temporal_lyr_id,WORD32 i4_lambda_type)405 void ihevce_populate_cl_cu_lambda_prms(
406 ihevce_enc_loop_ctxt_t *ps_ctxt,
407 frm_lambda_ctxt_t *ps_frm_lamda,
408 WORD32 i4_slice_type,
409 WORD32 i4_temporal_lyr_id,
410 WORD32 i4_lambda_type)
411 {
412 WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413 double lambda_modifier;
414 double lambda_uv_modifier;
415 double lambda;
416 double lambda_uv;
417
418 WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420 /*Populate lamda modifier */
421 ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422 ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423 ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425 for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426 i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427 i4_curr_cu_qp++)
428 {
429 WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430 ? MIN(i4_curr_cu_qp, 51)
431 : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433 i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438 if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439 {
440 lambda_modifier = ps_frm_lamda->lambda_modifier *
441 CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442 lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444 }
445 else
446 {
447 lambda_modifier = ps_frm_lamda->lambda_modifier;
448 lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449 }
450 if(ps_ctxt->i4_use_const_lamda_modifier)
451 {
452 if(ISLICE == ps_ctxt->i1_slice_type)
453 {
454 lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455 lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456 }
457 else
458 {
459 lambda_modifier = CONST_LAMDA_MOD_VAL;
460 lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461 }
462 }
463 switch(i4_lambda_type)
464 {
465 case 0:
466 {
467 i4_qp_bdoffset = 0;
468
469 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472 lambda *= lambda_modifier;
473 lambda_uv *= lambda_uv_modifier;
474
475 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483 if(ps_ctxt->i4_use_const_lamda_modifier)
484 {
485 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487 }
488 else
489 {
490 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492 }
493
494 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509 break;
510 }
511 case 1:
512 {
513 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516 lambda *= lambda_modifier;
517 lambda_uv *= lambda_uv_modifier;
518
519 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527 if(ps_ctxt->i4_use_const_lamda_modifier)
528 {
529 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531 }
532 else
533 {
534 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536 }
537 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552 break;
553 }
554 case 2:
555 {
556 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559 lambda *= lambda_modifier;
560 lambda_uv *= lambda_uv_modifier;
561
562 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571 if(ps_ctxt->i4_use_const_lamda_modifier)
572 {
573 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575 }
576 else
577 {
578 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580 }
581 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584 /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585 lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586 lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588 lambda *= lambda_modifier;
589 lambda_uv *= lambda_uv_modifier;
590
591 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599 if(ps_ctxt->i4_use_const_lamda_modifier)
600 {
601 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603 }
604 else
605 {
606 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608 }
609
610 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613 break;
614 }
615 default:
616 {
617 /* Intended to be a barren wasteland! */
618 ASSERT(0);
619 }
620 }
621 }
622 }
623
624 /*!
625 ******************************************************************************
626 * \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627 *
628 * \brief
629 * Function whihc calculates the Lambda params for current picture
630 *
631 * \param[in] ps_enc_ctxt : encoder ctxt pointer
632 * \param[in] ps_cur_pic_ctxt : current pic ctxt
633 * \param[in] i4_cur_frame_qp : current pic QP
634 * \param[in] first_field : is first field flag
635 * \param[in] i4_temporal_lyr_id : Current picture layer id
636 *
637 * \return
638 * None
639 *
640 * \author
641 * Ittiam
642 *
643 *****************************************************************************
644 */
ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 i4_cur_cu_qp)645 void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646 {
647 WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648 ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649 : gai1_ihevc_chroma_qp_scale
650 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652 /* closed loop ssd lambda is same as final lambda */
653 ps_ctxt->i8_cl_ssd_lambda_qf =
654 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656 ps_ctxt
657 ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658 ps_ctxt->u4_chroma_cost_weighing_factor =
659 ps_ctxt->au4_chroma_cost_weighing_factor_array
660 [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661 /* --- Initialized the lambda for SATD computations --- */
662 /* --- 0.95 is the multiplication factor as per HM --- */
663 /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664 ps_ctxt->i4_satd_lamda =
665 ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666 ps_ctxt->i4_sad_lamda =
667 ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668 }
669
670 /*!
671 ******************************************************************************
672 * \if Function name : ihevce_update_pred_qp \endif
673 *
674 * \brief
675 * Computes pred qp for the given CU
676 *
677 * \param[in]
678 *
679 * \return
680 *
681 *
682 * \author
683 * Ittiam
684 *
685 *****************************************************************************
686 */
ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 cu_pos_x,WORD32 cu_pos_y)687 void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688 {
689 WORD32 i4_pred_qp = 0x7FFFFFFF;
690 WORD32 i4_top, i4_left;
691 if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692 {
693 i4_pred_qp = ps_ctxt->i4_prev_QP;
694 }
695 else
696 {
697 if(cu_pos_y == 0) /*CTB boundary*/
698 {
699 i4_top = ps_ctxt->i4_prev_QP;
700 }
701 else /*within CTB*/
702 {
703 i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704 }
705 if(cu_pos_x == 0) /*CTB boundary*/
706 {
707 i4_left = ps_ctxt->i4_prev_QP;
708 }
709 else /*within CTB*/
710 {
711 i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712 }
713 i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714 }
715 ps_ctxt->i4_pred_qp = i4_pred_qp;
716 return;
717 }
718 /*!
719 ******************************************************************************
720 * \if Function name : ihevce_compute_cu_level_QP \endif
721 *
722 * \brief
723 * Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724 *
725 * \param[in]
726 *
727 * \return
728 *
729 *
730 * \author
731 * Ittiam
732 *
733 *****************************************************************************
734 */
ihevce_compute_cu_level_QP(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 i4_activity_for_qp,WORD32 i4_activity_for_lamda,WORD32 i4_reduce_qp)735 void ihevce_compute_cu_level_QP(
736 ihevce_enc_loop_ctxt_t *ps_ctxt,
737 WORD32 i4_activity_for_qp,
738 WORD32 i4_activity_for_lamda,
739 WORD32 i4_reduce_qp)
740 {
741 /*modify quant related param in ctxt based on current cu qp*/
742 WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743 WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745 WORD32 i4_max_qp_allowed;
746 WORD32 i4_min_qp_allowed;
747 WORD32 i4_pred_qp;
748
749 i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751 if(ps_ctxt->i4_sub_pic_level_rc)
752 {
753 i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754 i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755 }
756 else
757 {
758 i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759 i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760 }
761 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762 return;
763
764 #if LAMDA_BASED_ON_QUANT
765 i4_activity_for_lamda = i4_activity_for_qp;
766 #endif
767
768 if(i4_activity_for_qp != -1)
769 {
770 cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771 ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772 if(ps_ctxt->i4_qp_mod)
773 {
774 /*Recompute the Qp as per enc thread's frame level Qp*/
775 ASSERT(i4_activity_for_qp > 0);
776 cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777 QP_LEVEL_MOD_ACT_FACTOR;
778 }
779
780 // To avoid access of uninitialised Qscale to qp conversion table
781 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786 cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788 if((1 == i4_reduce_qp) && (cu_qp > 1))
789 cu_qp--;
790
791 /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792 if(cu_qp > i4_max_qp_allowed)
793 cu_qp = i4_max_qp_allowed;
794 else if(cu_qp < i4_min_qp_allowed)
795 cu_qp = i4_min_qp_allowed;
796
797 /* CLIP to maintain Qp between user configured and min and max Qp values*/
798 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803 /*cu qp must be populated in cu_analyse_t struct*/
804 ps_ctxt->i4_cu_qp = cu_qp;
805 /*recompute quant related param at every cu level*/
806 ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807 }
808
809 /*Decoupling qp and lamda calculation */
810 if(i4_activity_for_lamda != -1)
811 {
812 cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813 ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815 if(ps_ctxt->i4_qp_mod)
816 {
817 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818 /*Recompute the Qp as per enc thread's frame level Qp*/
819 ASSERT(i4_activity_for_lamda > 0);
820 cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821 QP_LEVEL_MOD_ACT_FACTOR;
822 #endif
823 }
824 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829 cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831 /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832 if(cu_qp > i4_max_qp_allowed)
833 cu_qp = i4_max_qp_allowed;
834 else if(cu_qp < i4_min_qp_allowed)
835 cu_qp = i4_min_qp_allowed;
836
837 /* CLIP to maintain Qp between user configured and min and max Qp values*/
838 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842 /* get frame level lambda params */
843 ihevce_get_cl_cu_lambda_prms(
844 ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845 }
846 }
847
848 /**
849 *******************************************************************************
850 * \if Function name : ihevce_scan_coeffs \endif
851 *
852 * @brief * Computes the coeff buffer for a coded TU for entropy coding
853 *
854 * @par Description
855 * Computes the coeff buffer for a coded TU for entropy coding
856 *
857 * \param[in] pi2_quan_coeffs Quantized coefficient context
858 *
859 * \param[in] scan_idx Scan index specifying the scan order
860 *
861 * \param[in] trans_size Transform unit size
862 *
863 * \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
864 *
865 * \param[in] pu1_csbf_buf csb flag buffer
866 *
867 * @returns num_bytes
868 * Number of bytes written to pu1_out_data
869 *
870 * @remarks
871 *
872 * \author
873 * Ittiam
874 *
875 *******************************************************************************
876 */
877
ihevce_scan_coeffs(WORD16 * pi2_quant_coeffs,WORD32 * pi4_subBlock2csbfId_map,WORD32 scan_idx,WORD32 trans_size,UWORD8 * pu1_out_data,UWORD8 * pu1_csbf_buf,WORD32 i4_csbf_stride)878 WORD32 ihevce_scan_coeffs(
879 WORD16 *pi2_quant_coeffs,
880 WORD32 *pi4_subBlock2csbfId_map,
881 WORD32 scan_idx,
882 WORD32 trans_size,
883 UWORD8 *pu1_out_data,
884 UWORD8 *pu1_csbf_buf,
885 WORD32 i4_csbf_stride)
886 {
887 WORD32 i, trans_unit_idx, num_gt1_flag;
888 UWORD16 u2_csbf0flags;
889 WORD32 num_bytes = 0;
890 UWORD8 *pu1_trans_table;
891 UWORD8 *pu1_csb_table;
892 WORD32 shift_value, mask_value;
893 UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
894 UWORD16 u2_sign_flags;
895 UWORD16 u2_abs_coeff_remaining[16];
896 WORD32 blk_row, blk_col;
897
898 UWORD8 *pu1_out_data_header;
899 UWORD16 *pu2_out_data_coeff;
900
901 WORD32 x_pos, y_pos;
902 WORD32 quant_coeff;
903
904 WORD32 num_gt0_flag;
905 (void)i4_csbf_stride;
906 pu1_out_data_header = pu1_out_data;
907 /* Need only last 3 bits, rest are reserved for debugging and making */
908 /* WORD alignment */
909 u2_csbf0flags = 0xBAD0;
910
911 /* Select proper order for your transform unit and csb based on scan_idx*/
912 /* and the trans_size */
913
914 /* scan order inside a csb */
915 pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
916 /* GETRANGE will give the log_2 of trans_size to shift_value */
917 GETRANGE(shift_value, trans_size);
918 shift_value = shift_value - 3; /* for finding. row no. from scan index */
919 mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
920 switch(trans_size)
921 {
922 case 32:
923 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
924 break;
925 case 16:
926 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
927 break;
928 case 8:
929 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
930 break;
931 case 4:
932 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
933 break;
934 default:
935 DBG_PRINTF("Invalid Trans Size\n");
936 return -1;
937 break;
938 }
939
940 /*go through each csb in the scan order for first non-zero coded sub-block*/
941 for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
942 {
943 /* check for the first csb flag in our scan order */
944 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
945 {
946 UWORD8 u1_last_x, u1_last_y;
947 /* row of csb */
948 blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
949 /* col of csb */
950 blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
951
952 /*check for the 1st non-0 values inside the csb in our scan order*/
953 for(i = 15; i >= 0; i--)
954 {
955 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
956 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
957
958 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
959
960 if(quant_coeff != 0)
961 break;
962 }
963
964 ASSERT(i >= 0);
965
966 u1_last_x = x_pos;
967 u1_last_y = y_pos;
968
969 /* storing last_x and last_y */
970 *pu1_out_data_header = u1_last_x;
971 pu1_out_data_header++;
972 num_bytes++;
973 *pu1_out_data_header = u1_last_y;
974 pu1_out_data_header++;
975 num_bytes++;
976
977 /* storing the scan order */
978 *pu1_out_data_header = scan_idx;
979 pu1_out_data_header++;
980 num_bytes++;
981 /* storing last_sub_block pos. in scan order count */
982 *pu1_out_data_header = trans_unit_idx;
983 pu1_out_data_header++;
984 num_bytes++;
985
986 /*stored the first 4 bytes, now all are word16. So word16 pointer*/
987 pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
988
989 /* u2_csbf0flags word */
990 u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
991 /* storing u2_csbf0flags word */
992 *pu2_out_data_coeff = u2_csbf0flags;
993 pu2_out_data_coeff++;
994 num_bytes += 2;
995
996 num_gt0_flag = 1;
997 num_gt1_flag = 0;
998 u2_sign_flags = 0;
999
1000 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1001 u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1002 if(abs(quant_coeff) > 1)
1003 {
1004 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1005 u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1006 /* update u2_abs_coeff_remaining */
1007 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1008
1009 num_gt1_flag++;
1010 }
1011
1012 if(quant_coeff < 0)
1013 {
1014 /* set the i th bit of u2_sign_flags */
1015 u2_sign_flags = u2_sign_flags | (1 << i);
1016 }
1017
1018 /* Test remaining elements in our scan order */
1019 /* Can optimize further by CLZ macro */
1020 for(i = i - 1; i >= 0; i--)
1021 {
1022 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1023 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1024
1025 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1026
1027 if(quant_coeff != 0)
1028 {
1029 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1030 u2_sig_coeff_abs_gt0_flags |= (1 << i);
1031
1032 if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1033 {
1034 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1035 u2_sig_coeff_abs_gt1_flags |= (1 << i);
1036
1037 /* update u2_abs_coeff_remaining */
1038 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1039
1040 num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1041 }
1042
1043 if(quant_coeff < 0)
1044 {
1045 /* set the i th bit of u2_sign_flags */
1046 u2_sign_flags |= (1 << i);
1047 }
1048
1049 num_gt0_flag++;
1050 }
1051 }
1052
1053 /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1054 *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1055 pu2_out_data_coeff++;
1056 num_bytes += 2;
1057 /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1058 *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1059 pu2_out_data_coeff++;
1060 num_bytes += 2;
1061 /* storing u2_sign_flags 2 bytes */
1062 *pu2_out_data_coeff = u2_sign_flags;
1063 pu2_out_data_coeff++;
1064 num_bytes += 2;
1065
1066 /* Store the u2_abs_coeff_remaining[] */
1067 for(i = 0; i < num_gt1_flag; i++)
1068 {
1069 /* storing u2_abs_coeff_remaining[i] 2 bytes */
1070 *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1071 pu2_out_data_coeff++;
1072 num_bytes += 2;
1073 }
1074
1075 break; /*We just need this loop for finding 1st non-zero csb only*/
1076 }
1077 }
1078
1079 /* go through remaining csb in the scan order */
1080 for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1081 {
1082 blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1083 blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1084
1085 /* u2_csbf0flags word */
1086 u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1087 (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1088
1089 /********************************************************************/
1090 /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1091 /* block0, instead sig coeff map is directly signalled. This is */
1092 /* taken care by forcing csbf for block0 to be 1 even if it is 0 */
1093 /********************************************************************/
1094 if(0 == trans_unit_idx)
1095 {
1096 u2_csbf0flags |= 1;
1097 }
1098
1099 if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1100 {
1101 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1102 {
1103 /* set the 2nd bit of u2_csbf0flags for right csbf */
1104 u2_csbf0flags = u2_csbf0flags | (1 << 1);
1105 }
1106 }
1107 if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1108 {
1109 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1110 {
1111 /* set the 3rd bit of u2_csbf0flags for bottom csbf */
1112 u2_csbf0flags = u2_csbf0flags | (1 << 2);
1113 }
1114 }
1115
1116 /* storing u2_csbf0flags word */
1117 *pu2_out_data_coeff = u2_csbf0flags;
1118 pu2_out_data_coeff++;
1119 num_bytes += 2;
1120
1121 /* check for the csb flag in our scan order */
1122 if(u2_csbf0flags & 0x1)
1123 {
1124 u2_sig_coeff_abs_gt0_flags = 0;
1125 u2_sig_coeff_abs_gt1_flags = 0;
1126 u2_sign_flags = 0;
1127
1128 num_gt0_flag = 0;
1129 num_gt1_flag = 0;
1130 /* check for the non-0 values inside the csb in our scan order */
1131 /* Can optimize further by CLZ macro */
1132 for(i = 15; i >= 0; i--)
1133 {
1134 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1135 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1136
1137 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1138
1139 if(quant_coeff != 0)
1140 {
1141 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1142 u2_sig_coeff_abs_gt0_flags |= (1 << i);
1143
1144 if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1145 {
1146 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1147 u2_sig_coeff_abs_gt1_flags |= (1 << i);
1148
1149 /* update u2_abs_coeff_remaining */
1150 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1151
1152 num_gt1_flag++;
1153 }
1154
1155 if(quant_coeff < 0)
1156 {
1157 /* set the i th bit of u2_sign_flags */
1158 u2_sign_flags = u2_sign_flags | (1 << i);
1159 }
1160
1161 num_gt0_flag++;
1162 }
1163 }
1164
1165 /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1166 *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1167 pu2_out_data_coeff++;
1168 num_bytes += 2;
1169
1170 /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1171 *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1172 pu2_out_data_coeff++;
1173 num_bytes += 2;
1174
1175 /* storing u2_sign_flags 2 bytes */
1176 *pu2_out_data_coeff = u2_sign_flags;
1177 pu2_out_data_coeff++;
1178 num_bytes += 2;
1179
1180 /* Store the u2_abs_coeff_remaining[] */
1181 for(i = 0; i < num_gt1_flag; i++)
1182 {
1183 /* storing u2_abs_coeff_remaining[i] 2 bytes */
1184 *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1185 pu2_out_data_coeff++;
1186 num_bytes += 2;
1187 }
1188 }
1189 }
1190
1191 return num_bytes; /* Return the number of bytes written to out_data */
1192 }
1193
1194 /**
1195 *******************************************************************************
1196 * \if Function name : ihevce_populate_intra_pred_mode \endif
1197 *
1198 * \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1199 * b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1200 *
1201 * \par Description
1202 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1203 * for a CU
1204 *
1205 * \param[in] top_intra_mode Top intra mode
1206 * \param[in] left_intra_mode Left intra mode
1207 * \param[in] available_top Top availability flag
1208 * \param[in] available_left Left availability flag
1209 * \param[in] cu_pos_y CU 'y' position
1210 * \param[in] ps_cand_mode_list pointer to populate candidate list
1211 *
1212 * \returns none
1213 *
1214 * \author
1215 * Ittiam
1216 *
1217 *******************************************************************************
1218 */
1219
ihevce_populate_intra_pred_mode(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,WORD32 * ps_cand_mode_list)1220 void ihevce_populate_intra_pred_mode(
1221 WORD32 top_intra_mode,
1222 WORD32 left_intra_mode,
1223 WORD32 available_top,
1224 WORD32 available_left,
1225 WORD32 cu_pos_y,
1226 WORD32 *ps_cand_mode_list)
1227 {
1228 /* local variables */
1229 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1230
1231 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1232 /* N = top */
1233 if(0 == available_top)
1234 {
1235 cand_intra_pred_mode_top = INTRA_DC;
1236 }
1237 /* for neighbour != INTRA, setting DC is done outside */
1238 else if(0 == cu_pos_y) /* It's on the CTB boundary */
1239 {
1240 cand_intra_pred_mode_top = INTRA_DC;
1241 }
1242 else
1243 {
1244 cand_intra_pred_mode_top = top_intra_mode;
1245 }
1246
1247 /* N = left */
1248 if(0 == available_left)
1249 {
1250 cand_intra_pred_mode_left = INTRA_DC;
1251 }
1252 /* for neighbour != INTRA, setting DC is done outside */
1253 else
1254 {
1255 cand_intra_pred_mode_left = left_intra_mode;
1256 }
1257
1258 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1259 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1260 {
1261 if(cand_intra_pred_mode_left < 2)
1262 {
1263 ps_cand_mode_list[0] = INTRA_PLANAR;
1264 ps_cand_mode_list[1] = INTRA_DC;
1265 ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1266 }
1267 else
1268 {
1269 ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1270 ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1271 ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1272 }
1273 }
1274 else
1275 {
1276 ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1277 ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1278
1279 if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1280 (cand_intra_pred_mode_top != INTRA_PLANAR))
1281 {
1282 ps_cand_mode_list[2] = INTRA_PLANAR;
1283 }
1284 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1285 {
1286 ps_cand_mode_list[2] = INTRA_DC;
1287 }
1288 else
1289 {
1290 ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1291 }
1292 }
1293 }
1294 /**
1295 *******************************************************************************
1296 * \if Function name : ihevce_intra_pred_mode_signaling \endif
1297 *
1298 * \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1299 * b5_rem_intra_pred_mode for a CU
1300 *
1301 * \par Description
1302 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1303 * for a CU
1304 *
1305 * \param[in] ps_nbr_top Top neighbour context
1306 * \param[in] ps_nbr_left Left neighbour context
1307 * \param[in] available_top Top availability flag
1308 * \param[in] available_left Left availability flag
1309 * \param[in] cu_pos_y CU 'y' position
1310 * \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1311 * \param[inout] ps_intra_pred_mode_current
1312 * Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1313 * b5_rem_intra_pred_mode
1314 *
1315 * \returns none
1316 *
1317 * \author
1318 * Ittiam
1319 *
1320 *******************************************************************************
1321 */
1322
ihevce_intra_pred_mode_signaling(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,WORD32 luma_intra_pred_mode_current,intra_prev_rem_flags_t * ps_intra_pred_mode_current)1323 void ihevce_intra_pred_mode_signaling(
1324 WORD32 top_intra_mode,
1325 WORD32 left_intra_mode,
1326 WORD32 available_top,
1327 WORD32 available_left,
1328 WORD32 cu_pos_y,
1329 WORD32 luma_intra_pred_mode_current,
1330 intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1331 {
1332 /* local variables */
1333 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1334 WORD32 cand_mode_list[3];
1335
1336 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1337 ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose
1338 ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1339
1340 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1341 /* N = top */
1342 if(0 == available_top)
1343 {
1344 cand_intra_pred_mode_top = INTRA_DC;
1345 }
1346 /* for neighbour != INTRA, setting DC is done outside */
1347 else if(0 == cu_pos_y) /* It's on the CTB boundary */
1348 {
1349 cand_intra_pred_mode_top = INTRA_DC;
1350 }
1351 else
1352 {
1353 cand_intra_pred_mode_top = top_intra_mode;
1354 }
1355
1356 /* N = left */
1357 if(0 == available_left)
1358 {
1359 cand_intra_pred_mode_left = INTRA_DC;
1360 }
1361 /* for neighbour != INTRA, setting DC is done outside */
1362 else
1363 {
1364 cand_intra_pred_mode_left = left_intra_mode;
1365 }
1366
1367 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1368 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1369 {
1370 if(cand_intra_pred_mode_left < 2)
1371 {
1372 cand_mode_list[0] = INTRA_PLANAR;
1373 cand_mode_list[1] = INTRA_DC;
1374 cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1375 }
1376 else
1377 {
1378 cand_mode_list[0] = cand_intra_pred_mode_left;
1379 cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1380 cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1381 }
1382 }
1383 else
1384 {
1385 cand_mode_list[0] = cand_intra_pred_mode_left;
1386 cand_mode_list[1] = cand_intra_pred_mode_top;
1387
1388 if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1389 (cand_intra_pred_mode_top != INTRA_PLANAR))
1390 {
1391 cand_mode_list[2] = INTRA_PLANAR;
1392 }
1393 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1394 {
1395 cand_mode_list[2] = INTRA_DC;
1396 }
1397 else
1398 {
1399 cand_mode_list[2] = INTRA_ANGULAR(26);
1400 }
1401 }
1402
1403 /* Signal Generation */
1404
1405 /* Flag & mpm_index generation */
1406 if(cand_mode_list[0] == luma_intra_pred_mode_current)
1407 {
1408 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1409 ps_intra_pred_mode_current->b2_mpm_idx = 0;
1410 }
1411 else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1412 {
1413 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1414 ps_intra_pred_mode_current->b2_mpm_idx = 1;
1415 }
1416 else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1417 {
1418 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1419 ps_intra_pred_mode_current->b2_mpm_idx = 2;
1420 }
1421 /* Flag & b5_rem_intra_pred_mode generation */
1422 else
1423 {
1424 WORD32 rem_mode;
1425
1426 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1427
1428 /* sorting cand_mode_list */
1429 if(cand_mode_list[0] > cand_mode_list[1])
1430 {
1431 SWAP(cand_mode_list[0], cand_mode_list[1]);
1432 }
1433 if(cand_mode_list[0] > cand_mode_list[2])
1434 {
1435 SWAP(cand_mode_list[0], cand_mode_list[2]);
1436 }
1437 if(cand_mode_list[1] > cand_mode_list[2])
1438 {
1439 SWAP(cand_mode_list[1], cand_mode_list[2]);
1440 }
1441
1442 rem_mode = luma_intra_pred_mode_current;
1443
1444 if((rem_mode) >= cand_mode_list[2])
1445 {
1446 (rem_mode)--;
1447 }
1448 if((rem_mode) >= cand_mode_list[1])
1449 {
1450 (rem_mode)--;
1451 }
1452 if((rem_mode) >= cand_mode_list[0])
1453 {
1454 (rem_mode)--;
1455 }
1456 ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1457 }
1458 }
1459
ihevce_quant_rounding_factor_gen(WORD32 i4_trans_size,WORD32 is_luma,rdopt_entropy_ctxt_t * ps_rdopt_entropy_ctxt,WORD32 * pi4_quant_round_0_1,WORD32 * pi4_quant_round_1_2,double i4_lamda_modifier,UWORD8 i4_is_tu_level_quant_rounding)1460 void ihevce_quant_rounding_factor_gen(
1461 WORD32 i4_trans_size,
1462 WORD32 is_luma,
1463 rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1464 WORD32 *pi4_quant_round_0_1,
1465 WORD32 *pi4_quant_round_1_2,
1466 double i4_lamda_modifier,
1467 UWORD8 i4_is_tu_level_quant_rounding)
1468 {
1469 //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1470 UWORD8 *pu1_ctxt_model;
1471 WORD32 scan_pos;
1472 WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
1473 WORD32 abs_gt1_base_ctxt;
1474 WORD32 log2_tr_size, i;
1475 UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1476 UWORD16 u4_bits_estimated_r1_temp;
1477 WORD32 j = 0;
1478 WORD32 k = 0;
1479 WORD32 temp2;
1480
1481 double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1482 LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1483 /* transform size to log2transform size */
1484 GETRANGE(log2_tr_size, i4_trans_size);
1485 log2_tr_size -= 1;
1486
1487 if(1 == i4_is_tu_level_quant_rounding)
1488 {
1489 entropy_context_t *ps_cur_tu_entropy;
1490 cab_ctxt_t *ps_cabac;
1491 WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1492 ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1493
1494 ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1495
1496 pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1497 }
1498 else
1499 {
1500 pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1501 }
1502 /*If transform size is 4x4, then only one sub-block*/
1503 if(is_luma)
1504 {
1505 sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1506 abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1507
1508 if(3 == log2_tr_size)
1509 {
1510 /* 8x8 transform size */
1511 /* Assuming diagnol scan idx for now */
1512 sig_coeff_base_ctxt += 9;
1513 }
1514 else if(3 < log2_tr_size)
1515 {
1516 /* larger transform sizes */
1517 sig_coeff_base_ctxt += 21;
1518 }
1519 }
1520 else
1521 {
1522 /* chroma context initializations */
1523 sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1524 abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1525
1526 if(3 == log2_tr_size)
1527 {
1528 /* 8x8 transform size */
1529 sig_coeff_base_ctxt += 9;
1530 }
1531 else if(3 < log2_tr_size)
1532 {
1533 /* larger transform sizes */
1534 sig_coeff_base_ctxt += 12;
1535 }
1536 }
1537
1538 /*Transform size of 4x4 will have only a single CSB */
1539 /* derive the context inc as per section 9.3.3.1.4 */
1540
1541 if(2 == log2_tr_size)
1542 {
1543 UWORD8 sig_ctxinc;
1544 WORD32 state_mps;
1545 WORD32 gt1_ctxt = 0;
1546 WORD32 ctxt_set = 0;
1547 WORD32 ctxt_idx = 0;
1548
1549 /* context set based on luma subblock pos */
1550
1551 /* Encodet the abs level gt1 bins */
1552 /* Currently calculating trade off between mps(2) and mps(1)*/
1553 /* The estimation has to be further done for mps(11) and mps(111)*/
1554 /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1555 /* gt1_ctxt = 0 for the co-ef value to be 2 */
1556
1557 ctxt_set = gt1_ctxt = 0;
1558 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1559
1560 state_mps = pu1_ctxt_model[ctxt_idx];
1561
1562 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1563
1564 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1565
1566 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1567 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1568 {
1569 *(pi4_quant_round_1_2 + scan_pos) = temp2;
1570 }
1571
1572 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1573 {
1574 //UWORD8 nbr_csbf = 1;
1575 /* derive the x,y pos */
1576 UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1577
1578 /* 4x4 transform size increment uses lookup */
1579 sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1580
1581 /*Get the mps state based on ctxt modes */
1582 state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1583
1584 /* Bits taken to encode sig co-ef flag as 0 */
1585 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1586
1587 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1588 //
1589 u4_bits_estimated_r1 =
1590 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1591
1592 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1593 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1594
1595 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1596 *(pi4_quant_round_0_1 + scan_pos) = temp2;
1597 }
1598 }
1599 else
1600 {
1601 UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1602 WORD32 is_nbr_csb_state_mps;
1603
1604 WORD32 state_mps;
1605 WORD32 gt1_ctxt = 0;
1606 WORD32 ctxt_set = 0;
1607 WORD32 ctxt_idx;
1608 /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1609 /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1610
1611 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1612 ctxt_set = 1 DC subblock, the previous state did have >= 2
1613 ctxt_set = 2 AC subblock, the previous state did not have 2
1614 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1615 i = 1;
1616 ctxt_set = (i && is_luma) ? 2 : 0;
1617
1618 ctxt_set++;
1619
1620 /*0th position indicates the probability of 2 */
1621 /*1th position indicates the probability of 1 */
1622 /*2th position indicates the probability of 11 */
1623 /*3th position indicates the probability of 111 */
1624
1625 gt1_ctxt = 0;
1626 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1627
1628 state_mps = pu1_ctxt_model[ctxt_idx];
1629
1630 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1631
1632 u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1633 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1634
1635 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1636 {
1637 *(pi4_quant_round_1_2 + scan_pos) = temp2;
1638 }
1639
1640 i = 0;
1641 ctxt_set = (i && is_luma) ? 2 : 0;
1642 ctxt_set++;
1643
1644 /*0th position indicates the probability of 2 */
1645 /*1th position indicates the probability of 1 */
1646 /*2th position indicates the probability of 11 */
1647 /*3th position indicates the probability of 111 */
1648
1649 gt1_ctxt = 0;
1650 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1651
1652 state_mps = pu1_ctxt_model[ctxt_idx];
1653
1654 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1655
1656 u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1657 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1658
1659 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1660 {
1661 *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1662 }
1663
1664 {
1665 WORD32 ctxt_idx;
1666
1667 WORD32 nbr_csbf_0, nbr_csbf_1;
1668 WORD32 state_mps_0, state_mps_1;
1669 ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1670 ctxt_idx += is_luma ? 0 : 2;
1671
1672 /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1673 /* if neibhor not available, ctxt idx = 0*/
1674 nbr_csbf_0 = 0;
1675 ctxt_idx += nbr_csbf_0 ? 1 : 0;
1676 state_mps_0 = pu1_ctxt_model[ctxt_idx];
1677
1678 nbr_csbf_1 = 1;
1679 ctxt_idx += nbr_csbf_1 ? 1 : 0;
1680 state_mps_1 = pu1_ctxt_model[ctxt_idx];
1681
1682 is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1683 }
1684
1685 if(1 == is_nbr_csb_state_mps)
1686 {
1687 for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1688 {
1689 UWORD8 sig_ctxinc;
1690 WORD32 state_mps;
1691 WORD32 gt1_ctxt = 0;
1692 WORD32 ctxt_set = 0;
1693
1694 WORD32 ctxt_idx;
1695
1696 /*Check if the cabac states had previous nbr available */
1697
1698 if(i == 0)
1699 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1700 else if(i < (i4_trans_size >> 2))
1701 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1702 else if((i % (i4_trans_size >> 2)) == 0)
1703 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1704 else
1705 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1706
1707 if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1708 k++;
1709
1710 j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1711 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1712 ctxt_set = 1 DC subblock, the previous state did have >= 2
1713 ctxt_set = 2 AC subblock, the previous state did not have 2
1714 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1715
1716 ctxt_set = (i && is_luma) ? 2 : 0;
1717
1718 /* gt1_ctxt = 1 for the co-ef value to be 1 */
1719 gt1_ctxt = 0;
1720 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1721
1722 state_mps = pu1_ctxt_model[ctxt_idx];
1723
1724 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1725 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1726
1727 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1728 {
1729 UWORD8 y_pos_x_pos;
1730
1731 if(scan_pos || i)
1732 {
1733 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1734 /* ctxt for AC coeff depends on curpos and neigbour csbf */
1735 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1736
1737 /* based on luma subblock pos */
1738 sig_ctxinc += (i && is_luma) ? 3 : 0;
1739
1740 sig_ctxinc += sig_coeff_base_ctxt;
1741 }
1742 else
1743 {
1744 /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1745 /* DC coeff has fixed context for luma and chroma */
1746 sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1747 }
1748
1749 /*Get the mps state based on ctxt modes */
1750 state_mps = pu1_ctxt_model[sig_ctxinc];
1751
1752 /* Bits taken to encode sig co-ef flag as 0 */
1753 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1754
1755 u4_bits_estimated_r1 =
1756 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1757
1758 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1759 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1760 {
1761 QUANT_ROUND_FACTOR(
1762 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1763 *(pi4_quant_round_0_1 +
1764 ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1765 }
1766 }
1767 }
1768 }
1769 else
1770 {
1771 /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1772 Hence will write the same value to all sub block, and overwrite for the 1st one */
1773 i = 1;
1774 {
1775 UWORD8 sig_ctxinc;
1776 UWORD8 y_pos_x_pos;
1777 WORD32 quant_rounding_0_1;
1778
1779 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1780
1781 scan_pos = 0;
1782 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1783 /* ctxt for AC coeff depends on curpos and neigbour csbf */
1784 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1785
1786 /* based on luma subblock pos */
1787 sig_ctxinc += (is_luma) ? 3 : 0;
1788
1789 sig_ctxinc += sig_coeff_base_ctxt;
1790
1791 /*Get the mps state based on ctxt modes */
1792 state_mps = pu1_ctxt_model[sig_ctxinc];
1793
1794 /* Bits taken to encode sig co-ef flag as 0 */
1795 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1796
1797 u4_bits_estimated_r1 =
1798 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1799
1800 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1801 ctxt_set = 1 DC subblock, the previous state did have >= 2
1802 ctxt_set = 2 AC subblock, the previous state did not have 2
1803 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1804
1805 ctxt_set = (i && is_luma) ? 2 : 0;
1806
1807 /* gt1_ctxt = 1 for the co-ef value to be 1 */
1808 gt1_ctxt = 0;
1809 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1810
1811 state_mps = pu1_ctxt_model[ctxt_idx];
1812
1813 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1814 u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1815
1816 QUANT_ROUND_FACTOR(
1817 quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1818
1819 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1820 scan_pos++)
1821 {
1822 *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1823 }
1824 }
1825
1826 /*First Subblock*/
1827 i = 0;
1828
1829 {
1830 UWORD8 sig_ctxinc;
1831 WORD32 state_mps;
1832 WORD32 gt1_ctxt = 0;
1833 WORD32 ctxt_set = 0;
1834
1835 WORD32 ctxt_idx;
1836
1837 /*Check if the cabac states had previous nbr available */
1838
1839 {
1840 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1841
1842 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1843 ctxt_set = 1 DC subblock, the previous state did have >= 2
1844 ctxt_set = 2 AC subblock, the previous state did not have 2
1845 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1846 ctxt_set = (i && is_luma) ? 2 : 0;
1847
1848 /* gt1_ctxt = 1 for the co-ef value to be 1 */
1849 gt1_ctxt = 0;
1850 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1851
1852 state_mps = pu1_ctxt_model[ctxt_idx];
1853
1854 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1855 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1856
1857 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1858 {
1859 UWORD8 y_pos_x_pos;
1860
1861 if(scan_pos)
1862 {
1863 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1864 /* ctxt for AC coeff depends on curpos and neigbour csbf */
1865 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1866
1867 /* based on luma subblock pos */
1868 sig_ctxinc += (i && is_luma) ? 3 : 0;
1869
1870 sig_ctxinc += sig_coeff_base_ctxt;
1871 }
1872 else
1873 {
1874 /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1875 /* DC coeff has fixed context for luma and chroma */
1876 sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1877 }
1878
1879 /*Get the mps state based on ctxt modes */
1880 state_mps = pu1_ctxt_model[sig_ctxinc];
1881
1882 /* Bits taken to encode sig co-ef flag as 0 */
1883 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1884
1885 u4_bits_estimated_r1 =
1886 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1887
1888 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1889 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1890 {
1891 QUANT_ROUND_FACTOR(
1892 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1893 *(pi4_quant_round_0_1 +
1894 ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1895 }
1896 }
1897 }
1898 }
1899 }
1900 }
1901 return;
1902 }
1903
1904 /*!
1905 ******************************************************************************
1906 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1907 *
1908 * \brief
1909 * Transform unit level (Luma) enc_loop function
1910 *
1911 * \param[in] ps_ctxt enc_loop module ctxt pointer
1912 * \param[in] pu1_pred pointer to predicted data buffer
1913 * \param[in] pred_strd predicted buffer stride
1914 * \param[in] pu1_src pointer to source data buffer
1915 * \param[in] src_strd source buffer stride
1916 * \param[in] pi2_deq_data pointer to store iq data
1917 * \param[in] deq_data_strd iq data buffer stride
1918 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1919 * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
1920 * block
1921 * \param[out] csbf_strd csbf buffer stride
1922 * \param[in] trans_size transform size (4, 8, 16,32)
1923 * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
1924 * \param[out] pi4_cost pointer to store the cost
1925 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1926 * coeff buffer
1927 * \param[out] pu4_tu_bits pointer to store the best TU bits required encode
1928 the current TU in RDopt Mode
1929 * \param[out] pu4_blk_sad pointer to store the block sad for RC
1930 * \param[out] pi4_zero_col pointer to store the zero_col info for the TU
1931 * \param[out] pi4_zero_row pointer to store the zero_row info for the TU
1932 * \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not
1933 * \param[in] i4_perform_sbh Indicates if SBH should be performed or not
1934 *
1935 * \return
1936 * CBF of the current block
1937 *
1938 * \author
1939 * Ittiam
1940 *
1941 *****************************************************************************
1942 */
1943
ihevce_t_q_iq_ssd_scan_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_src,WORD32 src_strd,WORD16 * pi2_deq_data,WORD32 deq_data_strd,UWORD8 * pu1_recon,WORD32 i4_recon_stride,UWORD8 * pu1_ecd_data,UWORD8 * pu1_csbf_buf,WORD32 csbf_strd,WORD32 trans_size,WORD32 packed_pred_mode,LWORD64 * pi8_cost,WORD32 * pi4_coeff_off,WORD32 * pi4_tu_bits,UWORD32 * pu4_blk_sad,WORD32 * pi4_zero_col,WORD32 * pi4_zero_row,UWORD8 * pu1_is_recon_available,WORD32 i4_perform_rdoq,WORD32 i4_perform_sbh,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy,SSD_TYPE_T e_ssd_type,WORD32 early_cbf)1944 WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1945 ihevce_enc_loop_ctxt_t *ps_ctxt,
1946 UWORD8 *pu1_pred,
1947 WORD32 pred_strd,
1948 UWORD8 *pu1_src,
1949 WORD32 src_strd,
1950 WORD16 *pi2_deq_data,
1951 WORD32 deq_data_strd,
1952 UWORD8 *pu1_recon,
1953 WORD32 i4_recon_stride,
1954 UWORD8 *pu1_ecd_data,
1955 UWORD8 *pu1_csbf_buf,
1956 WORD32 csbf_strd,
1957 WORD32 trans_size,
1958 WORD32 packed_pred_mode,
1959 LWORD64 *pi8_cost,
1960 WORD32 *pi4_coeff_off,
1961 WORD32 *pi4_tu_bits,
1962 UWORD32 *pu4_blk_sad,
1963 WORD32 *pi4_zero_col,
1964 WORD32 *pi4_zero_row,
1965 UWORD8 *pu1_is_recon_available,
1966 WORD32 i4_perform_rdoq,
1967 WORD32 i4_perform_sbh,
1968 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1969 WORD32 i4_alpha_stim_multiplier,
1970 UWORD8 u1_is_cu_noisy,
1971 #endif
1972 SSD_TYPE_T e_ssd_type,
1973 WORD32 early_cbf)
1974 {
1975 WORD32 cbf = 0;
1976 WORD32 trans_idx;
1977 WORD32 quant_scale_mat_offset;
1978 WORD32 *pi4_trans_scratch;
1979 WORD16 *pi2_trans_values;
1980 WORD16 *pi2_quant_coeffs;
1981 WORD32 *pi4_subBlock2csbfId_map = NULL;
1982
1983 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
1984 WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
1985 #endif
1986
1987 rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
1988
1989 WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
1990 (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
1991 WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
1992 WORD8 intra_flag = 0;
1993 ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
1994
1995 *pi4_tu_bits = 0;
1996 *pi4_coeff_off = 0;
1997 pu1_is_recon_available[0] = 0;
1998
1999 if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2000 {
2001 if(e_ssd_type != NULL_TYPE)
2002 {
2003 /* SSD cost is stored to the pointer */
2004 pi8_cost[0] =
2005
2006 ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2007 pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2008
2009 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2010 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2011 {
2012 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2013 pu1_src,
2014 src_strd,
2015 pu1_pred,
2016 pred_strd,
2017 pi8_cost[0],
2018 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2019 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2020 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2021 100.0,
2022 trans_size,
2023 0,
2024 ps_ctxt->u1_enable_psyRDOPT,
2025 NULL_PLANE);
2026 }
2027 #endif
2028
2029 /* copy pred to recon for skip mode */
2030 if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2031 {
2032 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2033 pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2034 pu1_is_recon_available[0] = 1;
2035 }
2036 else
2037 {
2038 pu1_is_recon_available[0] = 0;
2039 }
2040
2041 #if ENABLE_INTER_ZCU_COST
2042 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2043 #endif
2044 }
2045 else
2046 {
2047 pi8_cost[0] = UINT_MAX;
2048 }
2049
2050 /* cbf is returned as 0 */
2051 return (0);
2052 }
2053
2054 /* derive context variables */
2055 pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2056 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2057 pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2058
2059 /* translate the transform size to index for 4x4 and 8x8 */
2060 trans_idx = trans_size >> 2;
2061
2062 if(PRED_MODE_INTRA == packed_pred_mode)
2063 {
2064 quant_scale_mat_offset = 0;
2065 intra_flag = 1;
2066 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2067 ai4_quant_rounding_factors[0][0] =
2068 MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2069
2070 for(i = 0; i < trans_size * trans_size; i++)
2071 {
2072 ai4_quant_rounding_factors[1][i] =
2073 MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2074 (1 << QUANT_ROUND_FACTOR_Q) / 3);
2075 ai4_quant_rounding_factors[2][i] =
2076 MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2077 (1 << QUANT_ROUND_FACTOR_Q) / 3);
2078 }
2079 #endif
2080 }
2081 else
2082 {
2083 quant_scale_mat_offset = NUM_TRANS_TYPES;
2084 }
2085 /* for intra 4x4 DST transform should be used */
2086 if((1 == trans_idx) && (1 == intra_flag))
2087 {
2088 trans_idx = 0;
2089 }
2090 /* for 16x16 cases */
2091 else if(16 == trans_size)
2092 {
2093 trans_idx = 3;
2094 }
2095 /* for 32x32 cases */
2096 else if(32 == trans_size)
2097 {
2098 trans_idx = 4;
2099 }
2100
2101 switch(trans_size)
2102 {
2103 case 4:
2104 {
2105 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2106
2107 break;
2108 }
2109 case 8:
2110 {
2111 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2112
2113 break;
2114 }
2115 case 16:
2116 {
2117 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2118
2119 break;
2120 }
2121 case 32:
2122 {
2123 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2124
2125 break;
2126 }
2127 }
2128
2129 /* Do not call the FT and Quant functions if early_cbf is 0 */
2130 if(1 == early_cbf)
2131 {
2132 /* ---------- call residue and transform block ------- */
2133 *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2134 pu1_src,
2135 pu1_pred,
2136 pi4_trans_scratch,
2137 pi2_trans_values,
2138 src_strd,
2139 pred_strd,
2140 ((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */
2141
2142 cbf = ps_ctxt->apf_quant_iquant_ssd
2143 [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2144 pi2_trans_values,
2145 ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2146 pi2_quant_coeffs,
2147 pi2_deq_data,
2148 trans_size,
2149 ps_ctxt->i4_cu_qp_div6,
2150 ps_ctxt->i4_cu_qp_mod6,
2151 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2152 ps_ctxt->i4_quant_rnd_factor[intra_flag],
2153 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2154 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2155 #else
2156 intra_flag ? ai4_quant_rounding_factors[0][0]
2157 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2158 intra_flag ? ai4_quant_rounding_factors[1]
2159 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2160 intra_flag ? ai4_quant_rounding_factors[2]
2161 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2162 #endif
2163 trans_size,
2164 trans_size,
2165 deq_data_strd,
2166 pu1_csbf_buf,
2167 csbf_strd,
2168 pi4_zero_col,
2169 pi4_zero_row,
2170 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2171 pi8_cost);
2172
2173 if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2174 {
2175 pi8_cost[0] = UINT_MAX;
2176 }
2177 }
2178
2179 if(0 != cbf)
2180 {
2181 if(i4_perform_sbh || i4_perform_rdoq)
2182 {
2183 ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2184 ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2185 ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2186
2187 ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2188 ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2189 ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2190 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2191 ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2192
2193 ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2194 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2195 ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2196 ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2197 ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2198 ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2199
2200 /* ------- call coeffs scan function ------- */
2201 if((!i4_perform_rdoq))
2202 {
2203 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2204
2205 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2206 }
2207 }
2208
2209 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2210 pi2_quant_coeffs,
2211 pi4_subBlock2csbfId_map,
2212 ps_ctxt->i4_scan_idx,
2213 trans_size,
2214 pu1_ecd_data,
2215 pu1_csbf_buf,
2216 csbf_strd);
2217 }
2218 *pi8_cost >>= ga_trans_shift[trans_idx];
2219
2220 #if RDOPT_ZERO_CBF_ENABLE
2221 /* compare null cbf cost with encode tu rd-cost */
2222 if(cbf != 0)
2223 {
2224 WORD32 tu_bits;
2225 LWORD64 tu_rd_cost;
2226
2227 LWORD64 zero_cbf_cost = 0;
2228
2229 /*Populating the feilds of rdoq_ctxt structure*/
2230 if(i4_perform_rdoq)
2231 {
2232 /* transform size to log2transform size */
2233 GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2234 ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2235 ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2236 ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2237 ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2238 ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2239 (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2240 ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2241 ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2242 ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2243 }
2244 else if(i4_perform_zcbf)
2245 {
2246 zero_cbf_cost =
2247
2248 ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2249 pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size);
2250 }
2251
2252 /************************************************************************/
2253 /* call the entropy rdo encode to get the bit estimate for current tu */
2254 /* note that tu includes only residual coding bits and does not include */
2255 /* tu split, cbf and qp delta encoding bits for a TU */
2256 /************************************************************************/
2257 if(i4_perform_rdoq)
2258 {
2259 tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2260 &ps_ctxt->s_rdopt_entropy_ctxt,
2261 (pu1_ecd_data),
2262 trans_size,
2263 1,
2264 ps_rdoq_sbh_ctxt,
2265 pi8_cost,
2266 &zero_cbf_cost,
2267 0);
2268
2269 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2270 {
2271 cbf = 0;
2272 *pi4_coeff_off = 0;
2273 }
2274
2275 if((i4_perform_sbh) && (0 != cbf))
2276 {
2277 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2278 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2279 *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2280 }
2281
2282 /*Add round value before normalizing*/
2283 *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2284 *pi8_cost >>= ga_trans_shift[trans_idx];
2285
2286 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2287 {
2288 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2289 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2290 pi2_quant_coeffs,
2291 pi4_subBlock2csbfId_map,
2292 ps_ctxt->i4_scan_idx,
2293 trans_size,
2294 pu1_ecd_data,
2295 pu1_csbf_buf,
2296 csbf_strd);
2297 }
2298 }
2299 else
2300 {
2301 tu_bits = ihevce_entropy_rdo_encode_tu(
2302 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2303 }
2304
2305 *pi4_tu_bits = tu_bits;
2306
2307 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2308 {
2309 *pi8_cost = ihevce_it_recon_ssd(
2310 ps_ctxt,
2311 pu1_src,
2312 src_strd,
2313 pu1_pred,
2314 pred_strd,
2315 pi2_deq_data,
2316 deq_data_strd,
2317 pu1_recon,
2318 i4_recon_stride,
2319 pu1_ecd_data,
2320 trans_size,
2321 packed_pred_mode,
2322 cbf,
2323 *pi4_zero_col,
2324 *pi4_zero_row,
2325 NULL_PLANE);
2326
2327 pu1_is_recon_available[0] = 1;
2328 }
2329
2330 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2331 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2332 {
2333 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2334 pu1_src,
2335 src_strd,
2336 pu1_recon,
2337 i4_recon_stride,
2338 pi8_cost[0],
2339 i4_alpha_stim_multiplier,
2340 trans_size,
2341 0,
2342 ps_ctxt->u1_enable_psyRDOPT,
2343 NULL_PLANE);
2344 }
2345 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2346 {
2347 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2348 pu1_src,
2349 src_strd,
2350 pu1_pred,
2351 pred_strd,
2352 pi8_cost[0],
2353 i4_alpha_stim_multiplier,
2354 trans_size,
2355 0,
2356 ps_ctxt->u1_enable_psyRDOPT,
2357 NULL_PLANE);
2358 }
2359 #endif
2360
2361 /* add the SSD cost to bits estimate given by ECD */
2362 tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2363 tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2364
2365 if(i4_perform_zcbf)
2366 {
2367 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2368 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2369 {
2370 zero_cbf_cost = ihevce_inject_stim_into_distortion(
2371 pu1_src,
2372 src_strd,
2373 pu1_pred,
2374 pred_strd,
2375 zero_cbf_cost,
2376 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2377 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2378 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2379 100.0,
2380 trans_size,
2381 0,
2382 ps_ctxt->u1_enable_psyRDOPT,
2383 NULL_PLANE);
2384 }
2385 #endif
2386
2387 /* force the tu as zero cbf if zero_cbf_cost is lower */
2388 if(zero_cbf_cost < tu_rd_cost)
2389 {
2390 /* num bytes is set to 0 */
2391 *pi4_coeff_off = 0;
2392
2393 /* cbf is returned as 0 */
2394 cbf = 0;
2395
2396 /* cost is returned as 0 cbf cost */
2397 *pi8_cost = zero_cbf_cost;
2398
2399 /* TU bits is set to 0 */
2400 *pi4_tu_bits = 0;
2401 pu1_is_recon_available[0] = 0;
2402
2403 if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2404 {
2405 /* copy pred to recon for zcbf mode */
2406
2407 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2408 pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2409
2410 pu1_is_recon_available[0] = 1;
2411 }
2412 }
2413 /* accumulate cu not coded cost with zcbf cost */
2414 #if ENABLE_INTER_ZCU_COST
2415 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2416 #endif
2417 }
2418 }
2419 else
2420 {
2421 /* cbf = 0, accumulate cu not coded cost */
2422 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2423 {
2424 *pi8_cost = ihevce_it_recon_ssd(
2425 ps_ctxt,
2426 pu1_src,
2427 src_strd,
2428 pu1_pred,
2429 pred_strd,
2430 pi2_deq_data,
2431 deq_data_strd,
2432 pu1_recon,
2433 i4_recon_stride,
2434 pu1_ecd_data,
2435 trans_size,
2436 packed_pred_mode,
2437 cbf,
2438 *pi4_zero_col,
2439 *pi4_zero_row,
2440 NULL_PLANE);
2441
2442 pu1_is_recon_available[0] = 1;
2443 }
2444
2445 #if ENABLE_INTER_ZCU_COST
2446 {
2447 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2448 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2449 {
2450 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2451 pu1_src,
2452 src_strd,
2453 pu1_recon,
2454 i4_recon_stride,
2455 pi8_cost[0],
2456 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2457 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2458 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2459 100.0,
2460 trans_size,
2461 0,
2462 ps_ctxt->u1_enable_psyRDOPT,
2463 NULL_PLANE);
2464 }
2465 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2466 {
2467 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2468 pu1_src,
2469 src_strd,
2470 pu1_pred,
2471 pred_strd,
2472 pi8_cost[0],
2473 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2474 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2475 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2476 100.0,
2477 trans_size,
2478 0,
2479 ps_ctxt->u1_enable_psyRDOPT,
2480 NULL_PLANE);
2481 }
2482 #endif
2483
2484 ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2485 }
2486 #endif /* ENABLE_INTER_ZCU_COST */
2487 }
2488 #endif
2489
2490 return (cbf);
2491 }
2492
2493 /*!
2494 ******************************************************************************
2495 * \if Function name : ihevce_it_recon_fxn \endif
2496 *
2497 * \brief
2498 * Transform unit level (Luma) IT Recon function
2499 *
2500 * \param[in] ps_ctxt enc_loop module ctxt pointer
2501 * \param[in] pi2_deq_data pointer to iq data
2502 * \param[in] deq_data_strd iq data buffer stride
2503 * \param[in] pu1_pred pointer to predicted data buffer
2504 * \param[in] pred_strd predicted buffer stride
2505 * \param[in] pu1_recon pointer to recon buffer
2506 * \param[in] recon_strd recon buffer stride
2507 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
2508 * \param[in] trans_size transform size (4, 8, 16,32)
2509 * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
2510 * \param[in] cbf CBF of the current block
2511 * \param[in] zero_cols zero_cols of the current block
2512 * \param[in] zero_rows zero_rows of the current block
2513 *
2514 * \return
2515 *
2516 * \author
2517 * Ittiam
2518 *
2519 *****************************************************************************
2520 */
2521
ihevce_it_recon_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD16 * pi2_deq_data,WORD32 deq_dat_strd,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_recon,WORD32 recon_strd,UWORD8 * pu1_ecd_data,WORD32 trans_size,WORD32 packed_pred_mode,WORD32 cbf,WORD32 zero_cols,WORD32 zero_rows)2522 void ihevce_it_recon_fxn(
2523 ihevce_enc_loop_ctxt_t *ps_ctxt,
2524 WORD16 *pi2_deq_data,
2525 WORD32 deq_dat_strd,
2526 UWORD8 *pu1_pred,
2527 WORD32 pred_strd,
2528 UWORD8 *pu1_recon,
2529 WORD32 recon_strd,
2530 UWORD8 *pu1_ecd_data,
2531 WORD32 trans_size,
2532 WORD32 packed_pred_mode,
2533 WORD32 cbf,
2534 WORD32 zero_cols,
2535 WORD32 zero_rows)
2536 {
2537 WORD32 dc_add_flag = 0;
2538 WORD32 trans_idx;
2539
2540 /* translate the transform size to index for 4x4 and 8x8 */
2541 trans_idx = trans_size >> 2;
2542
2543 /* if SKIP mode needs to be evaluated the pred is copied to recon */
2544 if(PRED_MODE_SKIP == packed_pred_mode)
2545 {
2546 UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2547
2548 pu1_curr_pred = pu1_pred;
2549 pu1_curr_recon = pu1_recon;
2550
2551 /* 2D copy of data */
2552
2553 ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2554 pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2555
2556 return;
2557 }
2558
2559 /* for intra 4x4 DST transform should be used */
2560 if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2561 {
2562 trans_idx = 0;
2563 }
2564 /* for 16x16 cases */
2565 else if(16 == trans_size)
2566 {
2567 trans_idx = 3;
2568 }
2569 /* for 32x32 cases */
2570 else if(32 == trans_size)
2571 {
2572 trans_idx = 4;
2573 }
2574
2575 /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2576 if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2577 {
2578 dc_add_flag = 1;
2579 }
2580
2581 if(0 == cbf)
2582 {
2583 /* buffer copy */
2584 ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2585 pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2586 }
2587 else if((1 == dc_add_flag) && (0 != trans_idx))
2588 {
2589 /* dc add */
2590 ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2591 pu1_pred,
2592 pred_strd,
2593 pu1_recon,
2594 recon_strd,
2595 trans_size,
2596 pi2_deq_data[0],
2597 NULL_PLANE /* luma */
2598 );
2599 }
2600 else
2601 {
2602 ps_ctxt->apf_it_recon[trans_idx](
2603 pi2_deq_data,
2604 &ps_ctxt->ai2_scratch[0],
2605 pu1_pred,
2606 pu1_recon,
2607 deq_dat_strd,
2608 pred_strd,
2609 recon_strd,
2610 zero_cols,
2611 zero_rows);
2612 }
2613 }
2614
2615 /*!
2616 ******************************************************************************
2617 * \if Function name : ihevce_chroma_it_recon_fxn \endif
2618 *
2619 * \brief
2620 * Transform unit level (Chroma) IT Recon function
2621 *
2622 * \param[in] ps_ctxt enc_loop module ctxt pointer
2623 * \param[in] pi2_deq_data pointer to iq data
2624 * \param[in] deq_data_strd iq data buffer stride
2625 * \param[in] pu1_pred pointer to predicted data buffer
2626 * \param[in] pred_strd predicted buffer stride
2627 * \param[in] pu1_recon pointer to recon buffer
2628 * \param[in] recon_strd recon buffer stride
2629 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
2630 * \param[in] trans_size transform size (4, 8, 16)
2631 * \param[in] cbf CBF of the current block
2632 * \param[in] zero_cols zero_cols of the current block
2633 * \param[in] zero_rows zero_rows of the current block
2634 *
2635 * \return
2636 *
2637 * \author
2638 * Ittiam
2639 *
2640 *****************************************************************************
2641 */
2642
ihevce_chroma_it_recon_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD16 * pi2_deq_data,WORD32 deq_dat_strd,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_recon,WORD32 recon_strd,UWORD8 * pu1_ecd_data,WORD32 trans_size,WORD32 cbf,WORD32 zero_cols,WORD32 zero_rows,CHROMA_PLANE_ID_T e_chroma_plane)2643 void ihevce_chroma_it_recon_fxn(
2644 ihevce_enc_loop_ctxt_t *ps_ctxt,
2645 WORD16 *pi2_deq_data,
2646 WORD32 deq_dat_strd,
2647 UWORD8 *pu1_pred,
2648 WORD32 pred_strd,
2649 UWORD8 *pu1_recon,
2650 WORD32 recon_strd,
2651 UWORD8 *pu1_ecd_data,
2652 WORD32 trans_size,
2653 WORD32 cbf,
2654 WORD32 zero_cols,
2655 WORD32 zero_rows,
2656 CHROMA_PLANE_ID_T e_chroma_plane)
2657 {
2658 WORD32 trans_idx;
2659
2660 ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2661
2662 /* since 2x2 transform is not allowed for chroma*/
2663 if(2 == trans_size)
2664 {
2665 trans_size = 4;
2666 }
2667
2668 /* translate the transform size to index */
2669 trans_idx = trans_size >> 2;
2670
2671 /* for 16x16 cases */
2672 if(16 == trans_size)
2673 {
2674 trans_idx = 3;
2675 }
2676
2677 if(0 == cbf)
2678 {
2679 /* buffer copy */
2680 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2681 pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2682 }
2683 else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2684 {
2685 /* dc add */
2686 ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2687 pu1_pred,
2688 pred_strd,
2689 pu1_recon,
2690 recon_strd,
2691 trans_size,
2692 pi2_deq_data[0],
2693 e_chroma_plane /* chroma plane */
2694 );
2695 }
2696 else
2697 {
2698 ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2699 pi2_deq_data,
2700 &ps_ctxt->ai2_scratch[0],
2701 pu1_pred + (WORD32)e_chroma_plane,
2702 pu1_recon + (WORD32)e_chroma_plane,
2703 deq_dat_strd,
2704 pred_strd,
2705 recon_strd,
2706 zero_cols,
2707 zero_rows);
2708 }
2709 }
2710
2711 /**
2712 *******************************************************************************
2713 * \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2714 *
2715 * \brief * Filters the RDOPT candidates based on mpm_idx
2716 *
2717 * \par Description
2718 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2719 * for a CU
2720 *
2721 * \param[in] ps_ctxt : ptr to enc loop context
2722 * \param[in] ps_cu_analyse : ptr to CU analyse structure
2723 * \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2724 * \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2725 * \param[in] pu1_luma_mode luma mode
2726 *
2727 * \returns none
2728 *
2729 * \author
2730 * Ittiam
2731 *
2732 *******************************************************************************
2733 */
2734
ihevce_mpm_idx_based_filter_RDOPT_cand(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_analyse_t * ps_cu_analyse,nbr_4x4_t * ps_left_nbr_4x4,nbr_4x4_t * ps_top_nbr_4x4,UWORD8 * pu1_luma_mode,UWORD8 * pu1_eval_mark)2735 void ihevce_mpm_idx_based_filter_RDOPT_cand(
2736 ihevce_enc_loop_ctxt_t *ps_ctxt,
2737 cu_analyse_t *ps_cu_analyse,
2738 nbr_4x4_t *ps_left_nbr_4x4,
2739 nbr_4x4_t *ps_top_nbr_4x4,
2740 UWORD8 *pu1_luma_mode,
2741 UWORD8 *pu1_eval_mark)
2742 {
2743 WORD32 cu_pos_x;
2744 WORD32 cu_pos_y;
2745 nbr_avail_flags_t s_nbr;
2746 WORD32 trans_size;
2747 WORD32 au4_cand_mode_list[3];
2748 WORD32 nbr_flags;
2749 UWORD8 *pu1_intra_luma_modes;
2750 WORD32 rdopt_cand_ctr = 0;
2751 UWORD8 *pu1_luma_eval_mark;
2752
2753 cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2754 cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2755 trans_size = ps_cu_analyse->u1_cu_size;
2756
2757 /* get the neighbour availability flags */
2758 nbr_flags = ihevce_get_nbr_intra(
2759 &s_nbr,
2760 ps_ctxt->pu1_ctb_nbr_map,
2761 ps_ctxt->i4_nbr_map_strd,
2762 cu_pos_x,
2763 cu_pos_y,
2764 trans_size >> 2);
2765 (void)nbr_flags;
2766 /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2767 *TU=CU/2 also since the modes are same in both the cases.
2768 */
2769 ihevce_populate_intra_pred_mode(
2770 ps_top_nbr_4x4->b6_luma_intra_mode,
2771 ps_left_nbr_4x4->b6_luma_intra_mode,
2772 s_nbr.u1_top_avail,
2773 s_nbr.u1_left_avail,
2774 cu_pos_y,
2775 &au4_cand_mode_list[0]);
2776
2777 /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2778 *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2779 */
2780
2781 pu1_intra_luma_modes = pu1_luma_mode;
2782 pu1_luma_eval_mark = pu1_eval_mark;
2783
2784 while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2785 {
2786 WORD32 i;
2787 WORD32 found_flag = 0;
2788
2789 /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2790 *irrespective of whether the cand is present in the mpm idx list or not
2791 */
2792 if(rdopt_cand_ctr == 0)
2793 {
2794 rdopt_cand_ctr++;
2795 continue;
2796 }
2797
2798 for(i = 0; i < 3; i++)
2799 {
2800 if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2801 {
2802 found_flag = 1;
2803 break;
2804 }
2805 }
2806
2807 if(found_flag == 0)
2808 {
2809 pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2810 }
2811
2812 rdopt_cand_ctr++;
2813 }
2814 }
2815
2816 /*!
2817 ******************************************************************************
2818 * \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2819 *
2820 * \brief
2821 * Intra Coding unit funtion for RD opt mode
2822 *
2823 * \param[in] ps_ctxt enc_loop module ctxt pointer
2824 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2825 * \param[in] pu1_luma_mode : pointer to luma mode
2826 * \param[in] ps_cu_analyse pointer to cu analyse pointer
2827 * \param[in] pu1_src pointer to source data buffer
2828 * \param[in] src_strd source buffer stride
2829 * \param[in] pu1_cu_left pointer to left recon data buffer
2830 * \param[in] pu1_cu_top pointer to top recon data buffer
2831 * \param[in] pu1_cu_top_left pointer to top left recon data buffer
2832 * \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2833 * \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2834 * \param[in] nbr_4x4_left_strd left nbr4x4 stride
2835 * \param[in] cu_left_stride left recon buffer stride
2836 * \param[in] curr_buf_idx RD opt buffer index for current usage
2837 * \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2838 *
2839 * \return
2840 * RDopt cost
2841 *
2842 * \author
2843 * Ittiam
2844 *
2845 *****************************************************************************
2846 */
ihevce_intra_rdopt_cu_ntu(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,void * pv_pred_org,WORD32 pred_strd_org,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,UWORD8 * pu1_luma_mode,cu_analyse_t * ps_cu_analyse,void * pv_curr_src,void * pv_cu_left,void * pv_cu_top,void * pv_cu_top_left,nbr_4x4_t * ps_left_nbr_4x4,nbr_4x4_t * ps_top_nbr_4x4,WORD32 nbr_4x4_left_strd,WORD32 cu_left_stride,WORD32 curr_buf_idx,WORD32 func_proc_mode,WORD32 i4_alpha_stim_multiplier)2847 LWORD64 ihevce_intra_rdopt_cu_ntu(
2848 ihevce_enc_loop_ctxt_t *ps_ctxt,
2849 enc_loop_cu_prms_t *ps_cu_prms,
2850 void *pv_pred_org,
2851 WORD32 pred_strd_org,
2852 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2853 UWORD8 *pu1_luma_mode,
2854 cu_analyse_t *ps_cu_analyse,
2855 void *pv_curr_src,
2856 void *pv_cu_left,
2857 void *pv_cu_top,
2858 void *pv_cu_top_left,
2859 nbr_4x4_t *ps_left_nbr_4x4,
2860 nbr_4x4_t *ps_top_nbr_4x4,
2861 WORD32 nbr_4x4_left_strd,
2862 WORD32 cu_left_stride,
2863 WORD32 curr_buf_idx,
2864 WORD32 func_proc_mode,
2865 WORD32 i4_alpha_stim_multiplier)
2866 {
2867 enc_loop_cu_final_prms_t *ps_final_prms;
2868 nbr_avail_flags_t s_nbr;
2869 nbr_4x4_t *ps_nbr_4x4;
2870 nbr_4x4_t *ps_tmp_lt_4x4;
2871 recon_datastore_t *ps_recon_datastore;
2872
2873 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2874
2875 UWORD32 *pu4_nbr_flags;
2876 UWORD8 *pu1_intra_pred_mode;
2877 WORD32 cu_pos_x;
2878 WORD32 cu_pos_y;
2879 WORD32 trans_size = 0;
2880 UWORD8 *pu1_left;
2881 UWORD8 *pu1_top;
2882 UWORD8 *pu1_top_left;
2883 UWORD8 *pu1_recon;
2884 UWORD8 *pu1_csbf_buf;
2885 UWORD8 *pu1_ecd_data;
2886 WORD16 *pi2_deq_data;
2887 WORD32 deq_data_strd;
2888 LWORD64 total_rdopt_cost;
2889 WORD32 ctr;
2890 WORD32 left_strd;
2891 WORD32 i4_recon_stride;
2892 WORD32 csbf_strd;
2893 WORD32 ecd_data_bytes_cons;
2894 WORD32 num_4x4_in_tu;
2895 WORD32 num_4x4_in_cu;
2896 WORD32 chrm_present_flag;
2897 WORD32 tx_size;
2898 WORD32 cu_bits;
2899 WORD32 num_cu_parts = 0;
2900 WORD32 num_cands = 0;
2901 WORD32 cu_pos_x_8pelunits;
2902 WORD32 cu_pos_y_8pelunits;
2903 WORD32 i4_perform_rdoq;
2904 WORD32 i4_perform_sbh;
2905 UWORD8 u1_compute_spatial_ssd;
2906 UWORD8 u1_compute_recon;
2907 UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2908
2909 UWORD16 u2_num_tus_in_cu = 0;
2910 WORD32 is_sub_pu_in_hq = 0;
2911 /* Get the RDOPT cost of the best CU mode for early_exit */
2912 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2913 /* cabac context of prev intra luma pred flag */
2914 UWORD8 u1_prev_flag_cabac_ctxt =
2915 ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2916 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2917
2918 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2919
2920 total_rdopt_cost = 0;
2921 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2922 ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2923 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2924 csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2925 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2926 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2927 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2928 deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2929 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2930 ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2931 pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2932 pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2933 cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2934 cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2935 cu_pos_x_8pelunits = cu_pos_x;
2936 cu_pos_y_8pelunits = cu_pos_y;
2937
2938 /* reset cu not coded cost */
2939 ps_ctxt->i8_cu_not_coded_cost = 0;
2940
2941 /* based on the Processng mode */
2942 if(TU_EQ_CU == func_proc_mode)
2943 {
2944 ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2945 trans_size = ps_cu_analyse->u1_cu_size;
2946 num_cu_parts = 1;
2947 num_cands = 1;
2948 u2_num_tus_in_cu = 1;
2949 }
2950 else if(TU_EQ_CU_DIV2 == func_proc_mode)
2951 {
2952 ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2953 trans_size = ps_cu_analyse->u1_cu_size >> 1;
2954 num_cu_parts = 4;
2955 num_cands = 1;
2956 u2_num_tus_in_cu = 4;
2957 }
2958 else if(TU_EQ_SUBCU == func_proc_mode)
2959 {
2960 ps_final_prms->u1_part_mode = SIZE_NxN;
2961 trans_size = ps_cu_analyse->u1_cu_size >> 1;
2962 num_cu_parts = 4;
2963 /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
2964 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
2965 {
2966 if(ps_ctxt->i1_slice_type != BSLICE)
2967 {
2968 num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
2969 }
2970 else
2971 {
2972 num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
2973 }
2974 }
2975 else
2976 {
2977 num_cands = MAX_INTRA_CU_CANDIDATES;
2978 }
2979 u2_num_tus_in_cu = 4;
2980 }
2981 else
2982 {
2983 /* should not enter here */
2984 ASSERT(0);
2985 }
2986
2987 if(ps_ctxt->i1_cu_qp_delta_enable)
2988 {
2989 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
2990 if(ps_cu_analyse->u1_cu_size == 64)
2991 {
2992 ASSERT(
2993 (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
2994 i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
2995 i4_act_counter_lamda = 3;
2996 }
2997 else if(ps_cu_analyse->u1_cu_size == 32)
2998 {
2999 ASSERT(
3000 (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
3001 i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
3002 i4_act_counter_lamda = 0;
3003 }
3004 else if(ps_cu_analyse->u1_cu_size == 16)
3005 {
3006 ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
3007 i4_act_counter = (trans_size == 8) || (trans_size == 4);
3008 i4_act_counter_lamda = 0;
3009 }
3010 else if(ps_cu_analyse->u1_cu_size == 8)
3011 {
3012 ASSERT((trans_size == 8) || (trans_size == 4));
3013 i4_act_counter = 1;
3014 i4_act_counter_lamda = 0;
3015 }
3016 else
3017 {
3018 ASSERT(0);
3019 }
3020 if(ps_ctxt->i4_use_ctb_level_lamda)
3021 {
3022 ihevce_compute_cu_level_QP(
3023 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0);
3024 }
3025 else
3026 {
3027 ihevce_compute_cu_level_QP(
3028 ps_ctxt,
3029 ps_cu_analyse->i4_act_factor[i4_act_counter][1],
3030 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1],
3031 0);
3032 }
3033
3034 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
3035 }
3036 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3037 {
3038 ps_ctxt->i8_cl_ssd_lambda_qf =
3039 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3040 100.0f);
3041 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3042 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3043 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3044 }
3045
3046 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3047 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3048 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3049
3050 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3051 {
3052 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3053 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3054 }
3055
3056 /* populate the neigbours */
3057 pu1_left = (UWORD8 *)pv_cu_left;
3058 pu1_top = (UWORD8 *)pv_cu_top;
3059 pu1_top_left = (UWORD8 *)pv_cu_top_left;
3060 left_strd = cu_left_stride;
3061 num_4x4_in_tu = (trans_size >> 2);
3062 num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3063 chrm_present_flag = 1;
3064 ecd_data_bytes_cons = 0;
3065 cu_bits = 0;
3066
3067 /* get the 4x4 level postion of current cu */
3068 cu_pos_x = cu_pos_x << 1;
3069 cu_pos_y = cu_pos_y << 1;
3070
3071 /* pouplate cu level params knowing that current is intra */
3072 ps_final_prms->u1_skip_flag = 0;
3073 ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3074 ps_final_prms->u2_num_pus_in_cu = 1;
3075 /*init the is_cu_coded flag*/
3076 ps_final_prms->u1_is_cu_coded = 0;
3077 ps_final_prms->u4_cu_sad = 0;
3078
3079 ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3080 ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3081 ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3082 ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3083 ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3084 ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3085
3086 ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3087
3088 /*copy qp directly as intra cant be skip*/
3089 ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3090 ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3091 ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3092 ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3093 ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3094 ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3095 ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3096 ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3097 ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3098
3099 /* RDOPT copy States : TU init (best until prev TU) to current */
3100 memcpy(
3101 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3102 .s_cabac_ctxt.au1_ctxt_models[0],
3103 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3104 IHEVC_CAB_COEFFX_PREFIX);
3105
3106 /* RDOPT copy States :update to init state if 0 cbf */
3107 memcpy(
3108 &au1_intra_nxn_rdopt_ctxt_models[0][0],
3109 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3110 IHEVC_CAB_COEFFX_PREFIX);
3111 memcpy(
3112 &au1_intra_nxn_rdopt_ctxt_models[1][0],
3113 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3114 IHEVC_CAB_COEFFX_PREFIX);
3115
3116 /* loop for all partitions in CU blocks */
3117 for(ctr = 0; ctr < num_cu_parts; ctr++)
3118 {
3119 UWORD8 *pu1_curr_mode;
3120 WORD32 cand_ctr;
3121 WORD32 nbr_flags;
3122
3123 /* for NxN case to track the best mode */
3124 /* for other cases zeroth index will be used */
3125 intra_prev_rem_flags_t as_intra_prev_rem[2];
3126 LWORD64 ai8_cand_rdopt_cost[2];
3127 UWORD32 au4_tu_sad[2];
3128 WORD32 ai4_tu_bits[2];
3129 WORD32 ai4_cbf[2];
3130 WORD32 ai4_curr_bytes[2];
3131 WORD32 ai4_zero_col[2];
3132 WORD32 ai4_zero_row[2];
3133 /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3134 cand. are there) ping-pong buffer to store the best and current */
3135 UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3136 UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3137 WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3138 /* Context models stored for RDopt store and restore purpose */
3139
3140 UWORD8 au1_recon_availability[2];
3141
3142 WORD32 best_cand_idx = 0;
3143 LWORD64 best_cand_cost = MAX_COST_64;
3144 /* counters to toggle b/w best and current */
3145 WORD32 best_intra_buf_idx = 1;
3146 WORD32 curr_intra_buf_idx = 0;
3147
3148 /* copy the mode pointer to be used in inner loop */
3149 pu1_curr_mode = pu1_luma_mode;
3150
3151 /* get the neighbour availability flags */
3152 nbr_flags = ihevce_get_nbr_intra(
3153 &s_nbr,
3154 ps_ctxt->pu1_ctb_nbr_map,
3155 ps_ctxt->i4_nbr_map_strd,
3156 cu_pos_x,
3157 cu_pos_y,
3158 num_4x4_in_tu);
3159
3160 /* copy the nbr flags for chroma reuse */
3161 if(4 != trans_size)
3162 {
3163 *pu4_nbr_flags = nbr_flags;
3164 }
3165 else if(1 == chrm_present_flag)
3166 {
3167 /* compute the avail flags assuming luma trans is 8x8 */
3168 /* get the neighbour availability flags */
3169 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3170 ps_ctxt->pu1_ctb_nbr_map,
3171 ps_ctxt->i4_nbr_map_strd,
3172 cu_pos_x,
3173 cu_pos_y,
3174 (num_4x4_in_tu << 1),
3175 (num_4x4_in_tu << 1));
3176 }
3177
3178 u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3179
3180 if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3181 {
3182 ps_recon_datastore->u1_is_lumaRecon_available = 1;
3183 }
3184 else if(!ctr)
3185 {
3186 ps_recon_datastore->u1_is_lumaRecon_available = 0;
3187 }
3188
3189 ihevc_intra_pred_luma_ref_substitution_fptr =
3190 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3191
3192 /* call reference array substitution */
3193 ihevc_intra_pred_luma_ref_substitution_fptr(
3194 pu1_top_left,
3195 pu1_top,
3196 pu1_left,
3197 left_strd,
3198 trans_size,
3199 nbr_flags,
3200 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3201 1);
3202
3203 /* Intra Mode gating based on MPM cand list and encoder quality preset */
3204 if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3205 (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3206 {
3207 ihevce_mpm_idx_based_filter_RDOPT_cand(
3208 ps_ctxt,
3209 ps_cu_analyse,
3210 ps_left_nbr_4x4,
3211 ps_top_nbr_4x4,
3212 pu1_luma_mode,
3213 &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3214 }
3215
3216 if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3217 (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3218 {
3219 WORD32 ai4_mpm_mode_list[3];
3220 WORD32 i;
3221
3222 WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3223
3224 ihevce_populate_intra_pred_mode(
3225 ps_top_nbr_4x4->b6_luma_intra_mode,
3226 ps_tmp_lt_4x4->b6_luma_intra_mode,
3227 s_nbr.u1_top_avail,
3228 s_nbr.u1_left_avail,
3229 cu_pos_y,
3230 &ai4_mpm_mode_list[0]);
3231
3232 for(i = 0; i < 3; i++)
3233 {
3234 if(ps_cu_analyse->s_cu_intra_cand
3235 .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3236 {
3237 ASSERT(ai4_mpm_mode_list[i] < 35);
3238
3239 ps_cu_analyse->s_cu_intra_cand
3240 .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3241 pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3242 ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3243 i4_curr_index++;
3244 }
3245 }
3246
3247 pu1_luma_mode[i4_curr_index] = 255;
3248 }
3249
3250 /* loop over candidates for each partition */
3251 for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3252 {
3253 WORD32 curr_pred_mode;
3254 WORD32 bits = 0;
3255 LWORD64 curr_cost;
3256 WORD32 luma_pred_func_idx;
3257 UWORD8 *pu1_curr_ecd_data;
3258 WORD16 *pi2_curr_deq_data;
3259 WORD32 curr_deq_data_strd;
3260 WORD32 pred_strd;
3261 UWORD8 *pu1_pred;
3262
3263 /* if NXN case the recon and ecd data is stored in temp buffers */
3264 if(TU_EQ_SUBCU == func_proc_mode)
3265 {
3266 pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3267 pred_strd = trans_size;
3268 pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3269 pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3270 curr_deq_data_strd = trans_size;
3271
3272 ASSERT(trans_size == MIN_TU_SIZE);
3273 }
3274 else
3275 {
3276 pu1_pred = (UWORD8 *)pv_pred_org;
3277 pred_strd = pred_strd_org;
3278 pu1_curr_ecd_data = pu1_ecd_data;
3279 pi2_curr_deq_data = pi2_deq_data;
3280 curr_deq_data_strd = deq_data_strd;
3281 }
3282
3283 pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3284 (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3285
3286 if(is_sub_pu_in_hq == 1)
3287 {
3288 curr_pred_mode = cand_ctr;
3289 }
3290 else
3291 {
3292 curr_pred_mode = pu1_curr_mode[cand_ctr];
3293 }
3294
3295 /* If the candidate mode is 255, then break */
3296 if(255 == curr_pred_mode)
3297 {
3298 break;
3299 }
3300 else if(250 == curr_pred_mode)
3301 {
3302 continue;
3303 }
3304
3305 /* check if this mode needs to be evaluated or not. For 2nx2n cases, this */
3306 /* function will be called once per candidate, so this check has been done */
3307 /* outside this function call. For NxN case, this function will be called */
3308 /* only once, and all the candidates will be evaluated here. */
3309 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3310 {
3311 if((TU_EQ_SUBCU == func_proc_mode) &&
3312 (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3313 {
3314 continue;
3315 }
3316 }
3317
3318 /* call reference filtering */
3319 ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3320 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3321 trans_size,
3322 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3323 curr_pred_mode,
3324 ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3325
3326 /* use the look up to get the function idx */
3327 luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3328
3329 /* call the intra prediction function */
3330 ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3331 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3332 1,
3333 pu1_pred,
3334 pred_strd,
3335 trans_size,
3336 curr_pred_mode);
3337
3338 /* populate the coeffs scan idx */
3339 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3340
3341 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3342 if(trans_size < 16)
3343 {
3344 /* for modes from 22 upto 30 horizontal scan is used */
3345 if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3346 {
3347 ps_ctxt->i4_scan_idx = SCAN_HORZ;
3348 }
3349 /* for modes from 6 upto 14 horizontal scan is used */
3350 else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3351 {
3352 ps_ctxt->i4_scan_idx = SCAN_VERT;
3353 }
3354 }
3355
3356 /* RDOPT copy States : TU init (best until prev TU) to current */
3357 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3358 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3359 .s_cabac_ctxt.au1_ctxt_models[0] +
3360 IHEVC_CAB_COEFFX_PREFIX,
3361 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3362 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3363
3364 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3365 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3366
3367 #if DISABLE_RDOQ_INTRA
3368 i4_perform_rdoq = 0;
3369 #endif
3370
3371 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
3372 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3373 /* Currently the complete array will contain only single value*/
3374 /*The rounding factor is calculated with the formula
3375 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3376 rounding factor = (1 - DeadZone Val)
3377
3378 Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3379 */
3380 if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3381 {
3382 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3383 {
3384 double i4_lamda_modifier;
3385
3386 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3387 {
3388 i4_lamda_modifier =
3389 ps_ctxt->i4_lamda_modifier *
3390 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3391 }
3392 else
3393 {
3394 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3395 }
3396 if(ps_ctxt->i4_use_const_lamda_modifier)
3397 {
3398 if(ISLICE == ps_ctxt->i1_slice_type)
3399 {
3400 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3401 }
3402 else
3403 {
3404 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3405 }
3406 }
3407
3408 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3409 &ps_ctxt->i4_quant_round_tu[0][0];
3410 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3411 &ps_ctxt->i4_quant_round_tu[1][0];
3412
3413 memset(
3414 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3415 0,
3416 trans_size * trans_size * sizeof(WORD32));
3417 memset(
3418 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3419 0,
3420 trans_size * trans_size * sizeof(WORD32));
3421
3422 ihevce_quant_rounding_factor_gen(
3423 trans_size,
3424 1,
3425 &ps_ctxt->s_rdopt_entropy_ctxt,
3426 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3427 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3428 i4_lamda_modifier,
3429 1);
3430 }
3431 else
3432 {
3433 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3434 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3435 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3436 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3437 }
3438 }
3439
3440 /* call T Q IT IQ and recon function */
3441 ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3442 ps_ctxt,
3443 pu1_pred,
3444 pred_strd,
3445 (UWORD8 *)pv_curr_src,
3446 src_strd,
3447 pi2_curr_deq_data,
3448 curr_deq_data_strd,
3449 pu1_recon,
3450 i4_recon_stride,
3451 pu1_curr_ecd_data,
3452 pu1_csbf_buf,
3453 csbf_strd,
3454 trans_size,
3455 PRED_MODE_INTRA,
3456 &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3457 &ai4_curr_bytes[curr_intra_buf_idx],
3458 &ai4_tu_bits[curr_intra_buf_idx],
3459 &au4_tu_sad[curr_intra_buf_idx],
3460 &ai4_zero_col[curr_intra_buf_idx],
3461 &ai4_zero_row[curr_intra_buf_idx],
3462 &au1_recon_availability[curr_intra_buf_idx],
3463 i4_perform_rdoq,
3464 i4_perform_sbh,
3465 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3466 i4_alpha_stim_multiplier,
3467 u1_is_cu_noisy,
3468 #endif
3469 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3470 1 /*early_cbf */
3471 );
3472
3473 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3475 {
3476 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3477 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3478 pv_curr_src,
3479 src_strd,
3480 pu1_pred,
3481 pred_strd,
3482 ai8_cand_rdopt_cost[curr_intra_buf_idx],
3483 i4_alpha_stim_multiplier,
3484 trans_size,
3485 0,
3486 ps_ctxt->u1_enable_psyRDOPT,
3487 NULL_PLANE);
3488 #else
3489 if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3490 {
3491 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3492 pv_curr_src,
3493 src_strd,
3494 pu1_recon,
3495 i4_recon_stride,
3496 ai8_cand_rdopt_cost[curr_intra_buf_idx],
3497 i4_alpha_stim_multiplier,
3498 trans_size,
3499 0,
3500 ps_ctxt->u1_enable_psyRDOPT,
3501 NULL_PLANE);
3502 }
3503 else
3504 {
3505 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3506 pv_curr_src,
3507 src_strd,
3508 pu1_pred,
3509 pred_strd,
3510 ai8_cand_rdopt_cost[curr_intra_buf_idx],
3511 i4_alpha_stim_multiplier,
3512 trans_size,
3513 0,
3514 ps_ctxt->u1_enable_psyRDOPT,
3515 NULL_PLANE);
3516 }
3517 #endif
3518 }
3519 #endif
3520
3521 if(TU_EQ_SUBCU == func_proc_mode)
3522 {
3523 ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3524 }
3525
3526 /* based on CBF/No CBF copy the corresponding state */
3527 if(0 == ai4_cbf[curr_intra_buf_idx])
3528 {
3529 /* RDOPT copy States :update to init state if 0 cbf */
3530 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3531 &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3532 IHEVC_CAB_COEFFX_PREFIX,
3533 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3534 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3535 }
3536 else
3537 {
3538 /* RDOPT copy States :update to new state only if CBF is non zero */
3539 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3540 &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3541 IHEVC_CAB_COEFFX_PREFIX,
3542 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3543 .s_cabac_ctxt.au1_ctxt_models[0] +
3544 IHEVC_CAB_COEFFX_PREFIX,
3545 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3546 }
3547
3548 /* call the function which perform intra mode prediction */
3549 ihevce_intra_pred_mode_signaling(
3550 ps_top_nbr_4x4->b6_luma_intra_mode,
3551 ps_tmp_lt_4x4->b6_luma_intra_mode,
3552 s_nbr.u1_top_avail,
3553 s_nbr.u1_left_avail,
3554 cu_pos_y,
3555 curr_pred_mode,
3556 &as_intra_prev_rem[curr_intra_buf_idx]);
3557 /******************************************************************/
3558 /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3559 The bits for these are evaluated for every RDO mode of current subcu
3560 as they can significantly contribute to RDO cost. Note that these
3561 bits are not accounted for here (ai8_cand_rdopt_cost) as they
3562 are accounted for in encode_cu call later */
3563
3564 /******************************************************************/
3565 /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3566 The bits for these are evaluated for every RDO mode of current subcu
3567 as they can significantly contribute to RDO cost. Note that these
3568 bits are not accounted for here (ai8_cand_rdopt_cost) as they
3569 are accounted for in encode_cu call later */
3570
3571 /* Estimate bits to encode prev rem flag for NXN mode */
3572 {
3573 WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3574 [u1_prev_flag_cabac_ctxt ^
3575 as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3576
3577 /* rounding the fractional bits to nearest integer */
3578 bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3579 }
3580
3581 /* based on prev flag all the mpmidx bits and rem bits */
3582 if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3583 {
3584 /* mpm_idx */
3585 bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3586 }
3587 else
3588 {
3589 /* rem intra mode */
3590 bits += 5;
3591 }
3592
3593 bits += ai4_tu_bits[curr_intra_buf_idx];
3594
3595 /* compute the total cost for current candidate */
3596 curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3597
3598 /* get the final ssd cost */
3599 curr_cost +=
3600 COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3601
3602 /* check of the best candidate cost */
3603 if(curr_cost < best_cand_cost)
3604 {
3605 best_cand_cost = curr_cost;
3606 best_cand_idx = cand_ctr;
3607 best_intra_buf_idx = curr_intra_buf_idx;
3608 curr_intra_buf_idx = !curr_intra_buf_idx;
3609 }
3610 }
3611
3612 /*************** For TU_EQ_SUBCU case *****************/
3613 /* Copy the pred for best cand. to the final pred array */
3614 /* Copy the iq-coeff for best cand. to the final array */
3615 /* copy the best coeffs data to final buffer */
3616 if(TU_EQ_SUBCU == func_proc_mode)
3617 {
3618 /* Copy the pred for best cand. to the final pred array */
3619
3620 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3621 (UWORD8 *)pv_pred_org,
3622 pred_strd_org,
3623 &au1_cur_pred_data[best_intra_buf_idx][0],
3624 trans_size,
3625 trans_size,
3626 trans_size);
3627
3628 /* Copy the deq-coeff for best cand. to the final array */
3629
3630 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3631 (UWORD8 *)pi2_deq_data,
3632 deq_data_strd << 1,
3633 (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3634 trans_size << 1,
3635 trans_size << 1,
3636 trans_size);
3637 /* copy the coeffs to final cu ecd bytes buffer */
3638 memcpy(
3639 pu1_ecd_data,
3640 &au1_intra_coeffs[best_intra_buf_idx][0],
3641 ai4_curr_bytes[best_intra_buf_idx]);
3642
3643 pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3644 (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3645 }
3646
3647 /*---------- Calculate Recon for the best INTRA mode ---------*/
3648 /* TU_EQ_CU case : No need for recon, otherwise recon is required */
3649 /* Compute recon only for the best mode for TU_EQ_SUBCU case */
3650 if(u1_compute_recon)
3651 {
3652 ihevce_it_recon_fxn(
3653 ps_ctxt,
3654 pi2_deq_data,
3655 deq_data_strd,
3656 (UWORD8 *)pv_pred_org,
3657 pred_strd_org,
3658 pu1_recon,
3659 i4_recon_stride,
3660 pu1_ecd_data,
3661 trans_size,
3662 PRED_MODE_INTRA,
3663 ai4_cbf[best_intra_buf_idx],
3664 ai4_zero_col[best_intra_buf_idx],
3665 ai4_zero_row[best_intra_buf_idx]);
3666
3667 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3668 }
3669 else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3670 {
3671 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3672 }
3673 else
3674 {
3675 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3676 }
3677
3678 /* RDOPT copy States :update to best modes state */
3679 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3680 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3681 &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3682 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3683
3684 /* copy the prev,mpm_idx and rem modes from best cand */
3685 ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3686
3687 /* update the cabac context of prev intra pred mode flag */
3688 u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3689 [(u1_prev_flag_cabac_ctxt << 1) |
3690 as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3691
3692 /* accumulate the TU bits into cu bits */
3693 cu_bits += ai4_tu_bits[best_intra_buf_idx];
3694
3695 /* copy the intra pred mode for chroma reuse */
3696 if(is_sub_pu_in_hq == 0)
3697 {
3698 *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3699 }
3700 else
3701 {
3702 *pu1_intra_pred_mode = best_cand_idx;
3703 }
3704
3705 /* Store luma mode as chroma mode. If chroma prcs happens, and
3706 if a diff. mode wins, it should update this!! */
3707 if(1 == chrm_present_flag)
3708 {
3709 if(is_sub_pu_in_hq == 0)
3710 {
3711 ps_final_prms->u1_chroma_intra_pred_actual_mode =
3712 ((ps_ctxt->u1_chroma_array_type == 2)
3713 ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3714 : pu1_curr_mode[best_cand_idx]);
3715 }
3716 else
3717 {
3718 ps_final_prms->u1_chroma_intra_pred_actual_mode =
3719 ((ps_ctxt->u1_chroma_array_type == 2)
3720 ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3721 : best_cand_idx);
3722 }
3723
3724 ps_final_prms->u1_chroma_intra_pred_mode = 4;
3725 }
3726
3727 /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3728 ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3729
3730 /*accumulate ssd over all TU of intra CU*/
3731 ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3732
3733 /* update the bytes */
3734 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3735 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3736 ai4_curr_bytes[best_intra_buf_idx];
3737 /* update the zero_row and col info for the final mode */
3738 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3739 ai4_zero_col[best_intra_buf_idx];
3740 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3741 ai4_zero_row[best_intra_buf_idx];
3742
3743 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3744
3745 /* update the total bytes cons */
3746 ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3747 pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3748
3749 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3750 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3751 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3752 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3753 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3754 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3755 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3756 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3757 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3758 GETRANGE(tx_size, trans_size);
3759 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3760 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3761 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3762
3763 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3764 ps_nbr_4x4->b1_skip_flag = 0;
3765 ps_nbr_4x4->b1_intra_flag = 1;
3766 ps_nbr_4x4->b1_pred_l0_flag = 0;
3767 ps_nbr_4x4->b1_pred_l1_flag = 0;
3768
3769 if(is_sub_pu_in_hq == 0)
3770 {
3771 ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3772 }
3773 else
3774 {
3775 ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3776 }
3777
3778 ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3779
3780 /* since tu size can be less than cusize, replication is done with strd */
3781 {
3782 WORD32 i, j;
3783 nbr_4x4_t *ps_tmp_4x4;
3784
3785 ps_tmp_4x4 = ps_nbr_4x4;
3786
3787 for(i = 0; i < num_4x4_in_tu; i++)
3788 {
3789 for(j = 0; j < num_4x4_in_tu; j++)
3790 {
3791 ps_tmp_4x4[j] = *ps_nbr_4x4;
3792 }
3793 /* row level update*/
3794 ps_tmp_4x4 += num_4x4_in_cu;
3795 }
3796 }
3797
3798 if(TU_EQ_SUBCU == func_proc_mode)
3799 {
3800 pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3801 }
3802
3803 if((num_cu_parts > 1) && (ctr < 3))
3804 {
3805 /* set the neighbour map to 1 */
3806 ihevce_set_nbr_map(
3807 ps_ctxt->pu1_ctb_nbr_map,
3808 ps_ctxt->i4_nbr_map_strd,
3809 cu_pos_x,
3810 cu_pos_y,
3811 trans_size >> 2,
3812 1);
3813
3814 /* block level updates block number (1 & 3 )*/
3815 pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3816 pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3817 pi2_deq_data += trans_size;
3818
3819 switch(ctr)
3820 {
3821 case 0:
3822 {
3823 pu1_left = pu1_recon + trans_size - 1;
3824 pu1_top += trans_size;
3825 pu1_top_left = pu1_top - 1;
3826 left_strd = i4_recon_stride;
3827
3828 break;
3829 }
3830 case 1:
3831 {
3832 ASSERT(
3833 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3834 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3835
3836 /* Since the 'lumaRefSubstitution' function expects both Top and */
3837 /* TopRight recon pixels to be present in the same buffer */
3838 if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3839 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3840 {
3841 UWORD8 *pu1_src =
3842 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3843 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3844 trans_size;
3845 UWORD8 *pu1_dst =
3846 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3847 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3848 trans_size;
3849
3850 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3851 pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3852
3853 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3854 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3855 }
3856
3857 pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3858 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3859 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3860 (trans_size - 1) * i4_recon_stride;
3861 pu1_top_left = pu1_left - cu_left_stride;
3862 left_strd = cu_left_stride;
3863
3864 break;
3865 }
3866 case 2:
3867 {
3868 ASSERT(
3869 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3870 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3871
3872 pu1_left = pu1_recon + trans_size - 1;
3873 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3874 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3875 (trans_size - 1) * i4_recon_stride + trans_size;
3876 pu1_top_left = pu1_top - 1;
3877 left_strd = i4_recon_stride;
3878
3879 break;
3880 }
3881 }
3882
3883 pu1_csbf_buf += num_4x4_in_tu;
3884 cu_pos_x += num_4x4_in_tu;
3885 ps_nbr_4x4 += num_4x4_in_tu;
3886 ps_top_nbr_4x4 += num_4x4_in_tu;
3887 ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3888
3889 pu1_intra_pred_mode++;
3890
3891 /* after 2 blocks increment the pointers to bottom blocks */
3892 if(1 == ctr)
3893 {
3894 pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3895 pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3896
3897 pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3898 pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3899 pi2_deq_data -= (trans_size << 1);
3900 pi2_deq_data += (trans_size * deq_data_strd);
3901
3902 pu1_csbf_buf -= (num_4x4_in_tu << 1);
3903 pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3904
3905 ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3906 ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3907 ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3908 ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3909
3910 /* decrement pos x to start */
3911 cu_pos_x -= (num_4x4_in_tu << 1);
3912 cu_pos_y += num_4x4_in_tu;
3913 }
3914 }
3915
3916 #if RDOPT_ENABLE
3917 /* compute the RDOPT cost for the current TU */
3918 ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3919 ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3920 #endif
3921
3922 /* accumulate the costs */
3923 total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3924
3925 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3926 {
3927 /* Early exit : If the current running cost exceeds
3928 the prev. best mode cost, break */
3929 if(total_rdopt_cost > prev_best_rdopt_cost)
3930 {
3931 return (total_rdopt_cost);
3932 }
3933 }
3934
3935 /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3936 chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3937
3938 pu4_nbr_flags++;
3939 }
3940 /* Modify the cost function for this CU. */
3941 /* loop in for 8x8 blocks */
3942 if(ps_ctxt->u1_enable_psyRDOPT)
3943 {
3944 UWORD8 *pu1_recon_cu;
3945 WORD32 recon_stride;
3946 WORD32 curr_pos_x;
3947 WORD32 curr_pos_y;
3948 WORD32 start_index;
3949 WORD32 num_horz_cu_in_ctb;
3950 WORD32 cu_size;
3951 WORD32 had_block_size;
3952
3953 /* tODO: sreenivasa ctb size has to be used appropriately */
3954 had_block_size = 8;
3955 cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3956 num_horz_cu_in_ctb = 64 / had_block_size;
3957
3958 curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3959 curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3960 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3961 pu1_recon_cu =
3962 ((UWORD8 *)ps_final_prms->s_recon_datastore
3963 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3964 /* + \ curr_pos_x + curr_pos_y * recon_stride; */
3965
3966 /* start index to index the source satd of curr cu int he current ctb*/
3967 start_index =
3968 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3969
3970 {
3971 total_rdopt_cost += ihevce_psy_rd_cost(
3972 ps_ctxt->ai4_source_satd_8x8,
3973 pu1_recon_cu,
3974 recon_stride,
3975 1, //
3976 cu_size,
3977 0, // pic type
3978 0, //layer id
3979 ps_ctxt->i4_satd_lamda, // lambda
3980 start_index,
3981 ps_ctxt->u1_is_input_data_hbd,
3982 ps_ctxt->u4_psy_strength,
3983 &ps_ctxt->s_cmn_opt_func
3984
3985 ); // 8 bit
3986 }
3987 }
3988
3989 #if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3990 if(TU_EQ_SUBCU == func_proc_mode)
3991 {
3992 UWORD8 au1_tu_eq_cu_div2_modes[4];
3993 UWORD8 au1_freq_of_mode[4];
3994
3995 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
3996 ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
3997
3998 if(1 == i4_num_clusters)
3999 {
4000 ps_final_prms->u2_num_pus_in_cu = 1;
4001 ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4002 }
4003 }
4004 #endif
4005
4006 /* store the num TUs*/
4007 ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4008
4009 /* update the bytes consumed */
4010 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4011
4012 /* store the current cu size to final prms */
4013 ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4014
4015 /* cu bits will be having luma residual bits till this point */
4016 /* if zero_cbf eval is disabled then cu bits will be zero */
4017 ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4018
4019 /* ------------- Chroma processing -------------- */
4020 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4021 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4022 {
4023 LWORD64 chrm_rdopt_cost;
4024 WORD32 chrm_rdopt_tu_bits;
4025
4026 /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4027 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4028
4029 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4030 ps_ctxt,
4031 curr_buf_idx,
4032 func_proc_mode,
4033 ps_chrm_cu_buf_prms->pu1_curr_src,
4034 ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4035 ps_chrm_cu_buf_prms->pu1_cu_left,
4036 ps_chrm_cu_buf_prms->pu1_cu_top,
4037 ps_chrm_cu_buf_prms->pu1_cu_top_left,
4038 ps_chrm_cu_buf_prms->i4_cu_left_stride,
4039 cu_pos_x_8pelunits,
4040 cu_pos_y_8pelunits,
4041 &chrm_rdopt_tu_bits,
4042 i4_alpha_stim_multiplier,
4043 u1_is_cu_noisy);
4044
4045 #if WEIGH_CHROMA_COST
4046 chrm_rdopt_cost = (LWORD64)(
4047 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4048 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4049 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4050 #endif
4051
4052 #if CHROMA_RDOPT_ENABLE
4053 total_rdopt_cost += chrm_rdopt_cost;
4054 #endif
4055 cu_bits += chrm_rdopt_tu_bits;
4056
4057 /* cu bits for chroma residual if chroma rdopt is on */
4058 /* if zero_cbf eval is disabled then cu bits will be zero */
4059 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4060
4061 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4062 {
4063 /* Early exit : If the current running cost exceeds
4064 the prev. best mode cost, break */
4065 if(total_rdopt_cost > prev_best_rdopt_cost)
4066 {
4067 return (total_rdopt_cost);
4068 }
4069 }
4070 }
4071 else
4072 {}
4073
4074 /* RDOPT copy States : Best after all luma TUs to current */
4075 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4076 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4077 .s_cabac_ctxt.au1_ctxt_models[0] +
4078 IHEVC_CAB_COEFFX_PREFIX,
4079 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4080 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4081
4082 /* get the neighbour availability flags for current cu */
4083 ihevce_get_only_nbr_flag(
4084 &s_nbr,
4085 ps_ctxt->pu1_ctb_nbr_map,
4086 ps_ctxt->i4_nbr_map_strd,
4087 (cu_pos_x_8pelunits << 1),
4088 (cu_pos_y_8pelunits << 1),
4089 (trans_size << 1),
4090 (trans_size << 1));
4091
4092 /* call the entropy rdo encode to get the bit estimate for current cu */
4093 /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4094 {
4095 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4096 WORD32 cbf_bits, header_bits;
4097
4098 header_bits = ihevce_entropy_rdo_encode_cu(
4099 &ps_ctxt->s_rdopt_entropy_ctxt,
4100 ps_final_prms,
4101 cu_pos_x_8pelunits,
4102 cu_pos_y_8pelunits,
4103 ps_cu_analyse->u1_cu_size,
4104 s_nbr.u1_top_avail,
4105 s_nbr.u1_left_avail,
4106 &ps_final_prms->pu1_cu_coeffs[0],
4107 &cbf_bits);
4108
4109 cu_bits += header_bits;
4110
4111 /* cbf bits are excluded from header bits, instead considered as texture bits */
4112 /* incase if zero cbf eval is disabled then texture bits gets added here */
4113 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4114 ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4115
4116 #if RDOPT_ENABLE
4117 /* add the cost of coding the cu bits */
4118 total_rdopt_cost +=
4119 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4120 #endif
4121 }
4122 return (total_rdopt_cost);
4123 }
4124 /*!
4125 ******************************************************************************
4126 * \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4127 *
4128 * \brief
4129 * Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4130 *
4131 * \param[in] ps_ctxt enc_loop module ctxt pointer
4132 * \param[in] ps_inter_cand pointer to inter candidate structure
4133 * \param[in] pu1_src pointer to source data buffer
4134 * \param[in] cu_size Current CU size
4135 * \param[in] cu_pos_x cu position x w.r.t to ctb
4136 * \param[in] cu_pos_y cu position y w.r.t to ctb
4137 * \param[in] src_strd source buffer stride
4138 * \param[in] curr_buf_idx buffer index for current output storage
4139 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4140 *
4141 * \return
4142 * Rdopt cost
4143 *
4144 * \author
4145 * Ittiam
4146 *
4147 *****************************************************************************
4148 */
ihevce_inter_rdopt_cu_ntu(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,void * pv_src,WORD32 cu_size,WORD32 cu_pos_x,WORD32 cu_pos_y,WORD32 curr_buf_idx,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,cu_inter_cand_t * ps_inter_cand,cu_analyse_t * ps_cu_analyse,WORD32 i4_alpha_stim_multiplier)4149 LWORD64 ihevce_inter_rdopt_cu_ntu(
4150 ihevce_enc_loop_ctxt_t *ps_ctxt,
4151 enc_loop_cu_prms_t *ps_cu_prms,
4152 void *pv_src,
4153 WORD32 cu_size,
4154 WORD32 cu_pos_x,
4155 WORD32 cu_pos_y,
4156 WORD32 curr_buf_idx,
4157 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4158 cu_inter_cand_t *ps_inter_cand,
4159 cu_analyse_t *ps_cu_analyse,
4160 WORD32 i4_alpha_stim_multiplier)
4161 {
4162 enc_loop_cu_final_prms_t *ps_final_prms;
4163 nbr_4x4_t *ps_nbr_4x4;
4164 tu_prms_t s_tu_prms[64 * 4];
4165 tu_prms_t *ps_tu_prms;
4166
4167 WORD32 i4_perform_rdoq;
4168 WORD32 i4_perform_sbh;
4169 WORD32 ai4_tu_split_flags[4];
4170 WORD32 ai4_tu_early_cbf[4];
4171 WORD32 num_split_flags = 1;
4172 WORD32 i;
4173 UWORD8 u1_tu_size;
4174 UWORD8 *pu1_pred;
4175 UWORD8 *pu1_ecd_data;
4176 WORD16 *pi2_deq_data;
4177 UWORD8 *pu1_csbf_buf;
4178 UWORD8 *pu1_tu_sz_sft;
4179 UWORD8 *pu1_tu_posx;
4180 UWORD8 *pu1_tu_posy;
4181 LWORD64 total_rdopt_cost;
4182 WORD32 ctr;
4183 WORD32 chrm_ctr;
4184 WORD32 num_tu_in_cu = 0;
4185 WORD32 pred_stride;
4186 WORD32 recon_stride;
4187 WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4188 WORD32 csbf_strd;
4189 WORD32 chrm_present_flag;
4190 WORD32 ecd_data_bytes_cons;
4191 WORD32 num_4x4_in_cu;
4192 WORD32 num_4x4_in_tu;
4193 WORD32 recon_func_mode;
4194 WORD32 cu_bits;
4195 UWORD8 u1_compute_spatial_ssd;
4196
4197 /* min_trans_size is initialized to some huge number than usual TU sizes */
4198 WORD32 i4_min_trans_size = 256;
4199 /* Get the RDOPT cost of the best CU mode for early_exit */
4200 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4201 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4202
4203 /* model for no residue syntax qt root cbf flag */
4204 UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4205
4206 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4207 UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4208
4209 /* for skip cases tables are not reqquired */
4210 UWORD8 u1_skip_tu_sz_sft = 0;
4211 UWORD8 u1_skip_tu_posx = 0;
4212 UWORD8 u1_skip_tu_posy = 0;
4213 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4214
4215 /* get the pointers based on curbuf idx */
4216 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4217 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4218 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4219 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4220 csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4221 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4222
4223 pred_stride = ps_inter_cand->i4_pred_data_stride;
4224 recon_stride = cu_size;
4225 pu1_pred = ps_inter_cand->pu1_pred_data;
4226 chrm_ctr = 0;
4227 ecd_data_bytes_cons = 0;
4228 total_rdopt_cost = 0;
4229 num_4x4_in_cu = cu_size >> 2;
4230 recon_func_mode = PRED_MODE_INTER;
4231 cu_bits = 0;
4232
4233 /* get the 4x4 level postion of current cu */
4234 cu_pos_x = cu_pos_x << 1;
4235 cu_pos_y = cu_pos_y << 1;
4236
4237 /* default value for cu coded flag */
4238 ps_final_prms->u1_is_cu_coded = 0;
4239
4240 /*init of ssd of CU accuumulated over all TU*/
4241 ps_final_prms->u4_cu_sad = 0;
4242
4243 /* populate the coeffs scan idx */
4244 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4245
4246 #if ENABLE_INTER_ZCU_COST
4247 /* reset cu not coded cost */
4248 ps_ctxt->i8_cu_not_coded_cost = 0;
4249
4250 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4251 memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4252 #endif
4253
4254 if(ps_cu_analyse->u1_cu_size == 64)
4255 {
4256 num_split_flags = 4;
4257 u1_tu_size = 32;
4258 }
4259 else
4260 {
4261 num_split_flags = 1;
4262 u1_tu_size = ps_cu_analyse->u1_cu_size;
4263 }
4264
4265 /* ckeck for skip mode */
4266 if(1 == ps_final_prms->u1_skip_flag)
4267 {
4268 if(64 == cu_size)
4269 {
4270 /* TU = CU/2 is set but no trnaform is evaluated */
4271 num_tu_in_cu = 4;
4272 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4273 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4274 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4275 }
4276 else
4277 {
4278 /* TU = CU is set but no trnaform is evaluated */
4279 num_tu_in_cu = 1;
4280 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4281 pu1_tu_posx = &u1_skip_tu_posx;
4282 pu1_tu_posy = &u1_skip_tu_posy;
4283 }
4284
4285 recon_func_mode = PRED_MODE_SKIP;
4286 }
4287 /* check for PU part mode being AMP or No AMP */
4288 else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4289 {
4290 if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4291 {
4292 /* TU= CU is evaluated 2Nx2N inter case */
4293 num_tu_in_cu = 1;
4294 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4295 pu1_tu_posx = &u1_skip_tu_posx;
4296 pu1_tu_posy = &u1_skip_tu_posy;
4297 }
4298 else
4299 {
4300 /* currently TU= CU/2 is evaluated for all inter case */
4301 num_tu_in_cu = 4;
4302 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4303 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4304 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4305 }
4306 }
4307 else
4308 {
4309 /* for AMP cases one level of TU recurssion is done */
4310 /* based on oreintation of the partitions */
4311 num_tu_in_cu = 10;
4312 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4313 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4314 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4315 }
4316
4317 ps_tu_prms = &s_tu_prms[0];
4318 num_tu_in_cu = 0;
4319
4320 for(i = 0; i < num_split_flags; i++)
4321 {
4322 WORD32 i4_x_off = 0, i4_y_off = 0;
4323
4324 if(i == 1 || i == 3)
4325 {
4326 i4_x_off = 32;
4327 }
4328
4329 if(i == 2 || i == 3)
4330 {
4331 i4_y_off = 32;
4332 }
4333
4334 if(1 == ps_final_prms->u1_skip_flag)
4335 {
4336 ai4_tu_split_flags[0] = 0;
4337 ps_inter_cand->ai4_tu_split_flag[i] = 0;
4338
4339 ai4_tu_early_cbf[0] = 0;
4340 }
4341 else
4342 {
4343 ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4344 ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4345 }
4346
4347 ps_tu_prms->u1_tu_size = u1_tu_size;
4348
4349 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4350 ps_tu_prms,
4351 &num_tu_in_cu,
4352 0,
4353 ai4_tu_split_flags[0],
4354 ai4_tu_early_cbf[0],
4355 i4_x_off,
4356 i4_y_off);
4357 }
4358
4359 /* loop for all tu blocks in current cu */
4360 ps_tu_prms = &s_tu_prms[0];
4361 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4362 {
4363 trans_size = ps_tu_prms->u1_tu_size;
4364
4365 if(i4_min_trans_size > trans_size)
4366 {
4367 i4_min_trans_size = trans_size;
4368 }
4369 ps_tu_prms++;
4370 }
4371
4372 if(ps_ctxt->i1_cu_qp_delta_enable)
4373 {
4374 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
4375
4376 if(ps_cu_analyse->u1_cu_size == 64)
4377 {
4378 ASSERT(
4379 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
4380 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
4381 i4_act_counter = (i4_min_trans_size == 16) +
4382 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
4383 i4_act_counter_lamda = 3;
4384 }
4385 else if(ps_cu_analyse->u1_cu_size == 32)
4386 {
4387 ASSERT(
4388 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
4389 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
4390 i4_act_counter = (i4_min_trans_size == 16) +
4391 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
4392 i4_act_counter_lamda = 0;
4393 }
4394 else if(ps_cu_analyse->u1_cu_size == 16)
4395 {
4396 ASSERT(
4397 (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
4398 i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4);
4399 i4_act_counter_lamda = 0;
4400 }
4401 else if(ps_cu_analyse->u1_cu_size == 8)
4402 {
4403 ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
4404 i4_act_counter = 1;
4405 i4_act_counter_lamda = 0;
4406 }
4407 else
4408 {
4409 ASSERT(0);
4410 }
4411 if(ps_ctxt->i4_use_ctb_level_lamda)
4412 {
4413 ihevce_compute_cu_level_QP(
4414 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0);
4415 }
4416 else
4417 {
4418 ihevce_compute_cu_level_QP(
4419 ps_ctxt,
4420 ps_cu_analyse->i4_act_factor[i4_act_counter][0],
4421 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0],
4422 0);
4423 }
4424
4425 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
4426 }
4427 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4428 {
4429 ps_ctxt->i8_cl_ssd_lambda_qf =
4430 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4431 100.0f);
4432 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4433 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4434 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4435 }
4436
4437 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4438 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4439 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4440
4441 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4442 {
4443 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4444 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4445 }
4446
4447 if(!u1_compute_spatial_ssd)
4448 {
4449 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4450 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4451 }
4452 else
4453 {
4454 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4455 }
4456
4457 ps_tu_prms = &s_tu_prms[0];
4458
4459 ASSERT(num_tu_in_cu <= 256);
4460
4461 /* RDOPT copy States : TU init (best until prev TU) to current */
4462 memcpy(
4463 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4464 .s_cabac_ctxt.au1_ctxt_models[0],
4465 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4466 IHEVC_CAB_COEFFX_PREFIX);
4467
4468 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4469 {
4470 WORD32 curr_bytes;
4471 WORD32 tx_size;
4472 WORD32 cbf, zero_col, zero_row;
4473 LWORD64 rdopt_cost;
4474 UWORD8 u1_is_recon_available;
4475
4476 WORD32 curr_pos_x;
4477 WORD32 curr_pos_y;
4478 nbr_4x4_t *ps_cur_nbr_4x4;
4479 UWORD8 *pu1_cur_pred;
4480 UWORD8 *pu1_cur_src;
4481 UWORD8 *pu1_cur_recon;
4482 WORD16 *pi2_cur_deq_data;
4483 UWORD32 u4_tu_sad;
4484 WORD32 tu_bits;
4485
4486 WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4487
4488 trans_size = ps_tu_prms->u1_tu_size;
4489 /* get the current pos x and pos y in pixels */
4490 curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]);
4491 curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]);
4492
4493 num_4x4_in_tu = trans_size >> 2;
4494
4495 #if FORCE_8x8_TFR
4496 if(cu_size == 64)
4497 {
4498 curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4499 curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4500 }
4501 #endif
4502
4503 /* increment the pointers to start of current TU */
4504 pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4505 pu1_cur_src += (curr_pos_y * src_strd);
4506 pu1_cur_pred = (pu1_pred + curr_pos_x);
4507 pu1_cur_pred += (curr_pos_y * pred_stride);
4508 pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4509 pi2_cur_deq_data += (curr_pos_y * cu_size);
4510 pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4511 curr_pos_x + curr_pos_y * i4_recon_stride;
4512
4513 ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4514 ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4515
4516 /* RDOPT copy States : TU init (best until prev TU) to current */
4517 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4518 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4519 .s_cabac_ctxt.au1_ctxt_models[0] +
4520 IHEVC_CAB_COEFFX_PREFIX,
4521 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4522 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4523
4524 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4525 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4526
4527 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
4528 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4529 /* Currently the complete array will contain only single value*/
4530 /*The rounding factor is calculated with the formula
4531 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4532 rounding factor = (1 - DeadZone Val)
4533
4534 Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4535 */
4536 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4537 {
4538 double i4_lamda_modifier;
4539
4540 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4541 {
4542 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4543 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4544 }
4545 else
4546 {
4547 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4548 }
4549 if(ps_ctxt->i4_use_const_lamda_modifier)
4550 {
4551 if(ISLICE == ps_ctxt->i1_slice_type)
4552 {
4553 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4554 }
4555 else
4556 {
4557 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4558 }
4559 }
4560 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4561 &ps_ctxt->i4_quant_round_tu[0][0];
4562 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4563 &ps_ctxt->i4_quant_round_tu[1][0];
4564
4565 memset(
4566 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4567 0,
4568 trans_size * trans_size * sizeof(WORD32));
4569 memset(
4570 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4571 0,
4572 trans_size * trans_size * sizeof(WORD32));
4573
4574 ihevce_quant_rounding_factor_gen(
4575 trans_size,
4576 1,
4577 &ps_ctxt->s_rdopt_entropy_ctxt,
4578 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4579 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4580 i4_lamda_modifier,
4581 1);
4582 }
4583 else
4584 {
4585 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4586 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4587 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4588 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4589 }
4590
4591 /* call T Q IT IQ and recon function */
4592 cbf = ihevce_t_q_iq_ssd_scan_fxn(
4593 ps_ctxt,
4594 pu1_cur_pred,
4595 pred_stride,
4596 pu1_cur_src,
4597 src_strd,
4598 pi2_cur_deq_data,
4599 cu_size,
4600 pu1_cur_recon,
4601 i4_recon_stride,
4602 pu1_ecd_data,
4603 pu1_csbf_buf,
4604 csbf_strd,
4605 trans_size,
4606 recon_func_mode,
4607 &rdopt_cost,
4608 &curr_bytes,
4609 &tu_bits,
4610 &u4_tu_sad,
4611 &zero_col,
4612 &zero_row,
4613 &u1_is_recon_available,
4614 i4_perform_rdoq,
4615 i4_perform_sbh,
4616 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4617 i4_alpha_stim_multiplier,
4618 u1_is_cu_noisy,
4619 #endif
4620 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4621 ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4622
4623 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4624 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4625 {
4626 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4627 rdopt_cost = ihevce_inject_stim_into_distortion(
4628 pu1_cur_src,
4629 src_strd,
4630 pu1_cur_pred,
4631 pred_stride,
4632 rdopt_cost,
4633 i4_alpha_stim_multiplier,
4634 trans_size,
4635 0,
4636 ps_ctxt->u1_enable_psyRDOPT,
4637 NULL_PLANE);
4638 #else
4639 if(u1_compute_spatial_ssd && u1_is_recon_available)
4640 {
4641 rdopt_cost = ihevce_inject_stim_into_distortion(
4642 pu1_cur_src,
4643 src_strd,
4644 pu1_cur_recon,
4645 i4_recon_stride,
4646 rdopt_cost,
4647 i4_alpha_stim_multiplier,
4648 trans_size,
4649 0,
4650 NULL_PLANE);
4651 }
4652 else
4653 {
4654 rdopt_cost = ihevce_inject_stim_into_distortion(
4655 pu1_cur_src,
4656 src_strd,
4657 pu1_cur_pred,
4658 pred_stride,
4659 rdopt_cost,
4660 i4_alpha_stim_multiplier,
4661 trans_size,
4662 0,
4663 ps_ctxt->u1_enable_psyRDOPT,
4664 NULL_PLANE);
4665 }
4666 #endif
4667 }
4668 #endif
4669
4670 if(u1_compute_spatial_ssd && u1_is_recon_available)
4671 {
4672 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4673 }
4674 else
4675 {
4676 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4677 }
4678
4679 /* accumulate the TU sad into cu sad */
4680 ps_final_prms->u4_cu_sad += u4_tu_sad;
4681
4682 /* accumulate the TU bits into cu bits */
4683 cu_bits += tu_bits;
4684
4685 /* inter cu is coded if any of the tu is coded in it */
4686 ps_final_prms->u1_is_cu_coded |= cbf;
4687
4688 /* call the entropy function to get the bits */
4689 /* add that to rd opt cost(SSD) */
4690
4691 /* update the bytes */
4692 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4693 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4694 /* update the zero_row and col info for the final mode */
4695 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4696 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4697
4698 /* update the bytes */
4699 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4700
4701 /* update the total bytes cons */
4702 ecd_data_bytes_cons += curr_bytes;
4703 pu1_ecd_data += curr_bytes;
4704
4705 /* RDOPT copy States : New updated after curr TU to TU init */
4706 if(0 != cbf)
4707 {
4708 /* update to new state only if CBF is non zero */
4709 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4710 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4711 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4712 .s_cabac_ctxt.au1_ctxt_models[0] +
4713 IHEVC_CAB_COEFFX_PREFIX,
4714 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4715 }
4716
4717 /* by default chroma present is set to 1*/
4718 chrm_present_flag = 1;
4719 if(4 == trans_size)
4720 {
4721 /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4722 if(0 != chrm_ctr)
4723 {
4724 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4725 }
4726
4727 /* increment the chrm ctr unconditionally */
4728 chrm_ctr++;
4729
4730 /* after ctr reached 4 reset it */
4731 if(4 == chrm_ctr)
4732 {
4733 chrm_ctr = 0;
4734 }
4735 }
4736
4737 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4738 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4739 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4740 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4741 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4742 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4743 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4744 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4745 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4746 GETRANGE(tx_size, trans_size);
4747 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4748 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4749 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4750
4751 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4752 ps_cur_nbr_4x4->b1_y_cbf = cbf;
4753 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4754 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4755
4756 /* Qp and cbf are stored for the all 4x4 in TU */
4757 {
4758 WORD32 i, j;
4759 nbr_4x4_t *ps_tmp_4x4;
4760 ps_tmp_4x4 = ps_cur_nbr_4x4;
4761
4762 for(i = 0; i < num_4x4_in_tu; i++)
4763 {
4764 for(j = 0; j < num_4x4_in_tu; j++)
4765 {
4766 ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4767 ps_tmp_4x4[j].b1_y_cbf = cbf;
4768 }
4769 /* row level update*/
4770 ps_tmp_4x4 += num_4x4_in_cu;
4771 }
4772 }
4773
4774 #if RDOPT_ENABLE
4775 /* compute the rdopt cost */
4776 rdopt_cost +=
4777 COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4778 #endif
4779 /* accumulate the costs */
4780 total_rdopt_cost += rdopt_cost;
4781
4782 ps_tu_prms++;
4783
4784 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4785 {
4786 /* Early exit : If the current running cost exceeds
4787 the prev. best mode cost, break */
4788 if(total_rdopt_cost > prev_best_rdopt_cost)
4789 {
4790 return (total_rdopt_cost);
4791 }
4792 }
4793 }
4794
4795 /* Modify the cost function for this CU. */
4796 /* loop in for 8x8 blocks */
4797 if(ps_ctxt->u1_enable_psyRDOPT)
4798 {
4799 UWORD8 *pu1_recon_cu;
4800 WORD32 recon_stride;
4801 WORD32 curr_pos_x;
4802 WORD32 curr_pos_y;
4803 WORD32 start_index;
4804 WORD32 num_horz_cu_in_ctb;
4805 WORD32 had_block_size;
4806
4807 /* tODO: sreenivasa ctb size has to be used appropriately */
4808 had_block_size = 8;
4809 num_horz_cu_in_ctb = 64 / had_block_size;
4810
4811 curr_pos_x = cu_pos_x << 2; /* pel units */
4812 curr_pos_y = cu_pos_y << 2; /* pel units */
4813 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4814 pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4815 .apv_luma_recon_bufs[0]); // already pointing to the current CU recon
4816 //+ \curr_pos_x + curr_pos_y * recon_stride;
4817
4818 /* start index to index the source satd of curr cu int he current ctb*/
4819 start_index =
4820 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4821
4822 {
4823 total_rdopt_cost += ihevce_psy_rd_cost(
4824 ps_ctxt->ai4_source_satd_8x8,
4825 pu1_recon_cu,
4826 recon_stride,
4827 1, //howz stride
4828 cu_size,
4829 0, // pic type
4830 0, //layer id
4831 ps_ctxt->i4_satd_lamda, // lambda
4832 start_index,
4833 ps_ctxt->u1_is_input_data_hbd,
4834 ps_ctxt->u4_psy_strength,
4835 &ps_ctxt->s_cmn_opt_func); // 8 bit
4836 }
4837 }
4838
4839 /* store the num TUs*/
4840 ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4841
4842 /* update the bytes consumed */
4843 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4844
4845 /* store the current cu size to final prms */
4846 ps_final_prms->u1_cu_size = cu_size;
4847
4848 /* cu bits will be having luma residual bits till this point */
4849 /* if zero_cbf eval is disabled then cu bits will be zero */
4850 ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4851
4852 /* ------------- Chroma processing -------------- */
4853 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4854 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4855 {
4856 LWORD64 chrm_rdopt_cost;
4857 WORD32 chrm_rdopt_tu_bits;
4858
4859 /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4860 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4861
4862 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4863 ps_ctxt,
4864 curr_buf_idx,
4865 0, /* TU mode : Don't care in Inter patrh */
4866 ps_chrm_cu_buf_prms->pu1_curr_src,
4867 ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4868 ps_chrm_cu_buf_prms->pu1_cu_left,
4869 ps_chrm_cu_buf_prms->pu1_cu_top,
4870 ps_chrm_cu_buf_prms->pu1_cu_top_left,
4871 ps_chrm_cu_buf_prms->i4_cu_left_stride,
4872 (cu_pos_x >> 1),
4873 (cu_pos_y >> 1),
4874 &chrm_rdopt_tu_bits,
4875 i4_alpha_stim_multiplier,
4876 u1_is_cu_noisy);
4877
4878 #if WEIGH_CHROMA_COST
4879 chrm_rdopt_cost = (LWORD64)(
4880 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4881 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4882 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4883 #endif
4884
4885 #if CHROMA_RDOPT_ENABLE
4886 total_rdopt_cost += chrm_rdopt_cost;
4887 #endif
4888 cu_bits += chrm_rdopt_tu_bits;
4889
4890 /* during chroma evaluation if skip decision was over written */
4891 /* then the current skip candidate is set to a non skip candidate */
4892 ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4893
4894 /* cu bits for chroma residual if chroma rdopt is on */
4895 /* if zero_cbf eval is disabled then cu bits will be zero */
4896 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4897
4898 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4899 {
4900 /* Early exit : If the current running cost exceeds
4901 the prev. best mode cost, break */
4902 if(total_rdopt_cost > prev_best_rdopt_cost)
4903 {
4904 return (total_rdopt_cost);
4905 }
4906 }
4907 }
4908 else
4909 {}
4910
4911 #if SHRINK_INTER_TUTREE
4912 /* ------------- Quadtree TU split optimization ------------ */
4913 if(ps_final_prms->u1_is_cu_coded)
4914 {
4915 ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4916 &ps_final_prms->as_tu_enc_loop[0],
4917 &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4918 &ps_final_prms->s_recon_datastore,
4919 num_tu_in_cu,
4920 (ps_ctxt->u1_chroma_array_type == 2));
4921 }
4922 #endif
4923
4924 /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
4925 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4926 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4927 .s_cabac_ctxt.au1_ctxt_models[0] +
4928 IHEVC_CAB_COEFFX_PREFIX,
4929 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4930 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4931
4932 /* -------- Bit estimate for RD opt -------------- */
4933 {
4934 nbr_avail_flags_t s_nbr;
4935 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4936 WORD32 cbf_bits, header_bits;
4937
4938 /* get the neighbour availability flags for current cu */
4939 ihevce_get_only_nbr_flag(
4940 &s_nbr,
4941 ps_ctxt->pu1_ctb_nbr_map,
4942 ps_ctxt->i4_nbr_map_strd,
4943 cu_pos_x,
4944 cu_pos_y,
4945 (cu_size >> 2),
4946 (cu_size >> 2));
4947
4948 /* call the entropy rdo encode to get the bit estimate for current cu */
4949 header_bits = ihevce_entropy_rdo_encode_cu(
4950 &ps_ctxt->s_rdopt_entropy_ctxt,
4951 ps_final_prms,
4952 (cu_pos_x >> 1), /* back to 8x8 pel units */
4953 (cu_pos_y >> 1), /* back to 8x8 pel units */
4954 cu_size,
4955 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4956 : s_nbr.u1_top_avail,
4957 s_nbr.u1_left_avail,
4958 &ps_final_prms->pu1_cu_coeffs[0],
4959 &cbf_bits);
4960
4961 cu_bits += header_bits;
4962
4963 /* cbf bits are excluded from header bits, instead considered as texture bits */
4964 /* incase if zero cbf eval is disabled then texture bits gets added here */
4965 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4966 ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4967
4968 #if RDOPT_ENABLE
4969 /* add the cost of coding the header bits */
4970 total_rdopt_cost +=
4971 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4972
4973 #if ENABLE_INTER_ZCU_COST
4974 /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4975 if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4976 {
4977 LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4978
4979 WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4980 (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4981
4982 cab_ctxt_t *ps_cab_ctxt =
4983 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4984
4985 /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
4986 UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4987
4988 /* account for coding qt_root_cbf = 0 */
4989 /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4990 u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4991 if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4992 u4_cu_hdr_bits_q12 = 0;
4993 else
4994 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4995
4996 /* add the cost of coding the header bits */
4997 i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4998 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4999 ps_ctxt->i8_cl_ssd_lambda_qf,
5000 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5001
5002 if(ps_ctxt->u1_enable_psyRDOPT)
5003 {
5004 i8_cu_not_coded_cost = total_rdopt_cost + 1;
5005 }
5006
5007 /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5008 if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5009 {
5010 WORD32 tx_size;
5011
5012 /* force cu as not coded and update the cost */
5013 ps_final_prms->u1_is_cu_coded = 0;
5014 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5015 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5016
5017 total_rdopt_cost = i8_cu_not_coded_cost;
5018
5019 /* reset num TUs to 1 unless cu size id 64 */
5020 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5021 trans_size = (64 == cu_size) ? 32 : cu_size;
5022 GETRANGE(tx_size, trans_size);
5023
5024 /* reset the bytes consumed */
5025 ps_final_prms->i4_num_bytes_ecd_data = 0;
5026
5027 /* reset texture related bits and roll back header bits*/
5028 ps_final_prms->u4_cu_cbf_bits = 0;
5029 ps_final_prms->u4_cu_luma_res_bits = 0;
5030 ps_final_prms->u4_cu_chroma_res_bits = 0;
5031 ps_final_prms->u4_cu_hdr_bits =
5032 (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5033
5034 /* update cabac model with qtroot cbf = 0 decision */
5035 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5036 gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5037
5038 /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5039 memcpy(
5040 &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5041 &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5042 (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5043
5044 /* mark all tus as not coded for final eval */
5045 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5046 {
5047 WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5048 WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5049
5050 nbr_4x4_t *ps_cur_nbr_4x4 =
5051 ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5052
5053 num_4x4_in_tu = trans_size >> 2;
5054
5055 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5056 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5057 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5058
5059 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5060 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5061 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5062
5063 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5064 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5065
5066 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5067 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5068 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5069
5070 /* reset cbf for the all 4x4 in TU */
5071 {
5072 WORD32 i, j;
5073 nbr_4x4_t *ps_tmp_4x4;
5074 ps_tmp_4x4 = ps_cur_nbr_4x4;
5075
5076 for(i = 0; i < num_4x4_in_tu; i++)
5077 {
5078 for(j = 0; j < num_4x4_in_tu; j++)
5079 {
5080 ps_tmp_4x4[j].b1_y_cbf = 0;
5081 }
5082 /* row level update*/
5083 ps_tmp_4x4 += num_4x4_in_cu;
5084 }
5085 }
5086 }
5087 }
5088 }
5089 #endif /* ENABLE_INTER_ZCU_COST */
5090
5091 #endif /* RDOPT_ENABLE */
5092 }
5093
5094 return (total_rdopt_cost);
5095 }
5096
5097 #if ENABLE_RDO_BASED_TU_RECURSION
ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,void * pv_src,WORD32 cu_size,WORD32 cu_pos_x,WORD32 cu_pos_y,WORD32 curr_buf_idx,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,cu_inter_cand_t * ps_inter_cand,cu_analyse_t * ps_cu_analyse,WORD32 i4_alpha_stim_multiplier)5098 LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5099 ihevce_enc_loop_ctxt_t *ps_ctxt,
5100 enc_loop_cu_prms_t *ps_cu_prms,
5101 void *pv_src,
5102 WORD32 cu_size,
5103 WORD32 cu_pos_x,
5104 WORD32 cu_pos_y,
5105 WORD32 curr_buf_idx,
5106 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5107 cu_inter_cand_t *ps_inter_cand,
5108 cu_analyse_t *ps_cu_analyse,
5109 WORD32 i4_alpha_stim_multiplier)
5110 {
5111 tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5112 buffer_data_for_tu_t s_buffer_data_for_tu;
5113 enc_loop_cu_final_prms_t *ps_final_prms;
5114 nbr_4x4_t *ps_nbr_4x4;
5115
5116 WORD32 num_split_flags = 1;
5117 UWORD8 u1_tu_size;
5118 UWORD8 *pu1_pred;
5119 UWORD8 *pu1_ecd_data;
5120 WORD16 *pi2_deq_data;
5121 UWORD8 *pu1_csbf_buf;
5122 UWORD8 *pu1_tu_sz_sft;
5123 UWORD8 *pu1_tu_posx;
5124 UWORD8 *pu1_tu_posy;
5125 LWORD64 total_rdopt_cost;
5126 WORD32 ctr;
5127 WORD32 chrm_ctr;
5128 WORD32 pred_stride;
5129 WORD32 recon_stride;
5130 WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5131 WORD32 csbf_strd;
5132 WORD32 ecd_data_bytes_cons;
5133 WORD32 num_4x4_in_cu;
5134 WORD32 num_4x4_in_tu;
5135 WORD32 recon_func_mode;
5136 WORD32 cu_bits;
5137 UWORD8 u1_compute_spatial_ssd;
5138 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5139 UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5140
5141 WORD32 i4_min_trans_size = 256;
5142 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5143 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5144 /* model for no residue syntax qt root cbf flag */
5145 UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5146 UWORD8 u1_skip_tu_sz_sft = 0;
5147 UWORD8 u1_skip_tu_posx = 0;
5148 UWORD8 u1_skip_tu_posy = 0;
5149 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5150
5151 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5152 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5153 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5154 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5155 csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5156 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5157 pred_stride = ps_inter_cand->i4_pred_data_stride;
5158 recon_stride = cu_size;
5159 pu1_pred = ps_inter_cand->pu1_pred_data;
5160 chrm_ctr = 0;
5161 ecd_data_bytes_cons = 0;
5162 total_rdopt_cost = 0;
5163 num_4x4_in_cu = cu_size >> 2;
5164 recon_func_mode = PRED_MODE_INTER;
5165 cu_bits = 0;
5166
5167 /* get the 4x4 level postion of current cu */
5168 cu_pos_x = cu_pos_x << 1;
5169 cu_pos_y = cu_pos_y << 1;
5170
5171 ps_final_prms->u1_is_cu_coded = 0;
5172 ps_final_prms->u4_cu_sad = 0;
5173
5174 /* populate the coeffs scan idx */
5175 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5176
5177 #if ENABLE_INTER_ZCU_COST
5178 /* reset cu not coded cost */
5179 ps_ctxt->i8_cu_not_coded_cost = 0;
5180
5181 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5182 memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5183 #endif
5184
5185 if(ps_cu_analyse->u1_cu_size == 64)
5186 {
5187 num_split_flags = 4;
5188 u1_tu_size = 32;
5189 }
5190 else
5191 {
5192 num_split_flags = 1;
5193 u1_tu_size = ps_cu_analyse->u1_cu_size;
5194 }
5195
5196 if(1 == ps_final_prms->u1_skip_flag)
5197 {
5198 if(64 == cu_size)
5199 {
5200 /* TU = CU/2 is set but no trnaform is evaluated */
5201 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5202 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5203 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5204 }
5205 else
5206 {
5207 /* TU = CU is set but no trnaform is evaluated */
5208 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5209 pu1_tu_posx = &u1_skip_tu_posx;
5210 pu1_tu_posy = &u1_skip_tu_posy;
5211 }
5212
5213 recon_func_mode = PRED_MODE_SKIP;
5214 }
5215 /* check for PU part mode being AMP or No AMP */
5216 else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5217 {
5218 if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5219 {
5220 /* TU= CU is evaluated 2Nx2N inter case */
5221 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5222 pu1_tu_posx = &u1_skip_tu_posx;
5223 pu1_tu_posy = &u1_skip_tu_posy;
5224 }
5225 else
5226 {
5227 /* currently TU= CU/2 is evaluated for all inter case */
5228 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5229 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5230 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5231 }
5232 }
5233 else
5234 {
5235 /* for AMP cases one level of TU recurssion is done */
5236 /* based on oreintation of the partitions */
5237 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5238 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5239 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5240 }
5241
5242 i4_min_trans_size = 4;
5243
5244 if(ps_ctxt->i1_cu_qp_delta_enable)
5245 {
5246 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
5247 if(ps_cu_analyse->u1_cu_size == 64)
5248 {
5249 ASSERT(
5250 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
5251 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
5252 i4_act_counter = (i4_min_trans_size == 16) +
5253 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
5254 i4_act_counter_lamda = 3;
5255 }
5256 else if(ps_cu_analyse->u1_cu_size == 32)
5257 {
5258 ASSERT(
5259 (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
5260 (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
5261 i4_act_counter = (i4_min_trans_size == 16) +
5262 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
5263 i4_act_counter_lamda = 0;
5264 }
5265 else if(ps_cu_analyse->u1_cu_size == 16)
5266 {
5267 ASSERT(
5268 (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
5269 i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4);
5270 i4_act_counter_lamda = 0;
5271 }
5272 else if(ps_cu_analyse->u1_cu_size == 8)
5273 {
5274 ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
5275 i4_act_counter = 1;
5276 i4_act_counter_lamda = 0;
5277 }
5278 else
5279 {
5280 ASSERT(0);
5281 }
5282 if(ps_ctxt->i4_use_ctb_level_lamda)
5283 {
5284 ihevce_compute_cu_level_QP(
5285 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0);
5286 }
5287 else
5288 {
5289 ihevce_compute_cu_level_QP(
5290 ps_ctxt,
5291 ps_cu_analyse->i4_act_factor[i4_act_counter][0],
5292 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0],
5293 0);
5294 }
5295
5296 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
5297 }
5298
5299 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5300 {
5301 ps_ctxt->i8_cl_ssd_lambda_qf =
5302 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5303 100.0f);
5304 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5305 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5306 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5307 }
5308
5309 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5310 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5311 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5312
5313 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5314 {
5315 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5316 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5317 }
5318
5319 if(!u1_compute_spatial_ssd)
5320 {
5321 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5322 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5323 }
5324 else
5325 {
5326 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5327
5328 if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5329 {
5330 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5331 }
5332 }
5333
5334 /* RDOPT copy States : TU init (best until prev TU) to current */
5335 memcpy(
5336 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5337 .s_cabac_ctxt.au1_ctxt_models[0],
5338 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5339 IHEVC_CAB_COEFFX_PREFIX);
5340
5341 ihevce_tu_tree_init(
5342 as_tu_nodes,
5343 cu_size,
5344 (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5345 ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5346 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5347 ps_ctxt->u1_chroma_array_type == 2);
5348
5349 if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5350 {
5351 ihevce_tuSplitArray_to_tuTree_mapper(
5352 as_tu_nodes,
5353 ps_inter_cand->ai4_tu_split_flag,
5354 cu_size,
5355 cu_size,
5356 MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5357 MIN(MAX_TU_SIZE, cu_size),
5358 ps_inter_cand->b1_skip_flag);
5359 }
5360
5361 ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5362
5363 #if ENABLE_INTER_ZCU_COST
5364 ps_ctxt->i8_cu_not_coded_cost = 0;
5365 #endif
5366
5367 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5368 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5369 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5370 ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5371 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5372 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5373 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5374 ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5375 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5376 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5377 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5378 curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5379 (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5380 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5381 ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5382 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5383 ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5384 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5385 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5386 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5387 ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5388 s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5389 s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5390 s_buffer_data_for_tu.pi2_deq_data_chroma =
5391 pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5392 s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5393 s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5394 s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5395 s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5396
5397 if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5398 {
5399 UWORD8 i;
5400
5401 UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5402
5403 for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5404 {
5405 pu_t *ps_pu;
5406
5407 WORD32 inter_pu_wd;
5408 WORD32 inter_pu_ht;
5409
5410 ps_pu = ps_inter_cand->as_inter_pu + i;
5411
5412 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5413 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5414 inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5415 ihevce_chroma_inter_pred_pu(
5416 &ps_ctxt->s_mc_ctxt,
5417 ps_pu,
5418 pu1_pred,
5419 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5420 if(!!ps_inter_cand->b3_part_size)
5421 {
5422 /* 2Nx__ partion case */
5423 if(inter_pu_wd == cu_size)
5424 {
5425 pu1_pred +=
5426 (inter_pu_ht *
5427 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5428 }
5429
5430 /* __x2N partion case */
5431 if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5432 {
5433 pu1_pred += inter_pu_wd;
5434 }
5435 }
5436 }
5437 }
5438
5439 #if !ENABLE_TOP_DOWN_TU_RECURSION
5440 total_rdopt_cost = ihevce_tu_tree_selector(
5441 ps_ctxt,
5442 as_tu_nodes,
5443 &s_buffer_data_for_tu,
5444 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5445 .s_cabac_ctxt.au1_ctxt_models[0],
5446 recon_func_mode,
5447 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5448 i4_alpha_stim_multiplier,
5449 u1_is_cu_noisy,
5450 #endif
5451 0,
5452 ps_ctxt->u1_max_inter_tr_depth,
5453 ps_inter_cand->b3_part_size,
5454 u1_compute_spatial_ssd);
5455 #else
5456 total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5457 ps_ctxt,
5458 as_tu_nodes,
5459 &s_buffer_data_for_tu,
5460 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5461 .s_cabac_ctxt.au1_ctxt_models[0],
5462 recon_func_mode,
5463 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5464 i4_alpha_stim_multiplier,
5465 u1_is_cu_noisy,
5466 #endif
5467 0,
5468 ps_ctxt->u1_max_inter_tr_depth,
5469 ps_inter_cand->b3_part_size,
5470 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5471 u1_compute_spatial_ssd);
5472 #endif
5473
5474 ps_final_prms->u2_num_tus_in_cu = 0;
5475 ps_final_prms->u4_cu_luma_res_bits = 0;
5476 ps_final_prms->u4_cu_sad = 0;
5477 total_rdopt_cost = 0;
5478 ecd_data_bytes_cons = 0;
5479 cu_bits = 0;
5480 #if ENABLE_INTER_ZCU_COST
5481 ps_ctxt->i8_cu_not_coded_cost = 0;
5482 #endif
5483 ps_final_prms->u1_is_cu_coded = 0;
5484 ps_final_prms->u1_cu_size = cu_size;
5485
5486 ihevce_tu_selector_debriefer(
5487 as_tu_nodes,
5488 ps_final_prms,
5489 &total_rdopt_cost,
5490 #if ENABLE_INTER_ZCU_COST
5491 &ps_ctxt->i8_cu_not_coded_cost,
5492 #endif
5493 &ecd_data_bytes_cons,
5494 &cu_bits,
5495 &ps_final_prms->u2_num_tus_in_cu,
5496 ps_ctxt->i4_cu_qp,
5497 cu_pos_x * 4,
5498 cu_pos_y * 4,
5499 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5500 (ps_ctxt->u1_chroma_array_type == 2),
5501 POS_TL);
5502
5503 if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5504 {
5505 ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5506 }
5507
5508 /* Modify the cost function for this CU. */
5509 /* loop in for 8x8 blocks */
5510 if(ps_ctxt->u1_enable_psyRDOPT)
5511 {
5512 UWORD8 *pu1_recon_cu;
5513 WORD32 recon_stride;
5514 WORD32 curr_pos_x;
5515 WORD32 curr_pos_y;
5516 WORD32 start_index;
5517 WORD32 num_horz_cu_in_ctb;
5518 WORD32 had_block_size;
5519
5520 /* tODO: sreenivasa ctb size has to be used appropriately */
5521 had_block_size = 8;
5522 num_horz_cu_in_ctb = 64 / had_block_size;
5523
5524 curr_pos_x = cu_pos_x << 2; /* pel units */
5525 curr_pos_y = cu_pos_y << 2; /* pel units */
5526 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5527 pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5528 .apv_luma_recon_bufs[0]); // already pointing to the current CU recon
5529 //+ \curr_pos_x + curr_pos_y * recon_stride;
5530
5531 /* start index to index the source satd of curr cu int he current ctb*/
5532 start_index =
5533 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5534
5535 {
5536 total_rdopt_cost += ihevce_psy_rd_cost(
5537 ps_ctxt->ai4_source_satd_8x8,
5538 pu1_recon_cu,
5539 recon_stride,
5540 1, //howz stride
5541 cu_size,
5542 0, // pic type
5543 0, //layer id
5544 ps_ctxt->i4_satd_lamda, // lambda
5545 start_index,
5546 ps_ctxt->u1_is_input_data_hbd,
5547 ps_ctxt->u4_psy_strength,
5548 &ps_ctxt->s_cmn_opt_func); // 8 bit
5549 }
5550 }
5551
5552 ps_final_prms->u1_chroma_intra_pred_mode = 4;
5553
5554 /* update the bytes consumed */
5555 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5556
5557 /* store the current cu size to final prms */
5558 ps_final_prms->u1_cu_size = cu_size;
5559 /* ------------- Chroma processing -------------- */
5560 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5561 if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5562 !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5563 {
5564 LWORD64 chrm_rdopt_cost;
5565 WORD32 chrm_rdopt_tu_bits;
5566
5567 /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5568 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5569
5570 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5571 ps_ctxt,
5572 curr_buf_idx,
5573 0, /* TU mode : Don't care in Inter patrh */
5574 ps_chrm_cu_buf_prms->pu1_curr_src,
5575 ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5576 ps_chrm_cu_buf_prms->pu1_cu_left,
5577 ps_chrm_cu_buf_prms->pu1_cu_top,
5578 ps_chrm_cu_buf_prms->pu1_cu_top_left,
5579 ps_chrm_cu_buf_prms->i4_cu_left_stride,
5580 (cu_pos_x >> 1),
5581 (cu_pos_y >> 1),
5582 &chrm_rdopt_tu_bits,
5583 i4_alpha_stim_multiplier,
5584 u1_is_cu_noisy);
5585
5586 #if WEIGH_CHROMA_COST
5587 chrm_rdopt_cost = (LWORD64)(
5588 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5589 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5590 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5591 #endif
5592
5593 #if CHROMA_RDOPT_ENABLE
5594 total_rdopt_cost += chrm_rdopt_cost;
5595 #endif
5596 cu_bits += chrm_rdopt_tu_bits;
5597
5598 /* during chroma evaluation if skip decision was over written */
5599 /* then the current skip candidate is set to a non skip candidate */
5600 ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5601
5602 /* cu bits for chroma residual if chroma rdopt is on */
5603 /* if zero_cbf eval is disabled then cu bits will be zero */
5604 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5605
5606 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5607 {
5608 /* Early exit : If the current running cost exceeds
5609 the prev. best mode cost, break */
5610 if(total_rdopt_cost > prev_best_rdopt_cost)
5611 {
5612 return (total_rdopt_cost);
5613 }
5614 }
5615 }
5616 else
5617 {}
5618
5619 #if SHRINK_INTER_TUTREE
5620 /* ------------- Quadtree TU split optimization ------------ */
5621 if(ps_final_prms->u1_is_cu_coded)
5622 {
5623 ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5624 &ps_final_prms->as_tu_enc_loop[0],
5625 &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5626 &ps_final_prms->s_recon_datastore,
5627 ps_final_prms->u2_num_tus_in_cu,
5628 (ps_ctxt->u1_chroma_array_type == 2));
5629 }
5630 #endif
5631
5632 /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
5633 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5634 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5635 .s_cabac_ctxt.au1_ctxt_models[0] +
5636 IHEVC_CAB_COEFFX_PREFIX,
5637 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5638 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5639
5640 /* -------- Bit estimate for RD opt -------------- */
5641 {
5642 nbr_avail_flags_t s_nbr;
5643 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5644 WORD32 cbf_bits, header_bits;
5645
5646 /* get the neighbour availability flags for current cu */
5647 ihevce_get_only_nbr_flag(
5648 &s_nbr,
5649 ps_ctxt->pu1_ctb_nbr_map,
5650 ps_ctxt->i4_nbr_map_strd,
5651 cu_pos_x,
5652 cu_pos_y,
5653 (cu_size >> 2),
5654 (cu_size >> 2));
5655
5656 /* call the entropy rdo encode to get the bit estimate for current cu */
5657 header_bits = ihevce_entropy_rdo_encode_cu(
5658 &ps_ctxt->s_rdopt_entropy_ctxt,
5659 ps_final_prms,
5660 (cu_pos_x >> 1), /* back to 8x8 pel units */
5661 (cu_pos_y >> 1), /* back to 8x8 pel units */
5662 cu_size,
5663 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5664 : s_nbr.u1_top_avail,
5665 s_nbr.u1_left_avail,
5666 &ps_final_prms->pu1_cu_coeffs[0],
5667 &cbf_bits);
5668
5669 cu_bits += header_bits;
5670
5671 /* cbf bits are excluded from header bits, instead considered as texture bits */
5672 /* incase if zero cbf eval is disabled then texture bits gets added here */
5673 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5674 ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5675
5676 #if RDOPT_ENABLE
5677 /* add the cost of coding the header bits */
5678 total_rdopt_cost +=
5679 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5680
5681 #if ENABLE_INTER_ZCU_COST
5682 /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5683 if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5684 {
5685 LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5686
5687 WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5688 (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5689
5690 cab_ctxt_t *ps_cab_ctxt =
5691 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5692
5693 /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
5694 UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5695
5696 /* account for coding qt_root_cbf = 0 */
5697 /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5698 u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5699 if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5700 u4_cu_hdr_bits_q12 = 0;
5701 else
5702 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5703
5704 /* add the cost of coding the header bits */
5705 i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5706 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5707 ps_ctxt->i8_cl_ssd_lambda_qf,
5708 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5709
5710 if(ps_ctxt->u1_enable_psyRDOPT)
5711 {
5712 i8_cu_not_coded_cost = total_rdopt_cost + 1;
5713 }
5714
5715 /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5716 if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5717 {
5718 WORD32 tx_size;
5719
5720 /* force cu as not coded and update the cost */
5721 ps_final_prms->u1_is_cu_coded = 0;
5722 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5723 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5724
5725 total_rdopt_cost = i8_cu_not_coded_cost;
5726
5727 /* reset num TUs to 1 unless cu size id 64 */
5728 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5729 trans_size = (64 == cu_size) ? 32 : cu_size;
5730 GETRANGE(tx_size, trans_size);
5731
5732 /* reset the bytes consumed */
5733 ps_final_prms->i4_num_bytes_ecd_data = 0;
5734
5735 /* reset texture related bits and roll back header bits*/
5736 ps_final_prms->u4_cu_cbf_bits = 0;
5737 ps_final_prms->u4_cu_luma_res_bits = 0;
5738 ps_final_prms->u4_cu_chroma_res_bits = 0;
5739 ps_final_prms->u4_cu_hdr_bits =
5740 (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5741
5742 /* update cabac model with qtroot cbf = 0 decision */
5743 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5744 gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5745
5746 /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5747 memcpy(
5748 &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5749 &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5750 (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5751
5752 /* mark all tus as not coded for final eval */
5753 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5754 {
5755 WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5756 WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5757
5758 nbr_4x4_t *ps_cur_nbr_4x4 =
5759 ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5760
5761 num_4x4_in_tu = trans_size >> 2;
5762
5763 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5764 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5765 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5766
5767 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5768 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5769 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5770
5771 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5772 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5773
5774 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5775 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5776 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5777
5778 /* reset cbf for the all 4x4 in TU */
5779 {
5780 WORD32 i, j;
5781 nbr_4x4_t *ps_tmp_4x4;
5782 ps_tmp_4x4 = ps_cur_nbr_4x4;
5783
5784 for(i = 0; i < num_4x4_in_tu; i++)
5785 {
5786 for(j = 0; j < num_4x4_in_tu; j++)
5787 {
5788 ps_tmp_4x4[j].b1_y_cbf = 0;
5789 }
5790 /* row level update*/
5791 ps_tmp_4x4 += num_4x4_in_cu;
5792 }
5793 }
5794 }
5795 }
5796 }
5797 #endif /* ENABLE_INTER_ZCU_COST */
5798
5799 #endif /* RDOPT_ENABLE */
5800 }
5801
5802 return (total_rdopt_cost);
5803 }
5804 #endif
5805
5806 /*!
5807 ******************************************************************************
5808 * \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5809 *
5810 * \brief
5811 * Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5812 *
5813 * \param[in] ps_ctxt enc_loop module ctxt pointer
5814 * \param[in] ps_inter_cand pointer to inter candidate structure
5815 * \param[in] cu_size Current CU size
5816 * \param[in] cu_pos_x cu position x w.r.t to ctb
5817 * \param[in] cu_pos_y cu position y w.r.t to ctb
5818 * \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5819 * \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer
5820 * \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer
5821 * \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride
5822 * \param[in] curr_buf_idx Current Buffer index
5823 *
5824 * \return
5825 * Rdopt cost
5826 *
5827 * \author
5828 * Ittiam
5829 *
5830 *****************************************************************************
5831 */
ihevce_inter_rdopt_cu_mc_mvp(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_inter_cand_t * ps_inter_cand,WORD32 cu_size,WORD32 cu_pos_x,WORD32 cu_pos_y,nbr_4x4_t * ps_left_nbr_4x4,nbr_4x4_t * ps_top_nbr_4x4,nbr_4x4_t * ps_topleft_nbr_4x4,WORD32 nbr_4x4_left_strd,WORD32 curr_buf_idx)5832 LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5833 ihevce_enc_loop_ctxt_t *ps_ctxt,
5834 cu_inter_cand_t *ps_inter_cand,
5835 WORD32 cu_size,
5836 WORD32 cu_pos_x,
5837 WORD32 cu_pos_y,
5838 nbr_4x4_t *ps_left_nbr_4x4,
5839 nbr_4x4_t *ps_top_nbr_4x4,
5840 nbr_4x4_t *ps_topleft_nbr_4x4,
5841 WORD32 nbr_4x4_left_strd,
5842 WORD32 curr_buf_idx)
5843 {
5844 /* local variables */
5845 enc_loop_cu_final_prms_t *ps_final_prms;
5846 nbr_avail_flags_t s_nbr;
5847 nbr_4x4_t *ps_nbr_4x4;
5848
5849 UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5850 UWORD8 *pu1_pred;
5851 WORD32 rdopt_cost;
5852 WORD32 ctr;
5853 WORD32 num_cu_part;
5854 WORD32 inter_pu_wd;
5855 WORD32 inter_pu_ht;
5856 WORD32 pred_stride;
5857
5858 /* get the pointers based on curbuf idx */
5859 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5860 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5861 pu1_pred = ps_inter_cand->pu1_pred_data;
5862
5863 pred_stride = ps_inter_cand->i4_pred_data_stride;
5864
5865 /* store the partition mode in final prms */
5866 ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5867
5868 /* since encoder does not support NXN part type */
5869 /* num parts can be either 1 or 2 only */
5870 ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5871
5872 num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5873
5874 /* get the 4x4 level position of current cu */
5875 cu_pos_x = cu_pos_x << 1;
5876 cu_pos_y = cu_pos_y << 1;
5877
5878 /* populate cu level params */
5879 ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5880 ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5881
5882 /* run a loop over all the partitons in cu */
5883 for(ctr = 0; ctr < num_cu_part; ctr++)
5884 {
5885 pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5886 pu_t *ps_pu;
5887 WORD32 skip_or_merge_flag;
5888 UWORD8 u1_use_mvp_from_top_row;
5889
5890 ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5891
5892 /* IF AMP then each partitions can have diff wd ht */
5893 inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5894 inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5895
5896 /* populate reference pic buf id for bs compute */
5897
5898 /* L0 */
5899 if(-1 != ps_pu->mv.i1_l0_ref_idx)
5900 {
5901 ps_pu->mv.i1_l0_ref_pic_buf_id =
5902 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5903 }
5904
5905 /* L1 */
5906 if(-1 != ps_pu->mv.i1_l1_ref_idx)
5907 {
5908 ps_pu->mv.i1_l1_ref_pic_buf_id =
5909 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5910 }
5911
5912 /* SKIP or merge check for every part */
5913 skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5914
5915 /* ----------- MV Prediction ----------------- */
5916 if(0 == skip_or_merge_flag)
5917 {
5918 /* get the neighbour availability flags */
5919 ihevce_get_only_nbr_flag(
5920 &s_nbr,
5921 ps_ctxt->pu1_ctb_nbr_map,
5922 ps_ctxt->i4_nbr_map_strd,
5923 cu_pos_x,
5924 cu_pos_y,
5925 inter_pu_wd >> 2,
5926 inter_pu_ht >> 2);
5927
5928 if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5929 {
5930 u1_use_mvp_from_top_row = 0;
5931 }
5932 else
5933 {
5934 u1_use_mvp_from_top_row = 1;
5935 }
5936
5937 if(!u1_use_mvp_from_top_row)
5938 {
5939 if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5940 {
5941 if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5942 {
5943 WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5944
5945 /* Ensure Top Right Sync */
5946 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5947 {
5948 curr_cu_pos_in_row =
5949 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5950
5951 if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5952 {
5953 /* No wait for 1st row */
5954 cu_top_right_offset = -(MAX_CTB_SIZE);
5955 {
5956 ihevce_tile_params_t *ps_col_tile_params =
5957 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5958 ps_ctxt->i4_tile_col_idx);
5959
5960 /* No wait for 1st row */
5961 cu_top_right_offset =
5962 -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5963 }
5964 cu_top_right_dep_pos = 0;
5965 }
5966 else
5967 {
5968 cu_top_right_offset = (cu_size) + 4;
5969 cu_top_right_dep_pos =
5970 (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5971 }
5972
5973 ihevce_dmgr_chk_row_row_sync(
5974 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5975 curr_cu_pos_in_row,
5976 cu_top_right_offset,
5977 cu_top_right_dep_pos,
5978 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5979 ps_ctxt->thrd_id);
5980 }
5981
5982 u1_use_mvp_from_top_row = 1;
5983 }
5984 else
5985 {
5986 s_nbr.u1_top_avail = 0;
5987 s_nbr.u1_top_lt_avail = 0;
5988 s_nbr.u1_top_rt_avail = 0;
5989 }
5990 }
5991 else
5992 {
5993 u1_use_mvp_from_top_row = 1;
5994 }
5995 }
5996 /* Call the MV prediction module to get MVP */
5997 ihevce_mv_pred(
5998 &ps_ctxt->s_mv_pred_ctxt,
5999 ps_top_nbr_4x4,
6000 ps_left_nbr_4x4,
6001 ps_topleft_nbr_4x4,
6002 nbr_4x4_left_strd,
6003 &s_nbr,
6004 NULL, /* colocated MV */
6005 ps_pu,
6006 &as_pred_mv[0],
6007 au1_is_top_used);
6008 }
6009
6010 /* store the nbr 4x4 structure */
6011 ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
6012 ps_nbr_4x4->b1_intra_flag = 0;
6013 ps_nbr_4x4->b1_pred_l0_flag = 0;
6014 ps_nbr_4x4->b1_pred_l1_flag = 0;
6015
6016 /* DC is default mode for inter cu, required for intra mode signalling */
6017 ps_nbr_4x4->b6_luma_intra_mode = 1;
6018
6019 /* copy the motion vectors to neighbour structure */
6020 ps_nbr_4x4->mv = ps_pu->mv;
6021
6022 /* copy the PU to final out pu */
6023 ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
6024
6025 /* copy the PU to chroma */
6026 ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
6027
6028 /* store the skip flag to final prms */
6029 ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
6030
6031 /* MVP index & MVD calc is gated on skip/merge flag */
6032 if(0 == skip_or_merge_flag)
6033 {
6034 /* calculate the MVDs and popluate the MVP idx for L0 */
6035 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6036 {
6037 WORD32 idx0_cost, idx1_cost;
6038
6039 /* calculate the ABS mvd for cand 0 */
6040 idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
6041 idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
6042
6043 /* calculate the ABS mvd for cand 1 */
6044 if(u1_use_mvp_from_top_row)
6045 {
6046 idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
6047 idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
6048 }
6049 else
6050 {
6051 idx1_cost = INT_MAX;
6052 }
6053
6054 /* based on the least cost choose the mvp idx */
6055 if(idx0_cost <= idx1_cost)
6056 {
6057 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
6058 as_pred_mv[0].s_l0_mv.i2_mvx;
6059 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
6060 as_pred_mv[0].s_l0_mv.i2_mvy;
6061
6062 ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
6063 }
6064 else
6065 {
6066 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
6067 as_pred_mv[1].s_l0_mv.i2_mvx;
6068 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
6069 as_pred_mv[1].s_l0_mv.i2_mvy;
6070
6071 ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
6072 }
6073
6074 /* set the pred l0 flag for neighbour storage */
6075 ps_nbr_4x4->b1_pred_l0_flag = 1;
6076 }
6077 /* calculate the MVDs and popluate the MVP idx for L1 */
6078 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6079 {
6080 WORD32 idx0_cost, idx1_cost;
6081
6082 /* calculate the ABS mvd for cand 0 */
6083 idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
6084 idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
6085
6086 /* calculate the ABS mvd for cand 1 */
6087 if(u1_use_mvp_from_top_row)
6088 {
6089 idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
6090 idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
6091 }
6092 else
6093 {
6094 idx1_cost = INT_MAX;
6095 }
6096
6097 /* based on the least cost choose the mvp idx */
6098 if(idx0_cost <= idx1_cost)
6099 {
6100 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6101 as_pred_mv[0].s_l1_mv.i2_mvx;
6102 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6103 as_pred_mv[0].s_l1_mv.i2_mvy;
6104
6105 ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6106 }
6107 else
6108 {
6109 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6110 as_pred_mv[1].s_l1_mv.i2_mvx;
6111 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6112 as_pred_mv[1].s_l1_mv.i2_mvy;
6113
6114 ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6115 }
6116
6117 /* set the pred l1 flag for neighbour storage */
6118 ps_nbr_4x4->b1_pred_l1_flag = 1;
6119 }
6120
6121 /* set the merge flag to 0 */
6122 ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6123 ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6124 }
6125 else
6126 {
6127 /* copy the merge index from candidate */
6128 ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6129
6130 ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6131
6132 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6133 {
6134 /* set the pred l0 flag for neighbour storage */
6135 ps_nbr_4x4->b1_pred_l0_flag = 1;
6136 }
6137
6138 /* calculate the MVDs and popluate the MVP idx for L1 */
6139 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6140 {
6141 /* set the pred l1 flag for neighbour storage */
6142 ps_nbr_4x4->b1_pred_l1_flag = 1;
6143 }
6144 }
6145
6146 /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6147 rdopt_cost = 0;
6148
6149 /* copy the MV to colocated Mv structure */
6150 ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6151 ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6152 ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6153 ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6154 ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6155 ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6156
6157 /* replicate neighbour 4x4 strcuture for entire partition */
6158 {
6159 WORD32 i, j;
6160 nbr_4x4_t *ps_tmp_4x4;
6161
6162 ps_tmp_4x4 = ps_nbr_4x4;
6163
6164 for(i = 0; i < (inter_pu_ht >> 2); i++)
6165 {
6166 for(j = 0; j < (inter_pu_wd >> 2); j++)
6167 {
6168 ps_tmp_4x4[j] = *ps_nbr_4x4;
6169 }
6170 /* row level update*/
6171 ps_tmp_4x4 += (cu_size >> 2);
6172 }
6173 }
6174 /* set the neighbour map to 1 */
6175 ihevce_set_inter_nbr_map(
6176 ps_ctxt->pu1_ctb_nbr_map,
6177 ps_ctxt->i4_nbr_map_strd,
6178 cu_pos_x,
6179 cu_pos_y,
6180 (inter_pu_wd >> 2),
6181 (inter_pu_ht >> 2),
6182 1);
6183 /* ----------- Motion Compensation for Luma ----------- */
6184 #if !ENABLE_MIXED_INTER_MODE_EVAL
6185 {
6186 IV_API_CALL_STATUS_T valid_mv_cand;
6187
6188 /*If the inter candidate is neither merge cand nor skip cand
6189 then calculate the mc.*/
6190 if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6191 {
6192 valid_mv_cand =
6193 ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6194
6195 /* assert if the MC is given a valid mv candidate */
6196 ASSERT(valid_mv_cand == IV_SUCCESS);
6197 }
6198 }
6199 #endif
6200 if((2 == num_cu_part) && (0 == ctr))
6201 {
6202 /* 2Nx__ partion case */
6203 if(inter_pu_wd == cu_size)
6204 {
6205 cu_pos_y += (inter_pu_ht >> 2);
6206 pu1_pred += (inter_pu_ht * pred_stride);
6207 ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6208 ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6209 ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6210 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6211 }
6212
6213 /* __x2N partion case */
6214 if(inter_pu_ht == cu_size)
6215 {
6216 cu_pos_x += (inter_pu_wd >> 2);
6217 pu1_pred += inter_pu_wd;
6218 ps_nbr_4x4 += (inter_pu_wd >> 2);
6219 ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6220 ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6221 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6222 nbr_4x4_left_strd = (cu_size >> 2);
6223 }
6224 }
6225 }
6226
6227 return (rdopt_cost);
6228 }
6229
6230 /*!
6231 ******************************************************************************
6232 * \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6233 *
6234 * \brief
6235 * Coding unit processing function for chroma special modes (Non-Luma modes)
6236 *
6237 * \param[in] ps_ctxt enc_loop module ctxt pointer
6238 * \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms
6239 * \param[in] ps_cu_analyse pointer to cu analyse
6240 * \param[in] rd_opt_curr_idx index in the array of RDopt params
6241 * \param[in] tu_mode TU_EQ_CU or other case
6242 *
6243 * \return
6244 * Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6245 *
6246 * \author
6247 * Ittiam
6248 *
6249 *****************************************************************************
6250 */
ihevce_distortion_based_intra_chroma_mode_selector(cu_analyse_t * ps_cu_analyse,ihevc_intra_pred_chroma_ref_substitution_ft * pf_ref_substitution,pf_intra_pred * ppf_chroma_ip,pf_res_trans_luma_had_chroma * ppf_resd_trns_had,UWORD8 * pu1_src,WORD32 i4_src_stride,UWORD8 * pu1_pred,WORD32 i4_pred_stride,UWORD8 * pu1_ctb_nbr_map,WORD32 i4_nbr_map_strd,UWORD8 * pu1_ref_sub_out,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy,UWORD8 u1_trans_size,UWORD8 u1_trans_idx,UWORD8 u1_num_tus_in_cu,UWORD8 u1_num_4x4_luma_blks_in_tu,UWORD8 u1_enable_psyRDOPT,UWORD8 u1_is_422)6251 UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6252 cu_analyse_t *ps_cu_analyse,
6253 ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6254 pf_intra_pred *ppf_chroma_ip,
6255 pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6256 UWORD8 *pu1_src,
6257 WORD32 i4_src_stride,
6258 UWORD8 *pu1_pred,
6259 WORD32 i4_pred_stride,
6260 UWORD8 *pu1_ctb_nbr_map,
6261 WORD32 i4_nbr_map_strd,
6262 UWORD8 *pu1_ref_sub_out,
6263 WORD32 i4_alpha_stim_multiplier,
6264 UWORD8 u1_is_cu_noisy,
6265 UWORD8 u1_trans_size,
6266 UWORD8 u1_trans_idx,
6267 UWORD8 u1_num_tus_in_cu,
6268 UWORD8 u1_num_4x4_luma_blks_in_tu,
6269 UWORD8 u1_enable_psyRDOPT,
6270 UWORD8 u1_is_422)
6271 {
6272 UWORD8 u1_chrm_mode;
6273 UWORD8 ctr;
6274 WORD32 i4_subtu_idx;
6275
6276 WORD32 i = 0;
6277 UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6278 WORD32 i4_satd_had[4] = { 0 };
6279 WORD32 i4_best_satd_had = INT_MAX;
6280 UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6281 UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6282 WORD32 i4_num_sub_tus = u1_is_422 + 1;
6283 UWORD8 u1_best_chrm_mode = 0;
6284
6285 /* Get the best satd among all possible modes */
6286 for(i = 0; i < 4; i++)
6287 {
6288 WORD32 left_strd = i4_src_stride;
6289
6290 u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6291 : u1_chrm_modes[i];
6292
6293 /* loop based on num tus in a cu */
6294 for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6295 {
6296 WORD32 luma_nbr_flags;
6297 WORD32 chrm_pred_func_idx;
6298
6299 WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6300 UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6301 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6302 UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6303 (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6304 WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6305 WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6306
6307 luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6308 pu1_ctb_nbr_map,
6309 i4_nbr_map_strd,
6310 i4_curr_tu_pos_x,
6311 i4_curr_tu_pos_y,
6312 u1_num_4x4_luma_blks_in_tu,
6313 u1_num_4x4_luma_blks_in_tu);
6314
6315 for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6316 {
6317 WORD32 nbr_flags;
6318
6319 UWORD8 *pu1_cur_src =
6320 pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6321 UWORD8 *pu1_cur_pred =
6322 pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6323 UWORD8 *pu1_left = pu1_cur_src - 2;
6324 UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6325 UWORD8 *pu1_top_left = pu1_top - 2;
6326
6327 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6328 luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6329
6330 /* call the chroma reference array substitution */
6331 pf_ref_substitution(
6332 pu1_top_left,
6333 pu1_top,
6334 pu1_left,
6335 left_strd,
6336 u1_trans_size,
6337 nbr_flags,
6338 pu1_ref_sub_out,
6339 1);
6340
6341 /* use the look up to get the function idx */
6342 chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6343
6344 /* call the intra prediction function */
6345 ppf_chroma_ip[chrm_pred_func_idx](
6346 pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6347
6348 if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6349 {
6350 /* compute Hadamard-transform satd : Cb */
6351 i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6352 pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6353
6354 /* compute Hadamard-transform satd : Cr */
6355 i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6356 pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6357 }
6358 else
6359 {
6360 WORD32 i4_satd;
6361
6362 /* compute Hadamard-transform satd : Cb */
6363 i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6364 pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6365
6366 i4_satd = ihevce_inject_stim_into_distortion(
6367 pu1_cur_src,
6368 i4_src_stride,
6369 pu1_cur_pred,
6370 i4_pred_stride,
6371 i4_satd,
6372 i4_alpha_stim_multiplier,
6373 u1_trans_size,
6374 0,
6375 u1_enable_psyRDOPT,
6376 U_PLANE);
6377
6378 i4_satd_had[i] += i4_satd;
6379
6380 /* compute Hadamard-transform satd : Cr */
6381 i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6382 pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6383
6384 i4_satd = ihevce_inject_stim_into_distortion(
6385 pu1_cur_src,
6386 i4_src_stride,
6387 pu1_cur_pred,
6388 i4_pred_stride,
6389 i4_satd,
6390 i4_alpha_stim_multiplier,
6391 u1_trans_size,
6392 0,
6393 u1_enable_psyRDOPT,
6394 V_PLANE);
6395
6396 i4_satd_had[i] += i4_satd;
6397 }
6398 }
6399
6400 /* set the neighbour map to 1 */
6401 ihevce_set_nbr_map(
6402 pu1_ctb_nbr_map,
6403 i4_nbr_map_strd,
6404 i4_curr_tu_pos_x,
6405 i4_curr_tu_pos_y,
6406 u1_num_4x4_luma_blks_in_tu,
6407 1);
6408 }
6409
6410 /* set the neighbour map to 0 */
6411 ihevce_set_nbr_map(
6412 pu1_ctb_nbr_map,
6413 i4_nbr_map_strd,
6414 (ps_cu_analyse->b3_cu_pos_x << 1),
6415 (ps_cu_analyse->b3_cu_pos_y << 1),
6416 (ps_cu_analyse->u1_cu_size >> 2),
6417 0);
6418
6419 /* Get the least SATD and corresponding mode */
6420 if(i4_best_satd_had > i4_satd_had[i])
6421 {
6422 i4_best_satd_had = i4_satd_had[i];
6423 u1_best_chrm_mode = u1_chrm_mode;
6424 }
6425 }
6426
6427 return u1_best_chrm_mode;
6428 }
6429
ihevce_intra_chroma_pred_mode_selector(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,cu_analyse_t * ps_cu_analyse,WORD32 rd_opt_curr_idx,WORD32 tu_mode,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy)6430 void ihevce_intra_chroma_pred_mode_selector(
6431 ihevce_enc_loop_ctxt_t *ps_ctxt,
6432 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6433 cu_analyse_t *ps_cu_analyse,
6434 WORD32 rd_opt_curr_idx,
6435 WORD32 tu_mode,
6436 WORD32 i4_alpha_stim_multiplier,
6437 UWORD8 u1_is_cu_noisy)
6438 {
6439 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6440
6441 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6442
6443 UWORD8 *pu1_pred;
6444 WORD32 trans_size;
6445 WORD32 num_tus_in_cu;
6446 WORD32 pred_strd;
6447 WORD32 ctr;
6448 WORD32 i4_subtu_idx;
6449 WORD32 i4_num_sub_tus;
6450 WORD32 trans_idx;
6451 WORD32 scan_idx;
6452 WORD32 num_4x4_luma_in_tu;
6453 WORD32 cu_pos_x;
6454 WORD32 cu_pos_y;
6455
6456 recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6457 &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6458
6459 LWORD64 chrm_cod_cost = 0;
6460 WORD32 chrm_tu_bits = 0;
6461 WORD32 best_chrm_mode = DM_CHROMA_IDX;
6462 UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6463 WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6464 UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6465 UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6466 UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6467 WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6468 WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6469 WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6470 WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6471 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6472
6473 ihevc_intra_pred_chroma_ref_substitution_fptr =
6474 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6475 i4_num_sub_tus = (u1_is_422 == 1) + 1;
6476
6477 #if DISABLE_RDOQ_INTRA
6478 i4_perform_rdoq = 0;
6479 #endif
6480
6481 if(TU_EQ_CU == tu_mode)
6482 {
6483 num_tus_in_cu = 1;
6484 trans_size = cu_size >> 1;
6485 num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6486 ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6487 }
6488 else
6489 {
6490 num_tus_in_cu = 4;
6491 trans_size = cu_size >> 2;
6492 num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6493
6494 /* For 8x8 CU only one TU */
6495 if(MIN_TU_SIZE > trans_size)
6496 {
6497 trans_size = MIN_TU_SIZE;
6498 num_tus_in_cu = 1;
6499 /* chroma nbr avail. is derived based on luma.
6500 for 4x4 chrm use 8x8 luma's size */
6501 num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6502 }
6503
6504 ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6505 }
6506
6507 /* Can't be TU_EQ_SUBCU case */
6508 ASSERT(TU_EQ_SUBCU != tu_mode);
6509
6510 /* translate the transform size to index */
6511 trans_idx = trans_size >> 2;
6512
6513 pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6514
6515 pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6516
6517 /* for 16x16 cases */
6518 if(16 == trans_size)
6519 {
6520 trans_idx = 3;
6521 }
6522
6523 best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6524 ps_cu_analyse,
6525 ihevc_intra_pred_chroma_ref_substitution_fptr,
6526 ps_ctxt->apf_chrm_ip,
6527 ps_ctxt->apf_chrm_resd_trns_had,
6528 pu1_chrm_src,
6529 chrm_src_stride,
6530 pu1_pred,
6531 pred_strd,
6532 ps_ctxt->pu1_ctb_nbr_map,
6533 ps_ctxt->i4_nbr_map_strd,
6534 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6535 i4_alpha_stim_multiplier,
6536 u1_is_cu_noisy,
6537 trans_size,
6538 trans_idx,
6539 num_tus_in_cu,
6540 num_4x4_luma_in_tu,
6541 ps_ctxt->u1_enable_psyRDOPT,
6542 u1_is_422);
6543
6544 /* Store the best chroma mode */
6545 ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6546
6547 /* evaluate RDOPT cost for the Best mode */
6548 {
6549 WORD32 i4_subtu_pos_x;
6550 WORD32 i4_subtu_pos_y;
6551 UWORD8 u1_compute_spatial_ssd;
6552
6553 WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6554 WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6555 /* State for prefix bin of chroma intra pred mode before CU encode */
6556 UWORD8 u1_chroma_intra_mode_prefix_state =
6557 ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6558 WORD32 luma_trans_size = trans_size << 1;
6559 WORD32 calc_recon = 0;
6560 UWORD8 *pu1_left = pu1_cu_left;
6561 UWORD8 *pu1_top = pu1_cu_top;
6562 UWORD8 *pu1_top_left = pu1_cu_top_left;
6563 WORD32 left_strd = cu_left_stride;
6564
6565 if(ps_ctxt->i1_cu_qp_delta_enable)
6566 {
6567 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
6568 if(ps_cu_analyse->u1_cu_size == 64)
6569 {
6570 ASSERT(
6571 (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) ||
6572 (luma_trans_size == 4));
6573 i4_act_counter = (luma_trans_size == 16) +
6574 2 * ((luma_trans_size == 8) || (luma_trans_size == 4));
6575 i4_act_counter_lamda = 3;
6576 }
6577 else if(ps_cu_analyse->u1_cu_size == 32)
6578 {
6579 ASSERT(
6580 (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) ||
6581 (luma_trans_size == 4));
6582 i4_act_counter = (luma_trans_size == 16) +
6583 2 * ((luma_trans_size == 8) || (luma_trans_size == 4));
6584 i4_act_counter_lamda = 0;
6585 }
6586 else if(ps_cu_analyse->u1_cu_size == 16)
6587 {
6588 ASSERT((luma_trans_size == 16) || (luma_trans_size == 8) || (luma_trans_size == 4));
6589 i4_act_counter = (luma_trans_size == 8) || (luma_trans_size == 4);
6590 i4_act_counter_lamda = 0;
6591 }
6592 else if(ps_cu_analyse->u1_cu_size == 8)
6593 {
6594 ASSERT((luma_trans_size == 8) || (luma_trans_size == 4));
6595 i4_act_counter = 1;
6596 i4_act_counter_lamda = 0;
6597 }
6598 else
6599 {
6600 ASSERT(0);
6601 }
6602 /*assumption is that control comes here for intras*/
6603 if(ps_ctxt->i4_use_ctb_level_lamda)
6604 {
6605 ihevce_compute_cu_level_QP(
6606 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0);
6607 }
6608 else
6609 {
6610 ihevce_compute_cu_level_QP(
6611 ps_ctxt,
6612 ps_cu_analyse->i4_act_factor[i4_act_counter][1],
6613 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1],
6614 0);
6615 }
6616
6617 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
6618 }
6619
6620 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6621 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6622 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6623
6624 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6625 {
6626 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6627 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6628 }
6629
6630 /* get the 4x4 level postion of current cu */
6631 cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6632 cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6633
6634 calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6635
6636 if(calc_recon || u1_compute_spatial_ssd)
6637 {
6638 aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6639 aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6640 }
6641 else
6642 {
6643 aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6644 aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6645 }
6646
6647 /* loop based on num tus in a cu */
6648 for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6649 {
6650 WORD16 *pi2_cur_deq_data_cb;
6651 WORD16 *pi2_cur_deq_data_cr;
6652
6653 WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6654 WORD32 luma_nbr_flags = 0;
6655
6656 luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6657 ps_ctxt->pu1_ctb_nbr_map,
6658 ps_ctxt->i4_nbr_map_strd,
6659 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6660 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6661 (luma_trans_size >> 2),
6662 (luma_trans_size >> 2));
6663
6664 for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6665 {
6666 WORD32 cbf, num_bytes;
6667 LWORD64 trans_ssd_u, trans_ssd_v;
6668 UWORD8 u1_is_recon_available;
6669
6670 WORD32 trans_size_m2 = trans_size << 1;
6671 UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6672 (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6673 (i4_subtu_idx * trans_size * chrm_src_stride);
6674 UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6675 (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6676 (i4_subtu_idx * trans_size * pred_strd);
6677 WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6678 UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6679 ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6680 ((ctr & 1) * trans_size_m2) +
6681 (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6682 (i4_subtu_idx * trans_size * i4_recon_stride);
6683
6684 /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6685 chroma coeff/iq for high quality intra SATD special modes. Will
6686 be over written by coeff of luma mode in chroma_rdopt call */
6687 UWORD8 *pu1_ecd_data_cb =
6688 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6689 UWORD8 *pu1_ecd_data_cr =
6690 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6691
6692 WORD32 chrm_pred_func_idx = 0;
6693 LWORD64 curr_cb_cod_cost = 0;
6694 LWORD64 curr_cr_cod_cost = 0;
6695 WORD32 nbr_flags = 0;
6696
6697 i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6698 i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6699 ((i4_subtu_idx * trans_size) >> 2);
6700 pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6701 ((ctr & 1) * trans_size) +
6702 (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6703 (i4_subtu_idx * trans_size * deq_data_strd);
6704 pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6705 ((ctr & 1) * trans_size) +
6706 (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6707 (i4_subtu_idx * trans_size * deq_data_strd);
6708
6709 /* left cu boundary */
6710 if(0 == i4_subtu_pos_x)
6711 {
6712 left_strd = cu_left_stride;
6713 pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6714 }
6715 else
6716 {
6717 pu1_left = pu1_cur_recon - 2;
6718 left_strd = i4_recon_stride;
6719 }
6720
6721 /* top cu boundary */
6722 if(0 == i4_subtu_pos_y)
6723 {
6724 pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6725 }
6726 else
6727 {
6728 pu1_top = pu1_cur_recon - i4_recon_stride;
6729 }
6730
6731 /* by default top left is set to cu top left */
6732 pu1_top_left = pu1_cu_top_left;
6733
6734 /* top left based on position */
6735 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6736 {
6737 pu1_top_left = pu1_left - left_strd;
6738 }
6739 else if(0 != i4_subtu_pos_x)
6740 {
6741 pu1_top_left = pu1_top - 2;
6742 }
6743
6744 /* populate the coeffs scan idx */
6745 scan_idx = SCAN_DIAG_UPRIGHT;
6746
6747 /* RDOPT copy States : TU init (best until prev TU) to current */
6748 COPY_CABAC_STATES(
6749 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6750 .s_cabac_ctxt.au1_ctxt_models[0],
6751 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6752 IHEVC_CAB_CTXT_END);
6753
6754 /* for 4x4 transforms based on intra pred mode scan is choosen*/
6755 if(4 == trans_size)
6756 {
6757 /* for modes from 22 upto 30 horizontal scan is used */
6758 if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6759 {
6760 scan_idx = SCAN_HORZ;
6761 }
6762 /* for modes from 6 upto 14 horizontal scan is used */
6763 else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6764 {
6765 scan_idx = SCAN_VERT;
6766 }
6767 }
6768
6769 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6770 luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6771
6772 /* call the chroma reference array substitution */
6773 ihevc_intra_pred_chroma_ref_substitution_fptr(
6774 pu1_top_left,
6775 pu1_top,
6776 pu1_left,
6777 left_strd,
6778 trans_size,
6779 nbr_flags,
6780 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6781 1);
6782
6783 /* use the look up to get the function idx */
6784 chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6785
6786 /* call the intra prediction function */
6787 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6788 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6789 1,
6790 pu1_cur_pred,
6791 pred_strd,
6792 trans_size,
6793 best_chrm_mode);
6794
6795 /* UPLANE RDOPT Loop */
6796 {
6797 WORD32 tu_bits;
6798
6799 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6800 ps_ctxt,
6801 pu1_cur_pred,
6802 pred_strd,
6803 pu1_cur_src,
6804 chrm_src_stride,
6805 pi2_cur_deq_data_cb,
6806 deq_data_strd,
6807 pu1_cur_recon,
6808 i4_recon_stride,
6809 pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6810 ps_ctxt->au1_cu_csbf,
6811 ps_ctxt->i4_cu_csbf_strd,
6812 trans_size,
6813 scan_idx,
6814 1,
6815 &num_bytes,
6816 &tu_bits,
6817 &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6818 &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6819 &u1_is_recon_available,
6820 i4_perform_sbh,
6821 i4_perform_rdoq,
6822 &trans_ssd_u,
6823 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6824 i4_alpha_stim_multiplier,
6825 u1_is_cu_noisy,
6826 #endif
6827 0,
6828 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6829 U_PLANE);
6830
6831 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6832 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6833 {
6834 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6835 trans_ssd_u = ihevce_inject_stim_into_distortion(
6836 pu1_cur_src,
6837 chrm_src_stride,
6838 pu1_cur_pred,
6839 pred_strd,
6840 trans_ssd_u,
6841 i4_alpha_stim_multiplier,
6842 trans_size,
6843 0,
6844 ps_ctxt->u1_enable_psyRDOPT,
6845 U_PLANE);
6846 #else
6847 if(u1_compute_spatial_ssd && u1_is_recon_available)
6848 {
6849 trans_ssd_u = ihevce_inject_stim_into_distortion(
6850 pu1_cur_src,
6851 chrm_src_stride,
6852 pu1_cur_recon,
6853 i4_recon_stride,
6854 trans_ssd_u,
6855 i4_alpha_stim_multiplier,
6856 trans_size,
6857 0,
6858 ps_ctxt->u1_enable_psyRDOPT,
6859 U_PLANE);
6860 }
6861 else
6862 {
6863 trans_ssd_u = ihevce_inject_stim_into_distortion(
6864 pu1_cur_src,
6865 chrm_src_stride,
6866 pu1_cur_pred,
6867 pred_strd,
6868 trans_ssd_u,
6869 i4_alpha_stim_multiplier,
6870 trans_size,
6871 0,
6872 ps_ctxt->u1_enable_psyRDOPT,
6873 U_PLANE);
6874 }
6875 #endif
6876 }
6877 #endif
6878
6879 /* RDOPT copy States : New updated after curr TU to TU init */
6880 if(0 != cbf)
6881 {
6882 memcpy(
6883 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6884 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6885 .s_cabac_ctxt.au1_ctxt_models[0],
6886 IHEVC_CAB_CTXT_END);
6887 }
6888 /* RDOPT copy States : Restoring back the Cb init state to Cr */
6889 else
6890 {
6891 memcpy(
6892 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6893 .s_cabac_ctxt.au1_ctxt_models[0],
6894 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6895 IHEVC_CAB_CTXT_END);
6896 }
6897
6898 if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6899 {
6900 ihevce_chroma_it_recon_fxn(
6901 ps_ctxt,
6902 pi2_cur_deq_data_cb,
6903 deq_data_strd,
6904 pu1_cur_pred,
6905 pred_strd,
6906 pu1_cur_recon,
6907 i4_recon_stride,
6908 (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6909 trans_size,
6910 cbf,
6911 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6912 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6913 U_PLANE);
6914 }
6915
6916 ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6917 curr_cb_cod_cost =
6918 trans_ssd_u +
6919 COMPUTE_RATE_COST_CLIP30(
6920 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6921 chrm_tu_bits += tu_bits;
6922 ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6923 ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6924 num_bytes;
6925 }
6926
6927 /* VPLANE RDOPT Loop */
6928 {
6929 WORD32 tu_bits;
6930
6931 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6932 ps_ctxt,
6933 pu1_cur_pred,
6934 pred_strd,
6935 pu1_cur_src,
6936 chrm_src_stride,
6937 pi2_cur_deq_data_cr,
6938 deq_data_strd,
6939 pu1_cur_recon,
6940 i4_recon_stride,
6941 pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6942 ps_ctxt->au1_cu_csbf,
6943 ps_ctxt->i4_cu_csbf_strd,
6944 trans_size,
6945 scan_idx,
6946 1,
6947 &num_bytes,
6948 &tu_bits,
6949 &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6950 &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6951 &u1_is_recon_available,
6952 i4_perform_sbh,
6953 i4_perform_rdoq,
6954 &trans_ssd_v,
6955 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6956 i4_alpha_stim_multiplier,
6957 u1_is_cu_noisy,
6958 #endif
6959 0,
6960 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6961 V_PLANE);
6962
6963 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6964 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6965 {
6966 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6967 trans_ssd_v = ihevce_inject_stim_into_distortion(
6968 pu1_cur_src,
6969 chrm_src_stride,
6970 pu1_cur_pred,
6971 pred_strd,
6972 trans_ssd_v,
6973 i4_alpha_stim_multiplier,
6974 trans_size,
6975 0,
6976 ps_ctxt->u1_enable_psyRDOPT,
6977 V_PLANE);
6978 #else
6979 if(u1_compute_spatial_ssd && u1_is_recon_available)
6980 {
6981 trans_ssd_v = ihevce_inject_stim_into_distortion(
6982 pu1_cur_src,
6983 chrm_src_stride,
6984 pu1_cur_recon,
6985 i4_recon_stride,
6986 trans_ssd_v,
6987 i4_alpha_stim_multiplier,
6988 trans_size,
6989 0,
6990 ps_ctxt->u1_enable_psyRDOPT,
6991 V_PLANE);
6992 }
6993 else
6994 {
6995 trans_ssd_v = ihevce_inject_stim_into_distortion(
6996 pu1_cur_src,
6997 chrm_src_stride,
6998 pu1_cur_pred,
6999 pred_strd,
7000 trans_ssd_v,
7001 i4_alpha_stim_multiplier,
7002 trans_size,
7003 0,
7004 ps_ctxt->u1_enable_psyRDOPT,
7005 V_PLANE);
7006 }
7007 #endif
7008 }
7009 #endif
7010
7011 /* RDOPT copy States : New updated after curr TU to TU init */
7012 if(0 != cbf)
7013 {
7014 COPY_CABAC_STATES(
7015 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7016 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7017 .s_cabac_ctxt.au1_ctxt_models[0],
7018 IHEVC_CAB_CTXT_END);
7019 }
7020 /* RDOPT copy States : Restoring back the Cb init state to Cr */
7021 else
7022 {
7023 COPY_CABAC_STATES(
7024 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7025 .s_cabac_ctxt.au1_ctxt_models[0],
7026 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7027 IHEVC_CAB_CTXT_END);
7028 }
7029
7030 if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
7031 {
7032 ihevce_chroma_it_recon_fxn(
7033 ps_ctxt,
7034 pi2_cur_deq_data_cr,
7035 deq_data_strd,
7036 pu1_cur_pred,
7037 pred_strd,
7038 pu1_cur_recon,
7039 i4_recon_stride,
7040 (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
7041 trans_size,
7042 cbf,
7043 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
7044 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
7045 V_PLANE);
7046 }
7047
7048 ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
7049 curr_cr_cod_cost =
7050 trans_ssd_v +
7051 COMPUTE_RATE_COST_CLIP30(
7052 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7053 chrm_tu_bits += tu_bits;
7054 ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
7055 ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
7056 num_bytes;
7057 }
7058
7059 chrm_cod_cost += curr_cb_cod_cost;
7060 chrm_cod_cost += curr_cr_cod_cost;
7061 }
7062
7063 /* set the neighbour map to 1 */
7064 ihevce_set_nbr_map(
7065 ps_ctxt->pu1_ctb_nbr_map,
7066 ps_ctxt->i4_nbr_map_strd,
7067 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
7068 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
7069 (luma_trans_size >> 2),
7070 1);
7071 }
7072
7073 /* set the neighbour map to 0 */
7074 ihevce_set_nbr_map(
7075 ps_ctxt->pu1_ctb_nbr_map,
7076 ps_ctxt->i4_nbr_map_strd,
7077 (ps_cu_analyse->b3_cu_pos_x << 1),
7078 (ps_cu_analyse->b3_cu_pos_y << 1),
7079 (ps_cu_analyse->u1_cu_size >> 2),
7080 0);
7081
7082 /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
7083 /* This is done by adding the bits for signalling chroma mode (0-3) */
7084 /* and subtracting the bits for chroma mode same as luma mode (4) */
7085 #if CHROMA_RDOPT_ENABLE
7086 {
7087 /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
7088 WORD32 bits_frac_1 =
7089 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
7090
7091 WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
7092
7093 /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
7094 WORD32 bits_for_mode4 =
7095 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
7096
7097 /* accumulate into final rd cost for chroma */
7098 ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
7099 (bits_for_mode_0to3 - bits_for_mode4),
7100 ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
7101 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
7102
7103 chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
7104 }
7105 #endif
7106
7107 if(ps_ctxt->u1_enable_psyRDOPT)
7108 {
7109 UWORD8 *pu1_recon_cu;
7110 WORD32 recon_stride;
7111 WORD32 curr_pos_x;
7112 WORD32 curr_pos_y;
7113 WORD32 start_index;
7114 WORD32 num_horz_cu_in_ctb;
7115 WORD32 had_block_size;
7116
7117 /* tODO: sreenivasa ctb size has to be used appropriately */
7118 had_block_size = 8;
7119 num_horz_cu_in_ctb = 2 * 64 / had_block_size;
7120 curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
7121 curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
7122 recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
7123 pu1_recon_cu =
7124 aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; //
7125
7126 /* start index to index the source satd of curr cu int he current ctb*/
7127 start_index = 2 * (curr_pos_x / had_block_size) +
7128 (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
7129
7130 {
7131 chrm_cod_cost += ihevce_psy_rd_cost_croma(
7132 ps_ctxt->ai4_source_chroma_satd,
7133 pu1_recon_cu,
7134 recon_stride,
7135 1, //
7136 cu_size,
7137 0, // pic type
7138 0, //layer id
7139 ps_ctxt->i4_satd_lamda, // lambda
7140 start_index,
7141 ps_ctxt->u1_is_input_data_hbd, // 8 bit
7142 ps_ctxt->u1_chroma_array_type,
7143 &ps_ctxt->s_cmn_opt_func
7144
7145 ); // chroma subsampling 420
7146 }
7147 }
7148
7149 ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7150 ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7151
7152 memcpy(
7153 &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7154 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7155 IHEVC_CAB_CTXT_END);
7156 }
7157 }
7158
7159 /*!
7160 ******************************************************************************
7161 * \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7162 *
7163 * \brief
7164 * Coding unit processing function for chroma
7165 *
7166 * \param[in] ps_ctxt enc_loop module ctxt pointer
7167 * \param[in] rd_opt_curr_idx index in the array of RDopt params
7168 * \param[in] func_proc_mode TU_EQ_CU or other case
7169 * \param[in] pu1_chrm_src pointer to source data buffer
7170 * \param[in] chrm_src_stride source buffer stride
7171 * \param[in] pu1_cu_left pointer to left recon data buffer
7172 * \param[in] pu1_cu_top pointer to top recon data buffer
7173 * \param[in] pu1_cu_top_left pointer to top left recon data buffer
7174 * \param[in] left_stride left recon buffer stride
7175 * \param[out] cu_pos_x position x of current CU in CTB
7176 * \param[out] cu_pos_y position y of current CU in CTB
7177 * \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7178 *
7179 * \return
7180 * Chroma coding cost (cb adn Cr included)
7181 *
7182 * \author
7183 * Ittiam
7184 *
7185 *****************************************************************************
7186 */
ihevce_chroma_cu_prcs_rdopt(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 rd_opt_curr_idx,WORD32 func_proc_mode,UWORD8 * pu1_chrm_src,WORD32 chrm_src_stride,UWORD8 * pu1_cu_left,UWORD8 * pu1_cu_top,UWORD8 * pu1_cu_top_left,WORD32 cu_left_stride,WORD32 cu_pos_x,WORD32 cu_pos_y,WORD32 * pi4_chrm_tu_bits,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy)7187 LWORD64 ihevce_chroma_cu_prcs_rdopt(
7188 ihevce_enc_loop_ctxt_t *ps_ctxt,
7189 WORD32 rd_opt_curr_idx,
7190 WORD32 func_proc_mode,
7191 UWORD8 *pu1_chrm_src,
7192 WORD32 chrm_src_stride,
7193 UWORD8 *pu1_cu_left,
7194 UWORD8 *pu1_cu_top,
7195 UWORD8 *pu1_cu_top_left,
7196 WORD32 cu_left_stride,
7197 WORD32 cu_pos_x,
7198 WORD32 cu_pos_y,
7199 WORD32 *pi4_chrm_tu_bits,
7200 WORD32 i4_alpha_stim_multiplier,
7201 UWORD8 u1_is_cu_noisy)
7202 {
7203 tu_enc_loop_out_t *ps_tu;
7204 tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7205
7206 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7207
7208 UWORD8 *pu1_pred;
7209 UWORD8 *pu1_recon;
7210 WORD32 i4_recon_stride;
7211 WORD32 cu_size, trans_size = 0;
7212 WORD32 pred_strd;
7213 WORD32 ctr, i4_subtu_idx;
7214 WORD32 scan_idx;
7215 WORD32 u1_is_cu_coded_old;
7216 WORD32 init_bytes_offset;
7217
7218 enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7219 recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7220
7221 WORD32 total_bytes_offset = 0;
7222 LWORD64 chrm_cod_cost = 0;
7223 WORD32 chrm_tu_bits = 0;
7224 WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7225 LWORD64 i8_ssd_cb = 0;
7226 WORD32 i4_bits_cb = 0;
7227 LWORD64 i8_ssd_cr = 0;
7228 WORD32 i4_bits_cr = 0;
7229 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7230 UWORD8 u1_num_tus =
7231 /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7232 (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7233 ? 1
7234 : ps_best_cu_prms->u2_num_tus_in_cu;
7235 UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7236 UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7237 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7238 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7239 /* Get the RDOPT cost of the best CU mode for early_exit */
7240 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7241 /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7242 LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7243 WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7244 WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7245
7246 ihevc_intra_pred_chroma_ref_substitution_fptr =
7247 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7248
7249 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7250 {
7251 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7252 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7253 }
7254
7255 /* Store the init bytes offset from luma */
7256 init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7257
7258 /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7259 Chroma pred storage buf. for final_recon function.
7260 The buffer is split into two and used as a ping-pong buffer */
7261 pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7262 rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7263 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7264
7265 pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7266
7267 pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7268 i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7269 cu_size = ps_best_cu_prms->u1_cu_size;
7270 chrm_tu_bits = 0;
7271
7272 /* get the first TU pointer */
7273 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7274 /* get the first TU enc_loop temp prms pointer */
7275 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7276
7277 if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7278 {
7279 /* Mode signalled by intra prediction for luma */
7280 luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7281
7282 #if DISABLE_RDOQ_INTRA
7283 i4_perform_rdoq = 0;
7284 #endif
7285 }
7286
7287 else
7288 {
7289 UWORD8 *pu1_pred_org = pu1_pred;
7290
7291 /* ------ Motion Compensation for Chroma -------- */
7292 for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7293 {
7294 pu_t *ps_pu;
7295 WORD32 inter_pu_wd;
7296 WORD32 inter_pu_ht;
7297
7298 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7299
7300 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7301 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7302 inter_pu_ht <<= u1_is_422;
7303
7304 ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7305
7306 if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7307 {
7308 /* 2Nx__ partion case */
7309 if(inter_pu_wd == cu_size)
7310 {
7311 pu1_pred += (inter_pu_ht * pred_strd);
7312 }
7313
7314 /* __x2N partion case */
7315 if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7316 {
7317 pu1_pred += inter_pu_wd;
7318 }
7319 }
7320 }
7321
7322 /* restore the pred pointer to start for transform loop */
7323 pu1_pred = pu1_pred_org;
7324 }
7325
7326 /* Used to store back only the luma based info. if SATD based chorma
7327 mode also comes */
7328 u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7329
7330 /* evaluate chroma candidates (same as luma) and
7331 if INTRA & HIGH_QUALITY compare with best SATD mode */
7332 {
7333 WORD32 calc_recon = 0, deq_data_strd;
7334 WORD16 *pi2_deq_data;
7335 UWORD8 *pu1_ecd_data;
7336 UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7337
7338 pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7339 pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7340 deq_data_strd = cu_size;
7341 /* update ecd buffer for storing coeff. */
7342 pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7343 pu1_ecd_data += init_bytes_offset;
7344 /* store chroma starting index */
7345 ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7346
7347 /* get the first TU pointer */
7348 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7349 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7350
7351 /* Reset total_bytes_offset for each candidate */
7352 chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7353 : luma_pred_mode;
7354
7355 total_bytes_offset = 0;
7356
7357 if(TU_EQ_SUBCU == func_proc_mode)
7358 {
7359 func_proc_mode = TU_EQ_CU_DIV2;
7360 }
7361
7362 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7363 TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */
7364 if(8 == cu_size)
7365 {
7366 func_proc_mode = TU_EQ_CU;
7367 }
7368
7369 /* loop based on num tus in a cu */
7370 if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7371 (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7372 (chrm_pred_mode !=
7373 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7374 {
7375 /* loop based on num tus in a cu */
7376 for(ctr = 0; ctr < u1_num_tus; ctr++)
7377 {
7378 WORD32 num_bytes = 0;
7379 LWORD64 curr_cb_cod_cost = 0;
7380 LWORD64 curr_cr_cod_cost = 0;
7381 WORD32 chrm_pred_func_idx = 0;
7382 UWORD8 u1_is_early_exit_condition_satisfied = 0;
7383
7384 /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */
7385 /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7386 ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7387 ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7388 ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7389 ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7390 ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7391 ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7392 ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7393 ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7394 ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7395 ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7396
7397 /* TU level inits */
7398 /* check if chroma present flag is set */
7399 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7400 {
7401 /* RDOPT copy States : TU init (best until prev TU) to current */
7402 COPY_CABAC_STATES(
7403 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7404 .s_cabac_ctxt.au1_ctxt_models[0],
7405 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7406 IHEVC_CAB_CTXT_END);
7407
7408 /* get the current transform size */
7409 trans_size = ps_tu->s_tu.b3_size;
7410 trans_size = (1 << (trans_size + 1)); /* in chroma units */
7411
7412 /* since 2x2 transform is not allowed for chroma*/
7413 if(2 == trans_size)
7414 {
7415 trans_size = 4;
7416 }
7417 }
7418
7419 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7420 {
7421 WORD32 cbf;
7422 UWORD8 u1_is_recon_available;
7423
7424 WORD32 nbr_flags = 0;
7425 WORD32 zero_cols = 0;
7426 WORD32 zero_rows = 0;
7427
7428 /* check if chroma present flag is set */
7429 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7430 {
7431 UWORD8 *pu1_cur_pred;
7432 UWORD8 *pu1_cur_recon;
7433 UWORD8 *pu1_cur_src;
7434 WORD16 *pi2_cur_deq_data;
7435 WORD32 curr_pos_x, curr_pos_y;
7436 LWORD64 trans_ssd_u, trans_ssd_v;
7437
7438 /* get the current sub-tu posx and posy w.r.t to cu */
7439 curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7440 curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7441 (i4_subtu_idx * trans_size);
7442
7443 /* 420sp case only vertical height will be half */
7444 if(u1_is_422 == 0)
7445 {
7446 curr_pos_y >>= 1;
7447 }
7448
7449 /* increment the pointers to start of current Sub-TU */
7450 pu1_cur_recon = (pu1_recon + curr_pos_x);
7451 pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7452 pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7453 pu1_cur_src += (curr_pos_y * chrm_src_stride);
7454 pu1_cur_pred = (pu1_pred + curr_pos_x);
7455 pu1_cur_pred += (curr_pos_y * pred_strd);
7456 pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7457 pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7458
7459 /* populate the coeffs scan idx */
7460 scan_idx = SCAN_DIAG_UPRIGHT;
7461
7462 /* perform intra prediction only for Intra case */
7463 if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7464 {
7465 UWORD8 *pu1_top_left;
7466 UWORD8 *pu1_top;
7467 UWORD8 *pu1_left;
7468 WORD32 left_strd;
7469
7470 calc_recon = !u1_compute_spatial_ssd &&
7471 ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7472 (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7473 ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7474 ((u1_num_tus == 4) && (ctr < 3)));
7475
7476 /* left cu boundary */
7477 if(0 == curr_pos_x)
7478 {
7479 pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7480 left_strd = cu_left_stride;
7481 }
7482 else
7483 {
7484 pu1_left = pu1_cur_recon - 2;
7485 left_strd = i4_recon_stride;
7486 }
7487
7488 /* top cu boundary */
7489 if(0 == curr_pos_y)
7490 {
7491 pu1_top = pu1_cu_top + curr_pos_x;
7492 }
7493 else
7494 {
7495 pu1_top = pu1_cur_recon - i4_recon_stride;
7496 }
7497
7498 /* by default top left is set to cu top left */
7499 pu1_top_left = pu1_cu_top_left;
7500
7501 /* top left based on position */
7502 if((0 != curr_pos_y) && (0 == curr_pos_x))
7503 {
7504 pu1_top_left = pu1_left - cu_left_stride;
7505 }
7506 else if(0 != curr_pos_x)
7507 {
7508 pu1_top_left = pu1_top - 2;
7509 }
7510
7511 /* for 4x4 transforms based on intra pred mode scan is choosen*/
7512 if(4 == trans_size)
7513 {
7514 /* for modes from 22 upto 30 horizontal scan is used */
7515 if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7516 {
7517 scan_idx = SCAN_HORZ;
7518 }
7519 /* for modes from 6 upto 14 horizontal scan is used */
7520 else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7521 {
7522 scan_idx = SCAN_VERT;
7523 }
7524 }
7525
7526 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7527 ps_best_cu_prms->au4_nbr_flags[ctr],
7528 i4_subtu_idx,
7529 trans_size,
7530 u1_is_422);
7531
7532 /* call the chroma reference array substitution */
7533 ihevc_intra_pred_chroma_ref_substitution_fptr(
7534 pu1_top_left,
7535 pu1_top,
7536 pu1_left,
7537 left_strd,
7538 trans_size,
7539 nbr_flags,
7540 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7541 1);
7542
7543 /* use the look up to get the function idx */
7544 chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7545
7546 /* call the intra prediction function */
7547 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7548 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7549 1,
7550 pu1_cur_pred,
7551 pred_strd,
7552 trans_size,
7553 chrm_pred_mode);
7554 }
7555
7556 if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7557 {
7558 ps_recon_datastore->au1_is_chromaRecon_available[0] =
7559 !ps_best_cu_prms->u1_skip_flag;
7560 }
7561 else if(!ctr && !i4_subtu_idx)
7562 {
7563 ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7564 }
7565 /************************************************************/
7566 /* recon loop is done for all cases including skip cu */
7567 /* This is because skipping chroma reisdual based on luma */
7568 /* skip decision can lead to chroma artifacts */
7569 /************************************************************/
7570 /************************************************************/
7571 /*In the high quality and medium speed modes, wherein chroma*/
7572 /*and luma costs are included in the total cost calculation */
7573 /*the cost is just a ssd cost, and not that obtained through*/
7574 /*iq_it path */
7575 /************************************************************/
7576 if(ps_best_cu_prms->u1_skip_flag == 0)
7577 {
7578 WORD32 tu_bits;
7579
7580 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7581 ps_ctxt,
7582 pu1_cur_pred,
7583 pred_strd,
7584 pu1_cur_src,
7585 chrm_src_stride,
7586 pi2_cur_deq_data,
7587 deq_data_strd,
7588 pu1_cur_recon,
7589 i4_recon_stride,
7590 pu1_ecd_data + total_bytes_offset,
7591 ps_ctxt->au1_cu_csbf,
7592 ps_ctxt->i4_cu_csbf_strd,
7593 trans_size,
7594 scan_idx,
7595 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7596 &num_bytes,
7597 &tu_bits,
7598 &zero_cols,
7599 &zero_rows,
7600 &u1_is_recon_available,
7601 i4_perform_sbh,
7602 i4_perform_rdoq,
7603 &trans_ssd_u,
7604 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7605 i4_alpha_stim_multiplier,
7606 u1_is_cu_noisy,
7607 #endif
7608 ps_best_cu_prms->u1_skip_flag,
7609 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7610 U_PLANE);
7611
7612 if(u1_compute_spatial_ssd && u1_is_recon_available)
7613 {
7614 ps_recon_datastore
7615 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7616 [i4_subtu_idx] = 0;
7617 }
7618 else
7619 {
7620 ps_recon_datastore
7621 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7622 [i4_subtu_idx] = UCHAR_MAX;
7623 }
7624
7625 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7626 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7627 {
7628 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7629 trans_ssd_u = ihevce_inject_stim_into_distortion(
7630 pu1_cur_src,
7631 chrm_src_stride,
7632 pu1_cur_pred,
7633 pred_strd,
7634 trans_ssd_u,
7635 i4_alpha_stim_multiplier,
7636 trans_size,
7637 0,
7638 ps_ctxt->u1_enable_psyRDOPT,
7639 U_PLANE);
7640 #else
7641 if(u1_compute_spatial_ssd && u1_is_recon_available)
7642 {
7643 trans_ssd_u = ihevce_inject_stim_into_distortion(
7644 pu1_cur_src,
7645 chrm_src_stride,
7646 pu1_cur_recon,
7647 i4_recon_stride,
7648 trans_ssd_u,
7649 i4_alpha_stim_multiplier,
7650 trans_size,
7651 0,
7652 ps_ctxt->u1_enable_psyRDOPT,
7653 U_PLANE);
7654 }
7655 else
7656 {
7657 trans_ssd_u = ihevce_inject_stim_into_distortion(
7658 pu1_cur_src,
7659 chrm_src_stride,
7660 pu1_cur_pred,
7661 pred_strd,
7662 trans_ssd_u,
7663 i4_alpha_stim_multiplier,
7664 trans_size,
7665 0,
7666 ps_ctxt->u1_enable_psyRDOPT,
7667 U_PLANE);
7668 }
7669 #endif
7670 }
7671 #endif
7672
7673 curr_cb_cod_cost =
7674 trans_ssd_u +
7675 COMPUTE_RATE_COST_CLIP30(
7676 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7677
7678 chrm_tu_bits += tu_bits;
7679 i4_bits_cb += tu_bits;
7680
7681 /* RDOPT copy States : New updated after curr TU to TU init */
7682 if(0 != cbf)
7683 {
7684 COPY_CABAC_STATES(
7685 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7686 &ps_ctxt->s_rdopt_entropy_ctxt
7687 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7688 .s_cabac_ctxt.au1_ctxt_models[0],
7689 IHEVC_CAB_CTXT_END);
7690 }
7691 /* RDOPT copy States : Restoring back the Cb init state to Cr */
7692 else
7693 {
7694 COPY_CABAC_STATES(
7695 &ps_ctxt->s_rdopt_entropy_ctxt
7696 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7697 .s_cabac_ctxt.au1_ctxt_models[0],
7698 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7699 IHEVC_CAB_CTXT_END);
7700 }
7701
7702 /* If Intra and TU=CU/2, need recon for next TUs */
7703 if(calc_recon)
7704 {
7705 ihevce_chroma_it_recon_fxn(
7706 ps_ctxt,
7707 pi2_cur_deq_data,
7708 deq_data_strd,
7709 pu1_cur_pred,
7710 pred_strd,
7711 pu1_cur_recon,
7712 i4_recon_stride,
7713 (pu1_ecd_data + total_bytes_offset),
7714 trans_size,
7715 cbf,
7716 zero_cols,
7717 zero_rows,
7718 U_PLANE);
7719
7720 ps_recon_datastore
7721 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7722 [i4_subtu_idx] = 0;
7723 }
7724 else
7725 {
7726 ps_recon_datastore
7727 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7728 [i4_subtu_idx] = UCHAR_MAX;
7729 }
7730 }
7731 else
7732 {
7733 /* num bytes is set to 0 */
7734 num_bytes = 0;
7735
7736 /* cbf is returned as 0 */
7737 cbf = 0;
7738
7739 curr_cb_cod_cost = trans_ssd_u =
7740
7741 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7742 pu1_cur_pred,
7743 pu1_cur_src,
7744 pred_strd,
7745 chrm_src_stride,
7746 trans_size,
7747 trans_size);
7748
7749 if(u1_compute_spatial_ssd)
7750 {
7751 /* buffer copy fromp pred to recon */
7752
7753 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7754 pu1_cur_pred,
7755 pred_strd,
7756 pu1_cur_recon,
7757 i4_recon_stride,
7758 trans_size,
7759 trans_size,
7760 U_PLANE);
7761
7762 ps_recon_datastore
7763 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7764 [i4_subtu_idx] = 0;
7765 }
7766
7767 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7768 {
7769 trans_ssd_u = ihevce_inject_stim_into_distortion(
7770 pu1_cur_src,
7771 chrm_src_stride,
7772 pu1_cur_pred,
7773 pred_strd,
7774 trans_ssd_u,
7775 i4_alpha_stim_multiplier,
7776 trans_size,
7777 0,
7778 ps_ctxt->u1_enable_psyRDOPT,
7779 U_PLANE);
7780 }
7781
7782 #if ENABLE_INTER_ZCU_COST
7783 #if !WEIGH_CHROMA_COST
7784 /* cbf = 0, accumulate cu not coded cost */
7785 ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7786 #else
7787 /* cbf = 0, accumulate cu not coded cost */
7788
7789 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7790 (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7791 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7792 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7793 #endif
7794 #endif
7795 }
7796
7797 #if !WEIGH_CHROMA_COST
7798 curr_rdopt_cost += curr_cb_cod_cost;
7799 #else
7800 curr_rdopt_cost +=
7801 ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7802 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7803 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7804 #endif
7805 chrm_cod_cost += curr_cb_cod_cost;
7806 i8_ssd_cb += trans_ssd_u;
7807
7808 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7809 {
7810 /* Early exit : If the current running cost exceeds
7811 the prev. best mode cost, break */
7812 if(curr_rdopt_cost > prev_best_rdopt_cost)
7813 {
7814 u1_is_early_exit_condition_satisfied = 1;
7815 break;
7816 }
7817 }
7818
7819 /* inter cu is coded if any of the tu is coded in it */
7820 ps_best_cu_prms->u1_is_cu_coded |= cbf;
7821
7822 /* update CB related params */
7823 ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7824 total_bytes_offset + init_bytes_offset;
7825
7826 if(0 == i4_subtu_idx)
7827 {
7828 ps_tu->s_tu.b1_cb_cbf = cbf;
7829 }
7830 else
7831 {
7832 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7833 }
7834
7835 total_bytes_offset += num_bytes;
7836
7837 ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7838 ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7839 ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7840
7841 /* recon loop is done for non skip cases */
7842 if(ps_best_cu_prms->u1_skip_flag == 0)
7843 {
7844 WORD32 tu_bits;
7845
7846 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7847 ps_ctxt,
7848 pu1_cur_pred,
7849 pred_strd,
7850 pu1_cur_src,
7851 chrm_src_stride,
7852 pi2_cur_deq_data + trans_size,
7853 deq_data_strd,
7854 pu1_cur_recon,
7855 i4_recon_stride,
7856 pu1_ecd_data + total_bytes_offset,
7857 ps_ctxt->au1_cu_csbf,
7858 ps_ctxt->i4_cu_csbf_strd,
7859 trans_size,
7860 scan_idx,
7861 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7862 &num_bytes,
7863 &tu_bits,
7864 &zero_cols,
7865 &zero_rows,
7866 &u1_is_recon_available,
7867 i4_perform_sbh,
7868 i4_perform_rdoq,
7869 &trans_ssd_v,
7870 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7871 i4_alpha_stim_multiplier,
7872 u1_is_cu_noisy,
7873 #endif
7874 ps_best_cu_prms->u1_skip_flag,
7875 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7876 V_PLANE);
7877
7878 if(u1_compute_spatial_ssd && u1_is_recon_available)
7879 {
7880 ps_recon_datastore
7881 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7882 [i4_subtu_idx] = 0;
7883 }
7884 else
7885 {
7886 ps_recon_datastore
7887 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7888 [i4_subtu_idx] = UCHAR_MAX;
7889 }
7890
7891 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7892 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7893 {
7894 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7895 trans_ssd_v = ihevce_inject_stim_into_distortion(
7896 pu1_cur_src,
7897 chrm_src_stride,
7898 pu1_cur_pred,
7899 pred_strd,
7900 trans_ssd_v,
7901 i4_alpha_stim_multiplier,
7902 trans_size,
7903 0,
7904 ps_ctxt->u1_enable_psyRDOPT,
7905 V_PLANE);
7906 #else
7907 if(u1_compute_spatial_ssd && u1_is_recon_available)
7908 {
7909 trans_ssd_v = ihevce_inject_stim_into_distortion(
7910 pu1_cur_src,
7911 chrm_src_stride,
7912 pu1_cur_recon,
7913 i4_recon_stride,
7914 trans_ssd_v,
7915 i4_alpha_stim_multiplier,
7916 trans_size,
7917 0,
7918 ps_ctxt->u1_enable_psyRDOPT,
7919 V_PLANE);
7920 }
7921 else
7922 {
7923 trans_ssd_v = ihevce_inject_stim_into_distortion(
7924 pu1_cur_src,
7925 chrm_src_stride,
7926 pu1_cur_pred,
7927 pred_strd,
7928 trans_ssd_v,
7929 i4_alpha_stim_multiplier,
7930 trans_size,
7931 0,
7932 ps_ctxt->u1_enable_psyRDOPT,
7933 V_PLANE);
7934 }
7935 #endif
7936 }
7937 #endif
7938
7939 curr_cr_cod_cost =
7940 trans_ssd_v +
7941 COMPUTE_RATE_COST_CLIP30(
7942 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7943 chrm_tu_bits += tu_bits;
7944 i4_bits_cr += tu_bits;
7945
7946 /* RDOPT copy States : New updated after curr TU to TU init */
7947 if(0 != cbf)
7948 {
7949 COPY_CABAC_STATES(
7950 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7951 &ps_ctxt->s_rdopt_entropy_ctxt
7952 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7953 .s_cabac_ctxt.au1_ctxt_models[0],
7954 IHEVC_CAB_CTXT_END);
7955 }
7956 /* RDOPT copy States : Restoring back the Cb init state to Cr */
7957 else
7958 {
7959 COPY_CABAC_STATES(
7960 &ps_ctxt->s_rdopt_entropy_ctxt
7961 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7962 .s_cabac_ctxt.au1_ctxt_models[0],
7963 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7964 IHEVC_CAB_CTXT_END);
7965 }
7966
7967 /* If Intra and TU=CU/2, need recon for next TUs */
7968 if(calc_recon)
7969 {
7970 ihevce_chroma_it_recon_fxn(
7971 ps_ctxt,
7972 (pi2_cur_deq_data + trans_size),
7973 deq_data_strd,
7974 pu1_cur_pred,
7975 pred_strd,
7976 pu1_cur_recon,
7977 i4_recon_stride,
7978 (pu1_ecd_data + total_bytes_offset),
7979 trans_size,
7980 cbf,
7981 zero_cols,
7982 zero_rows,
7983 V_PLANE);
7984
7985 ps_recon_datastore
7986 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7987 [i4_subtu_idx] = 0;
7988 }
7989 else
7990 {
7991 ps_recon_datastore
7992 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7993 [i4_subtu_idx] = UCHAR_MAX;
7994 }
7995 }
7996 else
7997 {
7998 /* num bytes is set to 0 */
7999 num_bytes = 0;
8000
8001 /* cbf is returned as 0 */
8002 cbf = 0;
8003
8004 curr_cr_cod_cost = trans_ssd_v =
8005
8006 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
8007 pu1_cur_pred + 1,
8008 pu1_cur_src + 1,
8009 pred_strd,
8010 chrm_src_stride,
8011 trans_size,
8012 trans_size);
8013
8014 if(u1_compute_spatial_ssd)
8015 {
8016 /* buffer copy fromp pred to recon */
8017 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
8018 pu1_cur_pred,
8019 pred_strd,
8020 pu1_cur_recon,
8021 i4_recon_stride,
8022 trans_size,
8023 trans_size,
8024 V_PLANE);
8025
8026 ps_recon_datastore
8027 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8028 [i4_subtu_idx] = 0;
8029 }
8030
8031 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
8032 {
8033 trans_ssd_v = ihevce_inject_stim_into_distortion(
8034 pu1_cur_src,
8035 chrm_src_stride,
8036 pu1_cur_pred,
8037 pred_strd,
8038 trans_ssd_v,
8039 i4_alpha_stim_multiplier,
8040 trans_size,
8041 0,
8042 ps_ctxt->u1_enable_psyRDOPT,
8043 V_PLANE);
8044 }
8045
8046 #if ENABLE_INTER_ZCU_COST
8047 #if !WEIGH_CHROMA_COST
8048 /* cbf = 0, accumulate cu not coded cost */
8049 ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
8050 #else
8051 /* cbf = 0, accumulate cu not coded cost */
8052
8053 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
8054 (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
8055 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
8056 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
8057 #endif
8058 #endif
8059 }
8060
8061 #if !WEIGH_CHROMA_COST
8062 curr_rdopt_cost += curr_cr_cod_cost;
8063 #else
8064 curr_rdopt_cost +=
8065 ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
8066 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
8067 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
8068 #endif
8069
8070 chrm_cod_cost += curr_cr_cod_cost;
8071 i8_ssd_cr += trans_ssd_v;
8072
8073 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
8074 {
8075 /* Early exit : If the current running cost exceeds
8076 the prev. best mode cost, break */
8077 if(curr_rdopt_cost > prev_best_rdopt_cost)
8078 {
8079 u1_is_early_exit_condition_satisfied = 1;
8080 break;
8081 }
8082 }
8083
8084 /* inter cu is coded if any of the tu is coded in it */
8085 ps_best_cu_prms->u1_is_cu_coded |= cbf;
8086
8087 /* update CR related params */
8088 ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8089 total_bytes_offset + init_bytes_offset;
8090
8091 if(0 == i4_subtu_idx)
8092 {
8093 ps_tu->s_tu.b1_cr_cbf = cbf;
8094 }
8095 else
8096 {
8097 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8098 }
8099
8100 total_bytes_offset += num_bytes;
8101
8102 ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
8103 ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
8104 ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8105 }
8106 else
8107 {
8108 ps_recon_datastore
8109 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
8110 UCHAR_MAX;
8111 ps_recon_datastore
8112 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
8113 UCHAR_MAX;
8114 }
8115 }
8116
8117 if(u1_is_early_exit_condition_satisfied)
8118 {
8119 break;
8120 }
8121
8122 /* loop increments */
8123 ps_tu++;
8124 ps_tu_temp_prms++;
8125 }
8126
8127 /* Signal as luma mode. HIGH_QUALITY may update it */
8128 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8129
8130 /* modify the cost chrm_cod_cost */
8131 if(ps_ctxt->u1_enable_psyRDOPT)
8132 {
8133 UWORD8 *pu1_recon_cu;
8134 WORD32 recon_stride;
8135 WORD32 curr_pos_x;
8136 WORD32 curr_pos_y;
8137 WORD32 start_index;
8138 WORD32 num_horz_cu_in_ctb;
8139 WORD32 had_block_size;
8140 /* tODO: sreenivasa ctb size has to be used appropriately */
8141 had_block_size = 8;
8142 num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8143
8144 curr_pos_x = cu_pos_x << 3; /* pel units */
8145 curr_pos_y = cu_pos_y << 3; /* pel units */
8146 recon_stride = i4_recon_stride;
8147 pu1_recon_cu = pu1_recon;
8148
8149 /* start index to index the source satd of curr cu int he current ctb*/
8150 start_index = 2 * (curr_pos_x / had_block_size) +
8151 (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8152
8153 {
8154 chrm_cod_cost += ihevce_psy_rd_cost_croma(
8155 ps_ctxt->ai4_source_chroma_satd,
8156 pu1_recon,
8157 recon_stride,
8158 1, //
8159 cu_size,
8160 0, // pic type
8161 0, //layer id
8162 ps_ctxt->i4_satd_lamda, // lambda
8163 start_index,
8164 ps_ctxt->u1_is_input_data_hbd, // 8 bit
8165 ps_ctxt->u1_chroma_array_type,
8166 &ps_ctxt->s_cmn_opt_func
8167
8168 ); // chroma subsampling 420
8169 }
8170 }
8171 }
8172 else
8173 {
8174 u1_is_mode_eq_chroma_satd_mode = 1;
8175 chrm_cod_cost = MAX_COST_64;
8176 }
8177
8178 /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8179 if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8180 (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8181 {
8182 if(64 == cu_size)
8183 {
8184 ASSERT(TU_EQ_CU != func_proc_mode);
8185 }
8186
8187 if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8188 .i8_chroma_best_rdopt < chrm_cod_cost)
8189 {
8190 UWORD8 *pu1_src;
8191 UWORD8 *pu1_ecd_data_src_cb;
8192 UWORD8 *pu1_ecd_data_src_cr;
8193
8194 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8195 &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8196
8197 UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8198 WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8199 WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8200
8201 pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8202 chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8203 chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8204 chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8205
8206 if(u1_is_mode_eq_chroma_satd_mode)
8207 {
8208 chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8209 }
8210
8211 /*Resetting total_num_bytes_to 0*/
8212 total_bytes_offset = 0;
8213
8214 /* Update the CABAC state corresponding to chroma only */
8215 /* Chroma Cbf */
8216 memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8217 /* Chroma transform skip */
8218 memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8219 /* Chroma last coeff x prefix */
8220 memcpy(
8221 pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8222 pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8223 3);
8224 /* Chroma last coeff y prefix */
8225 memcpy(
8226 pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8227 pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8228 3);
8229 /* Chroma csbf */
8230 memcpy(
8231 pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8232 pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8233 2);
8234 /* Chroma sig coeff flags */
8235 memcpy(
8236 pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8237 /* Chroma absgt1 flags */
8238 memcpy(
8239 pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8240 pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8241 8);
8242 /* Chroma absgt2 flags */
8243 memcpy(
8244 pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8245 pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8246 2);
8247
8248 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8249 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8250
8251 /* update to luma decision as we update chroma in final mode */
8252 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8253
8254 for(ctr = 0; ctr < u1_num_tus; ctr++)
8255 {
8256 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8257 {
8258 WORD32 cbf;
8259 WORD32 num_bytes;
8260
8261 pu1_ecd_data_src_cb =
8262 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8263 pu1_ecd_data_src_cr =
8264 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8265
8266 /* check if chroma present flag is set */
8267 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8268 {
8269 UWORD8 *pu1_cur_pred_dest;
8270 UWORD8 *pu1_cur_pred_src;
8271 WORD32 pred_src_strd;
8272 WORD16 *pi2_cur_deq_data_dest;
8273 WORD16 *pi2_cur_deq_data_src_cb;
8274 WORD16 *pi2_cur_deq_data_src_cr;
8275 WORD32 deq_src_strd;
8276
8277 WORD32 curr_pos_x, curr_pos_y;
8278
8279 trans_size = ps_tu->s_tu.b3_size;
8280 trans_size = (1 << (trans_size + 1)); /* in chroma units */
8281
8282 /*Deriving stride values*/
8283 pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8284 deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8285
8286 /* since 2x2 transform is not allowed for chroma*/
8287 if(2 == trans_size)
8288 {
8289 trans_size = 4;
8290 }
8291
8292 /* get the current tu posx and posy w.r.t to cu */
8293 curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8294 curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8295 (i4_subtu_idx * trans_size);
8296
8297 /* 420sp case only vertical height will be half */
8298 if(0 == u1_is_422)
8299 {
8300 curr_pos_y >>= 1;
8301 }
8302
8303 /* increment the pointers to start of current TU */
8304 pu1_cur_pred_src =
8305 ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8306 pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8307 pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8308 pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8309
8310 pi2_cur_deq_data_src_cb =
8311 &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8312 pi2_cur_deq_data_src_cr =
8313 &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8314 pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8315 pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8316 pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8317 pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8318
8319 /*Overwriting deq data with that belonging to the winning special mode
8320 (luma mode != chroma mode)
8321 ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8322 correspondingly manipulate to copy WORD16 data*/
8323
8324 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8325 (UWORD8 *)pi2_cur_deq_data_dest,
8326 (deq_data_strd << 1),
8327 (UWORD8 *)pi2_cur_deq_data_src_cb,
8328 (deq_src_strd << 1),
8329 (trans_size << 1),
8330 trans_size);
8331
8332 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8333 (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8334 (deq_data_strd << 1),
8335 (UWORD8 *)pi2_cur_deq_data_src_cr,
8336 (deq_src_strd << 1),
8337 (trans_size << 1),
8338 trans_size);
8339
8340 /*Overwriting pred data with that belonging to the winning special mode
8341 (luma mode != chroma mode)*/
8342
8343 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8344 pu1_cur_pred_dest,
8345 pred_strd,
8346 pu1_cur_pred_src,
8347 pred_src_strd,
8348 (trans_size << 1),
8349 trans_size);
8350
8351 num_bytes = ps_chr_intra_satd_ctxt
8352 ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8353 cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8354 /* inter cu is coded if any of the tu is coded in it */
8355 ps_best_cu_prms->u1_is_cu_coded |= cbf;
8356
8357 /* update CB related params */
8358 ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8359 total_bytes_offset + init_bytes_offset;
8360
8361 if(0 == i4_subtu_idx)
8362 {
8363 ps_tu->s_tu.b1_cb_cbf = cbf;
8364 }
8365 else
8366 {
8367 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8368 }
8369
8370 /*Overwriting the cb ecd data corresponding to the special mode*/
8371 if(0 != num_bytes)
8372 {
8373 memcpy(
8374 (pu1_ecd_data + total_bytes_offset),
8375 pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8376 num_bytes);
8377 }
8378
8379 total_bytes_offset += num_bytes;
8380 ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8381 ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8382
8383 num_bytes = ps_chr_intra_satd_ctxt
8384 ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8385 cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8386 /* inter cu is coded if any of the tu is coded in it */
8387 ps_best_cu_prms->u1_is_cu_coded |= cbf;
8388
8389 /*Overwriting the cr ecd data corresponding to the special mode*/
8390 if(0 != num_bytes)
8391 {
8392 memcpy(
8393 (pu1_ecd_data + total_bytes_offset),
8394 pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8395 num_bytes);
8396 }
8397
8398 /* update CR related params */
8399 ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8400 total_bytes_offset + init_bytes_offset;
8401
8402 if(0 == i4_subtu_idx)
8403 {
8404 ps_tu->s_tu.b1_cr_cbf = cbf;
8405 }
8406 else
8407 {
8408 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8409 }
8410
8411 total_bytes_offset += num_bytes;
8412 ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8413
8414 /*Updating zero rows and zero cols*/
8415 ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8416 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8417 ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8418 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8419 ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8420 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8421 ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8422 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8423
8424 ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8425
8426 if((u1_num_tus > 1) &&
8427 ps_recon_datastore->au1_is_chromaRecon_available[2])
8428 {
8429 ps_recon_datastore
8430 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8431 [i4_subtu_idx] = 2;
8432 ps_recon_datastore
8433 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8434 [i4_subtu_idx] = 2;
8435 }
8436 else if(
8437 (1 == u1_num_tus) &&
8438 ps_recon_datastore->au1_is_chromaRecon_available[1])
8439 {
8440 ps_recon_datastore
8441 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8442 [i4_subtu_idx] = 1;
8443 ps_recon_datastore
8444 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8445 [i4_subtu_idx] = 1;
8446 }
8447 else
8448 {
8449 ps_recon_datastore
8450 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8451 [i4_subtu_idx] = UCHAR_MAX;
8452 ps_recon_datastore
8453 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8454 [i4_subtu_idx] = UCHAR_MAX;
8455 }
8456 }
8457 }
8458
8459 /* loop increments */
8460 ps_tu++;
8461 ps_tu_temp_prms++;
8462 }
8463 }
8464
8465 if(!u1_is_422)
8466 {
8467 if(chrm_pred_mode == luma_pred_mode)
8468 {
8469 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8470 }
8471 else if(chrm_pred_mode == 0)
8472 {
8473 ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8474 }
8475 else if(chrm_pred_mode == 1)
8476 {
8477 ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8478 }
8479 else if(chrm_pred_mode == 10)
8480 {
8481 ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8482 }
8483 else if(chrm_pred_mode == 26)
8484 {
8485 ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8486 }
8487 else
8488 {
8489 ASSERT(0); /*Should not come here*/
8490 }
8491 }
8492 else
8493 {
8494 if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8495 {
8496 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8497 }
8498 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8499 {
8500 ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8501 }
8502 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8503 {
8504 ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8505 }
8506 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8507 {
8508 ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8509 }
8510 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8511 {
8512 ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8513 }
8514 else
8515 {
8516 ASSERT(0); /*Should not come here*/
8517 }
8518 }
8519 }
8520
8521 /* Store the actual chroma mode */
8522 ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8523 }
8524
8525 /* update the total bytes produced */
8526 ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8527
8528 /* store the final chrm bits accumulated */
8529 *pi4_chrm_tu_bits = chrm_tu_bits;
8530
8531 return (chrm_cod_cost);
8532 }
8533
8534 /*!
8535 ******************************************************************************
8536 * \if Function name : ihevce_final_rdopt_mode_prcs \endif
8537 *
8538 * \brief
8539 * Final RDOPT mode process function. Performs Recon computation for the
8540 * final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8541 *
8542 * \param[in] pv_ctxt : pointer to enc_loop module
8543 * \param[in] ps_prms : pointer to struct containing requisite parameters
8544 *
8545 * \return
8546 * None
8547 *
8548 * \author
8549 * Ittiam
8550 *
8551 *****************************************************************************
8552 */
ihevce_final_rdopt_mode_prcs(ihevce_enc_loop_ctxt_t * ps_ctxt,final_mode_process_prms_t * ps_prms)8553 void ihevce_final_rdopt_mode_prcs(
8554 ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8555 {
8556 enc_loop_cu_final_prms_t *ps_best_cu_prms;
8557 tu_enc_loop_out_t *ps_tu_enc_loop;
8558 tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8559 nbr_avail_flags_t s_nbr;
8560 recon_datastore_t *ps_recon_datastore;
8561
8562 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8563 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8564 ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8565
8566 WORD32 num_tu_in_cu;
8567 LWORD64 rd_opt_cost;
8568 WORD32 ctr;
8569 WORD32 i4_subtu_idx;
8570 WORD32 cu_size;
8571 WORD32 cu_pos_x, cu_pos_y;
8572 WORD32 chrm_present_flag = 1;
8573 WORD32 num_bytes, total_bytes = 0;
8574 WORD32 chrm_ctr = 0;
8575 WORD32 u1_is_cu_coded;
8576 UWORD8 *pu1_old_ecd_data;
8577 UWORD8 *pu1_chrm_old_ecd_data;
8578 UWORD8 *pu1_cur_pred;
8579 WORD16 *pi2_deq_data;
8580 WORD16 *pi2_chrm_deq_data;
8581 WORD16 *pi2_cur_deq_data;
8582 WORD16 *pi2_cur_deq_data_chrm;
8583 UWORD8 *pu1_cur_luma_recon;
8584 UWORD8 *pu1_cur_chroma_recon;
8585 UWORD8 *pu1_cur_src;
8586 UWORD8 *pu1_cur_src_chrm;
8587 UWORD8 *pu1_cur_pred_chrm;
8588 UWORD8 *pu1_intra_pred_mode;
8589 UWORD32 *pu4_nbr_flags;
8590 LWORD64 i8_ssd;
8591
8592 cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8593 cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8594 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8595
8596 WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8597 WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8598 UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8599 WORD32 src_strd = ps_prms->src_strd;
8600 UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8601 WORD32 pred_strd = ps_prms->pred_strd;
8602 UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8603 WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8604 UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8605 UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8606 WORD32 csbf_strd = ps_prms->csbf_strd;
8607 UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8608 WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8609 UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8610 WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8611 UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8612 UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8613 UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8614 WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8615 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8616 UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8617 /* Get the Chroma pointer and parameters */
8618 UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8619 WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8620 UWORD8 u1_compute_spatial_ssd_luma = 0;
8621 UWORD8 u1_compute_spatial_ssd_chroma = 0;
8622 /* Get the pointer for function selector */
8623 ihevc_intra_pred_luma_ref_substitution_fptr =
8624 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8625
8626 ihevc_intra_pred_ref_filtering_fptr =
8627 ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8628
8629 ihevc_intra_pred_chroma_ref_substitution_fptr =
8630 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8631
8632 /* Get the best CU parameters */
8633 ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8634 num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8635 cu_size = ps_best_cu_prms->u1_cu_size;
8636 cu_pos_x = u1_cu_pos_x;
8637 cu_pos_y = u1_cu_pos_y;
8638 pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8639 pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8640 ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8641
8642 /* get the first TU pointer */
8643 ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8644 /* get the first TU only enc_loop prms pointer */
8645 ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8646 /*modify quant related param in ctxt based on current cu qp*/
8647 if((ps_ctxt->i1_cu_qp_delta_enable))
8648 {
8649 /*recompute quant related param at every cu level*/
8650 ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8651
8652 /* get frame level lambda params */
8653 ihevce_get_cl_cu_lambda_prms(
8654 ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8655 }
8656
8657 ps_best_cu_prms->i8_cu_ssd = 0;
8658 ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8659
8660 /* For skip case : Set TU_size = CU_size and make cbf = 0
8661 so that same TU loop can be used for all modes */
8662 if(PRED_MODE_SKIP == packed_pred_mode)
8663 {
8664 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8665 {
8666 ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8667
8668 ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8669
8670 ps_tu_enc_loop++;
8671 ps_tu_enc_loop_temp_prms++;
8672 }
8673
8674 /* go back to the first TU pointer */
8675 ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8676 ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8677 }
8678 /** For inter case, pred calculation is outside the loop **/
8679 if(PRED_MODE_INTRA != packed_pred_mode)
8680 {
8681 /**------------- Compute pred data if required --------------**/
8682 if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8683 {
8684 nbr_4x4_t *ps_topleft_nbr_4x4;
8685 nbr_4x4_t *ps_left_nbr_4x4;
8686 nbr_4x4_t *ps_top_nbr_4x4;
8687 WORD32 nbr_4x4_left_strd;
8688
8689 ps_best_inter_cand->pu1_pred_data = pu1_pred;
8690 ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8691
8692 /* Get the CU nbr information */
8693 ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8694 ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8695 ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8696 nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8697
8698 /* MVP ,MVD calc and Motion compensation */
8699 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8700 ps_ctxt,
8701 ps_best_inter_cand,
8702 u1_cu_size,
8703 cu_pos_x,
8704 cu_pos_y,
8705 ps_left_nbr_4x4,
8706 ps_top_nbr_4x4,
8707 ps_topleft_nbr_4x4,
8708 nbr_4x4_left_strd,
8709 rd_opt_best_idx);
8710 }
8711
8712 /** ------ Motion Compensation for Chroma -------- **/
8713 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8714 {
8715 UWORD8 *pu1_cur_pred;
8716 pu1_cur_pred = pu1_pred_chrm;
8717
8718 /* run a loop over all the partitons in cu */
8719 for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8720 {
8721 pu_t *ps_pu;
8722 WORD32 inter_pu_wd, inter_pu_ht;
8723
8724 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8725
8726 /* IF AMP then each partitions can have diff wd ht */
8727 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8728 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8729 inter_pu_ht <<= u1_is_422;
8730 /* chroma mc func */
8731 ihevce_chroma_inter_pred_pu(
8732 &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8733 if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8734 {
8735 /* 2Nx__ partion case */
8736 if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8737 {
8738 pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8739 }
8740 /* __x2N partion case */
8741 if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8742 {
8743 pu1_cur_pred += inter_pu_wd;
8744 }
8745 }
8746 }
8747 }
8748 }
8749 pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8750 pi2_chrm_deq_data =
8751 &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8752 pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8753 pu1_chrm_old_ecd_data =
8754 &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8755
8756 /* default value for cu coded flag */
8757 u1_is_cu_coded = 0;
8758
8759 /* If we are re-computing coeff, set sad to 0 and start accumulating */
8760 /* else use the best cand. sad from RDOPT stage */
8761 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8762 {
8763 /*init of ssd of CU accuumulated over all TU*/
8764 ps_best_cu_prms->u4_cu_sad = 0;
8765
8766 /* reset the luma residual bits */
8767 ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8768 }
8769
8770 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8771 {
8772 /* reset the chroma residual bits */
8773 ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8774 }
8775
8776 if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8777 (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8778 {
8779 /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8780 the quantized coefficients might be changed.
8781 We are copying only those states which correspond to the header from the cabac state
8782 of the previous CU, because the header is going to be recomputed for this condition*/
8783 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8784 memcpy(
8785 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8786 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8787 IHEVC_CAB_COEFFX_PREFIX);
8788
8789 if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8790 {
8791 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8792 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8793 (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8794 IHEVC_CAB_COEFFX_PREFIX),
8795 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8796 }
8797 else
8798 {
8799 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8800 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8801 (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8802 .s_cabac_ctxt.au1_ctxt_models[0] +
8803 IHEVC_CAB_COEFFX_PREFIX),
8804 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8805 }
8806 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8807 }
8808 else
8809 {
8810 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8811 }
8812
8813 /* Zero cbf tool is disabled for intra CUs */
8814 if(PRED_MODE_INTRA == packed_pred_mode)
8815 {
8816 #if ENABLE_ZERO_CBF_IN_INTRA
8817 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8818 #else
8819 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8820 #endif
8821 }
8822 else
8823 {
8824 #if DISABLE_ZERO_ZBF_IN_INTER
8825 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8826 #else
8827 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8828 #endif
8829 }
8830
8831 /** Loop for all tu blocks in current cu and do reconstruction **/
8832 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8833 {
8834 tu_t *ps_tu;
8835 WORD32 trans_size, num_4x4_in_tu;
8836 WORD32 cbf, zero_rows, zero_cols;
8837 WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8838 WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8839 WORD32 luma_pred_mode, chroma_pred_mode = 0;
8840 UWORD8 au1_is_recon_available[2];
8841
8842 ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8843
8844 u1_compute_spatial_ssd_luma = 0;
8845 u1_compute_spatial_ssd_chroma = 0;
8846
8847 trans_size = 1 << (ps_tu->b3_size + 2);
8848 num_4x4_in_tu = (trans_size >> 2);
8849 cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8850 cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8851
8852 /* populate the coeffs scan idx */
8853 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8854
8855 /* get the current pos x and pos y in pixels */
8856 cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8857 cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8858
8859 /* Update pointers based on the location */
8860 pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8861 pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8862 pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8863 pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8864
8865 pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8866 pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8867
8868 pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8869 pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8870
8871 pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8872 pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8873 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8874
8875 pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8876 pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8877 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8878
8879 pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8880 pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8881 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8882
8883 pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8884 pi2_cur_deq_data_chrm +=
8885 ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8886
8887 /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8888 chrm_present_flag = 1; /* by default chroma present is set to 1*/
8889
8890 if(4 == trans_size)
8891 {
8892 /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8893 if(0 != chrm_ctr)
8894 {
8895 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8896 }
8897
8898 /* increment the chrm ctr unconditionally */
8899 chrm_ctr++;
8900 /* after ctr reached 4 reset it */
8901 if(4 == chrm_ctr)
8902 {
8903 chrm_ctr = 0;
8904 }
8905 }
8906
8907 /**------------- Compute pred data if required --------------**/
8908 if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8909 {
8910 /* Get the pred mode for scan idx calculation, even if pred is not required */
8911 luma_pred_mode = *pu1_intra_pred_mode;
8912
8913 if((ps_ctxt->i4_rc_pass == 1) ||
8914 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8915 {
8916 WORD32 nbr_flags;
8917 WORD32 luma_pred_func_idx;
8918 UWORD8 *pu1_left;
8919 UWORD8 *pu1_top;
8920 UWORD8 *pu1_top_left;
8921 WORD32 left_strd;
8922
8923 /* left cu boundary */
8924 if(0 == cu_pos_x_in_pix)
8925 {
8926 left_strd = ps_cu_nbr_prms->cu_left_stride;
8927 pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8928 }
8929 else
8930 {
8931 pu1_left = pu1_cur_luma_recon - 1;
8932 left_strd = recon_luma_strd;
8933 }
8934
8935 /* top cu boundary */
8936 if(0 == cu_pos_y_in_pix)
8937 {
8938 pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8939 }
8940 else
8941 {
8942 pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8943 }
8944
8945 /* by default top left is set to cu top left */
8946 pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8947
8948 /* top left based on position */
8949 if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8950 {
8951 pu1_top_left = pu1_left - left_strd;
8952 }
8953 else if(0 != cu_pos_x_in_pix)
8954 {
8955 pu1_top_left = pu1_top - 1;
8956 }
8957
8958 /* get the neighbour availability flags */
8959 nbr_flags = ihevce_get_nbr_intra(
8960 &s_nbr,
8961 ps_ctxt->pu1_ctb_nbr_map,
8962 ps_ctxt->i4_nbr_map_strd,
8963 cu_pos_x_in_4x4,
8964 cu_pos_y_in_4x4,
8965 num_4x4_in_tu);
8966
8967 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8968 {
8969 /* copy the nbr flags for chroma reuse */
8970 if(4 != trans_size)
8971 {
8972 *pu4_nbr_flags = nbr_flags;
8973 }
8974 else if(1 == chrm_present_flag)
8975 {
8976 /* compute the avail flags assuming luma trans is 8x8 */
8977 /* get the neighbour availability flags */
8978 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8979 ps_ctxt->pu1_ctb_nbr_map,
8980 ps_ctxt->i4_nbr_map_strd,
8981 cu_pos_x_in_4x4,
8982 cu_pos_y_in_4x4,
8983 (num_4x4_in_tu << 1),
8984 (num_4x4_in_tu << 1));
8985 }
8986
8987 /* call reference array substitution */
8988 ihevc_intra_pred_luma_ref_substitution_fptr(
8989 pu1_top_left,
8990 pu1_top,
8991 pu1_left,
8992 left_strd,
8993 trans_size,
8994 nbr_flags,
8995 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8996 1);
8997
8998 /* call reference filtering */
8999 ihevc_intra_pred_ref_filtering_fptr(
9000 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9001 trans_size,
9002 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
9003 luma_pred_mode,
9004 ps_ctxt->i1_strong_intra_smoothing_enable_flag);
9005
9006 /* use the look up to get the function idx */
9007 luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
9008
9009 /* call the intra prediction function */
9010 ps_ctxt->apf_lum_ip[luma_pred_func_idx](
9011 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
9012 1,
9013 pu1_cur_pred,
9014 pred_strd,
9015 trans_size,
9016 luma_pred_mode);
9017 }
9018 }
9019 else if(
9020 (1 == chrm_present_flag) &&
9021 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9022 {
9023 WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
9024
9025 if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
9026 {
9027 temp_num_4x4_in_tu = num_4x4_in_tu << 1;
9028 }
9029
9030 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
9031 ps_ctxt->pu1_ctb_nbr_map,
9032 ps_ctxt->i4_nbr_map_strd,
9033 cu_pos_x_in_4x4,
9034 cu_pos_y_in_4x4,
9035 temp_num_4x4_in_tu,
9036 temp_num_4x4_in_tu);
9037 }
9038
9039 /* Get the pred mode for scan idx calculation, even if pred is not required */
9040 chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
9041 }
9042
9043 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
9044 {
9045 WORD32 temp_bits;
9046 LWORD64 temp_cost;
9047 UWORD32 u4_tu_sad;
9048 WORD32 perform_sbh, perform_rdoq;
9049
9050 if(PRED_MODE_INTRA == packed_pred_mode)
9051 {
9052 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
9053 if(trans_size < 16)
9054 {
9055 /* for modes from 22 upto 30 horizontal scan is used */
9056 if((luma_pred_mode > 21) && (luma_pred_mode < 31))
9057 {
9058 ps_ctxt->i4_scan_idx = SCAN_HORZ;
9059 }
9060 /* for modes from 6 upto 14 horizontal scan is used */
9061 else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
9062 {
9063 ps_ctxt->i4_scan_idx = SCAN_VERT;
9064 }
9065 }
9066 }
9067
9068 /* RDOPT copy States : TU init (best until prev TU) to current */
9069 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9070 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9071 .s_cabac_ctxt.au1_ctxt_models[0] +
9072 IHEVC_CAB_COEFFX_PREFIX,
9073 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9074 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9075
9076 if(ps_prms->u1_recompute_sbh_and_rdoq)
9077 {
9078 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9079 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9080 }
9081 else
9082 {
9083 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9084 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9085 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9086 we would have to do RDOQ again.*/
9087 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9088 }
9089
9090 #if DISABLE_RDOQ_INTRA
9091 if(PRED_MODE_INTRA == packed_pred_mode)
9092 {
9093 perform_rdoq = 0;
9094 }
9095 #endif
9096 /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9097 so that all candidates and best candidate are quantized with same rounding factor */
9098 if(1 == perform_rdoq)
9099 {
9100 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9101 }
9102
9103 cbf = ihevce_t_q_iq_ssd_scan_fxn(
9104 ps_ctxt,
9105 pu1_cur_pred,
9106 pred_strd,
9107 pu1_cur_src,
9108 src_strd,
9109 pi2_cur_deq_data,
9110 cu_size, /*deq_data stride is cu_size*/
9111 pu1_cur_luma_recon,
9112 recon_luma_strd,
9113 pu1_final_ecd_data,
9114 pu1_csbf_buf,
9115 csbf_strd,
9116 trans_size,
9117 packed_pred_mode,
9118 &temp_cost,
9119 &num_bytes,
9120 &temp_bits,
9121 &u4_tu_sad,
9122 &zero_cols,
9123 &zero_rows,
9124 &au1_is_recon_available[0],
9125 perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
9126 perform_sbh,
9127 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9128 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9129 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9130 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9131 100.0,
9132 ps_prms->u1_is_cu_noisy,
9133 #endif
9134 u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
9135 1 /*early cbf*/
9136 ); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
9137
9138 /* Accumulate luma residual bits */
9139 ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9140
9141 /* RDOPT copy States : New updated after curr TU to TU init */
9142 if(0 != cbf)
9143 {
9144 /* update to new state only if CBF is non zero */
9145 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9146 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9147 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9148 .s_cabac_ctxt.au1_ctxt_models[0] +
9149 IHEVC_CAB_COEFFX_PREFIX,
9150 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9151 }
9152
9153 /* accumulate the TU sad into cu sad */
9154 ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9155 ps_tu->b1_y_cbf = cbf;
9156 ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9157
9158 /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9159 if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9160 {
9161 WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9162 nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9163 ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9164 ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9165 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9166 ps_cur_nbr_4x4->b1_y_cbf = cbf;
9167 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9168 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9169 /* Qp and cbf are stored for the all 4x4 in TU */
9170 {
9171 WORD32 i, j;
9172 nbr_4x4_t *ps_tmp_4x4;
9173 ps_tmp_4x4 = ps_cur_nbr_4x4;
9174
9175 for(i = 0; i < num_4x4_in_tu; i++)
9176 {
9177 for(j = 0; j < num_4x4_in_tu; j++)
9178 {
9179 ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9180 ps_tmp_4x4[j].b1_y_cbf = cbf;
9181 }
9182 /* row level update*/
9183 ps_tmp_4x4 += num_4x4_in_cu;
9184 }
9185 }
9186 }
9187 }
9188 else
9189 {
9190 zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9191 zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9192
9193 if(ps_prms->u1_will_cabac_state_change)
9194 {
9195 num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9196 }
9197 else
9198 {
9199 num_bytes = 0;
9200 }
9201
9202 /* copy luma ecd data to final buffer */
9203 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9204
9205 pu1_old_ecd_data += num_bytes;
9206
9207 au1_is_recon_available[0] = 0;
9208 }
9209
9210 /**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/
9211 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9212 (!u1_compute_spatial_ssd_luma ||
9213 (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9214 {
9215 if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9216 (ps_recon_datastore->u1_is_lumaRecon_available &&
9217 (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9218 {
9219 ihevce_it_recon_fxn(
9220 ps_ctxt,
9221 pi2_cur_deq_data,
9222 cu_size,
9223 pu1_cur_pred,
9224 pred_strd,
9225 pu1_cur_luma_recon,
9226 recon_luma_strd,
9227 pu1_final_ecd_data,
9228 trans_size,
9229 packed_pred_mode,
9230 ps_tu->b1_y_cbf,
9231 zero_cols,
9232 zero_rows);
9233 }
9234 else if(
9235 ps_recon_datastore->u1_is_lumaRecon_available &&
9236 (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9237 {
9238 UWORD8 *pu1_recon_src =
9239 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9240 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9241 cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9242
9243 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9244 pu1_cur_luma_recon,
9245 recon_luma_strd,
9246 pu1_recon_src,
9247 ps_recon_datastore->i4_lumaRecon_stride,
9248 trans_size,
9249 trans_size);
9250 }
9251 }
9252
9253 if(ps_prms->u1_will_cabac_state_change)
9254 {
9255 ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9256 }
9257
9258 pu1_final_ecd_data += num_bytes;
9259 /* update total bytes consumed */
9260 total_bytes += num_bytes;
9261
9262 u1_is_cu_coded |= ps_tu->b1_y_cbf;
9263
9264 /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9265 if(1 == chrm_present_flag)
9266 {
9267 pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9268 pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9269 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9270
9271 pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9272 pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9273 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9274
9275 pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9276 pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9277 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9278
9279 pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9280 pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9281 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9282
9283 if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9284 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9285 (PRED_MODE_INTRA != packed_pred_mode))
9286 {
9287 WORD32 i4_num_bytes;
9288 UWORD8 *pu1_chroma_pred;
9289 UWORD8 *pu1_chroma_recon;
9290 WORD16 *pi2_chroma_deq;
9291 UWORD32 u4_zero_col;
9292 UWORD32 u4_zero_row;
9293
9294 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9295 {
9296 WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9297 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9298 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9299
9300 if(0 == u1_is_422)
9301 {
9302 i4_subtu_pos_y >>= 1;
9303 }
9304
9305 pu1_chroma_pred =
9306 pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9307 pu1_chroma_recon = pu1_cur_chroma_recon +
9308 (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9309 pi2_chroma_deq =
9310 pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9311
9312 u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9313 u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9314
9315 if(ps_prms->u1_will_cabac_state_change)
9316 {
9317 i4_num_bytes =
9318 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9319 }
9320 else
9321 {
9322 i4_num_bytes = 0;
9323 }
9324
9325 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9326
9327 pu1_old_ecd_data += i4_num_bytes;
9328
9329 au1_is_recon_available[U_PLANE] = 0;
9330
9331 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9332 (!u1_compute_spatial_ssd_chroma ||
9333 (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9334 {
9335 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9336 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9337 (UCHAR_MAX ==
9338 ps_recon_datastore
9339 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9340 {
9341 ihevce_chroma_it_recon_fxn(
9342 ps_ctxt,
9343 pi2_chroma_deq,
9344 cu_size,
9345 pu1_chroma_pred,
9346 pred_chrm_strd,
9347 pu1_chroma_recon,
9348 recon_chrma_strd,
9349 pu1_final_ecd_data,
9350 chroma_trans_size,
9351 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9352 u4_zero_col,
9353 u4_zero_row,
9354 U_PLANE);
9355 }
9356 else if(
9357 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9358 (UCHAR_MAX !=
9359 ps_recon_datastore
9360 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9361 {
9362 UWORD8 *pu1_recon_src =
9363 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9364 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9365 [U_PLANE][ctr][i4_subtu_idx]]) +
9366 i4_subtu_pos_x +
9367 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9368
9369 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9370 pu1_recon_src,
9371 ps_recon_datastore->i4_lumaRecon_stride,
9372 pu1_chroma_recon,
9373 recon_chrma_strd,
9374 chroma_trans_size,
9375 chroma_trans_size,
9376 U_PLANE);
9377 }
9378 }
9379
9380 u1_is_cu_coded |=
9381 ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9382
9383 pu1_final_ecd_data += i4_num_bytes;
9384 total_bytes += i4_num_bytes;
9385 }
9386
9387 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9388 {
9389 WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9390 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9391 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9392
9393 if(0 == u1_is_422)
9394 {
9395 i4_subtu_pos_y >>= 1;
9396 }
9397
9398 pu1_chroma_pred =
9399 pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9400 pu1_chroma_recon = pu1_cur_chroma_recon +
9401 (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9402 pi2_chroma_deq = pi2_cur_deq_data_chrm +
9403 (i4_subtu_idx * chroma_trans_size * cu_size) +
9404 chroma_trans_size;
9405
9406 u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9407 u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9408
9409 if(ps_prms->u1_will_cabac_state_change)
9410 {
9411 i4_num_bytes =
9412 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9413 }
9414 else
9415 {
9416 i4_num_bytes = 0;
9417 }
9418
9419 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9420
9421 pu1_old_ecd_data += i4_num_bytes;
9422
9423 au1_is_recon_available[V_PLANE] = 0;
9424
9425 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9426 (!u1_compute_spatial_ssd_chroma ||
9427 (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9428 {
9429 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9430 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9431 (UCHAR_MAX ==
9432 ps_recon_datastore
9433 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9434 {
9435 ihevce_chroma_it_recon_fxn(
9436 ps_ctxt,
9437 pi2_chroma_deq,
9438 cu_size,
9439 pu1_chroma_pred,
9440 pred_chrm_strd,
9441 pu1_chroma_recon,
9442 recon_chrma_strd,
9443 pu1_final_ecd_data,
9444 chroma_trans_size,
9445 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9446 u4_zero_col,
9447 u4_zero_row,
9448 V_PLANE);
9449 }
9450 else if(
9451 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9452 (UCHAR_MAX !=
9453 ps_recon_datastore
9454 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9455 {
9456 UWORD8 *pu1_recon_src =
9457 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9458 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9459 [V_PLANE][ctr][i4_subtu_idx]]) +
9460 i4_subtu_pos_x +
9461 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9462
9463 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9464 pu1_recon_src,
9465 ps_recon_datastore->i4_lumaRecon_stride,
9466 pu1_chroma_recon,
9467 recon_chrma_strd,
9468 chroma_trans_size,
9469 chroma_trans_size,
9470 V_PLANE);
9471 }
9472 }
9473
9474 u1_is_cu_coded |=
9475 ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9476
9477 pu1_final_ecd_data += i4_num_bytes;
9478 total_bytes += i4_num_bytes;
9479 }
9480 }
9481 else
9482 {
9483 WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9484
9485 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9486 {
9487 WORD32 cb_cbf, cr_cbf;
9488 WORD32 cb_num_bytes, cr_num_bytes;
9489
9490 WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9491
9492 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9493 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9494
9495 if(0 == u1_is_422)
9496 {
9497 i4_subtu_pos_y >>= 1;
9498 }
9499
9500 pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9501 pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9502 pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9503 pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9504
9505 if((PRED_MODE_INTRA == packed_pred_mode) &&
9506 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9507 {
9508 WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9509 UWORD8 *pu1_left_chrm;
9510 UWORD8 *pu1_top_chrm;
9511 UWORD8 *pu1_top_left_chrm;
9512
9513 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9514 *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9515
9516 /* left cu boundary */
9517 if(0 == i4_subtu_pos_x)
9518 {
9519 left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9520 pu1_left_chrm =
9521 ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9522 }
9523 else
9524 {
9525 pu1_left_chrm = pu1_cur_chroma_recon - 2;
9526 left_strd_chrm = recon_chrma_strd;
9527 }
9528
9529 /* top cu boundary */
9530 if(0 == i4_subtu_pos_y)
9531 {
9532 pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9533 }
9534 else
9535 {
9536 pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9537 }
9538
9539 /* by default top left is set to cu top left */
9540 pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9541
9542 /* top left based on position */
9543 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9544 {
9545 pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9546 }
9547 else if(0 != i4_subtu_pos_x)
9548 {
9549 pu1_top_left_chrm = pu1_top_chrm - 2;
9550 }
9551
9552 /* call the chroma reference array substitution */
9553 ihevc_intra_pred_chroma_ref_substitution_fptr(
9554 pu1_top_left_chrm,
9555 pu1_top_chrm,
9556 pu1_left_chrm,
9557 left_strd_chrm,
9558 chroma_trans_size,
9559 nbr_flags,
9560 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9561 1);
9562
9563 /* use the look up to get the function idx */
9564 chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9565
9566 /* call the intra prediction function */
9567 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9568 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9569 1,
9570 pu1_cur_pred_chrm,
9571 pred_chrm_strd,
9572 chroma_trans_size,
9573 chroma_pred_mode);
9574 }
9575
9576 /**---------- Compute iq&coeff data if required : Chroma ------------**/
9577 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9578 {
9579 WORD32 perform_sbh, perform_rdoq, temp_bits;
9580
9581 if(ps_prms->u1_recompute_sbh_and_rdoq)
9582 {
9583 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9584 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9585 }
9586 else
9587 {
9588 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9589 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9590 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9591 we would have to do RDOQ again.*/
9592 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9593 }
9594
9595 /* populate the coeffs scan idx */
9596 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9597
9598 if(PRED_MODE_INTRA == packed_pred_mode)
9599 {
9600 /* for 4x4 transforms based on intra pred mode scan is choosen*/
9601 if(4 == chroma_trans_size)
9602 {
9603 /* for modes from 22 upto 30 horizontal scan is used */
9604 if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9605 {
9606 ps_ctxt->i4_scan_idx = SCAN_HORZ;
9607 }
9608 /* for modes from 6 upto 14 horizontal scan is used */
9609 else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9610 {
9611 ps_ctxt->i4_scan_idx = SCAN_VERT;
9612 }
9613 }
9614 }
9615
9616 #if DISABLE_RDOQ_INTRA
9617 if(PRED_MODE_INTRA == packed_pred_mode)
9618 {
9619 perform_rdoq = 0;
9620 }
9621 #endif
9622
9623 /* RDOPT copy States : TU init (best until prev TU) to current */
9624 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9625 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9626 .s_cabac_ctxt.au1_ctxt_models[0] +
9627 IHEVC_CAB_COEFFX_PREFIX,
9628 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9629 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9630
9631 ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9632 /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9633 so that all candidates and best candidate are quantized with same rounding factor */
9634 if(1 == perform_rdoq)
9635 {
9636 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9637 }
9638
9639 if(!ps_best_cu_prms->u1_skip_flag ||
9640 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9641 {
9642 /* Cb */
9643 cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9644 ps_ctxt,
9645 pu1_cur_pred_chrm,
9646 pred_chrm_strd,
9647 pu1_cur_src_chrm,
9648 src_chrm_strd,
9649 pi2_cur_deq_data_chrm,
9650 cu_size,
9651 pu1_chrm_recon,
9652 recon_chrma_strd,
9653 pu1_final_ecd_data,
9654 pu1_csbf_buf,
9655 csbf_strd,
9656 chroma_trans_size,
9657 ps_ctxt->i4_scan_idx,
9658 (PRED_MODE_INTRA == packed_pred_mode),
9659 &cb_num_bytes,
9660 &temp_bits,
9661 &cb_zero_col,
9662 &cb_zero_row,
9663 &au1_is_recon_available[U_PLANE],
9664 perform_sbh,
9665 perform_rdoq,
9666 &i8_ssd,
9667 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9668 !ps_ctxt->u1_is_refPic
9669 ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9670 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9671 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9672 100.0,
9673 ps_prms->u1_is_cu_noisy,
9674 #endif
9675 ps_best_cu_prms->u1_skip_flag &&
9676 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9677 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9678 : FREQUENCY_DOMAIN_SSD,
9679 U_PLANE);
9680 }
9681 else
9682 {
9683 cb_cbf = 0;
9684 temp_bits = 0;
9685 cb_num_bytes = 0;
9686 au1_is_recon_available[U_PLANE] = 0;
9687 cb_zero_col = 0;
9688 cb_zero_row = 0;
9689 }
9690
9691 /* Accumulate chroma residual bits */
9692 ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9693
9694 /* RDOPT copy States : New updated after curr TU to TU init */
9695 if(0 != cb_cbf)
9696 {
9697 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9698 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9699 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9700 .s_cabac_ctxt.au1_ctxt_models[0] +
9701 IHEVC_CAB_COEFFX_PREFIX,
9702 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9703 }
9704 /* RDOPT copy States : Restoring back the Cb init state to Cr */
9705 else
9706 {
9707 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9708 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9709 .s_cabac_ctxt.au1_ctxt_models[0] +
9710 IHEVC_CAB_COEFFX_PREFIX,
9711 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9712 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9713 }
9714
9715 if(!ps_best_cu_prms->u1_skip_flag ||
9716 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9717 {
9718 /* Cr */
9719 cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9720 ps_ctxt,
9721 pu1_cur_pred_chrm,
9722 pred_chrm_strd,
9723 pu1_cur_src_chrm,
9724 src_chrm_strd,
9725 pi2_cur_deq_data_chrm + chroma_trans_size,
9726 cu_size,
9727 pu1_chrm_recon,
9728 recon_chrma_strd,
9729 pu1_final_ecd_data + cb_num_bytes,
9730 pu1_csbf_buf,
9731 csbf_strd,
9732 chroma_trans_size,
9733 ps_ctxt->i4_scan_idx,
9734 (PRED_MODE_INTRA == packed_pred_mode),
9735 &cr_num_bytes,
9736 &temp_bits,
9737 &cr_zero_col,
9738 &cr_zero_row,
9739 &au1_is_recon_available[V_PLANE],
9740 perform_sbh,
9741 perform_rdoq,
9742 &i8_ssd,
9743 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9744 !ps_ctxt->u1_is_refPic
9745 ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9746 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9747 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9748 100.0,
9749 ps_prms->u1_is_cu_noisy,
9750 #endif
9751 ps_best_cu_prms->u1_skip_flag &&
9752 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9753 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9754 : FREQUENCY_DOMAIN_SSD,
9755 V_PLANE);
9756 }
9757 else
9758 {
9759 cr_cbf = 0;
9760 temp_bits = 0;
9761 cr_num_bytes = 0;
9762 au1_is_recon_available[V_PLANE] = 0;
9763 cr_zero_col = 0;
9764 cr_zero_row = 0;
9765 }
9766
9767 /* Accumulate chroma residual bits */
9768 ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9769
9770 /* RDOPT copy States : New updated after curr TU to TU init */
9771 if(0 != cr_cbf)
9772 {
9773 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9774 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9775 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9776 .s_cabac_ctxt.au1_ctxt_models[0] +
9777 IHEVC_CAB_COEFFX_PREFIX,
9778 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9779 }
9780
9781 if(0 == i4_subtu_idx)
9782 {
9783 ps_tu->b1_cb_cbf = cb_cbf;
9784 ps_tu->b1_cr_cbf = cr_cbf;
9785 }
9786 else
9787 {
9788 ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9789 ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9790 }
9791 }
9792 else
9793 {
9794 cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9795 cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9796 cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9797 cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9798
9799 if(ps_prms->u1_will_cabac_state_change)
9800 {
9801 cb_num_bytes =
9802 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9803 }
9804 else
9805 {
9806 cb_num_bytes = 0;
9807 }
9808
9809 if(ps_prms->u1_will_cabac_state_change)
9810 {
9811 cr_num_bytes =
9812 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9813 }
9814 else
9815 {
9816 cr_num_bytes = 0;
9817 }
9818
9819 /* copy cb ecd data to final buffer */
9820 memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9821
9822 pu1_chrm_old_ecd_data += cb_num_bytes;
9823
9824 /* copy cb ecd data to final buffer */
9825 memcpy(
9826 (pu1_final_ecd_data + cb_num_bytes),
9827 pu1_chrm_old_ecd_data,
9828 cr_num_bytes);
9829
9830 pu1_chrm_old_ecd_data += cr_num_bytes;
9831
9832 au1_is_recon_available[U_PLANE] = 0;
9833 au1_is_recon_available[V_PLANE] = 0;
9834 }
9835
9836 /**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/
9837 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9838 (!u1_compute_spatial_ssd_chroma ||
9839 (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9840 {
9841 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9842 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9843 (UCHAR_MAX ==
9844 ps_recon_datastore
9845 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9846 {
9847 ihevce_chroma_it_recon_fxn(
9848 ps_ctxt,
9849 pi2_cur_deq_data_chrm,
9850 cu_size,
9851 pu1_cur_pred_chrm,
9852 pred_chrm_strd,
9853 pu1_cur_chroma_recon,
9854 recon_chrma_strd,
9855 pu1_final_ecd_data,
9856 chroma_trans_size,
9857 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9858 cb_zero_col,
9859 cb_zero_row,
9860 U_PLANE);
9861 }
9862 else if(
9863 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9864 (UCHAR_MAX !=
9865 ps_recon_datastore
9866 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9867 {
9868 UWORD8 *pu1_recon_src =
9869 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9870 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9871 [U_PLANE][ctr][i4_subtu_idx]]) +
9872 i4_subtu_pos_x +
9873 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9874
9875 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9876 pu1_recon_src,
9877 ps_recon_datastore->i4_lumaRecon_stride,
9878 pu1_cur_chroma_recon,
9879 recon_chrma_strd,
9880 chroma_trans_size,
9881 chroma_trans_size,
9882 U_PLANE);
9883 }
9884 }
9885
9886 u1_is_cu_coded |=
9887 ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9888
9889 if(ps_prms->u1_will_cabac_state_change)
9890 {
9891 ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9892 }
9893
9894 pu1_final_ecd_data += cb_num_bytes;
9895 /* update total bytes consumed */
9896 total_bytes += cb_num_bytes;
9897
9898 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9899 (!u1_compute_spatial_ssd_chroma ||
9900 (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9901 {
9902 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9903 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9904 (UCHAR_MAX ==
9905 ps_recon_datastore
9906 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9907 {
9908 ihevce_chroma_it_recon_fxn(
9909 ps_ctxt,
9910 pi2_cur_deq_data_chrm + chroma_trans_size,
9911 cu_size,
9912 pu1_cur_pred_chrm,
9913 pred_chrm_strd,
9914 pu1_cur_chroma_recon,
9915 recon_chrma_strd,
9916 pu1_final_ecd_data,
9917 chroma_trans_size,
9918 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9919 cr_zero_col,
9920 cr_zero_row,
9921 V_PLANE);
9922 }
9923 else if(
9924 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9925 (UCHAR_MAX !=
9926 ps_recon_datastore
9927 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9928 {
9929 UWORD8 *pu1_recon_src =
9930 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9931 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9932 [V_PLANE][ctr][i4_subtu_idx]]) +
9933 i4_subtu_pos_x +
9934 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9935
9936 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9937 pu1_recon_src,
9938 ps_recon_datastore->i4_lumaRecon_stride,
9939 pu1_cur_chroma_recon,
9940 recon_chrma_strd,
9941 chroma_trans_size,
9942 chroma_trans_size,
9943 V_PLANE);
9944 }
9945 }
9946
9947 u1_is_cu_coded |=
9948 ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9949
9950 if(ps_prms->u1_will_cabac_state_change)
9951 {
9952 ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9953 }
9954
9955 pu1_final_ecd_data += cr_num_bytes;
9956 /* update total bytes consumed */
9957 total_bytes += cr_num_bytes;
9958 }
9959 }
9960 }
9961 else
9962 {
9963 ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9964 ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9965 ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9966 ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9967 ps_tu->b1_cb_cbf = 0;
9968 ps_tu->b1_cr_cbf = 0;
9969 ps_tu->b1_cb_cbf_subtu1 = 0;
9970 ps_tu->b1_cr_cbf_subtu1 = 0;
9971 }
9972
9973 /* Update to next TU */
9974 ps_tu_enc_loop++;
9975 ps_tu_enc_loop_temp_prms++;
9976
9977 pu4_nbr_flags++;
9978 pu1_intra_pred_mode++;
9979
9980 /*Do not set the nbr map for last pu in cu */
9981 if((num_tu_in_cu - 1) != ctr)
9982 {
9983 /* set the neighbour map to 1 */
9984 ihevce_set_nbr_map(
9985 ps_ctxt->pu1_ctb_nbr_map,
9986 ps_ctxt->i4_nbr_map_strd,
9987 cu_pos_x_in_4x4,
9988 cu_pos_y_in_4x4,
9989 (trans_size >> 2),
9990 1);
9991 }
9992 }
9993
9994 if(ps_prms->u1_will_cabac_state_change)
9995 {
9996 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9997
9998 /* Modify skip flag, if luma is skipped & Chroma is coded */
9999 if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
10000 {
10001 ps_best_cu_prms->u1_skip_flag = 0;
10002 }
10003 }
10004
10005 /* during chroma evaluation if skip decision was over written */
10006 /* then the current skip candidate is set to a non skip candidate */
10007 if(PRED_MODE_INTRA != packed_pred_mode)
10008 {
10009 ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
10010 }
10011
10012 /**------------- Compute header data if required --------------**/
10013 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
10014 {
10015 WORD32 cbf_bits;
10016 WORD32 cu_bits;
10017 WORD32 unit_4x4_size = cu_size >> 2;
10018
10019 /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
10020 be copied as the base reference for the next cu
10021 Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
10022 luma and chroma are being reevaluated*/
10023 COPY_CABAC_STATES(
10024 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
10025 .s_cabac_ctxt.au1_ctxt_models[0],
10026 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
10027 IHEVC_CAB_CTXT_END);
10028
10029 /* get the neighbour availability flags for current cu */
10030 ihevce_get_only_nbr_flag(
10031 &s_nbr,
10032 ps_ctxt->pu1_ctb_nbr_map,
10033 ps_ctxt->i4_nbr_map_strd,
10034 (cu_pos_x << 1),
10035 (cu_pos_y << 1),
10036 unit_4x4_size,
10037 unit_4x4_size);
10038
10039 cu_bits = ihevce_entropy_rdo_encode_cu(
10040 &ps_ctxt->s_rdopt_entropy_ctxt,
10041 ps_best_cu_prms,
10042 cu_pos_x,
10043 cu_pos_y,
10044 cu_size,
10045 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
10046 : s_nbr.u1_top_avail,
10047 s_nbr.u1_left_avail,
10048 (pu1_final_ecd_data - total_bytes),
10049 &cbf_bits);
10050
10051 /* cbf bits are excluded from header bits, instead considered as texture bits */
10052 ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
10053 ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
10054 }
10055
10056 if(ps_prms->u1_will_cabac_state_change)
10057 {
10058 ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
10059 }
10060 }
10061
10062 /*!
10063 ******************************************************************************
10064 * \if Function name : ihevce_set_eval_flags \endif
10065 *
10066 * \brief
10067 * Function which decides which eval flags have to be set based on present
10068 * and RDOQ conditions
10069 *
10070 * \param[in] ps_ctxt : encoder ctxt pointer
10071 * \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
10072 *
10073 * \return
10074 * None
10075 *
10076 * \author
10077 * Ittiam
10078 *
10079 *****************************************************************************
10080 */
ihevce_set_eval_flags(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_final_prms_t * ps_enc_loop_bestprms)10081 void ihevce_set_eval_flags(
10082 ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
10083 {
10084 WORD32 count = 0;
10085
10086 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
10087
10088 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
10089 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10090
10091 if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
10092 {
10093 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
10094 }
10095 else
10096 {
10097 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
10098 }
10099
10100 if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
10101 (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
10102 {
10103 /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
10104 RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
10105 for the current CU will change. Therefore, we need to reevaluate the pred data*/
10106 if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
10107 (ps_enc_loop_bestprms->u1_intra_flag == 1))
10108 {
10109 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
10110 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
10111 }
10112 if(ps_enc_loop_bestprms->u1_skip_flag == 1)
10113 {
10114 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10115 {
10116 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10117 .b1_eval_luma_iq_and_coeff_data = 0;
10118 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10119 .b1_eval_chroma_iq_and_coeff_data = 0;
10120 }
10121 }
10122 else
10123 {
10124 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10125 {
10126 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10127 .b1_eval_luma_iq_and_coeff_data = 1;
10128 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10129 .b1_eval_chroma_iq_and_coeff_data = 1;
10130 }
10131 }
10132 }
10133 else
10134 {
10135 switch(ps_ctxt->i4_quality_preset)
10136 {
10137 case IHEVCE_QUALITY_P0:
10138 case IHEVCE_QUALITY_P2:
10139 case IHEVCE_QUALITY_P3:
10140 {
10141 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10142 {
10143 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10144 .b1_eval_luma_iq_and_coeff_data = 0;
10145 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10146 .b1_eval_chroma_iq_and_coeff_data =
10147 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10148 }
10149
10150 break;
10151 }
10152 case IHEVCE_QUALITY_P4:
10153 case IHEVCE_QUALITY_P5:
10154 {
10155 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10156 {
10157 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10158 .b1_eval_luma_iq_and_coeff_data = 0;
10159 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10160 .b1_eval_chroma_iq_and_coeff_data =
10161 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10162 }
10163
10164 break;
10165 }
10166 case IHEVCE_QUALITY_P6:
10167 {
10168 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10169 {
10170 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10171 .b1_eval_luma_iq_and_coeff_data = 0;
10172 #if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10173 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10174 .b1_eval_chroma_iq_and_coeff_data =
10175 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10176 #else
10177 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10178 (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10179 {
10180 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10181 .b1_eval_chroma_iq_and_coeff_data =
10182 ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10183 }
10184 else
10185 {
10186 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10187 .b1_eval_chroma_iq_and_coeff_data =
10188 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10189 }
10190 #endif
10191 }
10192
10193 break;
10194 }
10195 default:
10196 {
10197 break;
10198 }
10199 }
10200 }
10201
10202 /* Not recomputing Luma pred-data and header data for any preset now */
10203 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10204 }
10205
10206 /**
10207 ******************************************************************************
10208 *
10209 * @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10210 * (not coded children) into a parent node(not coded).
10211 *
10212 * @par Description
10213 * This is required post RDO evaluation as TU decisions are
10214 * pre-determined(pre RDO) based on recursive SATD,
10215 * while the quad children TU's can be skipped during RDO
10216 *
10217 * The shrink process is applied iteratively till there are no
10218 * more modes to shrink
10219 *
10220 * @param[inout] ps_tu_enc_loop
10221 * pointer to tu enc loop params of inter cu
10222 *
10223 * @param[inout] ps_tu_enc_loop_temp_prms
10224 * pointer to temp tu enc loop params of inter cu
10225 *
10226 * @param[in] num_tu_in_cu
10227 * number of tus in cu
10228 *
10229 * @return modified number of tus in cu
10230 *
10231 ******************************************************************************
10232 */
ihevce_shrink_inter_tu_tree(tu_enc_loop_out_t * ps_tu_enc_loop,tu_enc_loop_temp_prms_t * ps_tu_enc_loop_temp_prms,recon_datastore_t * ps_recon_datastore,WORD32 num_tu_in_cu,UWORD8 u1_is_422)10233 WORD32 ihevce_shrink_inter_tu_tree(
10234 tu_enc_loop_out_t *ps_tu_enc_loop,
10235 tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10236 recon_datastore_t *ps_recon_datastore,
10237 WORD32 num_tu_in_cu,
10238 UWORD8 u1_is_422)
10239 {
10240 WORD32 recurse = 1;
10241 WORD32 ctr;
10242
10243 /* ------------- Quadtree TU Split Transform flag optimization ------------ */
10244 /* Post RDO, if all 4 child nodes are not coded the overheads of split TU */
10245 /* flags and cbf flags are saved by merging to parent node and marking */
10246 /* parent TU as not coded */
10247 /* */
10248 /* ParentTUSplit=1 */
10249 /* | */
10250 /* --------------------------------------------------------- */
10251 /* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */
10252 /* || */
10253 /* \/ */
10254 /* */
10255 /* ParentTUSplit=0 (Not Coded) */
10256 /* */
10257 /* ------------- Quadtree TU Split Transform flag optimization ------------ */
10258 while((num_tu_in_cu > 4) && recurse)
10259 {
10260 recurse = 0;
10261
10262 /* Validate inter CU */
10263 //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10264
10265 /* loop for all tu blocks in current cu */
10266 for(ctr = 0; ctr < num_tu_in_cu;)
10267 {
10268 /* Get current tu posx, posy and size */
10269 WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10270 WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10271 /* +1 is for parents size */
10272 WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10273
10274 /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10275 WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10276 eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10277
10278 /* As TUs are published in encode order (Z SCAN), */
10279 /* Four consecutive TUS of same size implies we have hit leaf nodes. */
10280 if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10281 ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10282 ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10283 eval_merge)
10284 {
10285 WORD32 merge_parent = 1;
10286
10287 /* If any leaf noded is coded, it cannot be merged to parent */
10288 if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10289 (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10290
10291 (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10292 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10293 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10294
10295 (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10296 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10297 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10298
10299 (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10300 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10301 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10302 {
10303 merge_parent = 0;
10304 }
10305
10306 if(u1_is_422)
10307 {
10308 if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10309 (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10310
10311 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10312 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10313
10314 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10315 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10316
10317 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10318 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10319 {
10320 merge_parent = 0;
10321 }
10322 }
10323
10324 if(merge_parent)
10325 {
10326 /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10327
10328 if(ps_recon_datastore->u1_is_lumaRecon_available)
10329 {
10330 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10331
10332 memmove(
10333 &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10334 &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10335 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10336 }
10337
10338 if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10339 {
10340 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10341 UCHAR_MAX;
10342 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10343 UCHAR_MAX;
10344
10345 memmove(
10346 &ps_recon_datastore
10347 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10348 &ps_recon_datastore
10349 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10350 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10351
10352 memmove(
10353 &ps_recon_datastore
10354 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10355 &ps_recon_datastore
10356 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10357 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10358
10359 if(u1_is_422)
10360 {
10361 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10362 UCHAR_MAX;
10363 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10364 UCHAR_MAX;
10365
10366 memmove(
10367 &ps_recon_datastore
10368 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10369 &ps_recon_datastore
10370 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10371 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10372
10373 memmove(
10374 &ps_recon_datastore
10375 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10376 &ps_recon_datastore
10377 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10378 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10379 }
10380 }
10381
10382 /* Parent node size is one more than that of child */
10383 ps_tu_enc_loop[ctr].s_tu.b3_size++;
10384
10385 ctr++;
10386
10387 /* move the subsequent TUs to next element */
10388 ASSERT(num_tu_in_cu >= (ctr + 3));
10389 memmove(
10390 (void *)(ps_tu_enc_loop + ctr),
10391 (void *)(ps_tu_enc_loop + ctr + 3),
10392 (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10393
10394 /* Also memmove the temp TU params */
10395 memmove(
10396 (void *)(ps_tu_enc_loop_temp_prms + ctr),
10397 (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10398 (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10399
10400 /* Number of TUs in CU are now less by 3 */
10401 num_tu_in_cu -= 3;
10402
10403 /* Recurse again as new parent also be can be merged later */
10404 recurse = 1;
10405 }
10406 else
10407 {
10408 /* Go to next set of leaf nodes */
10409 ctr += 4;
10410 }
10411 }
10412 else
10413 {
10414 ctr++;
10415 }
10416 }
10417 }
10418
10419 /* return the modified num TUs*/
10420 ASSERT(num_tu_in_cu > 0);
10421 return (num_tu_in_cu);
10422 }
10423
ihevce_intra_mode_nxn_hash_updater(UWORD8 * pu1_mode_array,UWORD8 * pu1_hash_table,UWORD8 u1_num_ipe_modes)10424 UWORD8 ihevce_intra_mode_nxn_hash_updater(
10425 UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10426 {
10427 WORD32 i;
10428 WORD32 i4_mode;
10429
10430 for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10431 {
10432 if(pu1_mode_array[i] < 35)
10433 {
10434 if(pu1_mode_array[i] != 0)
10435 {
10436 i4_mode = pu1_mode_array[i] - 1;
10437
10438 if(!pu1_hash_table[i4_mode])
10439 {
10440 pu1_hash_table[i4_mode] = 1;
10441 pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10442 u1_num_ipe_modes++;
10443 }
10444 }
10445
10446 if(pu1_mode_array[i] != 34)
10447 {
10448 i4_mode = pu1_mode_array[i] + 1;
10449
10450 if((!pu1_hash_table[i4_mode]))
10451 {
10452 pu1_hash_table[i4_mode] = 1;
10453 pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10454 u1_num_ipe_modes++;
10455 }
10456 }
10457 }
10458 }
10459
10460 if(!pu1_hash_table[INTRA_PLANAR])
10461 {
10462 pu1_hash_table[INTRA_PLANAR] = 1;
10463 pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10464 u1_num_ipe_modes++;
10465 }
10466
10467 if(!pu1_hash_table[INTRA_DC])
10468 {
10469 pu1_hash_table[INTRA_DC] = 1;
10470 pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10471 u1_num_ipe_modes++;
10472 }
10473
10474 return u1_num_ipe_modes;
10475 }
10476
10477 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
ihevce_determine_tu_tree_distribution(cu_inter_cand_t * ps_cu_data,me_func_selector_t * ps_func_selector,WORD16 * pi2_scratch_mem,UWORD8 * pu1_inp,WORD32 i4_inp_stride,WORD32 i4_lambda,UWORD8 u1_lambda_q_shift,UWORD8 u1_cu_size,UWORD8 u1_max_tr_depth)10478 WORD32 ihevce_determine_tu_tree_distribution(
10479 cu_inter_cand_t *ps_cu_data,
10480 me_func_selector_t *ps_func_selector,
10481 WORD16 *pi2_scratch_mem,
10482 UWORD8 *pu1_inp,
10483 WORD32 i4_inp_stride,
10484 WORD32 i4_lambda,
10485 UWORD8 u1_lambda_q_shift,
10486 UWORD8 u1_cu_size,
10487 UWORD8 u1_max_tr_depth)
10488 {
10489 err_prms_t s_err_prms;
10490
10491 PF_SAD_FXN_TU_REC pf_err_compute[4];
10492
10493 WORD32 i4_satd;
10494
10495 s_err_prms.pi4_sad_grid = &i4_satd;
10496 s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10497 s_err_prms.pu1_inp = pu1_inp;
10498 s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10499 s_err_prms.i4_inp_stride = i4_inp_stride;
10500 s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10501 s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10502
10503 if(u1_cu_size == 64)
10504 {
10505 s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10506 }
10507 else
10508 {
10509 s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10510 }
10511
10512 pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10513 pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10514 pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10515 pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10516
10517 i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10518 &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10519
10520 if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10521 {
10522 ps_cu_data->ai4_tu_split_flag[0] = 1;
10523 }
10524
10525 return i4_satd;
10526 }
10527 #endif
10528
ihevce_populate_nbr_4x4_with_pu_data(nbr_4x4_t * ps_nbr_4x4,pu_t * ps_pu,WORD32 i4_nbr_buf_stride)10529 void ihevce_populate_nbr_4x4_with_pu_data(
10530 nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10531 {
10532 WORD32 i, j;
10533
10534 nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10535
10536 WORD32 ht = (ps_pu->b4_ht + 1);
10537 WORD32 wd = (ps_pu->b4_wd + 1);
10538
10539 ps_nbr_4x4->b1_intra_flag = 0;
10540 ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10541 ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10542 ps_nbr_4x4->mv = ps_pu->mv;
10543
10544 for(i = 0; i < ht; i++)
10545 {
10546 for(j = 0; j < wd; j++)
10547 {
10548 ps_tmp_4x4[j] = *ps_nbr_4x4;
10549 }
10550
10551 ps_tmp_4x4 += i4_nbr_buf_stride;
10552 }
10553 }
10554
ihevce_call_luma_inter_pred_rdopt_pass1(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_inter_cand_t * ps_inter_cand,WORD32 cu_size)10555 void ihevce_call_luma_inter_pred_rdopt_pass1(
10556 ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10557 {
10558 pu_t *ps_pu;
10559 UWORD8 *pu1_pred;
10560 WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10561 WORD32 inter_pu_wd, inter_pu_ht;
10562
10563 pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10564 pred_stride = ps_inter_cand->i4_pred_data_stride;
10565 num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10566
10567 for(ctr = 0; ctr < num_cu_part; ctr++)
10568 {
10569 ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10570
10571 /* IF AMP then each partitions can have diff wd ht */
10572 inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10573 inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10574
10575 skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10576 //if(0 == skip_or_merge_flag)
10577 {
10578 ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10579 }
10580 if((2 == num_cu_part) && (0 == ctr))
10581 {
10582 /* 2Nx__ partion case */
10583 if(inter_pu_wd == cu_size)
10584 {
10585 pu1_pred += (inter_pu_ht * pred_stride);
10586 }
10587
10588 /* __x2N partion case */
10589 if(inter_pu_ht == cu_size)
10590 {
10591 pu1_pred += inter_pu_wd;
10592 }
10593 }
10594 }
10595 }
10596
ihevce_it_recon_ssd(ihevce_enc_loop_ctxt_t * ps_ctxt,UWORD8 * pu1_src,WORD32 i4_src_strd,UWORD8 * pu1_pred,WORD32 i4_pred_strd,WORD16 * pi2_deq_data,WORD32 i4_deq_data_strd,UWORD8 * pu1_recon,WORD32 i4_recon_stride,UWORD8 * pu1_ecd_data,UWORD8 u1_trans_size,UWORD8 u1_pred_mode,WORD32 i4_cbf,WORD32 i4_zero_col,WORD32 i4_zero_row,CHROMA_PLANE_ID_T e_chroma_plane)10597 LWORD64 ihevce_it_recon_ssd(
10598 ihevce_enc_loop_ctxt_t *ps_ctxt,
10599 UWORD8 *pu1_src,
10600 WORD32 i4_src_strd,
10601 UWORD8 *pu1_pred,
10602 WORD32 i4_pred_strd,
10603 WORD16 *pi2_deq_data,
10604 WORD32 i4_deq_data_strd,
10605 UWORD8 *pu1_recon,
10606 WORD32 i4_recon_stride,
10607 UWORD8 *pu1_ecd_data,
10608 UWORD8 u1_trans_size,
10609 UWORD8 u1_pred_mode,
10610 WORD32 i4_cbf,
10611 WORD32 i4_zero_col,
10612 WORD32 i4_zero_row,
10613 CHROMA_PLANE_ID_T e_chroma_plane)
10614 {
10615 if(NULL_PLANE == e_chroma_plane)
10616 {
10617 ihevce_it_recon_fxn(
10618 ps_ctxt,
10619 pi2_deq_data,
10620 i4_deq_data_strd,
10621 pu1_pred,
10622 i4_pred_strd,
10623 pu1_recon,
10624 i4_recon_stride,
10625 pu1_ecd_data,
10626 u1_trans_size,
10627 u1_pred_mode,
10628 i4_cbf,
10629 i4_zero_col,
10630 i4_zero_row);
10631
10632 return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10633 pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size);
10634 }
10635 else
10636 {
10637 ihevce_chroma_it_recon_fxn(
10638 ps_ctxt,
10639 pi2_deq_data,
10640 i4_deq_data_strd,
10641 pu1_pred,
10642 i4_pred_strd,
10643 pu1_recon,
10644 i4_recon_stride,
10645 pu1_ecd_data,
10646 u1_trans_size,
10647 i4_cbf,
10648 i4_zero_col,
10649 i4_zero_row,
10650 e_chroma_plane);
10651
10652 return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10653 pu1_recon + (e_chroma_plane == V_PLANE),
10654 pu1_src + (e_chroma_plane == V_PLANE),
10655 i4_recon_stride,
10656 i4_src_strd,
10657 u1_trans_size,
10658 u1_trans_size);
10659 }
10660 }
10661
10662 /*!
10663 ******************************************************************************
10664 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10665 *
10666 * \brief
10667 * Transform unit level (Chroma) enc_loop function
10668 *
10669 * \param[in] ps_ctxt enc_loop module ctxt pointer
10670 * \param[in] pu1_pred pointer to predicted data buffer
10671 * \param[in] pred_strd predicted buffer stride
10672 * \param[in] pu1_src pointer to source data buffer
10673 * \param[in] src_strd source buffer stride
10674 * \param[in] pi2_deq_data pointer to store iq data
10675 * \param[in] deq_data_strd iq data buffer stride
10676 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
10677 * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
10678 * block
10679 * \param[out] csbf_strd csbf buffer stride
10680 * \param[in] trans_size transform size (4, 8, 16)
10681 * \param[in] intra_flag 0:Inter/Skip 1:Intra
10682 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10683 * coeff buffer
10684 the current TU in RDopt Mode
10685 * \param[out] pi4_zero_col pointer to store the zero_col info for the TU
10686 * \param[out] pi4_zero_row pointer to store the zero_row info for the TU
10687 *
10688 * \return
10689 * CBF of the current block
10690 *
10691 * \author
10692 * Ittiam
10693 *
10694 *****************************************************************************
10695 */
ihevce_chroma_t_q_iq_ssd_scan_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_src,WORD32 src_strd,WORD16 * pi2_deq_data,WORD32 deq_data_strd,UWORD8 * pu1_recon,WORD32 i4_recon_stride,UWORD8 * pu1_ecd_data,UWORD8 * pu1_csbf_buf,WORD32 csbf_strd,WORD32 trans_size,WORD32 i4_scan_idx,WORD32 intra_flag,WORD32 * pi4_coeff_off,WORD32 * pi4_tu_bits,WORD32 * pi4_zero_col,WORD32 * pi4_zero_row,UWORD8 * pu1_is_recon_available,WORD32 i4_perform_sbh,WORD32 i4_perform_rdoq,LWORD64 * pi8_cost,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy,UWORD8 u1_is_skip,SSD_TYPE_T e_ssd_type,CHROMA_PLANE_ID_T e_chroma_plane)10696 WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10697 ihevce_enc_loop_ctxt_t *ps_ctxt,
10698 UWORD8 *pu1_pred,
10699 WORD32 pred_strd,
10700 UWORD8 *pu1_src,
10701 WORD32 src_strd,
10702 WORD16 *pi2_deq_data,
10703 WORD32 deq_data_strd,
10704 UWORD8 *pu1_recon,
10705 WORD32 i4_recon_stride,
10706 UWORD8 *pu1_ecd_data,
10707 UWORD8 *pu1_csbf_buf,
10708 WORD32 csbf_strd,
10709 WORD32 trans_size,
10710 WORD32 i4_scan_idx,
10711 WORD32 intra_flag,
10712 WORD32 *pi4_coeff_off,
10713 WORD32 *pi4_tu_bits,
10714 WORD32 *pi4_zero_col,
10715 WORD32 *pi4_zero_row,
10716 UWORD8 *pu1_is_recon_available,
10717 WORD32 i4_perform_sbh,
10718 WORD32 i4_perform_rdoq,
10719 LWORD64 *pi8_cost,
10720 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10721 WORD32 i4_alpha_stim_multiplier,
10722 UWORD8 u1_is_cu_noisy,
10723 #endif
10724 UWORD8 u1_is_skip,
10725 SSD_TYPE_T e_ssd_type,
10726 CHROMA_PLANE_ID_T e_chroma_plane)
10727 {
10728 WORD32 trans_idx, cbf, u4_blk_sad;
10729 WORD16 *pi2_quant_coeffs;
10730 WORD16 *pi2_trans_values;
10731 WORD32 quant_scale_mat_offset;
10732 WORD32 *pi4_trans_scratch;
10733 WORD32 *pi4_subBlock2csbfId_map = NULL;
10734
10735 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10736 WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10737 #endif
10738
10739 rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10740
10741 WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10742 (!intra_flag && ENABLE_INTER_ZCU_COST);
10743 WORD32 i4_perform_coeff_level_rdoq =
10744 (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10745 (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10746
10747 ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10748 ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10749
10750 *pi4_coeff_off = 0;
10751 *pi4_tu_bits = 0;
10752 pu1_is_recon_available[0] = 0;
10753
10754 pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10755 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10756 pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10757
10758 if(2 == trans_size)
10759 {
10760 trans_size = 4;
10761 }
10762
10763 /* translate the transform size to index */
10764 trans_idx = trans_size >> 2;
10765
10766 if(16 == trans_size)
10767 {
10768 trans_idx = 3;
10769 }
10770
10771 if(u1_is_skip)
10772 {
10773 pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10774 pu1_pred + e_chroma_plane,
10775 pu1_src + e_chroma_plane,
10776 pred_strd,
10777 src_strd,
10778 trans_size,
10779 trans_size);
10780
10781 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10782 {
10783 /* buffer copy fromp pred to recon */
10784 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10785 pu1_pred,
10786 pred_strd,
10787 pu1_recon,
10788 i4_recon_stride,
10789 trans_size,
10790 trans_size,
10791 e_chroma_plane);
10792
10793 pu1_is_recon_available[0] = 1;
10794 }
10795
10796 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10797 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10798 {
10799 pi8_cost[0] = ihevce_inject_stim_into_distortion(
10800 pu1_src,
10801 src_strd,
10802 pu1_pred,
10803 pred_strd,
10804 pi8_cost[0],
10805 i4_alpha_stim_multiplier,
10806 trans_size,
10807 0,
10808 ps_ctxt->u1_enable_psyRDOPT,
10809 e_chroma_plane);
10810 }
10811 #endif
10812
10813 #if ENABLE_INTER_ZCU_COST
10814 #if !WEIGH_CHROMA_COST
10815 /* cbf = 0, accumulate cu not coded cost */
10816 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10817 #else
10818 ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10819 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10820 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10821 #endif
10822 #endif
10823
10824 return 0;
10825 }
10826
10827 if(intra_flag == 1)
10828 {
10829 quant_scale_mat_offset = 0;
10830
10831 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10832 ai4_quant_rounding_factors[0][0] =
10833 MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10834
10835 for(i = 0; i < trans_size * trans_size; i++)
10836 {
10837 ai4_quant_rounding_factors[1][i] =
10838 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10839 (1 << QUANT_ROUND_FACTOR_Q) / 3);
10840 ai4_quant_rounding_factors[2][i] =
10841 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10842 (1 << QUANT_ROUND_FACTOR_Q) / 3);
10843 }
10844 #endif
10845 }
10846 else
10847 {
10848 quant_scale_mat_offset = NUM_TRANS_TYPES;
10849 }
10850
10851 switch(trans_size)
10852 {
10853 case 4:
10854 {
10855 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10856
10857 break;
10858 }
10859 case 8:
10860 {
10861 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10862
10863 break;
10864 }
10865 case 16:
10866 {
10867 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10868
10869 break;
10870 }
10871 case 32:
10872 {
10873 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10874
10875 break;
10876 }
10877 }
10878
10879 /* ---------- call residue and transform block ------- */
10880 u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10881 pu1_src + (e_chroma_plane == V_PLANE),
10882 pu1_pred + (e_chroma_plane == V_PLANE),
10883 pi4_trans_scratch,
10884 pi2_trans_values,
10885 src_strd,
10886 pred_strd,
10887 ((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */
10888 (void)u4_blk_sad;
10889 /* -------- calculate SSD calculation in Transform Domain ------ */
10890
10891 cbf = ps_ctxt->apf_quant_iquant_ssd
10892 [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10893
10894 (pi2_trans_values,
10895 ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10896 pi2_quant_coeffs,
10897 pi2_deq_data,
10898 trans_size,
10899 ps_ctxt->i4_chrm_cu_qp_div6,
10900 ps_ctxt->i4_chrm_cu_qp_mod6,
10901 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10902 ps_ctxt->i4_quant_rnd_factor[intra_flag],
10903 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10904 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10905 #else
10906 intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10907 intra_flag ? ai4_quant_rounding_factors[1]
10908 : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10909 intra_flag ? ai4_quant_rounding_factors[2]
10910 : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10911 #endif
10912 trans_size,
10913 trans_size,
10914 deq_data_strd,
10915 pu1_csbf_buf,
10916 csbf_strd,
10917 pi4_zero_col,
10918 pi4_zero_row,
10919 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10920 pi8_cost);
10921
10922 if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10923 {
10924 pi8_cost[0] = UINT_MAX;
10925 }
10926
10927 if(0 != cbf)
10928 {
10929 if(i4_perform_sbh || i4_perform_rdoq)
10930 {
10931 ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10932 ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10933
10934 ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10935 ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10936 ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10937 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10938 ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10939
10940 ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10941 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10942 ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10943 ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10944 ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10945 ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10946 ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10947
10948 if((!i4_perform_rdoq))
10949 {
10950 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10951
10952 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10953 }
10954 }
10955
10956 /* ------- call coeffs scan function ------- */
10957 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10958 pi2_quant_coeffs,
10959 pi4_subBlock2csbfId_map,
10960 i4_scan_idx,
10961 trans_size,
10962 pu1_ecd_data,
10963 pu1_csbf_buf,
10964 csbf_strd);
10965 }
10966
10967 /* Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10968 pi8_cost[0] >>= ga_trans_shift[trans_idx];
10969
10970 #if RDOPT_ZERO_CBF_ENABLE
10971 if((0 != cbf))
10972 {
10973 WORD32 tu_bits;
10974 LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10975
10976 zero_cbf_cost_u = 0;
10977
10978 /*Populating the feilds of rdoq_ctxt structure*/
10979 if(i4_perform_rdoq)
10980 {
10981 //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10982 /* transform size to log2transform size */
10983 GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10984 ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10985
10986 ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10987 ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10988 ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10989 ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10990 (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10991 ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10992 ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10993 ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10994 }
10995 else if(i4_perform_zcbf)
10996 {
10997 /* cost of zero cbf encoding */
10998 zero_cbf_cost_u =
10999
11000 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
11001 pu1_pred + (e_chroma_plane == V_PLANE),
11002 pu1_src + (e_chroma_plane == V_PLANE),
11003 pred_strd,
11004 src_strd,
11005 trans_size,
11006 trans_size);
11007 }
11008
11009 /************************************************************************/
11010 /* call the entropy rdo encode to get the bit estimate for current tu */
11011 /* note that tu includes only residual coding bits and does not include */
11012 /* tu split, cbf and qp delta encoding bits for a TU */
11013 /************************************************************************/
11014 if(i4_perform_rdoq)
11015 {
11016 tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
11017 &ps_ctxt->s_rdopt_entropy_ctxt,
11018 pu1_ecd_data,
11019 trans_size,
11020 0,
11021 ps_rdoq_sbh_ctxt,
11022 pi8_cost,
11023 &zero_cbf_cost_u,
11024 0);
11025 //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
11026
11027 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
11028 {
11029 cbf = 0;
11030
11031 /* num bytes is set to 0 */
11032 *pi4_coeff_off = 0;
11033 }
11034
11035 (*pi4_tu_bits) += tu_bits;
11036
11037 if((i4_perform_sbh) && (0 != cbf))
11038 {
11039 ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
11040
11041 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
11042
11043 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
11044 }
11045
11046 /*Add round value before normalizing*/
11047 pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
11048 pi8_cost[0] >>= ga_trans_shift[trans_idx];
11049
11050 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
11051 {
11052 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
11053 pi2_quant_coeffs,
11054 pi4_subBlock2csbfId_map,
11055 i4_scan_idx,
11056 trans_size,
11057 pu1_ecd_data,
11058 ps_rdoq_sbh_ctxt->pu1_csbf_buf,
11059 csbf_strd);
11060 }
11061 }
11062 else
11063 {
11064 /************************************************************************/
11065 /* call the entropy rdo encode to get the bit estimate for current tu */
11066 /* note that tu includes only residual coding bits and does not include */
11067 /* tu split, cbf and qp delta encoding bits for a TU */
11068 /************************************************************************/
11069 tu_bits = ihevce_entropy_rdo_encode_tu(
11070 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
11071
11072 (*pi4_tu_bits) += tu_bits;
11073 }
11074
11075 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11076 {
11077 pi8_cost[0] = ihevce_it_recon_ssd(
11078 ps_ctxt,
11079 pu1_src,
11080 src_strd,
11081 pu1_pred,
11082 pred_strd,
11083 pi2_deq_data,
11084 deq_data_strd,
11085 pu1_recon,
11086 i4_recon_stride,
11087 pu1_ecd_data,
11088 trans_size,
11089 PRED_MODE_INTRA,
11090 cbf,
11091 pi4_zero_col[0],
11092 pi4_zero_row[0],
11093 e_chroma_plane);
11094
11095 pu1_is_recon_available[0] = 1;
11096 }
11097
11098 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11099 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11100 {
11101 pi8_cost[0] = ihevce_inject_stim_into_distortion(
11102 pu1_src,
11103 src_strd,
11104 pu1_recon,
11105 i4_recon_stride,
11106 pi8_cost[0],
11107 i4_alpha_stim_multiplier,
11108 trans_size,
11109 0,
11110 ps_ctxt->u1_enable_psyRDOPT,
11111 e_chroma_plane);
11112 }
11113 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11114 {
11115 pi8_cost[0] = ihevce_inject_stim_into_distortion(
11116 pu1_src,
11117 src_strd,
11118 pu1_pred,
11119 pred_strd,
11120 pi8_cost[0],
11121 i4_alpha_stim_multiplier,
11122 trans_size,
11123 0,
11124 ps_ctxt->u1_enable_psyRDOPT,
11125 e_chroma_plane);
11126 }
11127 #endif
11128
11129 curr_cb_cod_cost = pi8_cost[0];
11130
11131 /* add the SSD cost to bits estimate given by ECD */
11132 curr_cb_cod_cost +=
11133 COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
11134
11135 if(i4_perform_zcbf)
11136 {
11137 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11138 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11139 {
11140 zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11141 pu1_src,
11142 src_strd,
11143 pu1_pred,
11144 pred_strd,
11145 zero_cbf_cost_u,
11146 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11147 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11148 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11149 100.0,
11150 trans_size,
11151 0,
11152 ps_ctxt->u1_enable_psyRDOPT,
11153 e_chroma_plane);
11154 }
11155 #endif
11156 /* force the tu as zero cbf if zero_cbf_cost is lower */
11157 if(zero_cbf_cost_u < curr_cb_cod_cost)
11158 {
11159 *pi4_coeff_off = 0;
11160 cbf = 0;
11161 (*pi4_tu_bits) = 0;
11162 pi8_cost[0] = zero_cbf_cost_u;
11163
11164 pu1_is_recon_available[0] = 0;
11165
11166 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11167 {
11168 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11169 pu1_pred,
11170 pred_strd,
11171 pu1_recon,
11172 i4_recon_stride,
11173 trans_size,
11174 trans_size,
11175 e_chroma_plane);
11176
11177 pu1_is_recon_available[0] = 1;
11178 }
11179 }
11180
11181 #if ENABLE_INTER_ZCU_COST
11182 if(!intra_flag)
11183 {
11184 #if !WEIGH_CHROMA_COST
11185 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11186 #else
11187 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11188 (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11189 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11190 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11191 #endif
11192 }
11193 #endif
11194 }
11195 }
11196 else
11197 {
11198 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11199 {
11200 pi8_cost[0] = ihevce_it_recon_ssd(
11201 ps_ctxt,
11202 pu1_src,
11203 src_strd,
11204 pu1_pred,
11205 pred_strd,
11206 pi2_deq_data,
11207 deq_data_strd,
11208 pu1_recon,
11209 i4_recon_stride,
11210 pu1_ecd_data,
11211 trans_size,
11212 PRED_MODE_INTRA,
11213 cbf,
11214 pi4_zero_col[0],
11215 pi4_zero_row[0],
11216 e_chroma_plane);
11217
11218 pu1_is_recon_available[0] = 1;
11219 }
11220
11221 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11222 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11223 {
11224 pi8_cost[0] = ihevce_inject_stim_into_distortion(
11225 pu1_src,
11226 src_strd,
11227 pu1_recon,
11228 i4_recon_stride,
11229 pi8_cost[0],
11230 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11231 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11232 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11233 100.0,
11234 trans_size,
11235 0,
11236 ps_ctxt->u1_enable_psyRDOPT,
11237 e_chroma_plane);
11238 }
11239 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11240 {
11241 pi8_cost[0] = ihevce_inject_stim_into_distortion(
11242 pu1_src,
11243 src_strd,
11244 pu1_pred,
11245 pred_strd,
11246 pi8_cost[0],
11247 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11248 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11249 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11250 100.0,
11251 trans_size,
11252 0,
11253 ps_ctxt->u1_enable_psyRDOPT,
11254 e_chroma_plane);
11255 }
11256 #endif
11257
11258 #if ENABLE_INTER_ZCU_COST
11259 if(!intra_flag)
11260 {
11261 #if !WEIGH_CHROMA_COST
11262 /* cbf = 0, accumulate cu not coded cost */
11263 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11264 #else
11265 /* cbf = 0, accumulate cu not coded cost */
11266
11267 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11268 (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11269 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11270 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11271 #endif
11272 }
11273 #endif
11274 }
11275 #endif /* RDOPT_ZERO_CBF_ENABLE */
11276
11277 return (cbf);
11278 }
11279