1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_utils.c
24 *
25 * \brief
26 * This file contains utility functions of Encode loop
27 *
28 * \date
29 * 18/09/2012
30 *
31 * \author
32 * Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40
41 /*****************************************************************************/
42 /* File Includes */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81
82 #include "ihevce_defs.h"
83 #include "ihevce_hle_interface.h"
84 #include "ihevce_lap_enc_structs.h"
85 #include "ihevce_multi_thrd_structs.h"
86 #include "ihevce_multi_thrd_funcs.h"
87 #include "ihevce_me_common_defs.h"
88 #include "ihevce_had_satd.h"
89 #include "ihevce_error_codes.h"
90 #include "ihevce_bitstream.h"
91 #include "ihevce_cabac.h"
92 #include "ihevce_rdoq_macros.h"
93 #include "ihevce_function_selector.h"
94 #include "ihevce_enc_structs.h"
95 #include "ihevce_entropy_structs.h"
96 #include "ihevce_cmn_utils_instr_set_router.h"
97 #include "ihevce_ipe_instr_set_router.h"
98 #include "ihevce_decomp_pre_intra_structs.h"
99 #include "ihevce_decomp_pre_intra_pass.h"
100 #include "ihevce_enc_loop_structs.h"
101 #include "ihevce_nbr_avail.h"
102 #include "ihevce_enc_loop_utils.h"
103 #include "ihevce_sub_pic_rc.h"
104 #include "ihevce_global_tables.h"
105 #include "ihevce_bs_compute_ctb.h"
106 #include "ihevce_cabac_rdo.h"
107 #include "ihevce_deblk.h"
108 #include "ihevce_frame_process.h"
109 #include "ihevce_rc_enc_structs.h"
110 #include "hme_datatype.h"
111 #include "hme_interface.h"
112 #include "hme_common_defs.h"
113 #include "hme_defs.h"
114 #include "hme_common_utils.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128 #include "ihevce_stasino_helpers.h"
129 #include "ihevce_tu_tree_selector.h"
130
131 /*****************************************************************************/
132 /* Globals */
133 /*****************************************************************************/
134
135 extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136 extern const UWORD8 gu1_hevce_scan4x4[3][16];
137 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139 extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141 /*****************************************************************************/
142 /* Constant Macros */
143 /*****************************************************************************/
144 #define ENABLE_ZERO_CBF 1
145 #define DISABLE_RDOQ_INTRA 0
146
147 /*****************************************************************************/
148 /* Function Definitions */
149 /*****************************************************************************/
ihevce_tu_tree_update(tu_prms_t * ps_tu_prms,WORD32 * pnum_tu_in_cu,WORD32 depth,WORD32 tu_split_flag,WORD32 tu_early_cbf,WORD32 i4_x_off,WORD32 i4_y_off)150 void *ihevce_tu_tree_update(
151 tu_prms_t *ps_tu_prms,
152 WORD32 *pnum_tu_in_cu,
153 WORD32 depth,
154 WORD32 tu_split_flag,
155 WORD32 tu_early_cbf,
156 WORD32 i4_x_off,
157 WORD32 i4_y_off)
158 {
159 //WORD32 tu_split_flag = p_tu_split_flag[0];
160 WORD32 p_tu_split_flag[4];
161 WORD32 p_tu_early_cbf[4];
162
163 WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165 if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166 {
167 if((tu_size >> depth) == 32)
168 {
169 /* Get the individual TU split flags */
170 p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171 p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172 p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173 p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175 /* Get the early CBF flags */
176 p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177 p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178 p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179 p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180 }
181 else
182 {
183 /* Get the individual TU split flags */
184 p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185 p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186 p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187 p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189 /* Get the early CBF flags */
190 p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191 p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192 p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193 p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194 }
195
196 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197 ps_tu_prms,
198 pnum_tu_in_cu,
199 depth + 1,
200 p_tu_split_flag[0],
201 p_tu_early_cbf[0],
202 i4_x_off,
203 i4_y_off);
204
205 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206 ps_tu_prms,
207 pnum_tu_in_cu,
208 depth + 1,
209 p_tu_split_flag[1],
210 p_tu_early_cbf[1],
211 (i4_x_off + (tu_size >> (depth + 1))),
212 i4_y_off);
213
214 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215 ps_tu_prms,
216 pnum_tu_in_cu,
217 depth + 1,
218 p_tu_split_flag[2],
219 p_tu_early_cbf[2],
220 i4_x_off,
221 (i4_y_off + (tu_size >> (depth + 1))));
222
223 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224 ps_tu_prms,
225 pnum_tu_in_cu,
226 depth + 1,
227 p_tu_split_flag[3],
228 p_tu_early_cbf[3],
229 (i4_x_off + (tu_size >> (depth + 1))),
230 (i4_y_off + (tu_size >> (depth + 1))));
231 }
232 else
233 {
234 if(tu_split_flag & 0x1)
235 {
236 /* This piece of code will be entered for the 8x8, if it is split
237 Update the 4 child TU's accordingly. */
238
239 (*pnum_tu_in_cu) += 4;
240
241 /* TL TU update */
242 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244 ps_tu_prms->u1_x_off = i4_x_off;
245
246 ps_tu_prms->u1_y_off = i4_y_off;
247
248 /* Early CBF is not done for 4x4 transforms */
249 ps_tu_prms->i4_early_cbf = 1;
250
251 ps_tu_prms++;
252
253 /* TR TU update */
254 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256 ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258 ps_tu_prms->u1_y_off = i4_y_off;
259
260 /* Early CBF is not done for 4x4 transforms */
261 ps_tu_prms->i4_early_cbf = 1;
262
263 ps_tu_prms++;
264
265 /* BL TU update */
266 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268 ps_tu_prms->u1_x_off = i4_x_off;
269
270 ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272 /* Early CBF is not done for 4x4 transforms */
273 ps_tu_prms->i4_early_cbf = 1;
274
275 ps_tu_prms++;
276
277 /* BR TU update */
278 ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280 ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282 ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284 /* Early CBF is not done for 4x4 transforms */
285 ps_tu_prms->i4_early_cbf = 1;
286 }
287 else
288 {
289 /* Update the TU params */
290 ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292 ps_tu_prms->u1_x_off = i4_x_off;
293
294 ps_tu_prms->u1_y_off = i4_y_off;
295
296 (*pnum_tu_in_cu)++;
297
298 /* Early CBF update for current TU */
299 ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300 }
301 if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302 {
303 ps_tu_prms++;
304
305 ps_tu_prms->u1_tu_size = tu_size;
306 }
307 }
308
309 return ps_tu_prms;
310 }
311
312 /*!
313 ******************************************************************************
314 * \if Function name : ihevce_compute_quant_rel_param \endif
315 *
316 * \brief
317 * This function updates quantization related parameters like qp_mod_6 etc in
318 * context according to new qp
319 *
320 * \date
321 * 08/01/2013
322 *
323 * \author
324 * Ittiam
325 *
326 * \return
327 *
328 * List of Functions
329 *
330 *
331 ******************************************************************************
332 */
ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD8 i1_cu_qp)333 void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334 {
335 WORD32 i4_div_factor;
336
337 ps_ctxt->i4_chrm_cu_qp =
338 (ps_ctxt->u1_chroma_array_type == 2)
339 ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340 : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341 ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342 i4_div_factor = (i1_cu_qp + 3) / 6;
343 i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344 ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345 ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346 ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348 #define INTER_RND_QP_BY_6
349 #ifdef INTER_RND_QP_BY_6
350 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351 {
352 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354 }
355 #else
356 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358 #endif
359
360 if(ISLICE == ps_ctxt->i1_slice_type)
361 {
362 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365 }
366 else
367 {
368 if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369 {
370 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372 (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373 }
374 else
375 {
376 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380 }
381 }
382 }
383
384 /*!
385 ******************************************************************************
386 * \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387 *
388 * \brief
389 * Function whihc calculates the Lambda params for current picture
390 *
391 * \param[in] ps_enc_ctxt : encoder ctxt pointer
392 * \param[in] ps_cur_pic_ctxt : current pic ctxt
393 * \param[in] i4_cur_frame_qp : current pic QP
394 * \param[in] first_field : is first field flag
395 * \param[in] i4_temporal_lyr_id : Current picture layer id
396 *
397 * \return
398 * None
399 *
400 * \author
401 * Ittiam
402 *
403 *****************************************************************************
404 */
ihevce_populate_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t * ps_ctxt,frm_lambda_ctxt_t * ps_frm_lamda,WORD32 i4_slice_type,WORD32 i4_temporal_lyr_id,WORD32 i4_lambda_type)405 void ihevce_populate_cl_cu_lambda_prms(
406 ihevce_enc_loop_ctxt_t *ps_ctxt,
407 frm_lambda_ctxt_t *ps_frm_lamda,
408 WORD32 i4_slice_type,
409 WORD32 i4_temporal_lyr_id,
410 WORD32 i4_lambda_type)
411 {
412 WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413 double lambda_modifier;
414 double lambda_uv_modifier;
415 double lambda;
416 double lambda_uv;
417
418 WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420 /*Populate lamda modifier */
421 ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422 ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423 ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425 for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426 i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427 i4_curr_cu_qp++)
428 {
429 WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430 ? MIN(i4_curr_cu_qp, 51)
431 : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433 i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438 if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439 {
440 lambda_modifier = ps_frm_lamda->lambda_modifier *
441 CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442 lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444 }
445 else
446 {
447 lambda_modifier = ps_frm_lamda->lambda_modifier;
448 lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449 }
450 if(ps_ctxt->i4_use_const_lamda_modifier)
451 {
452 if(ISLICE == ps_ctxt->i1_slice_type)
453 {
454 lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455 lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456 }
457 else
458 {
459 lambda_modifier = CONST_LAMDA_MOD_VAL;
460 lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461 }
462 }
463 switch(i4_lambda_type)
464 {
465 case 0:
466 {
467 i4_qp_bdoffset = 0;
468
469 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472 lambda *= lambda_modifier;
473 lambda_uv *= lambda_uv_modifier;
474
475 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483 if(ps_ctxt->i4_use_const_lamda_modifier)
484 {
485 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487 }
488 else
489 {
490 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492 }
493
494 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509 break;
510 }
511 case 1:
512 {
513 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516 lambda *= lambda_modifier;
517 lambda_uv *= lambda_uv_modifier;
518
519 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527 if(ps_ctxt->i4_use_const_lamda_modifier)
528 {
529 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531 }
532 else
533 {
534 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536 }
537 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552 break;
553 }
554 case 2:
555 {
556 lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557 lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559 lambda *= lambda_modifier;
560 lambda_uv *= lambda_uv_modifier;
561
562 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568 ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571 if(ps_ctxt->i4_use_const_lamda_modifier)
572 {
573 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575 }
576 else
577 {
578 ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580 }
581 ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584 /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585 lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586 lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588 lambda *= lambda_modifier;
589 lambda_uv *= lambda_uv_modifier;
590
591 ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592 (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594 ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595 (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597 ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598 (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599 if(ps_ctxt->i4_use_const_lamda_modifier)
600 {
601 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603 }
604 else
605 {
606 ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607 (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608 }
609
610 ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611 (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613 break;
614 }
615 default:
616 {
617 /* Intended to be a barren wasteland! */
618 ASSERT(0);
619 }
620 }
621 }
622 }
623
624 /*!
625 ******************************************************************************
626 * \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627 *
628 * \brief
629 * Function whihc calculates the Lambda params for current picture
630 *
631 * \param[in] ps_enc_ctxt : encoder ctxt pointer
632 * \param[in] ps_cur_pic_ctxt : current pic ctxt
633 * \param[in] i4_cur_frame_qp : current pic QP
634 * \param[in] first_field : is first field flag
635 * \param[in] i4_temporal_lyr_id : Current picture layer id
636 *
637 * \return
638 * None
639 *
640 * \author
641 * Ittiam
642 *
643 *****************************************************************************
644 */
ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 i4_cur_cu_qp)645 void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646 {
647 WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648 ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649 : gai1_ihevc_chroma_qp_scale
650 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652 /* closed loop ssd lambda is same as final lambda */
653 ps_ctxt->i8_cl_ssd_lambda_qf =
654 ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656 ps_ctxt
657 ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658 ps_ctxt->u4_chroma_cost_weighing_factor =
659 ps_ctxt->au4_chroma_cost_weighing_factor_array
660 [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661 /* --- Initialized the lambda for SATD computations --- */
662 /* --- 0.95 is the multiplication factor as per HM --- */
663 /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664 ps_ctxt->i4_satd_lamda =
665 ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666 ps_ctxt->i4_sad_lamda =
667 ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668 }
669
670 /*!
671 ******************************************************************************
672 * \if Function name : ihevce_update_pred_qp \endif
673 *
674 * \brief
675 * Computes pred qp for the given CU
676 *
677 * \param[in]
678 *
679 * \return
680 *
681 *
682 * \author
683 * Ittiam
684 *
685 *****************************************************************************
686 */
ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 cu_pos_x,WORD32 cu_pos_y)687 void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688 {
689 WORD32 i4_pred_qp = 0x7FFFFFFF;
690 WORD32 i4_top, i4_left;
691 if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692 {
693 i4_pred_qp = ps_ctxt->i4_prev_QP;
694 }
695 else
696 {
697 if(cu_pos_y == 0) /*CTB boundary*/
698 {
699 i4_top = ps_ctxt->i4_prev_QP;
700 }
701 else /*within CTB*/
702 {
703 i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704 }
705 if(cu_pos_x == 0) /*CTB boundary*/
706 {
707 i4_left = ps_ctxt->i4_prev_QP;
708 }
709 else /*within CTB*/
710 {
711 i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712 }
713 i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714 }
715 ps_ctxt->i4_pred_qp = i4_pred_qp;
716 return;
717 }
718 /*!
719 ******************************************************************************
720 * \if Function name : ihevce_compute_cu_level_QP \endif
721 *
722 * \brief
723 * Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724 *
725 * \param[in]
726 *
727 * \return
728 *
729 *
730 * \author
731 * Ittiam
732 *
733 *****************************************************************************
734 */
ihevce_compute_cu_level_QP(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 i4_activity_for_qp,WORD32 i4_activity_for_lamda,WORD32 i4_reduce_qp)735 void ihevce_compute_cu_level_QP(
736 ihevce_enc_loop_ctxt_t *ps_ctxt,
737 WORD32 i4_activity_for_qp,
738 WORD32 i4_activity_for_lamda,
739 WORD32 i4_reduce_qp)
740 {
741 /*modify quant related param in ctxt based on current cu qp*/
742 WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743 WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745 WORD32 i4_max_qp_allowed;
746 WORD32 i4_min_qp_allowed;
747 WORD32 i4_pred_qp;
748
749 i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751 if(ps_ctxt->i4_sub_pic_level_rc)
752 {
753 i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754 i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755 }
756 else
757 {
758 i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759 i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760 }
761 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762 return;
763
764 #if LAMDA_BASED_ON_QUANT
765 i4_activity_for_lamda = i4_activity_for_qp;
766 #endif
767
768 if(i4_activity_for_qp != -1)
769 {
770 cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771 ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772 if(ps_ctxt->i4_qp_mod)
773 {
774 /*Recompute the Qp as per enc thread's frame level Qp*/
775 ASSERT(i4_activity_for_qp > 0);
776 cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777 QP_LEVEL_MOD_ACT_FACTOR;
778 }
779
780 // To avoid access of uninitialised Qscale to qp conversion table
781 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786 cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788 if((1 == i4_reduce_qp) && (cu_qp > 1))
789 cu_qp--;
790
791 /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792 if(cu_qp > i4_max_qp_allowed)
793 cu_qp = i4_max_qp_allowed;
794 else if(cu_qp < i4_min_qp_allowed)
795 cu_qp = i4_min_qp_allowed;
796
797 /* CLIP to maintain Qp between user configured and min and max Qp values*/
798 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803 /*cu qp must be populated in cu_analyse_t struct*/
804 ps_ctxt->i4_cu_qp = cu_qp;
805 /*recompute quant related param at every cu level*/
806 ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807 }
808
809 /*Decoupling qp and lamda calculation */
810 if(i4_activity_for_lamda != -1)
811 {
812 cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813 ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815 if(ps_ctxt->i4_qp_mod)
816 {
817 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818 /*Recompute the Qp as per enc thread's frame level Qp*/
819 ASSERT(i4_activity_for_lamda > 0);
820 cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821 QP_LEVEL_MOD_ACT_FACTOR;
822 #endif
823 }
824 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829 cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831 /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832 if(cu_qp > i4_max_qp_allowed)
833 cu_qp = i4_max_qp_allowed;
834 else if(cu_qp < i4_min_qp_allowed)
835 cu_qp = i4_min_qp_allowed;
836
837 /* CLIP to maintain Qp between user configured and min and max Qp values*/
838 if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840 else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841 cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842 /* get frame level lambda params */
843 ihevce_get_cl_cu_lambda_prms(
844 ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845 }
846 }
847
ihevce_update_cu_level_qp_lamda(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_analyse_t * ps_cu_analyse,WORD32 trans_size,WORD32 is_intra)848 void ihevce_update_cu_level_qp_lamda(
849 ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
850 {
851 WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
852
853 if(ps_cu_analyse->u1_cu_size == 64)
854 {
855 ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
856 i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
857 i4_act_counter_lamda = 3;
858 }
859 else if(ps_cu_analyse->u1_cu_size == 32)
860 {
861 ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
862 i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
863 i4_act_counter_lamda = 0;
864 }
865 else if(ps_cu_analyse->u1_cu_size == 16)
866 {
867 ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
868 i4_act_counter = (trans_size == 8) || (trans_size == 4);
869 i4_act_counter_lamda = 0;
870 }
871 else if(ps_cu_analyse->u1_cu_size == 8)
872 {
873 ASSERT((trans_size == 8) || (trans_size == 4));
874 i4_act_counter = 1;
875 i4_act_counter_lamda = 0;
876 }
877 else
878 {
879 ASSERT(0);
880 }
881
882 if(ps_ctxt->i4_use_ctb_level_lamda)
883 {
884 ihevce_compute_cu_level_QP(
885 ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
886 }
887 else
888 {
889 ihevce_compute_cu_level_QP(
890 ps_ctxt,
891 ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
892 ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
893 0);
894 }
895
896 ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
897 }
898
899 /**
900 *******************************************************************************
901 * \if Function name : ihevce_scan_coeffs \endif
902 *
903 * @brief * Computes the coeff buffer for a coded TU for entropy coding
904 *
905 * @par Description
906 * Computes the coeff buffer for a coded TU for entropy coding
907 *
908 * \param[in] pi2_quan_coeffs Quantized coefficient context
909 *
910 * \param[in] scan_idx Scan index specifying the scan order
911 *
912 * \param[in] trans_size Transform unit size
913 *
914 * \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
915 *
916 * \param[in] pu1_csbf_buf csb flag buffer
917 *
918 * @returns num_bytes
919 * Number of bytes written to pu1_out_data
920 *
921 * @remarks
922 *
923 * \author
924 * Ittiam
925 *
926 *******************************************************************************
927 */
928
ihevce_scan_coeffs(WORD16 * pi2_quant_coeffs,WORD32 * pi4_subBlock2csbfId_map,WORD32 scan_idx,WORD32 trans_size,UWORD8 * pu1_out_data,UWORD8 * pu1_csbf_buf,WORD32 i4_csbf_stride)929 WORD32 ihevce_scan_coeffs(
930 WORD16 *pi2_quant_coeffs,
931 WORD32 *pi4_subBlock2csbfId_map,
932 WORD32 scan_idx,
933 WORD32 trans_size,
934 UWORD8 *pu1_out_data,
935 UWORD8 *pu1_csbf_buf,
936 WORD32 i4_csbf_stride)
937 {
938 WORD32 i, trans_unit_idx, num_gt1_flag;
939 UWORD16 u2_csbf0flags;
940 WORD32 num_bytes = 0;
941 UWORD8 *pu1_trans_table;
942 UWORD8 *pu1_csb_table;
943 WORD32 shift_value, mask_value;
944 UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
945 UWORD16 u2_sign_flags;
946 UWORD16 u2_abs_coeff_remaining[16];
947 WORD32 blk_row, blk_col;
948
949 UWORD8 *pu1_out_data_header;
950 UWORD16 *pu2_out_data_coeff;
951
952 WORD32 x_pos, y_pos;
953 WORD32 quant_coeff;
954
955 WORD32 num_gt0_flag;
956 (void)i4_csbf_stride;
957 pu1_out_data_header = pu1_out_data;
958 /* Need only last 3 bits, rest are reserved for debugging and making */
959 /* WORD alignment */
960 u2_csbf0flags = 0xBAD0;
961
962 /* Select proper order for your transform unit and csb based on scan_idx*/
963 /* and the trans_size */
964
965 /* scan order inside a csb */
966 pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
967 /* GETRANGE will give the log_2 of trans_size to shift_value */
968 GETRANGE(shift_value, trans_size);
969 shift_value = shift_value - 3; /* for finding. row no. from scan index */
970 mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
971 switch(trans_size)
972 {
973 case 32:
974 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
975 break;
976 case 16:
977 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
978 break;
979 case 8:
980 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
981 break;
982 case 4:
983 pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
984 break;
985 default:
986 DBG_PRINTF("Invalid Trans Size\n");
987 return -1;
988 break;
989 }
990
991 /*go through each csb in the scan order for first non-zero coded sub-block*/
992 for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
993 {
994 /* check for the first csb flag in our scan order */
995 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
996 {
997 UWORD8 u1_last_x, u1_last_y;
998 /* row of csb */
999 blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
1000 /* col of csb */
1001 blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
1002
1003 /*check for the 1st non-0 values inside the csb in our scan order*/
1004 for(i = 15; i >= 0; i--)
1005 {
1006 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1007 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1008
1009 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1010
1011 if(quant_coeff != 0)
1012 break;
1013 }
1014
1015 ASSERT(i >= 0);
1016
1017 u1_last_x = x_pos;
1018 u1_last_y = y_pos;
1019
1020 /* storing last_x and last_y */
1021 *pu1_out_data_header = u1_last_x;
1022 pu1_out_data_header++;
1023 num_bytes++;
1024 *pu1_out_data_header = u1_last_y;
1025 pu1_out_data_header++;
1026 num_bytes++;
1027
1028 /* storing the scan order */
1029 *pu1_out_data_header = scan_idx;
1030 pu1_out_data_header++;
1031 num_bytes++;
1032 /* storing last_sub_block pos. in scan order count */
1033 *pu1_out_data_header = trans_unit_idx;
1034 pu1_out_data_header++;
1035 num_bytes++;
1036
1037 /*stored the first 4 bytes, now all are word16. So word16 pointer*/
1038 pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
1039
1040 /* u2_csbf0flags word */
1041 u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
1042 /* storing u2_csbf0flags word */
1043 *pu2_out_data_coeff = u2_csbf0flags;
1044 pu2_out_data_coeff++;
1045 num_bytes += 2;
1046
1047 num_gt0_flag = 1;
1048 num_gt1_flag = 0;
1049 u2_sign_flags = 0;
1050
1051 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1052 u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1053 if(abs(quant_coeff) > 1)
1054 {
1055 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1056 u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1057 /* update u2_abs_coeff_remaining */
1058 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1059
1060 num_gt1_flag++;
1061 }
1062
1063 if(quant_coeff < 0)
1064 {
1065 /* set the i th bit of u2_sign_flags */
1066 u2_sign_flags = u2_sign_flags | (1 << i);
1067 }
1068
1069 /* Test remaining elements in our scan order */
1070 /* Can optimize further by CLZ macro */
1071 for(i = i - 1; i >= 0; i--)
1072 {
1073 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1074 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1075
1076 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1077
1078 if(quant_coeff != 0)
1079 {
1080 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1081 u2_sig_coeff_abs_gt0_flags |= (1 << i);
1082
1083 if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1084 {
1085 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1086 u2_sig_coeff_abs_gt1_flags |= (1 << i);
1087
1088 /* update u2_abs_coeff_remaining */
1089 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1090
1091 num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1092 }
1093
1094 if(quant_coeff < 0)
1095 {
1096 /* set the i th bit of u2_sign_flags */
1097 u2_sign_flags |= (1 << i);
1098 }
1099
1100 num_gt0_flag++;
1101 }
1102 }
1103
1104 /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1105 *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1106 pu2_out_data_coeff++;
1107 num_bytes += 2;
1108 /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1109 *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1110 pu2_out_data_coeff++;
1111 num_bytes += 2;
1112 /* storing u2_sign_flags 2 bytes */
1113 *pu2_out_data_coeff = u2_sign_flags;
1114 pu2_out_data_coeff++;
1115 num_bytes += 2;
1116
1117 /* Store the u2_abs_coeff_remaining[] */
1118 for(i = 0; i < num_gt1_flag; i++)
1119 {
1120 /* storing u2_abs_coeff_remaining[i] 2 bytes */
1121 *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1122 pu2_out_data_coeff++;
1123 num_bytes += 2;
1124 }
1125
1126 break; /*We just need this loop for finding 1st non-zero csb only*/
1127 }
1128 }
1129
1130 /* go through remaining csb in the scan order */
1131 for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1132 {
1133 blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1134 blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1135
1136 /* u2_csbf0flags word */
1137 u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1138 (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1139
1140 /********************************************************************/
1141 /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1142 /* block0, instead sig coeff map is directly signalled. This is */
1143 /* taken care by forcing csbf for block0 to be 1 even if it is 0 */
1144 /********************************************************************/
1145 if(0 == trans_unit_idx)
1146 {
1147 u2_csbf0flags |= 1;
1148 }
1149
1150 if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1151 {
1152 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1153 {
1154 /* set the 2nd bit of u2_csbf0flags for right csbf */
1155 u2_csbf0flags = u2_csbf0flags | (1 << 1);
1156 }
1157 }
1158 if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1159 {
1160 if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1161 {
1162 /* set the 3rd bit of u2_csbf0flags for bottom csbf */
1163 u2_csbf0flags = u2_csbf0flags | (1 << 2);
1164 }
1165 }
1166
1167 /* storing u2_csbf0flags word */
1168 *pu2_out_data_coeff = u2_csbf0flags;
1169 pu2_out_data_coeff++;
1170 num_bytes += 2;
1171
1172 /* check for the csb flag in our scan order */
1173 if(u2_csbf0flags & 0x1)
1174 {
1175 u2_sig_coeff_abs_gt0_flags = 0;
1176 u2_sig_coeff_abs_gt1_flags = 0;
1177 u2_sign_flags = 0;
1178
1179 num_gt0_flag = 0;
1180 num_gt1_flag = 0;
1181 /* check for the non-0 values inside the csb in our scan order */
1182 /* Can optimize further by CLZ macro */
1183 for(i = 15; i >= 0; i--)
1184 {
1185 x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1186 y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1187
1188 quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1189
1190 if(quant_coeff != 0)
1191 {
1192 /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1193 u2_sig_coeff_abs_gt0_flags |= (1 << i);
1194
1195 if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1196 {
1197 /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1198 u2_sig_coeff_abs_gt1_flags |= (1 << i);
1199
1200 /* update u2_abs_coeff_remaining */
1201 u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1202
1203 num_gt1_flag++;
1204 }
1205
1206 if(quant_coeff < 0)
1207 {
1208 /* set the i th bit of u2_sign_flags */
1209 u2_sign_flags = u2_sign_flags | (1 << i);
1210 }
1211
1212 num_gt0_flag++;
1213 }
1214 }
1215
1216 /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1217 *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1218 pu2_out_data_coeff++;
1219 num_bytes += 2;
1220
1221 /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1222 *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1223 pu2_out_data_coeff++;
1224 num_bytes += 2;
1225
1226 /* storing u2_sign_flags 2 bytes */
1227 *pu2_out_data_coeff = u2_sign_flags;
1228 pu2_out_data_coeff++;
1229 num_bytes += 2;
1230
1231 /* Store the u2_abs_coeff_remaining[] */
1232 for(i = 0; i < num_gt1_flag; i++)
1233 {
1234 /* storing u2_abs_coeff_remaining[i] 2 bytes */
1235 *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1236 pu2_out_data_coeff++;
1237 num_bytes += 2;
1238 }
1239 }
1240 }
1241
1242 return num_bytes; /* Return the number of bytes written to out_data */
1243 }
1244
1245 /**
1246 *******************************************************************************
1247 * \if Function name : ihevce_populate_intra_pred_mode \endif
1248 *
1249 * \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1250 * b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1251 *
1252 * \par Description
1253 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1254 * for a CU
1255 *
1256 * \param[in] top_intra_mode Top intra mode
1257 * \param[in] left_intra_mode Left intra mode
1258 * \param[in] available_top Top availability flag
1259 * \param[in] available_left Left availability flag
1260 * \param[in] cu_pos_y CU 'y' position
1261 * \param[in] ps_cand_mode_list pointer to populate candidate list
1262 *
1263 * \returns none
1264 *
1265 * \author
1266 * Ittiam
1267 *
1268 *******************************************************************************
1269 */
1270
ihevce_populate_intra_pred_mode(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,WORD32 * ps_cand_mode_list)1271 void ihevce_populate_intra_pred_mode(
1272 WORD32 top_intra_mode,
1273 WORD32 left_intra_mode,
1274 WORD32 available_top,
1275 WORD32 available_left,
1276 WORD32 cu_pos_y,
1277 WORD32 *ps_cand_mode_list)
1278 {
1279 /* local variables */
1280 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1281
1282 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1283 /* N = top */
1284 if(0 == available_top)
1285 {
1286 cand_intra_pred_mode_top = INTRA_DC;
1287 }
1288 /* for neighbour != INTRA, setting DC is done outside */
1289 else if(0 == cu_pos_y) /* It's on the CTB boundary */
1290 {
1291 cand_intra_pred_mode_top = INTRA_DC;
1292 }
1293 else
1294 {
1295 cand_intra_pred_mode_top = top_intra_mode;
1296 }
1297
1298 /* N = left */
1299 if(0 == available_left)
1300 {
1301 cand_intra_pred_mode_left = INTRA_DC;
1302 }
1303 /* for neighbour != INTRA, setting DC is done outside */
1304 else
1305 {
1306 cand_intra_pred_mode_left = left_intra_mode;
1307 }
1308
1309 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1310 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1311 {
1312 if(cand_intra_pred_mode_left < 2)
1313 {
1314 ps_cand_mode_list[0] = INTRA_PLANAR;
1315 ps_cand_mode_list[1] = INTRA_DC;
1316 ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1317 }
1318 else
1319 {
1320 ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1321 ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1322 ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1323 }
1324 }
1325 else
1326 {
1327 ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1328 ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1329
1330 if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1331 (cand_intra_pred_mode_top != INTRA_PLANAR))
1332 {
1333 ps_cand_mode_list[2] = INTRA_PLANAR;
1334 }
1335 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1336 {
1337 ps_cand_mode_list[2] = INTRA_DC;
1338 }
1339 else
1340 {
1341 ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1342 }
1343 }
1344 }
1345 /**
1346 *******************************************************************************
1347 * \if Function name : ihevce_intra_pred_mode_signaling \endif
1348 *
1349 * \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1350 * b5_rem_intra_pred_mode for a CU
1351 *
1352 * \par Description
1353 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1354 * for a CU
1355 *
1356 * \param[in] ps_nbr_top Top neighbour context
1357 * \param[in] ps_nbr_left Left neighbour context
1358 * \param[in] available_top Top availability flag
1359 * \param[in] available_left Left availability flag
1360 * \param[in] cu_pos_y CU 'y' position
1361 * \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1362 * \param[inout] ps_intra_pred_mode_current
1363 * Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1364 * b5_rem_intra_pred_mode
1365 *
1366 * \returns none
1367 *
1368 * \author
1369 * Ittiam
1370 *
1371 *******************************************************************************
1372 */
1373
ihevce_intra_pred_mode_signaling(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,WORD32 luma_intra_pred_mode_current,intra_prev_rem_flags_t * ps_intra_pred_mode_current)1374 void ihevce_intra_pred_mode_signaling(
1375 WORD32 top_intra_mode,
1376 WORD32 left_intra_mode,
1377 WORD32 available_top,
1378 WORD32 available_left,
1379 WORD32 cu_pos_y,
1380 WORD32 luma_intra_pred_mode_current,
1381 intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1382 {
1383 /* local variables */
1384 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1385 WORD32 cand_mode_list[3];
1386
1387 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1388 ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose
1389 ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1390
1391 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1392 /* N = top */
1393 if(0 == available_top)
1394 {
1395 cand_intra_pred_mode_top = INTRA_DC;
1396 }
1397 /* for neighbour != INTRA, setting DC is done outside */
1398 else if(0 == cu_pos_y) /* It's on the CTB boundary */
1399 {
1400 cand_intra_pred_mode_top = INTRA_DC;
1401 }
1402 else
1403 {
1404 cand_intra_pred_mode_top = top_intra_mode;
1405 }
1406
1407 /* N = left */
1408 if(0 == available_left)
1409 {
1410 cand_intra_pred_mode_left = INTRA_DC;
1411 }
1412 /* for neighbour != INTRA, setting DC is done outside */
1413 else
1414 {
1415 cand_intra_pred_mode_left = left_intra_mode;
1416 }
1417
1418 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1419 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1420 {
1421 if(cand_intra_pred_mode_left < 2)
1422 {
1423 cand_mode_list[0] = INTRA_PLANAR;
1424 cand_mode_list[1] = INTRA_DC;
1425 cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1426 }
1427 else
1428 {
1429 cand_mode_list[0] = cand_intra_pred_mode_left;
1430 cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1431 cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1432 }
1433 }
1434 else
1435 {
1436 cand_mode_list[0] = cand_intra_pred_mode_left;
1437 cand_mode_list[1] = cand_intra_pred_mode_top;
1438
1439 if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1440 (cand_intra_pred_mode_top != INTRA_PLANAR))
1441 {
1442 cand_mode_list[2] = INTRA_PLANAR;
1443 }
1444 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1445 {
1446 cand_mode_list[2] = INTRA_DC;
1447 }
1448 else
1449 {
1450 cand_mode_list[2] = INTRA_ANGULAR(26);
1451 }
1452 }
1453
1454 /* Signal Generation */
1455
1456 /* Flag & mpm_index generation */
1457 if(cand_mode_list[0] == luma_intra_pred_mode_current)
1458 {
1459 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1460 ps_intra_pred_mode_current->b2_mpm_idx = 0;
1461 }
1462 else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1463 {
1464 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1465 ps_intra_pred_mode_current->b2_mpm_idx = 1;
1466 }
1467 else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1468 {
1469 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1470 ps_intra_pred_mode_current->b2_mpm_idx = 2;
1471 }
1472 /* Flag & b5_rem_intra_pred_mode generation */
1473 else
1474 {
1475 WORD32 rem_mode;
1476
1477 ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1478
1479 /* sorting cand_mode_list */
1480 if(cand_mode_list[0] > cand_mode_list[1])
1481 {
1482 SWAP(cand_mode_list[0], cand_mode_list[1]);
1483 }
1484 if(cand_mode_list[0] > cand_mode_list[2])
1485 {
1486 SWAP(cand_mode_list[0], cand_mode_list[2]);
1487 }
1488 if(cand_mode_list[1] > cand_mode_list[2])
1489 {
1490 SWAP(cand_mode_list[1], cand_mode_list[2]);
1491 }
1492
1493 rem_mode = luma_intra_pred_mode_current;
1494
1495 if((rem_mode) >= cand_mode_list[2])
1496 {
1497 (rem_mode)--;
1498 }
1499 if((rem_mode) >= cand_mode_list[1])
1500 {
1501 (rem_mode)--;
1502 }
1503 if((rem_mode) >= cand_mode_list[0])
1504 {
1505 (rem_mode)--;
1506 }
1507 ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1508 }
1509 }
1510
ihevce_quant_rounding_factor_gen(WORD32 i4_trans_size,WORD32 is_luma,rdopt_entropy_ctxt_t * ps_rdopt_entropy_ctxt,WORD32 * pi4_quant_round_0_1,WORD32 * pi4_quant_round_1_2,double i4_lamda_modifier,UWORD8 i4_is_tu_level_quant_rounding)1511 void ihevce_quant_rounding_factor_gen(
1512 WORD32 i4_trans_size,
1513 WORD32 is_luma,
1514 rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1515 WORD32 *pi4_quant_round_0_1,
1516 WORD32 *pi4_quant_round_1_2,
1517 double i4_lamda_modifier,
1518 UWORD8 i4_is_tu_level_quant_rounding)
1519 {
1520 //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1521 UWORD8 *pu1_ctxt_model;
1522 WORD32 scan_pos;
1523 WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
1524 WORD32 abs_gt1_base_ctxt;
1525 WORD32 log2_tr_size, i;
1526 UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1527 UWORD16 u4_bits_estimated_r1_temp;
1528 WORD32 j = 0;
1529 WORD32 k = 0;
1530 WORD32 temp2;
1531
1532 double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1533 LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1534 /* transform size to log2transform size */
1535 GETRANGE(log2_tr_size, i4_trans_size);
1536 log2_tr_size -= 1;
1537
1538 if(1 == i4_is_tu_level_quant_rounding)
1539 {
1540 entropy_context_t *ps_cur_tu_entropy;
1541 cab_ctxt_t *ps_cabac;
1542 WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1543 ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1544
1545 ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1546
1547 pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1548 }
1549 else
1550 {
1551 pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1552 }
1553 /*If transform size is 4x4, then only one sub-block*/
1554 if(is_luma)
1555 {
1556 sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1557 abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1558
1559 if(3 == log2_tr_size)
1560 {
1561 /* 8x8 transform size */
1562 /* Assuming diagnol scan idx for now */
1563 sig_coeff_base_ctxt += 9;
1564 }
1565 else if(3 < log2_tr_size)
1566 {
1567 /* larger transform sizes */
1568 sig_coeff_base_ctxt += 21;
1569 }
1570 }
1571 else
1572 {
1573 /* chroma context initializations */
1574 sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1575 abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1576
1577 if(3 == log2_tr_size)
1578 {
1579 /* 8x8 transform size */
1580 sig_coeff_base_ctxt += 9;
1581 }
1582 else if(3 < log2_tr_size)
1583 {
1584 /* larger transform sizes */
1585 sig_coeff_base_ctxt += 12;
1586 }
1587 }
1588
1589 /*Transform size of 4x4 will have only a single CSB */
1590 /* derive the context inc as per section 9.3.3.1.4 */
1591
1592 if(2 == log2_tr_size)
1593 {
1594 UWORD8 sig_ctxinc;
1595 WORD32 state_mps;
1596 WORD32 gt1_ctxt = 0;
1597 WORD32 ctxt_set = 0;
1598 WORD32 ctxt_idx = 0;
1599
1600 /* context set based on luma subblock pos */
1601
1602 /* Encodet the abs level gt1 bins */
1603 /* Currently calculating trade off between mps(2) and mps(1)*/
1604 /* The estimation has to be further done for mps(11) and mps(111)*/
1605 /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1606 /* gt1_ctxt = 0 for the co-ef value to be 2 */
1607
1608 ctxt_set = gt1_ctxt = 0;
1609 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1610
1611 state_mps = pu1_ctxt_model[ctxt_idx];
1612
1613 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1614
1615 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1616
1617 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1618 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1619 {
1620 *(pi4_quant_round_1_2 + scan_pos) = temp2;
1621 }
1622
1623 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1624 {
1625 //UWORD8 nbr_csbf = 1;
1626 /* derive the x,y pos */
1627 UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1628
1629 /* 4x4 transform size increment uses lookup */
1630 sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1631
1632 /*Get the mps state based on ctxt modes */
1633 state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1634
1635 /* Bits taken to encode sig co-ef flag as 0 */
1636 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1637
1638 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1639 //
1640 u4_bits_estimated_r1 =
1641 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1642
1643 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1644 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1645
1646 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1647 *(pi4_quant_round_0_1 + scan_pos) = temp2;
1648 }
1649 }
1650 else
1651 {
1652 UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1653 WORD32 is_nbr_csb_state_mps;
1654
1655 WORD32 state_mps;
1656 WORD32 gt1_ctxt = 0;
1657 WORD32 ctxt_set = 0;
1658 WORD32 ctxt_idx;
1659 /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1660 /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1661
1662 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1663 ctxt_set = 1 DC subblock, the previous state did have >= 2
1664 ctxt_set = 2 AC subblock, the previous state did not have 2
1665 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1666 i = 1;
1667 ctxt_set = (i && is_luma) ? 2 : 0;
1668
1669 ctxt_set++;
1670
1671 /*0th position indicates the probability of 2 */
1672 /*1th position indicates the probability of 1 */
1673 /*2th position indicates the probability of 11 */
1674 /*3th position indicates the probability of 111 */
1675
1676 gt1_ctxt = 0;
1677 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1678
1679 state_mps = pu1_ctxt_model[ctxt_idx];
1680
1681 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1682
1683 u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1684 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1685
1686 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1687 {
1688 *(pi4_quant_round_1_2 + scan_pos) = temp2;
1689 }
1690
1691 i = 0;
1692 ctxt_set = (i && is_luma) ? 2 : 0;
1693 ctxt_set++;
1694
1695 /*0th position indicates the probability of 2 */
1696 /*1th position indicates the probability of 1 */
1697 /*2th position indicates the probability of 11 */
1698 /*3th position indicates the probability of 111 */
1699
1700 gt1_ctxt = 0;
1701 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1702
1703 state_mps = pu1_ctxt_model[ctxt_idx];
1704
1705 u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1706
1707 u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1708 QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1709
1710 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1711 {
1712 *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1713 }
1714
1715 {
1716 WORD32 ctxt_idx;
1717
1718 WORD32 nbr_csbf_0, nbr_csbf_1;
1719 WORD32 state_mps_0, state_mps_1;
1720 ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1721 ctxt_idx += is_luma ? 0 : 2;
1722
1723 /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1724 /* if neibhor not available, ctxt idx = 0*/
1725 nbr_csbf_0 = 0;
1726 ctxt_idx += nbr_csbf_0 ? 1 : 0;
1727 state_mps_0 = pu1_ctxt_model[ctxt_idx];
1728
1729 nbr_csbf_1 = 1;
1730 ctxt_idx += nbr_csbf_1 ? 1 : 0;
1731 state_mps_1 = pu1_ctxt_model[ctxt_idx];
1732
1733 is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1734 }
1735
1736 if(1 == is_nbr_csb_state_mps)
1737 {
1738 for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1739 {
1740 UWORD8 sig_ctxinc;
1741 WORD32 state_mps;
1742 WORD32 gt1_ctxt = 0;
1743 WORD32 ctxt_set = 0;
1744
1745 WORD32 ctxt_idx;
1746
1747 /*Check if the cabac states had previous nbr available */
1748
1749 if(i == 0)
1750 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1751 else if(i < (i4_trans_size >> 2))
1752 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1753 else if((i % (i4_trans_size >> 2)) == 0)
1754 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1755 else
1756 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1757
1758 if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1759 k++;
1760
1761 j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1762 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1763 ctxt_set = 1 DC subblock, the previous state did have >= 2
1764 ctxt_set = 2 AC subblock, the previous state did not have 2
1765 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1766
1767 ctxt_set = (i && is_luma) ? 2 : 0;
1768
1769 /* gt1_ctxt = 1 for the co-ef value to be 1 */
1770 gt1_ctxt = 0;
1771 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1772
1773 state_mps = pu1_ctxt_model[ctxt_idx];
1774
1775 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1776 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1777
1778 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1779 {
1780 UWORD8 y_pos_x_pos;
1781
1782 if(scan_pos || i)
1783 {
1784 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1785 /* ctxt for AC coeff depends on curpos and neigbour csbf */
1786 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1787
1788 /* based on luma subblock pos */
1789 sig_ctxinc += (i && is_luma) ? 3 : 0;
1790
1791 sig_ctxinc += sig_coeff_base_ctxt;
1792 }
1793 else
1794 {
1795 /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1796 /* DC coeff has fixed context for luma and chroma */
1797 sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1798 }
1799
1800 /*Get the mps state based on ctxt modes */
1801 state_mps = pu1_ctxt_model[sig_ctxinc];
1802
1803 /* Bits taken to encode sig co-ef flag as 0 */
1804 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1805
1806 u4_bits_estimated_r1 =
1807 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1808
1809 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1810 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1811 {
1812 QUANT_ROUND_FACTOR(
1813 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1814 *(pi4_quant_round_0_1 +
1815 ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1816 }
1817 }
1818 }
1819 }
1820 else
1821 {
1822 /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1823 Hence will write the same value to all sub block, and overwrite for the 1st one */
1824 i = 1;
1825 {
1826 UWORD8 sig_ctxinc;
1827 UWORD8 y_pos_x_pos;
1828 WORD32 quant_rounding_0_1;
1829
1830 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1831
1832 scan_pos = 0;
1833 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1834 /* ctxt for AC coeff depends on curpos and neigbour csbf */
1835 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1836
1837 /* based on luma subblock pos */
1838 sig_ctxinc += (is_luma) ? 3 : 0;
1839
1840 sig_ctxinc += sig_coeff_base_ctxt;
1841
1842 /*Get the mps state based on ctxt modes */
1843 state_mps = pu1_ctxt_model[sig_ctxinc];
1844
1845 /* Bits taken to encode sig co-ef flag as 0 */
1846 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1847
1848 u4_bits_estimated_r1 =
1849 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1850
1851 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1852 ctxt_set = 1 DC subblock, the previous state did have >= 2
1853 ctxt_set = 2 AC subblock, the previous state did not have 2
1854 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1855
1856 ctxt_set = (i && is_luma) ? 2 : 0;
1857
1858 /* gt1_ctxt = 1 for the co-ef value to be 1 */
1859 gt1_ctxt = 0;
1860 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1861
1862 state_mps = pu1_ctxt_model[ctxt_idx];
1863
1864 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1865 u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1866
1867 QUANT_ROUND_FACTOR(
1868 quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1869
1870 for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1871 scan_pos++)
1872 {
1873 *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1874 }
1875 }
1876
1877 /*First Subblock*/
1878 i = 0;
1879
1880 {
1881 UWORD8 sig_ctxinc;
1882 WORD32 state_mps;
1883 WORD32 gt1_ctxt = 0;
1884 WORD32 ctxt_set = 0;
1885
1886 WORD32 ctxt_idx;
1887
1888 /*Check if the cabac states had previous nbr available */
1889
1890 {
1891 pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1892
1893 /*ctxt_set = 0 DC subblock, the previous state did not have 2
1894 ctxt_set = 1 DC subblock, the previous state did have >= 2
1895 ctxt_set = 2 AC subblock, the previous state did not have 2
1896 ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1897 ctxt_set = (i && is_luma) ? 2 : 0;
1898
1899 /* gt1_ctxt = 1 for the co-ef value to be 1 */
1900 gt1_ctxt = 0;
1901 ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1902
1903 state_mps = pu1_ctxt_model[ctxt_idx];
1904
1905 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1906 u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1907
1908 for(scan_pos = 0; scan_pos < 16; scan_pos++)
1909 {
1910 UWORD8 y_pos_x_pos;
1911
1912 if(scan_pos)
1913 {
1914 y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1915 /* ctxt for AC coeff depends on curpos and neigbour csbf */
1916 sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1917
1918 /* based on luma subblock pos */
1919 sig_ctxinc += (i && is_luma) ? 3 : 0;
1920
1921 sig_ctxinc += sig_coeff_base_ctxt;
1922 }
1923 else
1924 {
1925 /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1926 /* DC coeff has fixed context for luma and chroma */
1927 sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1928 }
1929
1930 /*Get the mps state based on ctxt modes */
1931 state_mps = pu1_ctxt_model[sig_ctxinc];
1932
1933 /* Bits taken to encode sig co-ef flag as 0 */
1934 u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1935
1936 u4_bits_estimated_r1 =
1937 (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1938
1939 /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1940 u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1941 {
1942 QUANT_ROUND_FACTOR(
1943 temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1944 *(pi4_quant_round_0_1 +
1945 ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1946 }
1947 }
1948 }
1949 }
1950 }
1951 }
1952 return;
1953 }
1954
1955 /*!
1956 ******************************************************************************
1957 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1958 *
1959 * \brief
1960 * Transform unit level (Luma) enc_loop function
1961 *
1962 * \param[in] ps_ctxt enc_loop module ctxt pointer
1963 * \param[in] pu1_pred pointer to predicted data buffer
1964 * \param[in] pred_strd predicted buffer stride
1965 * \param[in] pu1_src pointer to source data buffer
1966 * \param[in] src_strd source buffer stride
1967 * \param[in] pi2_deq_data pointer to store iq data
1968 * \param[in] deq_data_strd iq data buffer stride
1969 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1970 * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
1971 * block
1972 * \param[out] csbf_strd csbf buffer stride
1973 * \param[in] trans_size transform size (4, 8, 16,32)
1974 * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
1975 * \param[out] pi4_cost pointer to store the cost
1976 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1977 * coeff buffer
1978 * \param[out] pu4_tu_bits pointer to store the best TU bits required encode
1979 the current TU in RDopt Mode
1980 * \param[out] pu4_blk_sad pointer to store the block sad for RC
1981 * \param[out] pi4_zero_col pointer to store the zero_col info for the TU
1982 * \param[out] pi4_zero_row pointer to store the zero_row info for the TU
1983 * \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not
1984 * \param[in] i4_perform_sbh Indicates if SBH should be performed or not
1985 *
1986 * \return
1987 * CBF of the current block
1988 *
1989 * \author
1990 * Ittiam
1991 *
1992 *****************************************************************************
1993 */
1994
ihevce_t_q_iq_ssd_scan_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_src,WORD32 src_strd,WORD16 * pi2_deq_data,WORD32 deq_data_strd,UWORD8 * pu1_recon,WORD32 i4_recon_stride,UWORD8 * pu1_ecd_data,UWORD8 * pu1_csbf_buf,WORD32 csbf_strd,WORD32 trans_size,WORD32 packed_pred_mode,LWORD64 * pi8_cost,WORD32 * pi4_coeff_off,WORD32 * pi4_tu_bits,UWORD32 * pu4_blk_sad,WORD32 * pi4_zero_col,WORD32 * pi4_zero_row,UWORD8 * pu1_is_recon_available,WORD32 i4_perform_rdoq,WORD32 i4_perform_sbh,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy,SSD_TYPE_T e_ssd_type,WORD32 early_cbf)1995 WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1996 ihevce_enc_loop_ctxt_t *ps_ctxt,
1997 UWORD8 *pu1_pred,
1998 WORD32 pred_strd,
1999 UWORD8 *pu1_src,
2000 WORD32 src_strd,
2001 WORD16 *pi2_deq_data,
2002 WORD32 deq_data_strd,
2003 UWORD8 *pu1_recon,
2004 WORD32 i4_recon_stride,
2005 UWORD8 *pu1_ecd_data,
2006 UWORD8 *pu1_csbf_buf,
2007 WORD32 csbf_strd,
2008 WORD32 trans_size,
2009 WORD32 packed_pred_mode,
2010 LWORD64 *pi8_cost,
2011 WORD32 *pi4_coeff_off,
2012 WORD32 *pi4_tu_bits,
2013 UWORD32 *pu4_blk_sad,
2014 WORD32 *pi4_zero_col,
2015 WORD32 *pi4_zero_row,
2016 UWORD8 *pu1_is_recon_available,
2017 WORD32 i4_perform_rdoq,
2018 WORD32 i4_perform_sbh,
2019 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2020 WORD32 i4_alpha_stim_multiplier,
2021 UWORD8 u1_is_cu_noisy,
2022 #endif
2023 SSD_TYPE_T e_ssd_type,
2024 WORD32 early_cbf)
2025 {
2026 WORD32 cbf = 0;
2027 WORD32 trans_idx;
2028 WORD32 quant_scale_mat_offset;
2029 WORD32 *pi4_trans_scratch;
2030 WORD16 *pi2_trans_values;
2031 WORD16 *pi2_quant_coeffs;
2032 WORD32 *pi4_subBlock2csbfId_map = NULL;
2033
2034 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2035 WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
2036 #endif
2037
2038 rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
2039
2040 WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
2041 (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
2042 WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
2043 WORD8 intra_flag = 0;
2044 ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
2045
2046 *pi4_tu_bits = 0;
2047 *pi4_coeff_off = 0;
2048 pu1_is_recon_available[0] = 0;
2049
2050 if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2051 {
2052 if(e_ssd_type != NULL_TYPE)
2053 {
2054 /* SSD cost is stored to the pointer */
2055 pi8_cost[0] =
2056
2057 ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2058 pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2059
2060 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2061 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2062 {
2063 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2064 pu1_src,
2065 src_strd,
2066 pu1_pred,
2067 pred_strd,
2068 pi8_cost[0],
2069 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2070 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2071 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2072 100.0,
2073 trans_size,
2074 0,
2075 ps_ctxt->u1_enable_psyRDOPT,
2076 NULL_PLANE);
2077 }
2078 #endif
2079
2080 /* copy pred to recon for skip mode */
2081 if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2082 {
2083 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2084 pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2085 pu1_is_recon_available[0] = 1;
2086 }
2087 else
2088 {
2089 pu1_is_recon_available[0] = 0;
2090 }
2091
2092 #if ENABLE_INTER_ZCU_COST
2093 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2094 #endif
2095 }
2096 else
2097 {
2098 pi8_cost[0] = UINT_MAX;
2099 }
2100
2101 /* cbf is returned as 0 */
2102 return (0);
2103 }
2104
2105 /* derive context variables */
2106 pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2107 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2108 pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2109
2110 /* translate the transform size to index for 4x4 and 8x8 */
2111 trans_idx = trans_size >> 2;
2112
2113 if(PRED_MODE_INTRA == packed_pred_mode)
2114 {
2115 quant_scale_mat_offset = 0;
2116 intra_flag = 1;
2117 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2118 ai4_quant_rounding_factors[0][0] =
2119 MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2120
2121 for(i = 0; i < trans_size * trans_size; i++)
2122 {
2123 ai4_quant_rounding_factors[1][i] =
2124 MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2125 (1 << QUANT_ROUND_FACTOR_Q) / 3);
2126 ai4_quant_rounding_factors[2][i] =
2127 MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2128 (1 << QUANT_ROUND_FACTOR_Q) / 3);
2129 }
2130 #endif
2131 }
2132 else
2133 {
2134 quant_scale_mat_offset = NUM_TRANS_TYPES;
2135 }
2136 /* for intra 4x4 DST transform should be used */
2137 if((1 == trans_idx) && (1 == intra_flag))
2138 {
2139 trans_idx = 0;
2140 }
2141 /* for 16x16 cases */
2142 else if(16 == trans_size)
2143 {
2144 trans_idx = 3;
2145 }
2146 /* for 32x32 cases */
2147 else if(32 == trans_size)
2148 {
2149 trans_idx = 4;
2150 }
2151
2152 switch(trans_size)
2153 {
2154 case 4:
2155 {
2156 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2157
2158 break;
2159 }
2160 case 8:
2161 {
2162 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2163
2164 break;
2165 }
2166 case 16:
2167 {
2168 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2169
2170 break;
2171 }
2172 case 32:
2173 {
2174 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2175
2176 break;
2177 }
2178 }
2179
2180 /* Do not call the FT and Quant functions if early_cbf is 0 */
2181 if(1 == early_cbf)
2182 {
2183 /* ---------- call residue and transform block ------- */
2184 *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2185 pu1_src,
2186 pu1_pred,
2187 pi4_trans_scratch,
2188 pi2_trans_values,
2189 src_strd,
2190 pred_strd,
2191 trans_size,
2192 NULL_PLANE);
2193
2194 cbf = ps_ctxt->apf_quant_iquant_ssd
2195 [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2196 pi2_trans_values,
2197 ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2198 pi2_quant_coeffs,
2199 pi2_deq_data,
2200 trans_size,
2201 ps_ctxt->i4_cu_qp_div6,
2202 ps_ctxt->i4_cu_qp_mod6,
2203 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2204 ps_ctxt->i4_quant_rnd_factor[intra_flag],
2205 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2206 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2207 #else
2208 intra_flag ? ai4_quant_rounding_factors[0][0]
2209 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2210 intra_flag ? ai4_quant_rounding_factors[1]
2211 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2212 intra_flag ? ai4_quant_rounding_factors[2]
2213 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2214 #endif
2215 trans_size,
2216 trans_size,
2217 deq_data_strd,
2218 pu1_csbf_buf,
2219 csbf_strd,
2220 pi4_zero_col,
2221 pi4_zero_row,
2222 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2223 pi8_cost);
2224
2225 if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2226 {
2227 pi8_cost[0] = UINT_MAX;
2228 }
2229 }
2230
2231 if(0 != cbf)
2232 {
2233 if(i4_perform_sbh || i4_perform_rdoq)
2234 {
2235 ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2236 ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2237 ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2238
2239 ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2240 ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2241 ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2242 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2243 ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2244
2245 ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2246 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2247 ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2248 ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2249 ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2250 ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2251
2252 /* ------- call coeffs scan function ------- */
2253 if((!i4_perform_rdoq))
2254 {
2255 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2256
2257 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2258 }
2259 }
2260
2261 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2262 pi2_quant_coeffs,
2263 pi4_subBlock2csbfId_map,
2264 ps_ctxt->i4_scan_idx,
2265 trans_size,
2266 pu1_ecd_data,
2267 pu1_csbf_buf,
2268 csbf_strd);
2269 }
2270 *pi8_cost >>= ga_trans_shift[trans_idx];
2271
2272 #if RDOPT_ZERO_CBF_ENABLE
2273 /* compare null cbf cost with encode tu rd-cost */
2274 if(cbf != 0)
2275 {
2276 WORD32 tu_bits;
2277 LWORD64 tu_rd_cost;
2278
2279 LWORD64 zero_cbf_cost = 0;
2280
2281 /*Populating the feilds of rdoq_ctxt structure*/
2282 if(i4_perform_rdoq)
2283 {
2284 /* transform size to log2transform size */
2285 GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2286 ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2287 ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2288 ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2289 ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2290 ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2291 (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2292 ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2293 ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2294 ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2295 }
2296 else if(i4_perform_zcbf)
2297 {
2298 zero_cbf_cost =
2299
2300 ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2301 pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
2302 }
2303
2304 /************************************************************************/
2305 /* call the entropy rdo encode to get the bit estimate for current tu */
2306 /* note that tu includes only residual coding bits and does not include */
2307 /* tu split, cbf and qp delta encoding bits for a TU */
2308 /************************************************************************/
2309 if(i4_perform_rdoq)
2310 {
2311 tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2312 &ps_ctxt->s_rdopt_entropy_ctxt,
2313 (pu1_ecd_data),
2314 trans_size,
2315 1,
2316 ps_rdoq_sbh_ctxt,
2317 pi8_cost,
2318 &zero_cbf_cost,
2319 0);
2320
2321 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2322 {
2323 cbf = 0;
2324 *pi4_coeff_off = 0;
2325 }
2326
2327 if((i4_perform_sbh) && (0 != cbf))
2328 {
2329 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2330 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2331 *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2332 }
2333
2334 /*Add round value before normalizing*/
2335 *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2336 *pi8_cost >>= ga_trans_shift[trans_idx];
2337
2338 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2339 {
2340 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2341 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2342 pi2_quant_coeffs,
2343 pi4_subBlock2csbfId_map,
2344 ps_ctxt->i4_scan_idx,
2345 trans_size,
2346 pu1_ecd_data,
2347 pu1_csbf_buf,
2348 csbf_strd);
2349 }
2350 }
2351 else
2352 {
2353 tu_bits = ihevce_entropy_rdo_encode_tu(
2354 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2355 }
2356
2357 *pi4_tu_bits = tu_bits;
2358
2359 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2360 {
2361 *pi8_cost = ihevce_it_recon_ssd(
2362 ps_ctxt,
2363 pu1_src,
2364 src_strd,
2365 pu1_pred,
2366 pred_strd,
2367 pi2_deq_data,
2368 deq_data_strd,
2369 pu1_recon,
2370 i4_recon_stride,
2371 pu1_ecd_data,
2372 trans_size,
2373 packed_pred_mode,
2374 cbf,
2375 *pi4_zero_col,
2376 *pi4_zero_row,
2377 NULL_PLANE);
2378
2379 pu1_is_recon_available[0] = 1;
2380 }
2381
2382 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2383 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2384 {
2385 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2386 pu1_src,
2387 src_strd,
2388 pu1_recon,
2389 i4_recon_stride,
2390 pi8_cost[0],
2391 i4_alpha_stim_multiplier,
2392 trans_size,
2393 0,
2394 ps_ctxt->u1_enable_psyRDOPT,
2395 NULL_PLANE);
2396 }
2397 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2398 {
2399 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2400 pu1_src,
2401 src_strd,
2402 pu1_pred,
2403 pred_strd,
2404 pi8_cost[0],
2405 i4_alpha_stim_multiplier,
2406 trans_size,
2407 0,
2408 ps_ctxt->u1_enable_psyRDOPT,
2409 NULL_PLANE);
2410 }
2411 #endif
2412
2413 /* add the SSD cost to bits estimate given by ECD */
2414 tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2415 tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2416
2417 if(i4_perform_zcbf)
2418 {
2419 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2420 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2421 {
2422 zero_cbf_cost = ihevce_inject_stim_into_distortion(
2423 pu1_src,
2424 src_strd,
2425 pu1_pred,
2426 pred_strd,
2427 zero_cbf_cost,
2428 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2429 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2430 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2431 100.0,
2432 trans_size,
2433 0,
2434 ps_ctxt->u1_enable_psyRDOPT,
2435 NULL_PLANE);
2436 }
2437 #endif
2438
2439 /* force the tu as zero cbf if zero_cbf_cost is lower */
2440 if(zero_cbf_cost < tu_rd_cost)
2441 {
2442 /* num bytes is set to 0 */
2443 *pi4_coeff_off = 0;
2444
2445 /* cbf is returned as 0 */
2446 cbf = 0;
2447
2448 /* cost is returned as 0 cbf cost */
2449 *pi8_cost = zero_cbf_cost;
2450
2451 /* TU bits is set to 0 */
2452 *pi4_tu_bits = 0;
2453 pu1_is_recon_available[0] = 0;
2454
2455 if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2456 {
2457 /* copy pred to recon for zcbf mode */
2458
2459 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2460 pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2461
2462 pu1_is_recon_available[0] = 1;
2463 }
2464 }
2465 /* accumulate cu not coded cost with zcbf cost */
2466 #if ENABLE_INTER_ZCU_COST
2467 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2468 #endif
2469 }
2470 }
2471 else
2472 {
2473 /* cbf = 0, accumulate cu not coded cost */
2474 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2475 {
2476 *pi8_cost = ihevce_it_recon_ssd(
2477 ps_ctxt,
2478 pu1_src,
2479 src_strd,
2480 pu1_pred,
2481 pred_strd,
2482 pi2_deq_data,
2483 deq_data_strd,
2484 pu1_recon,
2485 i4_recon_stride,
2486 pu1_ecd_data,
2487 trans_size,
2488 packed_pred_mode,
2489 cbf,
2490 *pi4_zero_col,
2491 *pi4_zero_row,
2492 NULL_PLANE);
2493
2494 pu1_is_recon_available[0] = 1;
2495 }
2496
2497 #if ENABLE_INTER_ZCU_COST
2498 {
2499 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2500 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2501 {
2502 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2503 pu1_src,
2504 src_strd,
2505 pu1_recon,
2506 i4_recon_stride,
2507 pi8_cost[0],
2508 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2509 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2510 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2511 100.0,
2512 trans_size,
2513 0,
2514 ps_ctxt->u1_enable_psyRDOPT,
2515 NULL_PLANE);
2516 }
2517 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2518 {
2519 pi8_cost[0] = ihevce_inject_stim_into_distortion(
2520 pu1_src,
2521 src_strd,
2522 pu1_pred,
2523 pred_strd,
2524 pi8_cost[0],
2525 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2526 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2527 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2528 100.0,
2529 trans_size,
2530 0,
2531 ps_ctxt->u1_enable_psyRDOPT,
2532 NULL_PLANE);
2533 }
2534 #endif
2535
2536 ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2537 }
2538 #endif /* ENABLE_INTER_ZCU_COST */
2539 }
2540 #endif
2541
2542 return (cbf);
2543 }
2544
2545 /*!
2546 ******************************************************************************
2547 * \if Function name : ihevce_it_recon_fxn \endif
2548 *
2549 * \brief
2550 * Transform unit level (Luma) IT Recon function
2551 *
2552 * \param[in] ps_ctxt enc_loop module ctxt pointer
2553 * \param[in] pi2_deq_data pointer to iq data
2554 * \param[in] deq_data_strd iq data buffer stride
2555 * \param[in] pu1_pred pointer to predicted data buffer
2556 * \param[in] pred_strd predicted buffer stride
2557 * \param[in] pu1_recon pointer to recon buffer
2558 * \param[in] recon_strd recon buffer stride
2559 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
2560 * \param[in] trans_size transform size (4, 8, 16,32)
2561 * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
2562 * \param[in] cbf CBF of the current block
2563 * \param[in] zero_cols zero_cols of the current block
2564 * \param[in] zero_rows zero_rows of the current block
2565 *
2566 * \return
2567 *
2568 * \author
2569 * Ittiam
2570 *
2571 *****************************************************************************
2572 */
2573
ihevce_it_recon_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD16 * pi2_deq_data,WORD32 deq_dat_strd,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_recon,WORD32 recon_strd,UWORD8 * pu1_ecd_data,WORD32 trans_size,WORD32 packed_pred_mode,WORD32 cbf,WORD32 zero_cols,WORD32 zero_rows)2574 void ihevce_it_recon_fxn(
2575 ihevce_enc_loop_ctxt_t *ps_ctxt,
2576 WORD16 *pi2_deq_data,
2577 WORD32 deq_dat_strd,
2578 UWORD8 *pu1_pred,
2579 WORD32 pred_strd,
2580 UWORD8 *pu1_recon,
2581 WORD32 recon_strd,
2582 UWORD8 *pu1_ecd_data,
2583 WORD32 trans_size,
2584 WORD32 packed_pred_mode,
2585 WORD32 cbf,
2586 WORD32 zero_cols,
2587 WORD32 zero_rows)
2588 {
2589 WORD32 dc_add_flag = 0;
2590 WORD32 trans_idx;
2591
2592 /* translate the transform size to index for 4x4 and 8x8 */
2593 trans_idx = trans_size >> 2;
2594
2595 /* if SKIP mode needs to be evaluated the pred is copied to recon */
2596 if(PRED_MODE_SKIP == packed_pred_mode)
2597 {
2598 UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2599
2600 pu1_curr_pred = pu1_pred;
2601 pu1_curr_recon = pu1_recon;
2602
2603 /* 2D copy of data */
2604
2605 ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2606 pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2607
2608 return;
2609 }
2610
2611 /* for intra 4x4 DST transform should be used */
2612 if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2613 {
2614 trans_idx = 0;
2615 }
2616 /* for 16x16 cases */
2617 else if(16 == trans_size)
2618 {
2619 trans_idx = 3;
2620 }
2621 /* for 32x32 cases */
2622 else if(32 == trans_size)
2623 {
2624 trans_idx = 4;
2625 }
2626
2627 /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2628 if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2629 {
2630 dc_add_flag = 1;
2631 }
2632
2633 if(0 == cbf)
2634 {
2635 /* buffer copy */
2636 ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2637 pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2638 }
2639 else if((1 == dc_add_flag) && (0 != trans_idx))
2640 {
2641 /* dc add */
2642 ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2643 pu1_pred,
2644 pred_strd,
2645 pu1_recon,
2646 recon_strd,
2647 trans_size,
2648 pi2_deq_data[0],
2649 NULL_PLANE /* luma */
2650 );
2651 }
2652 else
2653 {
2654 ps_ctxt->apf_it_recon[trans_idx](
2655 pi2_deq_data,
2656 &ps_ctxt->ai2_scratch[0],
2657 pu1_pred,
2658 pu1_recon,
2659 deq_dat_strd,
2660 pred_strd,
2661 recon_strd,
2662 zero_cols,
2663 zero_rows);
2664 }
2665 }
2666
2667 /*!
2668 ******************************************************************************
2669 * \if Function name : ihevce_chroma_it_recon_fxn \endif
2670 *
2671 * \brief
2672 * Transform unit level (Chroma) IT Recon function
2673 *
2674 * \param[in] ps_ctxt enc_loop module ctxt pointer
2675 * \param[in] pi2_deq_data pointer to iq data
2676 * \param[in] deq_data_strd iq data buffer stride
2677 * \param[in] pu1_pred pointer to predicted data buffer
2678 * \param[in] pred_strd predicted buffer stride
2679 * \param[in] pu1_recon pointer to recon buffer
2680 * \param[in] recon_strd recon buffer stride
2681 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
2682 * \param[in] trans_size transform size (4, 8, 16)
2683 * \param[in] cbf CBF of the current block
2684 * \param[in] zero_cols zero_cols of the current block
2685 * \param[in] zero_rows zero_rows of the current block
2686 *
2687 * \return
2688 *
2689 * \author
2690 * Ittiam
2691 *
2692 *****************************************************************************
2693 */
2694
ihevce_chroma_it_recon_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD16 * pi2_deq_data,WORD32 deq_dat_strd,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_recon,WORD32 recon_strd,UWORD8 * pu1_ecd_data,WORD32 trans_size,WORD32 cbf,WORD32 zero_cols,WORD32 zero_rows,CHROMA_PLANE_ID_T e_chroma_plane)2695 void ihevce_chroma_it_recon_fxn(
2696 ihevce_enc_loop_ctxt_t *ps_ctxt,
2697 WORD16 *pi2_deq_data,
2698 WORD32 deq_dat_strd,
2699 UWORD8 *pu1_pred,
2700 WORD32 pred_strd,
2701 UWORD8 *pu1_recon,
2702 WORD32 recon_strd,
2703 UWORD8 *pu1_ecd_data,
2704 WORD32 trans_size,
2705 WORD32 cbf,
2706 WORD32 zero_cols,
2707 WORD32 zero_rows,
2708 CHROMA_PLANE_ID_T e_chroma_plane)
2709 {
2710 WORD32 trans_idx;
2711
2712 ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2713
2714 /* since 2x2 transform is not allowed for chroma*/
2715 if(2 == trans_size)
2716 {
2717 trans_size = 4;
2718 }
2719
2720 /* translate the transform size to index */
2721 trans_idx = trans_size >> 2;
2722
2723 /* for 16x16 cases */
2724 if(16 == trans_size)
2725 {
2726 trans_idx = 3;
2727 }
2728
2729 if(0 == cbf)
2730 {
2731 /* buffer copy */
2732 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2733 pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2734 }
2735 else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2736 {
2737 /* dc add */
2738 ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2739 pu1_pred,
2740 pred_strd,
2741 pu1_recon,
2742 recon_strd,
2743 trans_size,
2744 pi2_deq_data[0],
2745 e_chroma_plane /* chroma plane */
2746 );
2747 }
2748 else
2749 {
2750 ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2751 pi2_deq_data,
2752 &ps_ctxt->ai2_scratch[0],
2753 pu1_pred + (WORD32)e_chroma_plane,
2754 pu1_recon + (WORD32)e_chroma_plane,
2755 deq_dat_strd,
2756 pred_strd,
2757 recon_strd,
2758 zero_cols,
2759 zero_rows);
2760 }
2761 }
2762
2763 /**
2764 *******************************************************************************
2765 * \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2766 *
2767 * \brief * Filters the RDOPT candidates based on mpm_idx
2768 *
2769 * \par Description
2770 * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2771 * for a CU
2772 *
2773 * \param[in] ps_ctxt : ptr to enc loop context
2774 * \param[in] ps_cu_analyse : ptr to CU analyse structure
2775 * \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2776 * \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2777 * \param[in] pu1_luma_mode luma mode
2778 *
2779 * \returns none
2780 *
2781 * \author
2782 * Ittiam
2783 *
2784 *******************************************************************************
2785 */
2786
ihevce_mpm_idx_based_filter_RDOPT_cand(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_analyse_t * ps_cu_analyse,nbr_4x4_t * ps_left_nbr_4x4,nbr_4x4_t * ps_top_nbr_4x4,UWORD8 * pu1_luma_mode,UWORD8 * pu1_eval_mark)2787 void ihevce_mpm_idx_based_filter_RDOPT_cand(
2788 ihevce_enc_loop_ctxt_t *ps_ctxt,
2789 cu_analyse_t *ps_cu_analyse,
2790 nbr_4x4_t *ps_left_nbr_4x4,
2791 nbr_4x4_t *ps_top_nbr_4x4,
2792 UWORD8 *pu1_luma_mode,
2793 UWORD8 *pu1_eval_mark)
2794 {
2795 WORD32 cu_pos_x;
2796 WORD32 cu_pos_y;
2797 nbr_avail_flags_t s_nbr;
2798 WORD32 trans_size;
2799 WORD32 au4_cand_mode_list[3];
2800 WORD32 nbr_flags;
2801 UWORD8 *pu1_intra_luma_modes;
2802 WORD32 rdopt_cand_ctr = 0;
2803 UWORD8 *pu1_luma_eval_mark;
2804
2805 cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2806 cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2807 trans_size = ps_cu_analyse->u1_cu_size;
2808
2809 /* get the neighbour availability flags */
2810 nbr_flags = ihevce_get_nbr_intra(
2811 &s_nbr,
2812 ps_ctxt->pu1_ctb_nbr_map,
2813 ps_ctxt->i4_nbr_map_strd,
2814 cu_pos_x,
2815 cu_pos_y,
2816 trans_size >> 2);
2817 (void)nbr_flags;
2818 /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2819 *TU=CU/2 also since the modes are same in both the cases.
2820 */
2821 ihevce_populate_intra_pred_mode(
2822 ps_top_nbr_4x4->b6_luma_intra_mode,
2823 ps_left_nbr_4x4->b6_luma_intra_mode,
2824 s_nbr.u1_top_avail,
2825 s_nbr.u1_left_avail,
2826 cu_pos_y,
2827 &au4_cand_mode_list[0]);
2828
2829 /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2830 *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2831 */
2832
2833 pu1_intra_luma_modes = pu1_luma_mode;
2834 pu1_luma_eval_mark = pu1_eval_mark;
2835
2836 while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2837 {
2838 WORD32 i;
2839 WORD32 found_flag = 0;
2840
2841 /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2842 *irrespective of whether the cand is present in the mpm idx list or not
2843 */
2844 if(rdopt_cand_ctr == 0)
2845 {
2846 rdopt_cand_ctr++;
2847 continue;
2848 }
2849
2850 for(i = 0; i < 3; i++)
2851 {
2852 if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2853 {
2854 found_flag = 1;
2855 break;
2856 }
2857 }
2858
2859 if(found_flag == 0)
2860 {
2861 pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2862 }
2863
2864 rdopt_cand_ctr++;
2865 }
2866 }
2867
2868 /*!
2869 ******************************************************************************
2870 * \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2871 *
2872 * \brief
2873 * Intra Coding unit funtion for RD opt mode
2874 *
2875 * \param[in] ps_ctxt enc_loop module ctxt pointer
2876 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2877 * \param[in] pu1_luma_mode : pointer to luma mode
2878 * \param[in] ps_cu_analyse pointer to cu analyse pointer
2879 * \param[in] pu1_src pointer to source data buffer
2880 * \param[in] src_strd source buffer stride
2881 * \param[in] pu1_cu_left pointer to left recon data buffer
2882 * \param[in] pu1_cu_top pointer to top recon data buffer
2883 * \param[in] pu1_cu_top_left pointer to top left recon data buffer
2884 * \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2885 * \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2886 * \param[in] nbr_4x4_left_strd left nbr4x4 stride
2887 * \param[in] cu_left_stride left recon buffer stride
2888 * \param[in] curr_buf_idx RD opt buffer index for current usage
2889 * \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2890 *
2891 * \return
2892 * RDopt cost
2893 *
2894 * \author
2895 * Ittiam
2896 *
2897 *****************************************************************************
2898 */
ihevce_intra_rdopt_cu_ntu(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,void * pv_pred_org,WORD32 pred_strd_org,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,UWORD8 * pu1_luma_mode,cu_analyse_t * ps_cu_analyse,void * pv_curr_src,void * pv_cu_left,void * pv_cu_top,void * pv_cu_top_left,nbr_4x4_t * ps_left_nbr_4x4,nbr_4x4_t * ps_top_nbr_4x4,WORD32 nbr_4x4_left_strd,WORD32 cu_left_stride,WORD32 curr_buf_idx,WORD32 func_proc_mode,WORD32 i4_alpha_stim_multiplier)2899 LWORD64 ihevce_intra_rdopt_cu_ntu(
2900 ihevce_enc_loop_ctxt_t *ps_ctxt,
2901 enc_loop_cu_prms_t *ps_cu_prms,
2902 void *pv_pred_org,
2903 WORD32 pred_strd_org,
2904 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2905 UWORD8 *pu1_luma_mode,
2906 cu_analyse_t *ps_cu_analyse,
2907 void *pv_curr_src,
2908 void *pv_cu_left,
2909 void *pv_cu_top,
2910 void *pv_cu_top_left,
2911 nbr_4x4_t *ps_left_nbr_4x4,
2912 nbr_4x4_t *ps_top_nbr_4x4,
2913 WORD32 nbr_4x4_left_strd,
2914 WORD32 cu_left_stride,
2915 WORD32 curr_buf_idx,
2916 WORD32 func_proc_mode,
2917 WORD32 i4_alpha_stim_multiplier)
2918 {
2919 enc_loop_cu_final_prms_t *ps_final_prms;
2920 nbr_avail_flags_t s_nbr;
2921 nbr_4x4_t *ps_nbr_4x4;
2922 nbr_4x4_t *ps_tmp_lt_4x4;
2923 recon_datastore_t *ps_recon_datastore;
2924
2925 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2926
2927 UWORD32 *pu4_nbr_flags;
2928 UWORD8 *pu1_intra_pred_mode;
2929 WORD32 cu_pos_x;
2930 WORD32 cu_pos_y;
2931 WORD32 trans_size = 0;
2932 UWORD8 *pu1_left;
2933 UWORD8 *pu1_top;
2934 UWORD8 *pu1_top_left;
2935 UWORD8 *pu1_recon;
2936 UWORD8 *pu1_csbf_buf;
2937 UWORD8 *pu1_ecd_data;
2938 WORD16 *pi2_deq_data;
2939 WORD32 deq_data_strd;
2940 LWORD64 total_rdopt_cost;
2941 WORD32 ctr;
2942 WORD32 left_strd;
2943 WORD32 i4_recon_stride;
2944 WORD32 csbf_strd;
2945 WORD32 ecd_data_bytes_cons;
2946 WORD32 num_4x4_in_tu;
2947 WORD32 num_4x4_in_cu;
2948 WORD32 chrm_present_flag;
2949 WORD32 tx_size;
2950 WORD32 cu_bits;
2951 WORD32 num_cu_parts = 0;
2952 WORD32 num_cands = 0;
2953 WORD32 cu_pos_x_8pelunits;
2954 WORD32 cu_pos_y_8pelunits;
2955 WORD32 i4_perform_rdoq;
2956 WORD32 i4_perform_sbh;
2957 UWORD8 u1_compute_spatial_ssd;
2958 UWORD8 u1_compute_recon;
2959 UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2960
2961 UWORD16 u2_num_tus_in_cu = 0;
2962 WORD32 is_sub_pu_in_hq = 0;
2963 /* Get the RDOPT cost of the best CU mode for early_exit */
2964 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2965 /* cabac context of prev intra luma pred flag */
2966 UWORD8 u1_prev_flag_cabac_ctxt =
2967 ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2968 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2969
2970 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2971
2972 total_rdopt_cost = 0;
2973 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2974 ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2975 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2976 csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2977 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2978 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2979 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2980 deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2981 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2982 ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2983 pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2984 pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2985 cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2986 cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2987 cu_pos_x_8pelunits = cu_pos_x;
2988 cu_pos_y_8pelunits = cu_pos_y;
2989
2990 /* reset cu not coded cost */
2991 ps_ctxt->i8_cu_not_coded_cost = 0;
2992
2993 /* based on the Processng mode */
2994 if(TU_EQ_CU == func_proc_mode)
2995 {
2996 ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2997 trans_size = ps_cu_analyse->u1_cu_size;
2998 num_cu_parts = 1;
2999 num_cands = 1;
3000 u2_num_tus_in_cu = 1;
3001 }
3002 else if(TU_EQ_CU_DIV2 == func_proc_mode)
3003 {
3004 ps_final_prms->u1_part_mode = SIZE_2Nx2N;
3005 trans_size = ps_cu_analyse->u1_cu_size >> 1;
3006 num_cu_parts = 4;
3007 num_cands = 1;
3008 u2_num_tus_in_cu = 4;
3009 }
3010 else if(TU_EQ_SUBCU == func_proc_mode)
3011 {
3012 ps_final_prms->u1_part_mode = SIZE_NxN;
3013 trans_size = ps_cu_analyse->u1_cu_size >> 1;
3014 num_cu_parts = 4;
3015 /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
3016 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
3017 {
3018 if(ps_ctxt->i1_slice_type != BSLICE)
3019 {
3020 num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
3021 }
3022 else
3023 {
3024 num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
3025 }
3026 }
3027 else
3028 {
3029 num_cands = MAX_INTRA_CU_CANDIDATES;
3030 }
3031 u2_num_tus_in_cu = 4;
3032 }
3033 else
3034 {
3035 /* should not enter here */
3036 ASSERT(0);
3037 }
3038
3039 if(ps_ctxt->i1_cu_qp_delta_enable)
3040 {
3041 ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
3042 }
3043
3044 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3045 {
3046 ps_ctxt->i8_cl_ssd_lambda_qf =
3047 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3048 100.0f);
3049 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3050 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3051 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3052 }
3053
3054 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3055 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3056 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3057
3058 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3059 {
3060 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3061 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3062 }
3063
3064 /* populate the neigbours */
3065 pu1_left = (UWORD8 *)pv_cu_left;
3066 pu1_top = (UWORD8 *)pv_cu_top;
3067 pu1_top_left = (UWORD8 *)pv_cu_top_left;
3068 left_strd = cu_left_stride;
3069 num_4x4_in_tu = (trans_size >> 2);
3070 num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3071 chrm_present_flag = 1;
3072 ecd_data_bytes_cons = 0;
3073 cu_bits = 0;
3074
3075 /* get the 4x4 level postion of current cu */
3076 cu_pos_x = cu_pos_x << 1;
3077 cu_pos_y = cu_pos_y << 1;
3078
3079 /* pouplate cu level params knowing that current is intra */
3080 ps_final_prms->u1_skip_flag = 0;
3081 ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3082 ps_final_prms->u2_num_pus_in_cu = 1;
3083 /*init the is_cu_coded flag*/
3084 ps_final_prms->u1_is_cu_coded = 0;
3085 ps_final_prms->u4_cu_sad = 0;
3086
3087 ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3088 ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3089 ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3090 ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3091 ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3092 ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3093
3094 ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3095
3096 /*copy qp directly as intra cant be skip*/
3097 ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3098 ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3099 ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3100 ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3101 ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3102 ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3103 ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3104 ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3105 ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3106
3107 /* RDOPT copy States : TU init (best until prev TU) to current */
3108 memcpy(
3109 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3110 .s_cabac_ctxt.au1_ctxt_models[0],
3111 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3112 IHEVC_CAB_COEFFX_PREFIX);
3113
3114 /* RDOPT copy States :update to init state if 0 cbf */
3115 memcpy(
3116 &au1_intra_nxn_rdopt_ctxt_models[0][0],
3117 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3118 IHEVC_CAB_COEFFX_PREFIX);
3119 memcpy(
3120 &au1_intra_nxn_rdopt_ctxt_models[1][0],
3121 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3122 IHEVC_CAB_COEFFX_PREFIX);
3123
3124 /* loop for all partitions in CU blocks */
3125 for(ctr = 0; ctr < num_cu_parts; ctr++)
3126 {
3127 UWORD8 *pu1_curr_mode;
3128 WORD32 cand_ctr;
3129 WORD32 nbr_flags;
3130
3131 /* for NxN case to track the best mode */
3132 /* for other cases zeroth index will be used */
3133 intra_prev_rem_flags_t as_intra_prev_rem[2];
3134 LWORD64 ai8_cand_rdopt_cost[2];
3135 UWORD32 au4_tu_sad[2];
3136 WORD32 ai4_tu_bits[2];
3137 WORD32 ai4_cbf[2];
3138 WORD32 ai4_curr_bytes[2];
3139 WORD32 ai4_zero_col[2];
3140 WORD32 ai4_zero_row[2];
3141 /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3142 cand. are there) ping-pong buffer to store the best and current */
3143 UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3144 UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3145 WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3146 /* Context models stored for RDopt store and restore purpose */
3147
3148 UWORD8 au1_recon_availability[2];
3149
3150 WORD32 best_cand_idx = 0;
3151 LWORD64 best_cand_cost = MAX_COST_64;
3152 /* counters to toggle b/w best and current */
3153 WORD32 best_intra_buf_idx = 1;
3154 WORD32 curr_intra_buf_idx = 0;
3155
3156 /* copy the mode pointer to be used in inner loop */
3157 pu1_curr_mode = pu1_luma_mode;
3158
3159 /* get the neighbour availability flags */
3160 nbr_flags = ihevce_get_nbr_intra(
3161 &s_nbr,
3162 ps_ctxt->pu1_ctb_nbr_map,
3163 ps_ctxt->i4_nbr_map_strd,
3164 cu_pos_x,
3165 cu_pos_y,
3166 num_4x4_in_tu);
3167
3168 /* copy the nbr flags for chroma reuse */
3169 if(4 != trans_size)
3170 {
3171 *pu4_nbr_flags = nbr_flags;
3172 }
3173 else if(1 == chrm_present_flag)
3174 {
3175 /* compute the avail flags assuming luma trans is 8x8 */
3176 /* get the neighbour availability flags */
3177 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3178 ps_ctxt->pu1_ctb_nbr_map,
3179 ps_ctxt->i4_nbr_map_strd,
3180 cu_pos_x,
3181 cu_pos_y,
3182 (num_4x4_in_tu << 1),
3183 (num_4x4_in_tu << 1));
3184 }
3185
3186 u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3187
3188 if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3189 {
3190 ps_recon_datastore->u1_is_lumaRecon_available = 1;
3191 }
3192 else if(!ctr)
3193 {
3194 ps_recon_datastore->u1_is_lumaRecon_available = 0;
3195 }
3196
3197 ihevc_intra_pred_luma_ref_substitution_fptr =
3198 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3199
3200 /* call reference array substitution */
3201 ihevc_intra_pred_luma_ref_substitution_fptr(
3202 pu1_top_left,
3203 pu1_top,
3204 pu1_left,
3205 left_strd,
3206 trans_size,
3207 nbr_flags,
3208 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3209 1);
3210
3211 /* Intra Mode gating based on MPM cand list and encoder quality preset */
3212 if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3213 (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3214 {
3215 ihevce_mpm_idx_based_filter_RDOPT_cand(
3216 ps_ctxt,
3217 ps_cu_analyse,
3218 ps_left_nbr_4x4,
3219 ps_top_nbr_4x4,
3220 pu1_luma_mode,
3221 &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3222 }
3223
3224 if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3225 (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3226 {
3227 WORD32 ai4_mpm_mode_list[3];
3228 WORD32 i;
3229
3230 WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3231
3232 ihevce_populate_intra_pred_mode(
3233 ps_top_nbr_4x4->b6_luma_intra_mode,
3234 ps_tmp_lt_4x4->b6_luma_intra_mode,
3235 s_nbr.u1_top_avail,
3236 s_nbr.u1_left_avail,
3237 cu_pos_y,
3238 &ai4_mpm_mode_list[0]);
3239
3240 for(i = 0; i < 3; i++)
3241 {
3242 if(ps_cu_analyse->s_cu_intra_cand
3243 .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3244 {
3245 ASSERT(ai4_mpm_mode_list[i] < 35);
3246
3247 ps_cu_analyse->s_cu_intra_cand
3248 .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3249 pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3250 ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3251 i4_curr_index++;
3252 }
3253 }
3254
3255 pu1_luma_mode[i4_curr_index] = 255;
3256 }
3257
3258 /* loop over candidates for each partition */
3259 for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3260 {
3261 WORD32 curr_pred_mode;
3262 WORD32 bits = 0;
3263 LWORD64 curr_cost;
3264 WORD32 luma_pred_func_idx;
3265 UWORD8 *pu1_curr_ecd_data;
3266 WORD16 *pi2_curr_deq_data;
3267 WORD32 curr_deq_data_strd;
3268 WORD32 pred_strd;
3269 UWORD8 *pu1_pred;
3270
3271 /* if NXN case the recon and ecd data is stored in temp buffers */
3272 if(TU_EQ_SUBCU == func_proc_mode)
3273 {
3274 pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3275 pred_strd = trans_size;
3276 pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3277 pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3278 curr_deq_data_strd = trans_size;
3279
3280 ASSERT(trans_size == MIN_TU_SIZE);
3281 }
3282 else
3283 {
3284 pu1_pred = (UWORD8 *)pv_pred_org;
3285 pred_strd = pred_strd_org;
3286 pu1_curr_ecd_data = pu1_ecd_data;
3287 pi2_curr_deq_data = pi2_deq_data;
3288 curr_deq_data_strd = deq_data_strd;
3289 }
3290
3291 pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3292 (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3293
3294 if(is_sub_pu_in_hq == 1)
3295 {
3296 curr_pred_mode = cand_ctr;
3297 }
3298 else
3299 {
3300 curr_pred_mode = pu1_curr_mode[cand_ctr];
3301 }
3302
3303 /* If the candidate mode is 255, then break */
3304 if(255 == curr_pred_mode)
3305 {
3306 break;
3307 }
3308 else if(250 == curr_pred_mode)
3309 {
3310 continue;
3311 }
3312
3313 /* check if this mode needs to be evaluated or not. For 2nx2n cases, this */
3314 /* function will be called once per candidate, so this check has been done */
3315 /* outside this function call. For NxN case, this function will be called */
3316 /* only once, and all the candidates will be evaluated here. */
3317 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3318 {
3319 if((TU_EQ_SUBCU == func_proc_mode) &&
3320 (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3321 {
3322 continue;
3323 }
3324 }
3325
3326 /* call reference filtering */
3327 ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3328 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3329 trans_size,
3330 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3331 curr_pred_mode,
3332 ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3333
3334 /* use the look up to get the function idx */
3335 luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3336
3337 /* call the intra prediction function */
3338 ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3339 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3340 1,
3341 pu1_pred,
3342 pred_strd,
3343 trans_size,
3344 curr_pred_mode);
3345
3346 /* populate the coeffs scan idx */
3347 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3348
3349 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3350 if(trans_size < 16)
3351 {
3352 /* for modes from 22 upto 30 horizontal scan is used */
3353 if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3354 {
3355 ps_ctxt->i4_scan_idx = SCAN_HORZ;
3356 }
3357 /* for modes from 6 upto 14 horizontal scan is used */
3358 else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3359 {
3360 ps_ctxt->i4_scan_idx = SCAN_VERT;
3361 }
3362 }
3363
3364 /* RDOPT copy States : TU init (best until prev TU) to current */
3365 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3366 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3367 .s_cabac_ctxt.au1_ctxt_models[0] +
3368 IHEVC_CAB_COEFFX_PREFIX,
3369 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3370 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3371
3372 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3373 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3374
3375 #if DISABLE_RDOQ_INTRA
3376 i4_perform_rdoq = 0;
3377 #endif
3378
3379 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
3380 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3381 /* Currently the complete array will contain only single value*/
3382 /*The rounding factor is calculated with the formula
3383 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3384 rounding factor = (1 - DeadZone Val)
3385
3386 Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3387 */
3388 if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3389 {
3390 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3391 {
3392 double i4_lamda_modifier;
3393
3394 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3395 {
3396 i4_lamda_modifier =
3397 ps_ctxt->i4_lamda_modifier *
3398 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3399 }
3400 else
3401 {
3402 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3403 }
3404 if(ps_ctxt->i4_use_const_lamda_modifier)
3405 {
3406 if(ISLICE == ps_ctxt->i1_slice_type)
3407 {
3408 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3409 }
3410 else
3411 {
3412 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3413 }
3414 }
3415
3416 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3417 &ps_ctxt->i4_quant_round_tu[0][0];
3418 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3419 &ps_ctxt->i4_quant_round_tu[1][0];
3420
3421 memset(
3422 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3423 0,
3424 trans_size * trans_size * sizeof(WORD32));
3425 memset(
3426 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3427 0,
3428 trans_size * trans_size * sizeof(WORD32));
3429
3430 ihevce_quant_rounding_factor_gen(
3431 trans_size,
3432 1,
3433 &ps_ctxt->s_rdopt_entropy_ctxt,
3434 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3435 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3436 i4_lamda_modifier,
3437 1);
3438 }
3439 else
3440 {
3441 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3442 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3443 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3444 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3445 }
3446 }
3447
3448 /* call T Q IT IQ and recon function */
3449 ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3450 ps_ctxt,
3451 pu1_pred,
3452 pred_strd,
3453 (UWORD8 *)pv_curr_src,
3454 src_strd,
3455 pi2_curr_deq_data,
3456 curr_deq_data_strd,
3457 pu1_recon,
3458 i4_recon_stride,
3459 pu1_curr_ecd_data,
3460 pu1_csbf_buf,
3461 csbf_strd,
3462 trans_size,
3463 PRED_MODE_INTRA,
3464 &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3465 &ai4_curr_bytes[curr_intra_buf_idx],
3466 &ai4_tu_bits[curr_intra_buf_idx],
3467 &au4_tu_sad[curr_intra_buf_idx],
3468 &ai4_zero_col[curr_intra_buf_idx],
3469 &ai4_zero_row[curr_intra_buf_idx],
3470 &au1_recon_availability[curr_intra_buf_idx],
3471 i4_perform_rdoq,
3472 i4_perform_sbh,
3473 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474 i4_alpha_stim_multiplier,
3475 u1_is_cu_noisy,
3476 #endif
3477 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3478 1 /*early_cbf */
3479 );
3480
3481 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3482 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3483 {
3484 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3485 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3486 pv_curr_src,
3487 src_strd,
3488 pu1_pred,
3489 pred_strd,
3490 ai8_cand_rdopt_cost[curr_intra_buf_idx],
3491 i4_alpha_stim_multiplier,
3492 trans_size,
3493 0,
3494 ps_ctxt->u1_enable_psyRDOPT,
3495 NULL_PLANE);
3496 #else
3497 if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3498 {
3499 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3500 pv_curr_src,
3501 src_strd,
3502 pu1_recon,
3503 i4_recon_stride,
3504 ai8_cand_rdopt_cost[curr_intra_buf_idx],
3505 i4_alpha_stim_multiplier,
3506 trans_size,
3507 0,
3508 ps_ctxt->u1_enable_psyRDOPT,
3509 NULL_PLANE);
3510 }
3511 else
3512 {
3513 ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3514 pv_curr_src,
3515 src_strd,
3516 pu1_pred,
3517 pred_strd,
3518 ai8_cand_rdopt_cost[curr_intra_buf_idx],
3519 i4_alpha_stim_multiplier,
3520 trans_size,
3521 0,
3522 ps_ctxt->u1_enable_psyRDOPT,
3523 NULL_PLANE);
3524 }
3525 #endif
3526 }
3527 #endif
3528
3529 if(TU_EQ_SUBCU == func_proc_mode)
3530 {
3531 ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3532 }
3533
3534 /* based on CBF/No CBF copy the corresponding state */
3535 if(0 == ai4_cbf[curr_intra_buf_idx])
3536 {
3537 /* RDOPT copy States :update to init state if 0 cbf */
3538 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3539 &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3540 IHEVC_CAB_COEFFX_PREFIX,
3541 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3542 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3543 }
3544 else
3545 {
3546 /* RDOPT copy States :update to new state only if CBF is non zero */
3547 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3548 &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3549 IHEVC_CAB_COEFFX_PREFIX,
3550 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3551 .s_cabac_ctxt.au1_ctxt_models[0] +
3552 IHEVC_CAB_COEFFX_PREFIX,
3553 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3554 }
3555
3556 /* call the function which perform intra mode prediction */
3557 ihevce_intra_pred_mode_signaling(
3558 ps_top_nbr_4x4->b6_luma_intra_mode,
3559 ps_tmp_lt_4x4->b6_luma_intra_mode,
3560 s_nbr.u1_top_avail,
3561 s_nbr.u1_left_avail,
3562 cu_pos_y,
3563 curr_pred_mode,
3564 &as_intra_prev_rem[curr_intra_buf_idx]);
3565 /******************************************************************/
3566 /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3567 The bits for these are evaluated for every RDO mode of current subcu
3568 as they can significantly contribute to RDO cost. Note that these
3569 bits are not accounted for here (ai8_cand_rdopt_cost) as they
3570 are accounted for in encode_cu call later */
3571
3572 /******************************************************************/
3573 /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3574 The bits for these are evaluated for every RDO mode of current subcu
3575 as they can significantly contribute to RDO cost. Note that these
3576 bits are not accounted for here (ai8_cand_rdopt_cost) as they
3577 are accounted for in encode_cu call later */
3578
3579 /* Estimate bits to encode prev rem flag for NXN mode */
3580 {
3581 WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3582 [u1_prev_flag_cabac_ctxt ^
3583 as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3584
3585 /* rounding the fractional bits to nearest integer */
3586 bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3587 }
3588
3589 /* based on prev flag all the mpmidx bits and rem bits */
3590 if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3591 {
3592 /* mpm_idx */
3593 bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3594 }
3595 else
3596 {
3597 /* rem intra mode */
3598 bits += 5;
3599 }
3600
3601 bits += ai4_tu_bits[curr_intra_buf_idx];
3602
3603 /* compute the total cost for current candidate */
3604 curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3605
3606 /* get the final ssd cost */
3607 curr_cost +=
3608 COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3609
3610 /* check of the best candidate cost */
3611 if(curr_cost < best_cand_cost)
3612 {
3613 best_cand_cost = curr_cost;
3614 best_cand_idx = cand_ctr;
3615 best_intra_buf_idx = curr_intra_buf_idx;
3616 curr_intra_buf_idx = !curr_intra_buf_idx;
3617 }
3618 }
3619
3620 /*************** For TU_EQ_SUBCU case *****************/
3621 /* Copy the pred for best cand. to the final pred array */
3622 /* Copy the iq-coeff for best cand. to the final array */
3623 /* copy the best coeffs data to final buffer */
3624 if(TU_EQ_SUBCU == func_proc_mode)
3625 {
3626 /* Copy the pred for best cand. to the final pred array */
3627
3628 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3629 (UWORD8 *)pv_pred_org,
3630 pred_strd_org,
3631 &au1_cur_pred_data[best_intra_buf_idx][0],
3632 trans_size,
3633 trans_size,
3634 trans_size);
3635
3636 /* Copy the deq-coeff for best cand. to the final array */
3637
3638 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3639 (UWORD8 *)pi2_deq_data,
3640 deq_data_strd << 1,
3641 (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3642 trans_size << 1,
3643 trans_size << 1,
3644 trans_size);
3645 /* copy the coeffs to final cu ecd bytes buffer */
3646 memcpy(
3647 pu1_ecd_data,
3648 &au1_intra_coeffs[best_intra_buf_idx][0],
3649 ai4_curr_bytes[best_intra_buf_idx]);
3650
3651 pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3652 (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3653 }
3654
3655 /*---------- Calculate Recon for the best INTRA mode ---------*/
3656 /* TU_EQ_CU case : No need for recon, otherwise recon is required */
3657 /* Compute recon only for the best mode for TU_EQ_SUBCU case */
3658 if(u1_compute_recon)
3659 {
3660 ihevce_it_recon_fxn(
3661 ps_ctxt,
3662 pi2_deq_data,
3663 deq_data_strd,
3664 (UWORD8 *)pv_pred_org,
3665 pred_strd_org,
3666 pu1_recon,
3667 i4_recon_stride,
3668 pu1_ecd_data,
3669 trans_size,
3670 PRED_MODE_INTRA,
3671 ai4_cbf[best_intra_buf_idx],
3672 ai4_zero_col[best_intra_buf_idx],
3673 ai4_zero_row[best_intra_buf_idx]);
3674
3675 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3676 }
3677 else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3678 {
3679 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3680 }
3681 else
3682 {
3683 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3684 }
3685
3686 /* RDOPT copy States :update to best modes state */
3687 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3688 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3689 &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3690 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3691
3692 /* copy the prev,mpm_idx and rem modes from best cand */
3693 ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3694
3695 /* update the cabac context of prev intra pred mode flag */
3696 u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3697 [(u1_prev_flag_cabac_ctxt << 1) |
3698 as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3699
3700 /* accumulate the TU bits into cu bits */
3701 cu_bits += ai4_tu_bits[best_intra_buf_idx];
3702
3703 /* copy the intra pred mode for chroma reuse */
3704 if(is_sub_pu_in_hq == 0)
3705 {
3706 *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3707 }
3708 else
3709 {
3710 *pu1_intra_pred_mode = best_cand_idx;
3711 }
3712
3713 /* Store luma mode as chroma mode. If chroma prcs happens, and
3714 if a diff. mode wins, it should update this!! */
3715 if(1 == chrm_present_flag)
3716 {
3717 if(is_sub_pu_in_hq == 0)
3718 {
3719 ps_final_prms->u1_chroma_intra_pred_actual_mode =
3720 ((ps_ctxt->u1_chroma_array_type == 2)
3721 ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3722 : pu1_curr_mode[best_cand_idx]);
3723 }
3724 else
3725 {
3726 ps_final_prms->u1_chroma_intra_pred_actual_mode =
3727 ((ps_ctxt->u1_chroma_array_type == 2)
3728 ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3729 : best_cand_idx);
3730 }
3731
3732 ps_final_prms->u1_chroma_intra_pred_mode = 4;
3733 }
3734
3735 /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3736 ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3737
3738 /*accumulate ssd over all TU of intra CU*/
3739 ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3740
3741 /* update the bytes */
3742 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3743 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3744 ai4_curr_bytes[best_intra_buf_idx];
3745 /* update the zero_row and col info for the final mode */
3746 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3747 ai4_zero_col[best_intra_buf_idx];
3748 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3749 ai4_zero_row[best_intra_buf_idx];
3750
3751 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3752
3753 /* update the total bytes cons */
3754 ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3755 pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3756
3757 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3758 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3759 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3760 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3761 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3762 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3763 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3764 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3765 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3766 GETRANGE(tx_size, trans_size);
3767 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3768 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3769 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3770
3771 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3772 ps_nbr_4x4->b1_skip_flag = 0;
3773 ps_nbr_4x4->b1_intra_flag = 1;
3774 ps_nbr_4x4->b1_pred_l0_flag = 0;
3775 ps_nbr_4x4->b1_pred_l1_flag = 0;
3776
3777 if(is_sub_pu_in_hq == 0)
3778 {
3779 ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3780 }
3781 else
3782 {
3783 ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3784 }
3785
3786 ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3787
3788 /* since tu size can be less than cusize, replication is done with strd */
3789 {
3790 WORD32 i, j;
3791 nbr_4x4_t *ps_tmp_4x4;
3792
3793 ps_tmp_4x4 = ps_nbr_4x4;
3794
3795 for(i = 0; i < num_4x4_in_tu; i++)
3796 {
3797 for(j = 0; j < num_4x4_in_tu; j++)
3798 {
3799 ps_tmp_4x4[j] = *ps_nbr_4x4;
3800 }
3801 /* row level update*/
3802 ps_tmp_4x4 += num_4x4_in_cu;
3803 }
3804 }
3805
3806 if(TU_EQ_SUBCU == func_proc_mode)
3807 {
3808 pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3809 }
3810
3811 if((num_cu_parts > 1) && (ctr < 3))
3812 {
3813 /* set the neighbour map to 1 */
3814 ihevce_set_nbr_map(
3815 ps_ctxt->pu1_ctb_nbr_map,
3816 ps_ctxt->i4_nbr_map_strd,
3817 cu_pos_x,
3818 cu_pos_y,
3819 trans_size >> 2,
3820 1);
3821
3822 /* block level updates block number (1 & 3 )*/
3823 pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3824 pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3825 pi2_deq_data += trans_size;
3826
3827 switch(ctr)
3828 {
3829 case 0:
3830 {
3831 pu1_left = pu1_recon + trans_size - 1;
3832 pu1_top += trans_size;
3833 pu1_top_left = pu1_top - 1;
3834 left_strd = i4_recon_stride;
3835
3836 break;
3837 }
3838 case 1:
3839 {
3840 ASSERT(
3841 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3842 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3843
3844 /* Since the 'lumaRefSubstitution' function expects both Top and */
3845 /* TopRight recon pixels to be present in the same buffer */
3846 if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3847 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3848 {
3849 UWORD8 *pu1_src =
3850 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3851 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3852 trans_size;
3853 UWORD8 *pu1_dst =
3854 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3855 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3856 trans_size;
3857
3858 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3859 pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3860
3861 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3862 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3863 }
3864
3865 pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3866 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3867 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3868 (trans_size - 1) * i4_recon_stride;
3869 pu1_top_left = pu1_left - cu_left_stride;
3870 left_strd = cu_left_stride;
3871
3872 break;
3873 }
3874 case 2:
3875 {
3876 ASSERT(
3877 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3878 (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3879
3880 pu1_left = pu1_recon + trans_size - 1;
3881 pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3882 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3883 (trans_size - 1) * i4_recon_stride + trans_size;
3884 pu1_top_left = pu1_top - 1;
3885 left_strd = i4_recon_stride;
3886
3887 break;
3888 }
3889 }
3890
3891 pu1_csbf_buf += num_4x4_in_tu;
3892 cu_pos_x += num_4x4_in_tu;
3893 ps_nbr_4x4 += num_4x4_in_tu;
3894 ps_top_nbr_4x4 += num_4x4_in_tu;
3895 ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3896
3897 pu1_intra_pred_mode++;
3898
3899 /* after 2 blocks increment the pointers to bottom blocks */
3900 if(1 == ctr)
3901 {
3902 pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3903 pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3904
3905 pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3906 pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3907 pi2_deq_data -= (trans_size << 1);
3908 pi2_deq_data += (trans_size * deq_data_strd);
3909
3910 pu1_csbf_buf -= (num_4x4_in_tu << 1);
3911 pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3912
3913 ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3914 ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3915 ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3916 ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3917
3918 /* decrement pos x to start */
3919 cu_pos_x -= (num_4x4_in_tu << 1);
3920 cu_pos_y += num_4x4_in_tu;
3921 }
3922 }
3923
3924 #if RDOPT_ENABLE
3925 /* compute the RDOPT cost for the current TU */
3926 ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3927 ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3928 #endif
3929
3930 /* accumulate the costs */
3931 total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3932
3933 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3934 {
3935 /* Early exit : If the current running cost exceeds
3936 the prev. best mode cost, break */
3937 if(total_rdopt_cost > prev_best_rdopt_cost)
3938 {
3939 return (total_rdopt_cost);
3940 }
3941 }
3942
3943 /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3944 chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3945
3946 pu4_nbr_flags++;
3947 }
3948 /* Modify the cost function for this CU. */
3949 /* loop in for 8x8 blocks */
3950 if(ps_ctxt->u1_enable_psyRDOPT)
3951 {
3952 UWORD8 *pu1_recon_cu;
3953 WORD32 recon_stride;
3954 WORD32 curr_pos_x;
3955 WORD32 curr_pos_y;
3956 WORD32 start_index;
3957 WORD32 num_horz_cu_in_ctb;
3958 WORD32 cu_size;
3959 WORD32 had_block_size;
3960
3961 /* tODO: sreenivasa ctb size has to be used appropriately */
3962 had_block_size = 8;
3963 cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3964 num_horz_cu_in_ctb = 64 / had_block_size;
3965
3966 curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3967 curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3968 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3969 pu1_recon_cu =
3970 ((UWORD8 *)ps_final_prms->s_recon_datastore
3971 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3972 /* + \ curr_pos_x + curr_pos_y * recon_stride; */
3973
3974 /* start index to index the source satd of curr cu int he current ctb*/
3975 start_index =
3976 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3977
3978 {
3979 total_rdopt_cost += ihevce_psy_rd_cost(
3980 ps_ctxt->ai4_source_satd_8x8,
3981 pu1_recon_cu,
3982 recon_stride,
3983 1, //
3984 cu_size,
3985 0, // pic type
3986 0, //layer id
3987 ps_ctxt->i4_satd_lamda, // lambda
3988 start_index,
3989 ps_ctxt->u1_is_input_data_hbd,
3990 ps_ctxt->u4_psy_strength,
3991 &ps_ctxt->s_cmn_opt_func
3992
3993 ); // 8 bit
3994 }
3995 }
3996
3997 #if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3998 if(TU_EQ_SUBCU == func_proc_mode)
3999 {
4000 UWORD8 au1_tu_eq_cu_div2_modes[4];
4001 UWORD8 au1_freq_of_mode[4];
4002
4003 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
4004 ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
4005
4006 if(1 == i4_num_clusters)
4007 {
4008 ps_final_prms->u2_num_pus_in_cu = 1;
4009 ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4010 }
4011 }
4012 #endif
4013
4014 /* store the num TUs*/
4015 ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4016
4017 /* update the bytes consumed */
4018 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4019
4020 /* store the current cu size to final prms */
4021 ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4022
4023 /* cu bits will be having luma residual bits till this point */
4024 /* if zero_cbf eval is disabled then cu bits will be zero */
4025 ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4026
4027 /* ------------- Chroma processing -------------- */
4028 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4029 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4030 {
4031 LWORD64 chrm_rdopt_cost;
4032 WORD32 chrm_rdopt_tu_bits;
4033
4034 /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4035 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4036
4037 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4038 ps_ctxt,
4039 curr_buf_idx,
4040 func_proc_mode,
4041 ps_chrm_cu_buf_prms->pu1_curr_src,
4042 ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4043 ps_chrm_cu_buf_prms->pu1_cu_left,
4044 ps_chrm_cu_buf_prms->pu1_cu_top,
4045 ps_chrm_cu_buf_prms->pu1_cu_top_left,
4046 ps_chrm_cu_buf_prms->i4_cu_left_stride,
4047 cu_pos_x_8pelunits,
4048 cu_pos_y_8pelunits,
4049 &chrm_rdopt_tu_bits,
4050 i4_alpha_stim_multiplier,
4051 u1_is_cu_noisy);
4052
4053 #if WEIGH_CHROMA_COST
4054 chrm_rdopt_cost = (LWORD64)(
4055 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4056 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4057 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4058 #endif
4059
4060 #if CHROMA_RDOPT_ENABLE
4061 total_rdopt_cost += chrm_rdopt_cost;
4062 #endif
4063 cu_bits += chrm_rdopt_tu_bits;
4064
4065 /* cu bits for chroma residual if chroma rdopt is on */
4066 /* if zero_cbf eval is disabled then cu bits will be zero */
4067 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4068
4069 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4070 {
4071 /* Early exit : If the current running cost exceeds
4072 the prev. best mode cost, break */
4073 if(total_rdopt_cost > prev_best_rdopt_cost)
4074 {
4075 return (total_rdopt_cost);
4076 }
4077 }
4078 }
4079 else
4080 {}
4081
4082 /* RDOPT copy States : Best after all luma TUs to current */
4083 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4084 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4085 .s_cabac_ctxt.au1_ctxt_models[0] +
4086 IHEVC_CAB_COEFFX_PREFIX,
4087 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4088 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4089
4090 /* get the neighbour availability flags for current cu */
4091 ihevce_get_only_nbr_flag(
4092 &s_nbr,
4093 ps_ctxt->pu1_ctb_nbr_map,
4094 ps_ctxt->i4_nbr_map_strd,
4095 (cu_pos_x_8pelunits << 1),
4096 (cu_pos_y_8pelunits << 1),
4097 (trans_size << 1),
4098 (trans_size << 1));
4099
4100 /* call the entropy rdo encode to get the bit estimate for current cu */
4101 /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4102 {
4103 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4104 WORD32 cbf_bits, header_bits;
4105
4106 header_bits = ihevce_entropy_rdo_encode_cu(
4107 &ps_ctxt->s_rdopt_entropy_ctxt,
4108 ps_final_prms,
4109 cu_pos_x_8pelunits,
4110 cu_pos_y_8pelunits,
4111 ps_cu_analyse->u1_cu_size,
4112 s_nbr.u1_top_avail,
4113 s_nbr.u1_left_avail,
4114 &ps_final_prms->pu1_cu_coeffs[0],
4115 &cbf_bits);
4116
4117 cu_bits += header_bits;
4118
4119 /* cbf bits are excluded from header bits, instead considered as texture bits */
4120 /* incase if zero cbf eval is disabled then texture bits gets added here */
4121 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4122 ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4123
4124 #if RDOPT_ENABLE
4125 /* add the cost of coding the cu bits */
4126 total_rdopt_cost +=
4127 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4128 #endif
4129 }
4130 return (total_rdopt_cost);
4131 }
4132 /*!
4133 ******************************************************************************
4134 * \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4135 *
4136 * \brief
4137 * Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4138 *
4139 * \param[in] ps_ctxt enc_loop module ctxt pointer
4140 * \param[in] ps_inter_cand pointer to inter candidate structure
4141 * \param[in] pu1_src pointer to source data buffer
4142 * \param[in] cu_size Current CU size
4143 * \param[in] cu_pos_x cu position x w.r.t to ctb
4144 * \param[in] cu_pos_y cu position y w.r.t to ctb
4145 * \param[in] src_strd source buffer stride
4146 * \param[in] curr_buf_idx buffer index for current output storage
4147 * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4148 *
4149 * \return
4150 * Rdopt cost
4151 *
4152 * \author
4153 * Ittiam
4154 *
4155 *****************************************************************************
4156 */
ihevce_inter_rdopt_cu_ntu(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,void * pv_src,WORD32 cu_size,WORD32 cu_pos_x,WORD32 cu_pos_y,WORD32 curr_buf_idx,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,cu_inter_cand_t * ps_inter_cand,cu_analyse_t * ps_cu_analyse,WORD32 i4_alpha_stim_multiplier)4157 LWORD64 ihevce_inter_rdopt_cu_ntu(
4158 ihevce_enc_loop_ctxt_t *ps_ctxt,
4159 enc_loop_cu_prms_t *ps_cu_prms,
4160 void *pv_src,
4161 WORD32 cu_size,
4162 WORD32 cu_pos_x,
4163 WORD32 cu_pos_y,
4164 WORD32 curr_buf_idx,
4165 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4166 cu_inter_cand_t *ps_inter_cand,
4167 cu_analyse_t *ps_cu_analyse,
4168 WORD32 i4_alpha_stim_multiplier)
4169 {
4170 enc_loop_cu_final_prms_t *ps_final_prms;
4171 nbr_4x4_t *ps_nbr_4x4;
4172 tu_prms_t s_tu_prms[64 * 4];
4173 tu_prms_t *ps_tu_prms;
4174
4175 WORD32 i4_perform_rdoq;
4176 WORD32 i4_perform_sbh;
4177 WORD32 ai4_tu_split_flags[4];
4178 WORD32 ai4_tu_early_cbf[4];
4179 WORD32 num_split_flags = 1;
4180 WORD32 i;
4181 UWORD8 u1_tu_size;
4182 UWORD8 *pu1_pred;
4183 UWORD8 *pu1_ecd_data;
4184 WORD16 *pi2_deq_data;
4185 UWORD8 *pu1_csbf_buf;
4186 UWORD8 *pu1_tu_sz_sft;
4187 UWORD8 *pu1_tu_posx;
4188 UWORD8 *pu1_tu_posy;
4189 LWORD64 total_rdopt_cost;
4190 WORD32 ctr;
4191 WORD32 chrm_ctr;
4192 WORD32 num_tu_in_cu = 0;
4193 WORD32 pred_stride;
4194 WORD32 recon_stride;
4195 WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4196 WORD32 csbf_strd;
4197 WORD32 chrm_present_flag;
4198 WORD32 ecd_data_bytes_cons;
4199 WORD32 num_4x4_in_cu;
4200 WORD32 num_4x4_in_tu;
4201 WORD32 recon_func_mode;
4202 WORD32 cu_bits;
4203 UWORD8 u1_compute_spatial_ssd;
4204
4205 /* min_trans_size is initialized to some huge number than usual TU sizes */
4206 WORD32 i4_min_trans_size = 256;
4207 /* Get the RDOPT cost of the best CU mode for early_exit */
4208 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4209 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4210
4211 /* model for no residue syntax qt root cbf flag */
4212 UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4213
4214 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4215 UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4216
4217 /* for skip cases tables are not reqquired */
4218 UWORD8 u1_skip_tu_sz_sft = 0;
4219 UWORD8 u1_skip_tu_posx = 0;
4220 UWORD8 u1_skip_tu_posy = 0;
4221 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4222
4223 /* get the pointers based on curbuf idx */
4224 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4225 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4226 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4227 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4228 csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4229 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4230
4231 pred_stride = ps_inter_cand->i4_pred_data_stride;
4232 recon_stride = cu_size;
4233 pu1_pred = ps_inter_cand->pu1_pred_data;
4234 chrm_ctr = 0;
4235 ecd_data_bytes_cons = 0;
4236 total_rdopt_cost = 0;
4237 num_4x4_in_cu = cu_size >> 2;
4238 recon_func_mode = PRED_MODE_INTER;
4239 cu_bits = 0;
4240
4241 /* get the 4x4 level postion of current cu */
4242 cu_pos_x = cu_pos_x << 1;
4243 cu_pos_y = cu_pos_y << 1;
4244
4245 /* default value for cu coded flag */
4246 ps_final_prms->u1_is_cu_coded = 0;
4247
4248 /*init of ssd of CU accuumulated over all TU*/
4249 ps_final_prms->u4_cu_sad = 0;
4250
4251 /* populate the coeffs scan idx */
4252 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4253
4254 #if ENABLE_INTER_ZCU_COST
4255 /* reset cu not coded cost */
4256 ps_ctxt->i8_cu_not_coded_cost = 0;
4257
4258 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4259 memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4260 #endif
4261
4262 if(ps_cu_analyse->u1_cu_size == 64)
4263 {
4264 num_split_flags = 4;
4265 u1_tu_size = 32;
4266 }
4267 else
4268 {
4269 num_split_flags = 1;
4270 u1_tu_size = ps_cu_analyse->u1_cu_size;
4271 }
4272
4273 /* ckeck for skip mode */
4274 if(1 == ps_final_prms->u1_skip_flag)
4275 {
4276 if(64 == cu_size)
4277 {
4278 /* TU = CU/2 is set but no trnaform is evaluated */
4279 num_tu_in_cu = 4;
4280 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4281 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4282 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4283 }
4284 else
4285 {
4286 /* TU = CU is set but no trnaform is evaluated */
4287 num_tu_in_cu = 1;
4288 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4289 pu1_tu_posx = &u1_skip_tu_posx;
4290 pu1_tu_posy = &u1_skip_tu_posy;
4291 }
4292
4293 recon_func_mode = PRED_MODE_SKIP;
4294 }
4295 /* check for PU part mode being AMP or No AMP */
4296 else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4297 {
4298 if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4299 {
4300 /* TU= CU is evaluated 2Nx2N inter case */
4301 num_tu_in_cu = 1;
4302 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4303 pu1_tu_posx = &u1_skip_tu_posx;
4304 pu1_tu_posy = &u1_skip_tu_posy;
4305 }
4306 else
4307 {
4308 /* currently TU= CU/2 is evaluated for all inter case */
4309 num_tu_in_cu = 4;
4310 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4311 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4312 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4313 }
4314 }
4315 else
4316 {
4317 /* for AMP cases one level of TU recurssion is done */
4318 /* based on oreintation of the partitions */
4319 num_tu_in_cu = 10;
4320 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4321 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4322 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4323 }
4324
4325 ps_tu_prms = &s_tu_prms[0];
4326 num_tu_in_cu = 0;
4327
4328 for(i = 0; i < num_split_flags; i++)
4329 {
4330 WORD32 i4_x_off = 0, i4_y_off = 0;
4331
4332 if(i == 1 || i == 3)
4333 {
4334 i4_x_off = 32;
4335 }
4336
4337 if(i == 2 || i == 3)
4338 {
4339 i4_y_off = 32;
4340 }
4341
4342 if(1 == ps_final_prms->u1_skip_flag)
4343 {
4344 ai4_tu_split_flags[0] = 0;
4345 ps_inter_cand->ai4_tu_split_flag[i] = 0;
4346
4347 ai4_tu_early_cbf[0] = 0;
4348 }
4349 else
4350 {
4351 ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4352 ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4353 }
4354
4355 ps_tu_prms->u1_tu_size = u1_tu_size;
4356
4357 ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4358 ps_tu_prms,
4359 &num_tu_in_cu,
4360 0,
4361 ai4_tu_split_flags[0],
4362 ai4_tu_early_cbf[0],
4363 i4_x_off,
4364 i4_y_off);
4365 }
4366
4367 /* loop for all tu blocks in current cu */
4368 ps_tu_prms = &s_tu_prms[0];
4369 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4370 {
4371 trans_size = ps_tu_prms->u1_tu_size;
4372
4373 if(i4_min_trans_size > trans_size)
4374 {
4375 i4_min_trans_size = trans_size;
4376 }
4377 ps_tu_prms++;
4378 }
4379
4380 if(ps_ctxt->i1_cu_qp_delta_enable)
4381 {
4382 ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
4383 }
4384
4385 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4386 {
4387 ps_ctxt->i8_cl_ssd_lambda_qf =
4388 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4389 100.0f);
4390 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4391 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4392 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4393 }
4394
4395 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4396 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4397 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4398
4399 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4400 {
4401 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4402 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4403 }
4404
4405 if(!u1_compute_spatial_ssd)
4406 {
4407 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4408 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4409 }
4410 else
4411 {
4412 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4413 }
4414
4415 ps_tu_prms = &s_tu_prms[0];
4416
4417 ASSERT(num_tu_in_cu <= 256);
4418
4419 /* RDOPT copy States : TU init (best until prev TU) to current */
4420 memcpy(
4421 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4422 .s_cabac_ctxt.au1_ctxt_models[0],
4423 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4424 IHEVC_CAB_COEFFX_PREFIX);
4425
4426 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4427 {
4428 WORD32 curr_bytes;
4429 WORD32 tx_size;
4430 WORD32 cbf, zero_col, zero_row;
4431 LWORD64 rdopt_cost;
4432 UWORD8 u1_is_recon_available;
4433
4434 WORD32 curr_pos_x;
4435 WORD32 curr_pos_y;
4436 nbr_4x4_t *ps_cur_nbr_4x4;
4437 UWORD8 *pu1_cur_pred;
4438 UWORD8 *pu1_cur_src;
4439 UWORD8 *pu1_cur_recon;
4440 WORD16 *pi2_cur_deq_data;
4441 UWORD32 u4_tu_sad;
4442 WORD32 tu_bits;
4443
4444 WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4445
4446 trans_size = ps_tu_prms->u1_tu_size;
4447 /* get the current pos x and pos y in pixels */
4448 curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]);
4449 curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]);
4450
4451 num_4x4_in_tu = trans_size >> 2;
4452
4453 #if FORCE_8x8_TFR
4454 if(cu_size == 64)
4455 {
4456 curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4457 curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4458 }
4459 #endif
4460
4461 /* increment the pointers to start of current TU */
4462 pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4463 pu1_cur_src += (curr_pos_y * src_strd);
4464 pu1_cur_pred = (pu1_pred + curr_pos_x);
4465 pu1_cur_pred += (curr_pos_y * pred_stride);
4466 pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4467 pi2_cur_deq_data += (curr_pos_y * cu_size);
4468 pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4469 curr_pos_x + curr_pos_y * i4_recon_stride;
4470
4471 ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4472 ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4473
4474 /* RDOPT copy States : TU init (best until prev TU) to current */
4475 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4476 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4477 .s_cabac_ctxt.au1_ctxt_models[0] +
4478 IHEVC_CAB_COEFFX_PREFIX,
4479 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4480 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4481
4482 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4483 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4484
4485 /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
4486 /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4487 /* Currently the complete array will contain only single value*/
4488 /*The rounding factor is calculated with the formula
4489 Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4490 rounding factor = (1 - DeadZone Val)
4491
4492 Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4493 */
4494 if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4495 {
4496 double i4_lamda_modifier;
4497
4498 if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4499 {
4500 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4501 CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4502 }
4503 else
4504 {
4505 i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4506 }
4507 if(ps_ctxt->i4_use_const_lamda_modifier)
4508 {
4509 if(ISLICE == ps_ctxt->i1_slice_type)
4510 {
4511 i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4512 }
4513 else
4514 {
4515 i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4516 }
4517 }
4518 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4519 &ps_ctxt->i4_quant_round_tu[0][0];
4520 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4521 &ps_ctxt->i4_quant_round_tu[1][0];
4522
4523 memset(
4524 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4525 0,
4526 trans_size * trans_size * sizeof(WORD32));
4527 memset(
4528 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4529 0,
4530 trans_size * trans_size * sizeof(WORD32));
4531
4532 ihevce_quant_rounding_factor_gen(
4533 trans_size,
4534 1,
4535 &ps_ctxt->s_rdopt_entropy_ctxt,
4536 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4537 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4538 i4_lamda_modifier,
4539 1);
4540 }
4541 else
4542 {
4543 ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4544 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4545 ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4546 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4547 }
4548
4549 /* call T Q IT IQ and recon function */
4550 cbf = ihevce_t_q_iq_ssd_scan_fxn(
4551 ps_ctxt,
4552 pu1_cur_pred,
4553 pred_stride,
4554 pu1_cur_src,
4555 src_strd,
4556 pi2_cur_deq_data,
4557 cu_size,
4558 pu1_cur_recon,
4559 i4_recon_stride,
4560 pu1_ecd_data,
4561 pu1_csbf_buf,
4562 csbf_strd,
4563 trans_size,
4564 recon_func_mode,
4565 &rdopt_cost,
4566 &curr_bytes,
4567 &tu_bits,
4568 &u4_tu_sad,
4569 &zero_col,
4570 &zero_row,
4571 &u1_is_recon_available,
4572 i4_perform_rdoq,
4573 i4_perform_sbh,
4574 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4575 i4_alpha_stim_multiplier,
4576 u1_is_cu_noisy,
4577 #endif
4578 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4579 ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4580
4581 #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4582 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4583 {
4584 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4585 rdopt_cost = ihevce_inject_stim_into_distortion(
4586 pu1_cur_src,
4587 src_strd,
4588 pu1_cur_pred,
4589 pred_stride,
4590 rdopt_cost,
4591 i4_alpha_stim_multiplier,
4592 trans_size,
4593 0,
4594 ps_ctxt->u1_enable_psyRDOPT,
4595 NULL_PLANE);
4596 #else
4597 if(u1_compute_spatial_ssd && u1_is_recon_available)
4598 {
4599 rdopt_cost = ihevce_inject_stim_into_distortion(
4600 pu1_cur_src,
4601 src_strd,
4602 pu1_cur_recon,
4603 i4_recon_stride,
4604 rdopt_cost,
4605 i4_alpha_stim_multiplier,
4606 trans_size,
4607 0,
4608 NULL_PLANE);
4609 }
4610 else
4611 {
4612 rdopt_cost = ihevce_inject_stim_into_distortion(
4613 pu1_cur_src,
4614 src_strd,
4615 pu1_cur_pred,
4616 pred_stride,
4617 rdopt_cost,
4618 i4_alpha_stim_multiplier,
4619 trans_size,
4620 0,
4621 ps_ctxt->u1_enable_psyRDOPT,
4622 NULL_PLANE);
4623 }
4624 #endif
4625 }
4626 #endif
4627
4628 if(u1_compute_spatial_ssd && u1_is_recon_available)
4629 {
4630 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4631 }
4632 else
4633 {
4634 ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4635 }
4636
4637 /* accumulate the TU sad into cu sad */
4638 ps_final_prms->u4_cu_sad += u4_tu_sad;
4639
4640 /* accumulate the TU bits into cu bits */
4641 cu_bits += tu_bits;
4642
4643 /* inter cu is coded if any of the tu is coded in it */
4644 ps_final_prms->u1_is_cu_coded |= cbf;
4645
4646 /* call the entropy function to get the bits */
4647 /* add that to rd opt cost(SSD) */
4648
4649 /* update the bytes */
4650 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4651 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4652 /* update the zero_row and col info for the final mode */
4653 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4654 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4655
4656 /* update the bytes */
4657 ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4658
4659 /* update the total bytes cons */
4660 ecd_data_bytes_cons += curr_bytes;
4661 pu1_ecd_data += curr_bytes;
4662
4663 /* RDOPT copy States : New updated after curr TU to TU init */
4664 if(0 != cbf)
4665 {
4666 /* update to new state only if CBF is non zero */
4667 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4668 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4669 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4670 .s_cabac_ctxt.au1_ctxt_models[0] +
4671 IHEVC_CAB_COEFFX_PREFIX,
4672 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4673 }
4674
4675 /* by default chroma present is set to 1*/
4676 chrm_present_flag = 1;
4677 if(4 == trans_size)
4678 {
4679 /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4680 if(0 != chrm_ctr)
4681 {
4682 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4683 }
4684
4685 /* increment the chrm ctr unconditionally */
4686 chrm_ctr++;
4687
4688 /* after ctr reached 4 reset it */
4689 if(4 == chrm_ctr)
4690 {
4691 chrm_ctr = 0;
4692 }
4693 }
4694
4695 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4696 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4697 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4698 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4699 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4700 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4701 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4702 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4703 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4704 GETRANGE(tx_size, trans_size);
4705 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4706 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4707 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4708
4709 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4710 ps_cur_nbr_4x4->b1_y_cbf = cbf;
4711 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4712 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4713
4714 /* Qp and cbf are stored for the all 4x4 in TU */
4715 {
4716 WORD32 i, j;
4717 nbr_4x4_t *ps_tmp_4x4;
4718 ps_tmp_4x4 = ps_cur_nbr_4x4;
4719
4720 for(i = 0; i < num_4x4_in_tu; i++)
4721 {
4722 for(j = 0; j < num_4x4_in_tu; j++)
4723 {
4724 ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4725 ps_tmp_4x4[j].b1_y_cbf = cbf;
4726 }
4727 /* row level update*/
4728 ps_tmp_4x4 += num_4x4_in_cu;
4729 }
4730 }
4731
4732 #if RDOPT_ENABLE
4733 /* compute the rdopt cost */
4734 rdopt_cost +=
4735 COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4736 #endif
4737 /* accumulate the costs */
4738 total_rdopt_cost += rdopt_cost;
4739
4740 ps_tu_prms++;
4741
4742 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4743 {
4744 /* Early exit : If the current running cost exceeds
4745 the prev. best mode cost, break */
4746 if(total_rdopt_cost > prev_best_rdopt_cost)
4747 {
4748 return (total_rdopt_cost);
4749 }
4750 }
4751 }
4752
4753 /* Modify the cost function for this CU. */
4754 /* loop in for 8x8 blocks */
4755 if(ps_ctxt->u1_enable_psyRDOPT)
4756 {
4757 UWORD8 *pu1_recon_cu;
4758 WORD32 recon_stride;
4759 WORD32 curr_pos_x;
4760 WORD32 curr_pos_y;
4761 WORD32 start_index;
4762 WORD32 num_horz_cu_in_ctb;
4763 WORD32 had_block_size;
4764
4765 /* tODO: sreenivasa ctb size has to be used appropriately */
4766 had_block_size = 8;
4767 num_horz_cu_in_ctb = 64 / had_block_size;
4768
4769 curr_pos_x = cu_pos_x << 2; /* pel units */
4770 curr_pos_y = cu_pos_y << 2; /* pel units */
4771 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4772 pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4773 .apv_luma_recon_bufs[0]); // already pointing to the current CU recon
4774 //+ \curr_pos_x + curr_pos_y * recon_stride;
4775
4776 /* start index to index the source satd of curr cu int he current ctb*/
4777 start_index =
4778 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4779
4780 {
4781 total_rdopt_cost += ihevce_psy_rd_cost(
4782 ps_ctxt->ai4_source_satd_8x8,
4783 pu1_recon_cu,
4784 recon_stride,
4785 1, //howz stride
4786 cu_size,
4787 0, // pic type
4788 0, //layer id
4789 ps_ctxt->i4_satd_lamda, // lambda
4790 start_index,
4791 ps_ctxt->u1_is_input_data_hbd,
4792 ps_ctxt->u4_psy_strength,
4793 &ps_ctxt->s_cmn_opt_func); // 8 bit
4794 }
4795 }
4796
4797 /* store the num TUs*/
4798 ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4799
4800 /* update the bytes consumed */
4801 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4802
4803 /* store the current cu size to final prms */
4804 ps_final_prms->u1_cu_size = cu_size;
4805
4806 /* cu bits will be having luma residual bits till this point */
4807 /* if zero_cbf eval is disabled then cu bits will be zero */
4808 ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4809
4810 /* ------------- Chroma processing -------------- */
4811 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4812 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4813 {
4814 LWORD64 chrm_rdopt_cost;
4815 WORD32 chrm_rdopt_tu_bits;
4816
4817 /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4818 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4819
4820 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4821 ps_ctxt,
4822 curr_buf_idx,
4823 0, /* TU mode : Don't care in Inter patrh */
4824 ps_chrm_cu_buf_prms->pu1_curr_src,
4825 ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4826 ps_chrm_cu_buf_prms->pu1_cu_left,
4827 ps_chrm_cu_buf_prms->pu1_cu_top,
4828 ps_chrm_cu_buf_prms->pu1_cu_top_left,
4829 ps_chrm_cu_buf_prms->i4_cu_left_stride,
4830 (cu_pos_x >> 1),
4831 (cu_pos_y >> 1),
4832 &chrm_rdopt_tu_bits,
4833 i4_alpha_stim_multiplier,
4834 u1_is_cu_noisy);
4835
4836 #if WEIGH_CHROMA_COST
4837 chrm_rdopt_cost = (LWORD64)(
4838 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4839 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4840 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4841 #endif
4842
4843 #if CHROMA_RDOPT_ENABLE
4844 total_rdopt_cost += chrm_rdopt_cost;
4845 #endif
4846 cu_bits += chrm_rdopt_tu_bits;
4847
4848 /* during chroma evaluation if skip decision was over written */
4849 /* then the current skip candidate is set to a non skip candidate */
4850 ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4851
4852 /* cu bits for chroma residual if chroma rdopt is on */
4853 /* if zero_cbf eval is disabled then cu bits will be zero */
4854 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4855
4856 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4857 {
4858 /* Early exit : If the current running cost exceeds
4859 the prev. best mode cost, break */
4860 if(total_rdopt_cost > prev_best_rdopt_cost)
4861 {
4862 return (total_rdopt_cost);
4863 }
4864 }
4865 }
4866 else
4867 {}
4868
4869 #if SHRINK_INTER_TUTREE
4870 /* ------------- Quadtree TU split optimization ------------ */
4871 if(ps_final_prms->u1_is_cu_coded)
4872 {
4873 ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4874 &ps_final_prms->as_tu_enc_loop[0],
4875 &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4876 &ps_final_prms->s_recon_datastore,
4877 num_tu_in_cu,
4878 (ps_ctxt->u1_chroma_array_type == 2));
4879 }
4880 #endif
4881
4882 /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
4883 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4884 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4885 .s_cabac_ctxt.au1_ctxt_models[0] +
4886 IHEVC_CAB_COEFFX_PREFIX,
4887 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4888 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4889
4890 /* -------- Bit estimate for RD opt -------------- */
4891 {
4892 nbr_avail_flags_t s_nbr;
4893 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4894 WORD32 cbf_bits, header_bits;
4895
4896 /* get the neighbour availability flags for current cu */
4897 ihevce_get_only_nbr_flag(
4898 &s_nbr,
4899 ps_ctxt->pu1_ctb_nbr_map,
4900 ps_ctxt->i4_nbr_map_strd,
4901 cu_pos_x,
4902 cu_pos_y,
4903 (cu_size >> 2),
4904 (cu_size >> 2));
4905
4906 /* call the entropy rdo encode to get the bit estimate for current cu */
4907 header_bits = ihevce_entropy_rdo_encode_cu(
4908 &ps_ctxt->s_rdopt_entropy_ctxt,
4909 ps_final_prms,
4910 (cu_pos_x >> 1), /* back to 8x8 pel units */
4911 (cu_pos_y >> 1), /* back to 8x8 pel units */
4912 cu_size,
4913 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4914 : s_nbr.u1_top_avail,
4915 s_nbr.u1_left_avail,
4916 &ps_final_prms->pu1_cu_coeffs[0],
4917 &cbf_bits);
4918
4919 cu_bits += header_bits;
4920
4921 /* cbf bits are excluded from header bits, instead considered as texture bits */
4922 /* incase if zero cbf eval is disabled then texture bits gets added here */
4923 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4924 ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4925
4926 #if RDOPT_ENABLE
4927 /* add the cost of coding the header bits */
4928 total_rdopt_cost +=
4929 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4930
4931 #if ENABLE_INTER_ZCU_COST
4932 /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4933 if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4934 {
4935 LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4936
4937 WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4938 (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4939
4940 cab_ctxt_t *ps_cab_ctxt =
4941 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4942
4943 /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
4944 UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4945
4946 /* account for coding qt_root_cbf = 0 */
4947 /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4948 u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4949 if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4950 u4_cu_hdr_bits_q12 = 0;
4951 else
4952 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4953
4954 /* add the cost of coding the header bits */
4955 i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4956 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4957 ps_ctxt->i8_cl_ssd_lambda_qf,
4958 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
4959
4960 if(ps_ctxt->u1_enable_psyRDOPT)
4961 {
4962 i8_cu_not_coded_cost = total_rdopt_cost + 1;
4963 }
4964
4965 /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
4966 if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
4967 {
4968 WORD32 tx_size;
4969
4970 /* force cu as not coded and update the cost */
4971 ps_final_prms->u1_is_cu_coded = 0;
4972 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4973 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4974
4975 total_rdopt_cost = i8_cu_not_coded_cost;
4976
4977 /* reset num TUs to 1 unless cu size id 64 */
4978 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
4979 trans_size = (64 == cu_size) ? 32 : cu_size;
4980 GETRANGE(tx_size, trans_size);
4981
4982 /* reset the bytes consumed */
4983 ps_final_prms->i4_num_bytes_ecd_data = 0;
4984
4985 /* reset texture related bits and roll back header bits*/
4986 ps_final_prms->u4_cu_cbf_bits = 0;
4987 ps_final_prms->u4_cu_luma_res_bits = 0;
4988 ps_final_prms->u4_cu_chroma_res_bits = 0;
4989 ps_final_prms->u4_cu_hdr_bits =
4990 (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
4991
4992 /* update cabac model with qtroot cbf = 0 decision */
4993 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
4994 gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
4995
4996 /* restore untouched cabac models for, tusplit, cbfs, texture etc */
4997 memcpy(
4998 &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
4999 &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5000 (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5001
5002 /* mark all tus as not coded for final eval */
5003 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5004 {
5005 WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5006 WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5007
5008 nbr_4x4_t *ps_cur_nbr_4x4 =
5009 ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5010
5011 num_4x4_in_tu = trans_size >> 2;
5012
5013 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5014 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5015 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5016
5017 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5018 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5019 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5020
5021 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5022 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5023
5024 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5025 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5026 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5027
5028 /* reset cbf for the all 4x4 in TU */
5029 {
5030 WORD32 i, j;
5031 nbr_4x4_t *ps_tmp_4x4;
5032 ps_tmp_4x4 = ps_cur_nbr_4x4;
5033
5034 for(i = 0; i < num_4x4_in_tu; i++)
5035 {
5036 for(j = 0; j < num_4x4_in_tu; j++)
5037 {
5038 ps_tmp_4x4[j].b1_y_cbf = 0;
5039 }
5040 /* row level update*/
5041 ps_tmp_4x4 += num_4x4_in_cu;
5042 }
5043 }
5044 }
5045 }
5046 }
5047 #endif /* ENABLE_INTER_ZCU_COST */
5048
5049 #endif /* RDOPT_ENABLE */
5050 }
5051
5052 return (total_rdopt_cost);
5053 }
5054
5055 #if ENABLE_RDO_BASED_TU_RECURSION
ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,void * pv_src,WORD32 cu_size,WORD32 cu_pos_x,WORD32 cu_pos_y,WORD32 curr_buf_idx,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,cu_inter_cand_t * ps_inter_cand,cu_analyse_t * ps_cu_analyse,WORD32 i4_alpha_stim_multiplier)5056 LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5057 ihevce_enc_loop_ctxt_t *ps_ctxt,
5058 enc_loop_cu_prms_t *ps_cu_prms,
5059 void *pv_src,
5060 WORD32 cu_size,
5061 WORD32 cu_pos_x,
5062 WORD32 cu_pos_y,
5063 WORD32 curr_buf_idx,
5064 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5065 cu_inter_cand_t *ps_inter_cand,
5066 cu_analyse_t *ps_cu_analyse,
5067 WORD32 i4_alpha_stim_multiplier)
5068 {
5069 tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5070 buffer_data_for_tu_t s_buffer_data_for_tu;
5071 enc_loop_cu_final_prms_t *ps_final_prms;
5072 nbr_4x4_t *ps_nbr_4x4;
5073
5074 WORD32 num_split_flags = 1;
5075 UWORD8 u1_tu_size;
5076 UWORD8 *pu1_pred;
5077 UWORD8 *pu1_ecd_data;
5078 WORD16 *pi2_deq_data;
5079 UWORD8 *pu1_csbf_buf;
5080 UWORD8 *pu1_tu_sz_sft;
5081 UWORD8 *pu1_tu_posx;
5082 UWORD8 *pu1_tu_posy;
5083 LWORD64 total_rdopt_cost;
5084 WORD32 ctr;
5085 WORD32 chrm_ctr;
5086 WORD32 pred_stride;
5087 WORD32 recon_stride;
5088 WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5089 WORD32 csbf_strd;
5090 WORD32 ecd_data_bytes_cons;
5091 WORD32 num_4x4_in_cu;
5092 WORD32 num_4x4_in_tu;
5093 WORD32 recon_func_mode;
5094 WORD32 cu_bits;
5095 UWORD8 u1_compute_spatial_ssd;
5096 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5097 UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5098
5099 WORD32 i4_min_trans_size = 256;
5100 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5101 WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5102 /* model for no residue syntax qt root cbf flag */
5103 UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5104 UWORD8 u1_skip_tu_sz_sft = 0;
5105 UWORD8 u1_skip_tu_posx = 0;
5106 UWORD8 u1_skip_tu_posy = 0;
5107 UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5108
5109 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5110 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5111 pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5112 pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5113 csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5114 pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5115 pred_stride = ps_inter_cand->i4_pred_data_stride;
5116 recon_stride = cu_size;
5117 pu1_pred = ps_inter_cand->pu1_pred_data;
5118 chrm_ctr = 0;
5119 ecd_data_bytes_cons = 0;
5120 total_rdopt_cost = 0;
5121 num_4x4_in_cu = cu_size >> 2;
5122 recon_func_mode = PRED_MODE_INTER;
5123 cu_bits = 0;
5124
5125 /* get the 4x4 level postion of current cu */
5126 cu_pos_x = cu_pos_x << 1;
5127 cu_pos_y = cu_pos_y << 1;
5128
5129 ps_final_prms->u1_is_cu_coded = 0;
5130 ps_final_prms->u4_cu_sad = 0;
5131
5132 /* populate the coeffs scan idx */
5133 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5134
5135 #if ENABLE_INTER_ZCU_COST
5136 /* reset cu not coded cost */
5137 ps_ctxt->i8_cu_not_coded_cost = 0;
5138
5139 /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5140 memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5141 #endif
5142
5143 if(ps_cu_analyse->u1_cu_size == 64)
5144 {
5145 num_split_flags = 4;
5146 u1_tu_size = 32;
5147 }
5148 else
5149 {
5150 num_split_flags = 1;
5151 u1_tu_size = ps_cu_analyse->u1_cu_size;
5152 }
5153
5154 if(1 == ps_final_prms->u1_skip_flag)
5155 {
5156 if(64 == cu_size)
5157 {
5158 /* TU = CU/2 is set but no trnaform is evaluated */
5159 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5160 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5161 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5162 }
5163 else
5164 {
5165 /* TU = CU is set but no trnaform is evaluated */
5166 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5167 pu1_tu_posx = &u1_skip_tu_posx;
5168 pu1_tu_posy = &u1_skip_tu_posy;
5169 }
5170
5171 recon_func_mode = PRED_MODE_SKIP;
5172 }
5173 /* check for PU part mode being AMP or No AMP */
5174 else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5175 {
5176 if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5177 {
5178 /* TU= CU is evaluated 2Nx2N inter case */
5179 pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5180 pu1_tu_posx = &u1_skip_tu_posx;
5181 pu1_tu_posy = &u1_skip_tu_posy;
5182 }
5183 else
5184 {
5185 /* currently TU= CU/2 is evaluated for all inter case */
5186 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5187 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5188 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5189 }
5190 }
5191 else
5192 {
5193 /* for AMP cases one level of TU recurssion is done */
5194 /* based on oreintation of the partitions */
5195 pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5196 pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5197 pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5198 }
5199
5200 i4_min_trans_size = 4;
5201
5202 if(ps_ctxt->i1_cu_qp_delta_enable)
5203 {
5204 ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
5205 }
5206
5207 if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5208 {
5209 ps_ctxt->i8_cl_ssd_lambda_qf =
5210 ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5211 100.0f);
5212 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5213 ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5214 (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5215 }
5216
5217 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5218 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5219 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5220
5221 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5222 {
5223 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5224 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5225 }
5226
5227 if(!u1_compute_spatial_ssd)
5228 {
5229 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5230 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5231 }
5232 else
5233 {
5234 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5235
5236 if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5237 {
5238 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5239 }
5240 }
5241
5242 /* RDOPT copy States : TU init (best until prev TU) to current */
5243 memcpy(
5244 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5245 .s_cabac_ctxt.au1_ctxt_models[0],
5246 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5247 IHEVC_CAB_COEFFX_PREFIX);
5248
5249 ihevce_tu_tree_init(
5250 as_tu_nodes,
5251 cu_size,
5252 (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5253 ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5254 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5255 ps_ctxt->u1_chroma_array_type == 2);
5256
5257 if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5258 {
5259 ihevce_tuSplitArray_to_tuTree_mapper(
5260 as_tu_nodes,
5261 ps_inter_cand->ai4_tu_split_flag,
5262 cu_size,
5263 cu_size,
5264 MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5265 MIN(MAX_TU_SIZE, cu_size),
5266 ps_inter_cand->b1_skip_flag);
5267 }
5268
5269 ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5270
5271 #if ENABLE_INTER_ZCU_COST
5272 ps_ctxt->i8_cu_not_coded_cost = 0;
5273 #endif
5274
5275 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5276 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5277 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5278 ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5279 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5280 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5281 s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5282 ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5283 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5284 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5285 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5286 curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5287 (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5288 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5289 ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5290 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5291 ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5292 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5293 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5294 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5295 ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5296 s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5297 s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5298 s_buffer_data_for_tu.pi2_deq_data_chroma =
5299 pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5300 s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5301 s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5302 s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5303 s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5304
5305 if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5306 {
5307 UWORD8 i;
5308
5309 UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5310
5311 for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5312 {
5313 pu_t *ps_pu;
5314
5315 WORD32 inter_pu_wd;
5316 WORD32 inter_pu_ht;
5317
5318 ps_pu = ps_inter_cand->as_inter_pu + i;
5319
5320 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5321 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5322 inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5323 ihevce_chroma_inter_pred_pu(
5324 &ps_ctxt->s_mc_ctxt,
5325 ps_pu,
5326 pu1_pred,
5327 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5328 if(!!ps_inter_cand->b3_part_size)
5329 {
5330 /* 2Nx__ partion case */
5331 if(inter_pu_wd == cu_size)
5332 {
5333 pu1_pred +=
5334 (inter_pu_ht *
5335 s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5336 }
5337
5338 /* __x2N partion case */
5339 if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5340 {
5341 pu1_pred += inter_pu_wd;
5342 }
5343 }
5344 }
5345 }
5346
5347 #if !ENABLE_TOP_DOWN_TU_RECURSION
5348 total_rdopt_cost = ihevce_tu_tree_selector(
5349 ps_ctxt,
5350 as_tu_nodes,
5351 &s_buffer_data_for_tu,
5352 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5353 .s_cabac_ctxt.au1_ctxt_models[0],
5354 recon_func_mode,
5355 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5356 i4_alpha_stim_multiplier,
5357 u1_is_cu_noisy,
5358 #endif
5359 0,
5360 ps_ctxt->u1_max_inter_tr_depth,
5361 ps_inter_cand->b3_part_size,
5362 u1_compute_spatial_ssd);
5363 #else
5364 total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5365 ps_ctxt,
5366 as_tu_nodes,
5367 &s_buffer_data_for_tu,
5368 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5369 .s_cabac_ctxt.au1_ctxt_models[0],
5370 recon_func_mode,
5371 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5372 i4_alpha_stim_multiplier,
5373 u1_is_cu_noisy,
5374 #endif
5375 0,
5376 ps_ctxt->u1_max_inter_tr_depth,
5377 ps_inter_cand->b3_part_size,
5378 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5379 u1_compute_spatial_ssd);
5380 #endif
5381
5382 ps_final_prms->u2_num_tus_in_cu = 0;
5383 ps_final_prms->u4_cu_luma_res_bits = 0;
5384 ps_final_prms->u4_cu_sad = 0;
5385 total_rdopt_cost = 0;
5386 ecd_data_bytes_cons = 0;
5387 cu_bits = 0;
5388 #if ENABLE_INTER_ZCU_COST
5389 ps_ctxt->i8_cu_not_coded_cost = 0;
5390 #endif
5391 ps_final_prms->u1_is_cu_coded = 0;
5392 ps_final_prms->u1_cu_size = cu_size;
5393
5394 ihevce_tu_selector_debriefer(
5395 as_tu_nodes,
5396 ps_final_prms,
5397 &total_rdopt_cost,
5398 #if ENABLE_INTER_ZCU_COST
5399 &ps_ctxt->i8_cu_not_coded_cost,
5400 #endif
5401 &ecd_data_bytes_cons,
5402 &cu_bits,
5403 &ps_final_prms->u2_num_tus_in_cu,
5404 ps_ctxt->i4_cu_qp,
5405 cu_pos_x * 4,
5406 cu_pos_y * 4,
5407 INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5408 (ps_ctxt->u1_chroma_array_type == 2),
5409 POS_TL);
5410
5411 if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5412 {
5413 ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5414 }
5415
5416 /* Modify the cost function for this CU. */
5417 /* loop in for 8x8 blocks */
5418 if(ps_ctxt->u1_enable_psyRDOPT)
5419 {
5420 UWORD8 *pu1_recon_cu;
5421 WORD32 recon_stride;
5422 WORD32 curr_pos_x;
5423 WORD32 curr_pos_y;
5424 WORD32 start_index;
5425 WORD32 num_horz_cu_in_ctb;
5426 WORD32 had_block_size;
5427
5428 /* tODO: sreenivasa ctb size has to be used appropriately */
5429 had_block_size = 8;
5430 num_horz_cu_in_ctb = 64 / had_block_size;
5431
5432 curr_pos_x = cu_pos_x << 2; /* pel units */
5433 curr_pos_y = cu_pos_y << 2; /* pel units */
5434 recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5435 pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5436 .apv_luma_recon_bufs[0]); // already pointing to the current CU recon
5437 //+ \curr_pos_x + curr_pos_y * recon_stride;
5438
5439 /* start index to index the source satd of curr cu int he current ctb*/
5440 start_index =
5441 (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5442
5443 {
5444 total_rdopt_cost += ihevce_psy_rd_cost(
5445 ps_ctxt->ai4_source_satd_8x8,
5446 pu1_recon_cu,
5447 recon_stride,
5448 1, //howz stride
5449 cu_size,
5450 0, // pic type
5451 0, //layer id
5452 ps_ctxt->i4_satd_lamda, // lambda
5453 start_index,
5454 ps_ctxt->u1_is_input_data_hbd,
5455 ps_ctxt->u4_psy_strength,
5456 &ps_ctxt->s_cmn_opt_func); // 8 bit
5457 }
5458 }
5459
5460 ps_final_prms->u1_chroma_intra_pred_mode = 4;
5461
5462 /* update the bytes consumed */
5463 ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5464
5465 /* store the current cu size to final prms */
5466 ps_final_prms->u1_cu_size = cu_size;
5467 /* ------------- Chroma processing -------------- */
5468 /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5469 if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5470 !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5471 {
5472 LWORD64 chrm_rdopt_cost;
5473 WORD32 chrm_rdopt_tu_bits;
5474
5475 /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5476 ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5477
5478 chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5479 ps_ctxt,
5480 curr_buf_idx,
5481 0, /* TU mode : Don't care in Inter patrh */
5482 ps_chrm_cu_buf_prms->pu1_curr_src,
5483 ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5484 ps_chrm_cu_buf_prms->pu1_cu_left,
5485 ps_chrm_cu_buf_prms->pu1_cu_top,
5486 ps_chrm_cu_buf_prms->pu1_cu_top_left,
5487 ps_chrm_cu_buf_prms->i4_cu_left_stride,
5488 (cu_pos_x >> 1),
5489 (cu_pos_y >> 1),
5490 &chrm_rdopt_tu_bits,
5491 i4_alpha_stim_multiplier,
5492 u1_is_cu_noisy);
5493
5494 #if WEIGH_CHROMA_COST
5495 chrm_rdopt_cost = (LWORD64)(
5496 (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5497 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5498 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5499 #endif
5500
5501 #if CHROMA_RDOPT_ENABLE
5502 total_rdopt_cost += chrm_rdopt_cost;
5503 #endif
5504 cu_bits += chrm_rdopt_tu_bits;
5505
5506 /* during chroma evaluation if skip decision was over written */
5507 /* then the current skip candidate is set to a non skip candidate */
5508 ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5509
5510 /* cu bits for chroma residual if chroma rdopt is on */
5511 /* if zero_cbf eval is disabled then cu bits will be zero */
5512 ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5513
5514 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5515 {
5516 /* Early exit : If the current running cost exceeds
5517 the prev. best mode cost, break */
5518 if(total_rdopt_cost > prev_best_rdopt_cost)
5519 {
5520 return (total_rdopt_cost);
5521 }
5522 }
5523 }
5524 else
5525 {}
5526
5527 #if SHRINK_INTER_TUTREE
5528 /* ------------- Quadtree TU split optimization ------------ */
5529 if(ps_final_prms->u1_is_cu_coded)
5530 {
5531 ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5532 &ps_final_prms->as_tu_enc_loop[0],
5533 &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5534 &ps_final_prms->s_recon_datastore,
5535 ps_final_prms->u2_num_tus_in_cu,
5536 (ps_ctxt->u1_chroma_array_type == 2));
5537 }
5538 #endif
5539
5540 /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
5541 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5542 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5543 .s_cabac_ctxt.au1_ctxt_models[0] +
5544 IHEVC_CAB_COEFFX_PREFIX,
5545 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5546 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5547
5548 /* -------- Bit estimate for RD opt -------------- */
5549 {
5550 nbr_avail_flags_t s_nbr;
5551 /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5552 WORD32 cbf_bits, header_bits;
5553
5554 /* get the neighbour availability flags for current cu */
5555 ihevce_get_only_nbr_flag(
5556 &s_nbr,
5557 ps_ctxt->pu1_ctb_nbr_map,
5558 ps_ctxt->i4_nbr_map_strd,
5559 cu_pos_x,
5560 cu_pos_y,
5561 (cu_size >> 2),
5562 (cu_size >> 2));
5563
5564 /* call the entropy rdo encode to get the bit estimate for current cu */
5565 header_bits = ihevce_entropy_rdo_encode_cu(
5566 &ps_ctxt->s_rdopt_entropy_ctxt,
5567 ps_final_prms,
5568 (cu_pos_x >> 1), /* back to 8x8 pel units */
5569 (cu_pos_y >> 1), /* back to 8x8 pel units */
5570 cu_size,
5571 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5572 : s_nbr.u1_top_avail,
5573 s_nbr.u1_left_avail,
5574 &ps_final_prms->pu1_cu_coeffs[0],
5575 &cbf_bits);
5576
5577 cu_bits += header_bits;
5578
5579 /* cbf bits are excluded from header bits, instead considered as texture bits */
5580 /* incase if zero cbf eval is disabled then texture bits gets added here */
5581 ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5582 ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5583
5584 #if RDOPT_ENABLE
5585 /* add the cost of coding the header bits */
5586 total_rdopt_cost +=
5587 COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5588
5589 #if ENABLE_INTER_ZCU_COST
5590 /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5591 if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5592 {
5593 LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5594
5595 WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5596 (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5597
5598 cab_ctxt_t *ps_cab_ctxt =
5599 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5600
5601 /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
5602 UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5603
5604 /* account for coding qt_root_cbf = 0 */
5605 /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5606 u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5607 if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5608 u4_cu_hdr_bits_q12 = 0;
5609 else
5610 u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5611
5612 /* add the cost of coding the header bits */
5613 i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5614 u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5615 ps_ctxt->i8_cl_ssd_lambda_qf,
5616 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5617
5618 if(ps_ctxt->u1_enable_psyRDOPT)
5619 {
5620 i8_cu_not_coded_cost = total_rdopt_cost + 1;
5621 }
5622
5623 /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5624 if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5625 {
5626 WORD32 tx_size;
5627
5628 /* force cu as not coded and update the cost */
5629 ps_final_prms->u1_is_cu_coded = 0;
5630 ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5631 ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5632
5633 total_rdopt_cost = i8_cu_not_coded_cost;
5634
5635 /* reset num TUs to 1 unless cu size id 64 */
5636 ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5637 trans_size = (64 == cu_size) ? 32 : cu_size;
5638 GETRANGE(tx_size, trans_size);
5639
5640 /* reset the bytes consumed */
5641 ps_final_prms->i4_num_bytes_ecd_data = 0;
5642
5643 /* reset texture related bits and roll back header bits*/
5644 ps_final_prms->u4_cu_cbf_bits = 0;
5645 ps_final_prms->u4_cu_luma_res_bits = 0;
5646 ps_final_prms->u4_cu_chroma_res_bits = 0;
5647 ps_final_prms->u4_cu_hdr_bits =
5648 (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5649
5650 /* update cabac model with qtroot cbf = 0 decision */
5651 ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5652 gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5653
5654 /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5655 memcpy(
5656 &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5657 &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5658 (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5659
5660 /* mark all tus as not coded for final eval */
5661 for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5662 {
5663 WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5664 WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5665
5666 nbr_4x4_t *ps_cur_nbr_4x4 =
5667 ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5668
5669 num_4x4_in_tu = trans_size >> 2;
5670
5671 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5672 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5673 ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5674
5675 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5676 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5677 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5678
5679 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5680 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5681
5682 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5683 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5684 ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5685
5686 /* reset cbf for the all 4x4 in TU */
5687 {
5688 WORD32 i, j;
5689 nbr_4x4_t *ps_tmp_4x4;
5690 ps_tmp_4x4 = ps_cur_nbr_4x4;
5691
5692 for(i = 0; i < num_4x4_in_tu; i++)
5693 {
5694 for(j = 0; j < num_4x4_in_tu; j++)
5695 {
5696 ps_tmp_4x4[j].b1_y_cbf = 0;
5697 }
5698 /* row level update*/
5699 ps_tmp_4x4 += num_4x4_in_cu;
5700 }
5701 }
5702 }
5703 }
5704 }
5705 #endif /* ENABLE_INTER_ZCU_COST */
5706
5707 #endif /* RDOPT_ENABLE */
5708 }
5709
5710 return (total_rdopt_cost);
5711 }
5712 #endif
5713
5714 /*!
5715 ******************************************************************************
5716 * \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5717 *
5718 * \brief
5719 * Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5720 *
5721 * \param[in] ps_ctxt enc_loop module ctxt pointer
5722 * \param[in] ps_inter_cand pointer to inter candidate structure
5723 * \param[in] cu_size Current CU size
5724 * \param[in] cu_pos_x cu position x w.r.t to ctb
5725 * \param[in] cu_pos_y cu position y w.r.t to ctb
5726 * \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5727 * \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer
5728 * \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer
5729 * \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride
5730 * \param[in] curr_buf_idx Current Buffer index
5731 *
5732 * \return
5733 * Rdopt cost
5734 *
5735 * \author
5736 * Ittiam
5737 *
5738 *****************************************************************************
5739 */
ihevce_inter_rdopt_cu_mc_mvp(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_inter_cand_t * ps_inter_cand,WORD32 cu_size,WORD32 cu_pos_x,WORD32 cu_pos_y,nbr_4x4_t * ps_left_nbr_4x4,nbr_4x4_t * ps_top_nbr_4x4,nbr_4x4_t * ps_topleft_nbr_4x4,WORD32 nbr_4x4_left_strd,WORD32 curr_buf_idx)5740 LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5741 ihevce_enc_loop_ctxt_t *ps_ctxt,
5742 cu_inter_cand_t *ps_inter_cand,
5743 WORD32 cu_size,
5744 WORD32 cu_pos_x,
5745 WORD32 cu_pos_y,
5746 nbr_4x4_t *ps_left_nbr_4x4,
5747 nbr_4x4_t *ps_top_nbr_4x4,
5748 nbr_4x4_t *ps_topleft_nbr_4x4,
5749 WORD32 nbr_4x4_left_strd,
5750 WORD32 curr_buf_idx)
5751 {
5752 /* local variables */
5753 enc_loop_cu_final_prms_t *ps_final_prms;
5754 nbr_avail_flags_t s_nbr;
5755 nbr_4x4_t *ps_nbr_4x4;
5756
5757 UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5758 UWORD8 *pu1_pred;
5759 WORD32 rdopt_cost;
5760 WORD32 ctr;
5761 WORD32 num_cu_part;
5762 WORD32 inter_pu_wd;
5763 WORD32 inter_pu_ht;
5764 WORD32 pred_stride;
5765
5766 /* get the pointers based on curbuf idx */
5767 ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5768 ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5769 pu1_pred = ps_inter_cand->pu1_pred_data;
5770
5771 pred_stride = ps_inter_cand->i4_pred_data_stride;
5772
5773 /* store the partition mode in final prms */
5774 ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5775
5776 /* since encoder does not support NXN part type */
5777 /* num parts can be either 1 or 2 only */
5778 ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5779
5780 num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5781
5782 /* get the 4x4 level position of current cu */
5783 cu_pos_x = cu_pos_x << 1;
5784 cu_pos_y = cu_pos_y << 1;
5785
5786 /* populate cu level params */
5787 ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5788 ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5789
5790 /* run a loop over all the partitons in cu */
5791 for(ctr = 0; ctr < num_cu_part; ctr++)
5792 {
5793 pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5794 pu_t *ps_pu;
5795 WORD32 skip_or_merge_flag;
5796 UWORD8 u1_use_mvp_from_top_row;
5797
5798 ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5799
5800 /* IF AMP then each partitions can have diff wd ht */
5801 inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5802 inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5803
5804 /* populate reference pic buf id for bs compute */
5805
5806 /* L0 */
5807 if(-1 != ps_pu->mv.i1_l0_ref_idx)
5808 {
5809 ps_pu->mv.i1_l0_ref_pic_buf_id =
5810 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5811 }
5812
5813 /* L1 */
5814 if(-1 != ps_pu->mv.i1_l1_ref_idx)
5815 {
5816 ps_pu->mv.i1_l1_ref_pic_buf_id =
5817 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5818 }
5819
5820 /* SKIP or merge check for every part */
5821 skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5822
5823 /* ----------- MV Prediction ----------------- */
5824 if(0 == skip_or_merge_flag)
5825 {
5826 /* get the neighbour availability flags */
5827 ihevce_get_only_nbr_flag(
5828 &s_nbr,
5829 ps_ctxt->pu1_ctb_nbr_map,
5830 ps_ctxt->i4_nbr_map_strd,
5831 cu_pos_x,
5832 cu_pos_y,
5833 inter_pu_wd >> 2,
5834 inter_pu_ht >> 2);
5835
5836 if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5837 {
5838 u1_use_mvp_from_top_row = 0;
5839 }
5840 else
5841 {
5842 u1_use_mvp_from_top_row = 1;
5843 }
5844
5845 if(!u1_use_mvp_from_top_row)
5846 {
5847 if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5848 {
5849 if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5850 {
5851 WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5852
5853 /* Ensure Top Right Sync */
5854 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5855 {
5856 curr_cu_pos_in_row =
5857 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5858
5859 if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5860 {
5861 /* No wait for 1st row */
5862 cu_top_right_offset = -(MAX_CTB_SIZE);
5863 {
5864 ihevce_tile_params_t *ps_col_tile_params =
5865 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5866 ps_ctxt->i4_tile_col_idx);
5867
5868 /* No wait for 1st row */
5869 cu_top_right_offset =
5870 -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5871 }
5872 cu_top_right_dep_pos = 0;
5873 }
5874 else
5875 {
5876 cu_top_right_offset = (cu_size) + 4;
5877 cu_top_right_dep_pos =
5878 (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5879 }
5880
5881 ihevce_dmgr_chk_row_row_sync(
5882 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5883 curr_cu_pos_in_row,
5884 cu_top_right_offset,
5885 cu_top_right_dep_pos,
5886 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5887 ps_ctxt->thrd_id);
5888 }
5889
5890 u1_use_mvp_from_top_row = 1;
5891 }
5892 else
5893 {
5894 s_nbr.u1_top_avail = 0;
5895 s_nbr.u1_top_lt_avail = 0;
5896 s_nbr.u1_top_rt_avail = 0;
5897 }
5898 }
5899 else
5900 {
5901 u1_use_mvp_from_top_row = 1;
5902 }
5903 }
5904 /* Call the MV prediction module to get MVP */
5905 ihevce_mv_pred(
5906 &ps_ctxt->s_mv_pred_ctxt,
5907 ps_top_nbr_4x4,
5908 ps_left_nbr_4x4,
5909 ps_topleft_nbr_4x4,
5910 nbr_4x4_left_strd,
5911 &s_nbr,
5912 NULL, /* colocated MV */
5913 ps_pu,
5914 &as_pred_mv[0],
5915 au1_is_top_used);
5916 }
5917
5918 /* store the nbr 4x4 structure */
5919 ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
5920 ps_nbr_4x4->b1_intra_flag = 0;
5921 ps_nbr_4x4->b1_pred_l0_flag = 0;
5922 ps_nbr_4x4->b1_pred_l1_flag = 0;
5923
5924 /* DC is default mode for inter cu, required for intra mode signalling */
5925 ps_nbr_4x4->b6_luma_intra_mode = 1;
5926
5927 /* copy the motion vectors to neighbour structure */
5928 ps_nbr_4x4->mv = ps_pu->mv;
5929
5930 /* copy the PU to final out pu */
5931 ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
5932
5933 /* copy the PU to chroma */
5934 ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
5935
5936 /* store the skip flag to final prms */
5937 ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
5938
5939 /* MVP index & MVD calc is gated on skip/merge flag */
5940 if(0 == skip_or_merge_flag)
5941 {
5942 /* calculate the MVDs and popluate the MVP idx for L0 */
5943 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
5944 {
5945 WORD32 idx0_cost, idx1_cost;
5946
5947 /* calculate the ABS mvd for cand 0 */
5948 idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
5949 idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
5950
5951 /* calculate the ABS mvd for cand 1 */
5952 if(u1_use_mvp_from_top_row)
5953 {
5954 idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
5955 idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
5956 }
5957 else
5958 {
5959 idx1_cost = INT_MAX;
5960 }
5961
5962 /* based on the least cost choose the mvp idx */
5963 if(idx0_cost <= idx1_cost)
5964 {
5965 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5966 as_pred_mv[0].s_l0_mv.i2_mvx;
5967 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5968 as_pred_mv[0].s_l0_mv.i2_mvy;
5969
5970 ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
5971 }
5972 else
5973 {
5974 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5975 as_pred_mv[1].s_l0_mv.i2_mvx;
5976 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5977 as_pred_mv[1].s_l0_mv.i2_mvy;
5978
5979 ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
5980 }
5981
5982 /* set the pred l0 flag for neighbour storage */
5983 ps_nbr_4x4->b1_pred_l0_flag = 1;
5984 }
5985 /* calculate the MVDs and popluate the MVP idx for L1 */
5986 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
5987 {
5988 WORD32 idx0_cost, idx1_cost;
5989
5990 /* calculate the ABS mvd for cand 0 */
5991 idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
5992 idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
5993
5994 /* calculate the ABS mvd for cand 1 */
5995 if(u1_use_mvp_from_top_row)
5996 {
5997 idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
5998 idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
5999 }
6000 else
6001 {
6002 idx1_cost = INT_MAX;
6003 }
6004
6005 /* based on the least cost choose the mvp idx */
6006 if(idx0_cost <= idx1_cost)
6007 {
6008 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6009 as_pred_mv[0].s_l1_mv.i2_mvx;
6010 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6011 as_pred_mv[0].s_l1_mv.i2_mvy;
6012
6013 ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6014 }
6015 else
6016 {
6017 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6018 as_pred_mv[1].s_l1_mv.i2_mvx;
6019 ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6020 as_pred_mv[1].s_l1_mv.i2_mvy;
6021
6022 ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6023 }
6024
6025 /* set the pred l1 flag for neighbour storage */
6026 ps_nbr_4x4->b1_pred_l1_flag = 1;
6027 }
6028
6029 /* set the merge flag to 0 */
6030 ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6031 ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6032 }
6033 else
6034 {
6035 /* copy the merge index from candidate */
6036 ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6037
6038 ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6039
6040 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6041 {
6042 /* set the pred l0 flag for neighbour storage */
6043 ps_nbr_4x4->b1_pred_l0_flag = 1;
6044 }
6045
6046 /* calculate the MVDs and popluate the MVP idx for L1 */
6047 if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6048 {
6049 /* set the pred l1 flag for neighbour storage */
6050 ps_nbr_4x4->b1_pred_l1_flag = 1;
6051 }
6052 }
6053
6054 /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6055 rdopt_cost = 0;
6056
6057 /* copy the MV to colocated Mv structure */
6058 ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6059 ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6060 ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6061 ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6062 ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6063 ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6064
6065 /* replicate neighbour 4x4 strcuture for entire partition */
6066 {
6067 WORD32 i, j;
6068 nbr_4x4_t *ps_tmp_4x4;
6069
6070 ps_tmp_4x4 = ps_nbr_4x4;
6071
6072 for(i = 0; i < (inter_pu_ht >> 2); i++)
6073 {
6074 for(j = 0; j < (inter_pu_wd >> 2); j++)
6075 {
6076 ps_tmp_4x4[j] = *ps_nbr_4x4;
6077 }
6078 /* row level update*/
6079 ps_tmp_4x4 += (cu_size >> 2);
6080 }
6081 }
6082 /* set the neighbour map to 1 */
6083 ihevce_set_inter_nbr_map(
6084 ps_ctxt->pu1_ctb_nbr_map,
6085 ps_ctxt->i4_nbr_map_strd,
6086 cu_pos_x,
6087 cu_pos_y,
6088 (inter_pu_wd >> 2),
6089 (inter_pu_ht >> 2),
6090 1);
6091 /* ----------- Motion Compensation for Luma ----------- */
6092 #if !ENABLE_MIXED_INTER_MODE_EVAL
6093 {
6094 IV_API_CALL_STATUS_T valid_mv_cand;
6095
6096 /*If the inter candidate is neither merge cand nor skip cand
6097 then calculate the mc.*/
6098 if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6099 {
6100 valid_mv_cand =
6101 ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6102
6103 /* assert if the MC is given a valid mv candidate */
6104 ASSERT(valid_mv_cand == IV_SUCCESS);
6105 }
6106 }
6107 #endif
6108 if((2 == num_cu_part) && (0 == ctr))
6109 {
6110 /* 2Nx__ partion case */
6111 if(inter_pu_wd == cu_size)
6112 {
6113 cu_pos_y += (inter_pu_ht >> 2);
6114 pu1_pred += (inter_pu_ht * pred_stride);
6115 ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6116 ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6117 ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6118 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6119 }
6120
6121 /* __x2N partion case */
6122 if(inter_pu_ht == cu_size)
6123 {
6124 cu_pos_x += (inter_pu_wd >> 2);
6125 pu1_pred += inter_pu_wd;
6126 ps_nbr_4x4 += (inter_pu_wd >> 2);
6127 ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6128 ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6129 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6130 nbr_4x4_left_strd = (cu_size >> 2);
6131 }
6132 }
6133 }
6134
6135 return (rdopt_cost);
6136 }
6137
6138 /*!
6139 ******************************************************************************
6140 * \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6141 *
6142 * \brief
6143 * Coding unit processing function for chroma special modes (Non-Luma modes)
6144 *
6145 * \param[in] ps_ctxt enc_loop module ctxt pointer
6146 * \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms
6147 * \param[in] ps_cu_analyse pointer to cu analyse
6148 * \param[in] rd_opt_curr_idx index in the array of RDopt params
6149 * \param[in] tu_mode TU_EQ_CU or other case
6150 *
6151 * \return
6152 * Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6153 *
6154 * \author
6155 * Ittiam
6156 *
6157 *****************************************************************************
6158 */
ihevce_distortion_based_intra_chroma_mode_selector(cu_analyse_t * ps_cu_analyse,ihevc_intra_pred_chroma_ref_substitution_ft * pf_ref_substitution,pf_intra_pred * ppf_chroma_ip,pf_res_trans_luma_had_chroma * ppf_resd_trns_had,UWORD8 * pu1_src,WORD32 i4_src_stride,UWORD8 * pu1_pred,WORD32 i4_pred_stride,UWORD8 * pu1_ctb_nbr_map,WORD32 i4_nbr_map_strd,UWORD8 * pu1_ref_sub_out,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy,UWORD8 u1_trans_size,UWORD8 u1_trans_idx,UWORD8 u1_num_tus_in_cu,UWORD8 u1_num_4x4_luma_blks_in_tu,UWORD8 u1_enable_psyRDOPT,UWORD8 u1_is_422)6159 UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6160 cu_analyse_t *ps_cu_analyse,
6161 ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6162 pf_intra_pred *ppf_chroma_ip,
6163 pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6164 UWORD8 *pu1_src,
6165 WORD32 i4_src_stride,
6166 UWORD8 *pu1_pred,
6167 WORD32 i4_pred_stride,
6168 UWORD8 *pu1_ctb_nbr_map,
6169 WORD32 i4_nbr_map_strd,
6170 UWORD8 *pu1_ref_sub_out,
6171 WORD32 i4_alpha_stim_multiplier,
6172 UWORD8 u1_is_cu_noisy,
6173 UWORD8 u1_trans_size,
6174 UWORD8 u1_trans_idx,
6175 UWORD8 u1_num_tus_in_cu,
6176 UWORD8 u1_num_4x4_luma_blks_in_tu,
6177 UWORD8 u1_enable_psyRDOPT,
6178 UWORD8 u1_is_422)
6179 {
6180 UWORD8 u1_chrm_mode;
6181 UWORD8 ctr;
6182 WORD32 i4_subtu_idx;
6183
6184 WORD32 i = 0;
6185 UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6186 WORD32 i4_satd_had[4] = { 0 };
6187 WORD32 i4_best_satd_had = INT_MAX;
6188 UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6189 UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6190 WORD32 i4_num_sub_tus = u1_is_422 + 1;
6191 UWORD8 u1_best_chrm_mode = 0;
6192
6193 /* Get the best satd among all possible modes */
6194 for(i = 0; i < 4; i++)
6195 {
6196 WORD32 left_strd = i4_src_stride;
6197
6198 u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6199 : u1_chrm_modes[i];
6200
6201 /* loop based on num tus in a cu */
6202 for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6203 {
6204 WORD32 luma_nbr_flags;
6205 WORD32 chrm_pred_func_idx;
6206
6207 WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6208 UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6209 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6210 UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6211 (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6212 WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6213 WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6214
6215 luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6216 pu1_ctb_nbr_map,
6217 i4_nbr_map_strd,
6218 i4_curr_tu_pos_x,
6219 i4_curr_tu_pos_y,
6220 u1_num_4x4_luma_blks_in_tu,
6221 u1_num_4x4_luma_blks_in_tu);
6222
6223 for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6224 {
6225 WORD32 nbr_flags;
6226
6227 UWORD8 *pu1_cur_src =
6228 pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6229 UWORD8 *pu1_cur_pred =
6230 pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6231 UWORD8 *pu1_left = pu1_cur_src - 2;
6232 UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6233 UWORD8 *pu1_top_left = pu1_top - 2;
6234
6235 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6236 luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6237
6238 /* call the chroma reference array substitution */
6239 pf_ref_substitution(
6240 pu1_top_left,
6241 pu1_top,
6242 pu1_left,
6243 left_strd,
6244 u1_trans_size,
6245 nbr_flags,
6246 pu1_ref_sub_out,
6247 1);
6248
6249 /* use the look up to get the function idx */
6250 chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6251
6252 /* call the intra prediction function */
6253 ppf_chroma_ip[chrm_pred_func_idx](
6254 pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6255
6256 if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6257 {
6258 /* compute Hadamard-transform satd : Cb */
6259 i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6260 pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6261
6262 /* compute Hadamard-transform satd : Cr */
6263 i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6264 pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6265 }
6266 else
6267 {
6268 WORD32 i4_satd;
6269
6270 /* compute Hadamard-transform satd : Cb */
6271 i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6272 pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6273
6274 i4_satd = ihevce_inject_stim_into_distortion(
6275 pu1_cur_src,
6276 i4_src_stride,
6277 pu1_cur_pred,
6278 i4_pred_stride,
6279 i4_satd,
6280 i4_alpha_stim_multiplier,
6281 u1_trans_size,
6282 0,
6283 u1_enable_psyRDOPT,
6284 U_PLANE);
6285
6286 i4_satd_had[i] += i4_satd;
6287
6288 /* compute Hadamard-transform satd : Cr */
6289 i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6290 pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6291
6292 i4_satd = ihevce_inject_stim_into_distortion(
6293 pu1_cur_src,
6294 i4_src_stride,
6295 pu1_cur_pred,
6296 i4_pred_stride,
6297 i4_satd,
6298 i4_alpha_stim_multiplier,
6299 u1_trans_size,
6300 0,
6301 u1_enable_psyRDOPT,
6302 V_PLANE);
6303
6304 i4_satd_had[i] += i4_satd;
6305 }
6306 }
6307
6308 /* set the neighbour map to 1 */
6309 ihevce_set_nbr_map(
6310 pu1_ctb_nbr_map,
6311 i4_nbr_map_strd,
6312 i4_curr_tu_pos_x,
6313 i4_curr_tu_pos_y,
6314 u1_num_4x4_luma_blks_in_tu,
6315 1);
6316 }
6317
6318 /* set the neighbour map to 0 */
6319 ihevce_set_nbr_map(
6320 pu1_ctb_nbr_map,
6321 i4_nbr_map_strd,
6322 (ps_cu_analyse->b3_cu_pos_x << 1),
6323 (ps_cu_analyse->b3_cu_pos_y << 1),
6324 (ps_cu_analyse->u1_cu_size >> 2),
6325 0);
6326
6327 /* Get the least SATD and corresponding mode */
6328 if(i4_best_satd_had > i4_satd_had[i])
6329 {
6330 i4_best_satd_had = i4_satd_had[i];
6331 u1_best_chrm_mode = u1_chrm_mode;
6332 }
6333 }
6334
6335 return u1_best_chrm_mode;
6336 }
6337
ihevce_intra_chroma_pred_mode_selector(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_chrm_cu_buf_prms_t * ps_chrm_cu_buf_prms,cu_analyse_t * ps_cu_analyse,WORD32 rd_opt_curr_idx,WORD32 tu_mode,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy)6338 void ihevce_intra_chroma_pred_mode_selector(
6339 ihevce_enc_loop_ctxt_t *ps_ctxt,
6340 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6341 cu_analyse_t *ps_cu_analyse,
6342 WORD32 rd_opt_curr_idx,
6343 WORD32 tu_mode,
6344 WORD32 i4_alpha_stim_multiplier,
6345 UWORD8 u1_is_cu_noisy)
6346 {
6347 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6348
6349 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6350
6351 UWORD8 *pu1_pred;
6352 WORD32 trans_size;
6353 WORD32 num_tus_in_cu;
6354 WORD32 pred_strd;
6355 WORD32 ctr;
6356 WORD32 i4_subtu_idx;
6357 WORD32 i4_num_sub_tus;
6358 WORD32 trans_idx;
6359 WORD32 scan_idx;
6360 WORD32 num_4x4_luma_in_tu;
6361 WORD32 cu_pos_x;
6362 WORD32 cu_pos_y;
6363
6364 recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6365 &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6366
6367 LWORD64 chrm_cod_cost = 0;
6368 WORD32 chrm_tu_bits = 0;
6369 WORD32 best_chrm_mode = DM_CHROMA_IDX;
6370 UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6371 WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6372 UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6373 UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6374 UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6375 WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6376 WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6377 WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6378 WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6379 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6380
6381 ihevc_intra_pred_chroma_ref_substitution_fptr =
6382 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6383 i4_num_sub_tus = (u1_is_422 == 1) + 1;
6384
6385 #if DISABLE_RDOQ_INTRA
6386 i4_perform_rdoq = 0;
6387 #endif
6388
6389 if(TU_EQ_CU == tu_mode)
6390 {
6391 num_tus_in_cu = 1;
6392 trans_size = cu_size >> 1;
6393 num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6394 ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6395 }
6396 else
6397 {
6398 num_tus_in_cu = 4;
6399 trans_size = cu_size >> 2;
6400 num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6401
6402 /* For 8x8 CU only one TU */
6403 if(MIN_TU_SIZE > trans_size)
6404 {
6405 trans_size = MIN_TU_SIZE;
6406 num_tus_in_cu = 1;
6407 /* chroma nbr avail. is derived based on luma.
6408 for 4x4 chrm use 8x8 luma's size */
6409 num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6410 }
6411
6412 ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6413 }
6414
6415 /* Can't be TU_EQ_SUBCU case */
6416 ASSERT(TU_EQ_SUBCU != tu_mode);
6417
6418 /* translate the transform size to index */
6419 trans_idx = trans_size >> 2;
6420
6421 pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6422
6423 pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6424
6425 /* for 16x16 cases */
6426 if(16 == trans_size)
6427 {
6428 trans_idx = 3;
6429 }
6430
6431 best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6432 ps_cu_analyse,
6433 ihevc_intra_pred_chroma_ref_substitution_fptr,
6434 ps_ctxt->apf_chrm_ip,
6435 ps_ctxt->apf_chrm_resd_trns_had,
6436 pu1_chrm_src,
6437 chrm_src_stride,
6438 pu1_pred,
6439 pred_strd,
6440 ps_ctxt->pu1_ctb_nbr_map,
6441 ps_ctxt->i4_nbr_map_strd,
6442 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6443 i4_alpha_stim_multiplier,
6444 u1_is_cu_noisy,
6445 trans_size,
6446 trans_idx,
6447 num_tus_in_cu,
6448 num_4x4_luma_in_tu,
6449 ps_ctxt->u1_enable_psyRDOPT,
6450 u1_is_422);
6451
6452 /* Store the best chroma mode */
6453 ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6454
6455 /* evaluate RDOPT cost for the Best mode */
6456 {
6457 WORD32 i4_subtu_pos_x;
6458 WORD32 i4_subtu_pos_y;
6459 UWORD8 u1_compute_spatial_ssd;
6460
6461 WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6462 WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6463 /* State for prefix bin of chroma intra pred mode before CU encode */
6464 UWORD8 u1_chroma_intra_mode_prefix_state =
6465 ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6466 WORD32 luma_trans_size = trans_size << 1;
6467 WORD32 calc_recon = 0;
6468 UWORD8 *pu1_left = pu1_cu_left;
6469 UWORD8 *pu1_top = pu1_cu_top;
6470 UWORD8 *pu1_top_left = pu1_cu_top_left;
6471 WORD32 left_strd = cu_left_stride;
6472
6473 if(ps_ctxt->i1_cu_qp_delta_enable)
6474 {
6475 ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
6476 }
6477
6478 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6479 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6480 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6481
6482 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6483 {
6484 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6485 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6486 }
6487
6488 /* get the 4x4 level postion of current cu */
6489 cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6490 cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6491
6492 calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6493
6494 if(calc_recon || u1_compute_spatial_ssd)
6495 {
6496 aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6497 aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6498 }
6499 else
6500 {
6501 aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6502 aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6503 }
6504
6505 /* loop based on num tus in a cu */
6506 for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6507 {
6508 WORD16 *pi2_cur_deq_data_cb;
6509 WORD16 *pi2_cur_deq_data_cr;
6510
6511 WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6512 WORD32 luma_nbr_flags = 0;
6513
6514 luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6515 ps_ctxt->pu1_ctb_nbr_map,
6516 ps_ctxt->i4_nbr_map_strd,
6517 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6518 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6519 (luma_trans_size >> 2),
6520 (luma_trans_size >> 2));
6521
6522 for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6523 {
6524 WORD32 cbf, num_bytes;
6525 LWORD64 trans_ssd_u, trans_ssd_v;
6526 UWORD8 u1_is_recon_available;
6527
6528 WORD32 trans_size_m2 = trans_size << 1;
6529 UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6530 (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6531 (i4_subtu_idx * trans_size * chrm_src_stride);
6532 UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6533 (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6534 (i4_subtu_idx * trans_size * pred_strd);
6535 WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6536 UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6537 ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6538 ((ctr & 1) * trans_size_m2) +
6539 (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6540 (i4_subtu_idx * trans_size * i4_recon_stride);
6541
6542 /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6543 chroma coeff/iq for high quality intra SATD special modes. Will
6544 be over written by coeff of luma mode in chroma_rdopt call */
6545 UWORD8 *pu1_ecd_data_cb =
6546 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6547 UWORD8 *pu1_ecd_data_cr =
6548 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6549
6550 WORD32 chrm_pred_func_idx = 0;
6551 LWORD64 curr_cb_cod_cost = 0;
6552 LWORD64 curr_cr_cod_cost = 0;
6553 WORD32 nbr_flags = 0;
6554
6555 i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6556 i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6557 ((i4_subtu_idx * trans_size) >> 2);
6558 pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6559 ((ctr & 1) * trans_size) +
6560 (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6561 (i4_subtu_idx * trans_size * deq_data_strd);
6562 pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6563 ((ctr & 1) * trans_size) +
6564 (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6565 (i4_subtu_idx * trans_size * deq_data_strd);
6566
6567 /* left cu boundary */
6568 if(0 == i4_subtu_pos_x)
6569 {
6570 left_strd = cu_left_stride;
6571 pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6572 }
6573 else
6574 {
6575 pu1_left = pu1_cur_recon - 2;
6576 left_strd = i4_recon_stride;
6577 }
6578
6579 /* top cu boundary */
6580 if(0 == i4_subtu_pos_y)
6581 {
6582 pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6583 }
6584 else
6585 {
6586 pu1_top = pu1_cur_recon - i4_recon_stride;
6587 }
6588
6589 /* by default top left is set to cu top left */
6590 pu1_top_left = pu1_cu_top_left;
6591
6592 /* top left based on position */
6593 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6594 {
6595 pu1_top_left = pu1_left - left_strd;
6596 }
6597 else if(0 != i4_subtu_pos_x)
6598 {
6599 pu1_top_left = pu1_top - 2;
6600 }
6601
6602 /* populate the coeffs scan idx */
6603 scan_idx = SCAN_DIAG_UPRIGHT;
6604
6605 /* RDOPT copy States : TU init (best until prev TU) to current */
6606 COPY_CABAC_STATES(
6607 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6608 .s_cabac_ctxt.au1_ctxt_models[0],
6609 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6610 IHEVC_CAB_CTXT_END);
6611
6612 /* for 4x4 transforms based on intra pred mode scan is choosen*/
6613 if(4 == trans_size)
6614 {
6615 /* for modes from 22 upto 30 horizontal scan is used */
6616 if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6617 {
6618 scan_idx = SCAN_HORZ;
6619 }
6620 /* for modes from 6 upto 14 horizontal scan is used */
6621 else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6622 {
6623 scan_idx = SCAN_VERT;
6624 }
6625 }
6626
6627 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6628 luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6629
6630 /* call the chroma reference array substitution */
6631 ihevc_intra_pred_chroma_ref_substitution_fptr(
6632 pu1_top_left,
6633 pu1_top,
6634 pu1_left,
6635 left_strd,
6636 trans_size,
6637 nbr_flags,
6638 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6639 1);
6640
6641 /* use the look up to get the function idx */
6642 chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6643
6644 /* call the intra prediction function */
6645 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6646 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6647 1,
6648 pu1_cur_pred,
6649 pred_strd,
6650 trans_size,
6651 best_chrm_mode);
6652
6653 /* UPLANE RDOPT Loop */
6654 {
6655 WORD32 tu_bits;
6656
6657 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6658 ps_ctxt,
6659 pu1_cur_pred,
6660 pred_strd,
6661 pu1_cur_src,
6662 chrm_src_stride,
6663 pi2_cur_deq_data_cb,
6664 deq_data_strd,
6665 pu1_cur_recon,
6666 i4_recon_stride,
6667 pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6668 ps_ctxt->au1_cu_csbf,
6669 ps_ctxt->i4_cu_csbf_strd,
6670 trans_size,
6671 scan_idx,
6672 1,
6673 &num_bytes,
6674 &tu_bits,
6675 &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6676 &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6677 &u1_is_recon_available,
6678 i4_perform_sbh,
6679 i4_perform_rdoq,
6680 &trans_ssd_u,
6681 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6682 i4_alpha_stim_multiplier,
6683 u1_is_cu_noisy,
6684 #endif
6685 0,
6686 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6687 U_PLANE);
6688
6689 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6690 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6691 {
6692 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6693 trans_ssd_u = ihevce_inject_stim_into_distortion(
6694 pu1_cur_src,
6695 chrm_src_stride,
6696 pu1_cur_pred,
6697 pred_strd,
6698 trans_ssd_u,
6699 i4_alpha_stim_multiplier,
6700 trans_size,
6701 0,
6702 ps_ctxt->u1_enable_psyRDOPT,
6703 U_PLANE);
6704 #else
6705 if(u1_compute_spatial_ssd && u1_is_recon_available)
6706 {
6707 trans_ssd_u = ihevce_inject_stim_into_distortion(
6708 pu1_cur_src,
6709 chrm_src_stride,
6710 pu1_cur_recon,
6711 i4_recon_stride,
6712 trans_ssd_u,
6713 i4_alpha_stim_multiplier,
6714 trans_size,
6715 0,
6716 ps_ctxt->u1_enable_psyRDOPT,
6717 U_PLANE);
6718 }
6719 else
6720 {
6721 trans_ssd_u = ihevce_inject_stim_into_distortion(
6722 pu1_cur_src,
6723 chrm_src_stride,
6724 pu1_cur_pred,
6725 pred_strd,
6726 trans_ssd_u,
6727 i4_alpha_stim_multiplier,
6728 trans_size,
6729 0,
6730 ps_ctxt->u1_enable_psyRDOPT,
6731 U_PLANE);
6732 }
6733 #endif
6734 }
6735 #endif
6736
6737 /* RDOPT copy States : New updated after curr TU to TU init */
6738 if(0 != cbf)
6739 {
6740 memcpy(
6741 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6742 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6743 .s_cabac_ctxt.au1_ctxt_models[0],
6744 IHEVC_CAB_CTXT_END);
6745 }
6746 /* RDOPT copy States : Restoring back the Cb init state to Cr */
6747 else
6748 {
6749 memcpy(
6750 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6751 .s_cabac_ctxt.au1_ctxt_models[0],
6752 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6753 IHEVC_CAB_CTXT_END);
6754 }
6755
6756 if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6757 {
6758 ihevce_chroma_it_recon_fxn(
6759 ps_ctxt,
6760 pi2_cur_deq_data_cb,
6761 deq_data_strd,
6762 pu1_cur_pred,
6763 pred_strd,
6764 pu1_cur_recon,
6765 i4_recon_stride,
6766 (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6767 trans_size,
6768 cbf,
6769 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6770 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6771 U_PLANE);
6772 }
6773
6774 ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6775 curr_cb_cod_cost =
6776 trans_ssd_u +
6777 COMPUTE_RATE_COST_CLIP30(
6778 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6779 chrm_tu_bits += tu_bits;
6780 ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6781 ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6782 num_bytes;
6783 }
6784
6785 /* VPLANE RDOPT Loop */
6786 {
6787 WORD32 tu_bits;
6788
6789 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6790 ps_ctxt,
6791 pu1_cur_pred,
6792 pred_strd,
6793 pu1_cur_src,
6794 chrm_src_stride,
6795 pi2_cur_deq_data_cr,
6796 deq_data_strd,
6797 pu1_cur_recon,
6798 i4_recon_stride,
6799 pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6800 ps_ctxt->au1_cu_csbf,
6801 ps_ctxt->i4_cu_csbf_strd,
6802 trans_size,
6803 scan_idx,
6804 1,
6805 &num_bytes,
6806 &tu_bits,
6807 &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6808 &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6809 &u1_is_recon_available,
6810 i4_perform_sbh,
6811 i4_perform_rdoq,
6812 &trans_ssd_v,
6813 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6814 i4_alpha_stim_multiplier,
6815 u1_is_cu_noisy,
6816 #endif
6817 0,
6818 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6819 V_PLANE);
6820
6821 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6822 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6823 {
6824 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6825 trans_ssd_v = ihevce_inject_stim_into_distortion(
6826 pu1_cur_src,
6827 chrm_src_stride,
6828 pu1_cur_pred,
6829 pred_strd,
6830 trans_ssd_v,
6831 i4_alpha_stim_multiplier,
6832 trans_size,
6833 0,
6834 ps_ctxt->u1_enable_psyRDOPT,
6835 V_PLANE);
6836 #else
6837 if(u1_compute_spatial_ssd && u1_is_recon_available)
6838 {
6839 trans_ssd_v = ihevce_inject_stim_into_distortion(
6840 pu1_cur_src,
6841 chrm_src_stride,
6842 pu1_cur_recon,
6843 i4_recon_stride,
6844 trans_ssd_v,
6845 i4_alpha_stim_multiplier,
6846 trans_size,
6847 0,
6848 ps_ctxt->u1_enable_psyRDOPT,
6849 V_PLANE);
6850 }
6851 else
6852 {
6853 trans_ssd_v = ihevce_inject_stim_into_distortion(
6854 pu1_cur_src,
6855 chrm_src_stride,
6856 pu1_cur_pred,
6857 pred_strd,
6858 trans_ssd_v,
6859 i4_alpha_stim_multiplier,
6860 trans_size,
6861 0,
6862 ps_ctxt->u1_enable_psyRDOPT,
6863 V_PLANE);
6864 }
6865 #endif
6866 }
6867 #endif
6868
6869 /* RDOPT copy States : New updated after curr TU to TU init */
6870 if(0 != cbf)
6871 {
6872 COPY_CABAC_STATES(
6873 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6874 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6875 .s_cabac_ctxt.au1_ctxt_models[0],
6876 IHEVC_CAB_CTXT_END);
6877 }
6878 /* RDOPT copy States : Restoring back the Cb init state to Cr */
6879 else
6880 {
6881 COPY_CABAC_STATES(
6882 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6883 .s_cabac_ctxt.au1_ctxt_models[0],
6884 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6885 IHEVC_CAB_CTXT_END);
6886 }
6887
6888 if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6889 {
6890 ihevce_chroma_it_recon_fxn(
6891 ps_ctxt,
6892 pi2_cur_deq_data_cr,
6893 deq_data_strd,
6894 pu1_cur_pred,
6895 pred_strd,
6896 pu1_cur_recon,
6897 i4_recon_stride,
6898 (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
6899 trans_size,
6900 cbf,
6901 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6902 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6903 V_PLANE);
6904 }
6905
6906 ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
6907 curr_cr_cod_cost =
6908 trans_ssd_v +
6909 COMPUTE_RATE_COST_CLIP30(
6910 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6911 chrm_tu_bits += tu_bits;
6912 ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
6913 ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
6914 num_bytes;
6915 }
6916
6917 chrm_cod_cost += curr_cb_cod_cost;
6918 chrm_cod_cost += curr_cr_cod_cost;
6919 }
6920
6921 /* set the neighbour map to 1 */
6922 ihevce_set_nbr_map(
6923 ps_ctxt->pu1_ctb_nbr_map,
6924 ps_ctxt->i4_nbr_map_strd,
6925 (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6926 (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6927 (luma_trans_size >> 2),
6928 1);
6929 }
6930
6931 /* set the neighbour map to 0 */
6932 ihevce_set_nbr_map(
6933 ps_ctxt->pu1_ctb_nbr_map,
6934 ps_ctxt->i4_nbr_map_strd,
6935 (ps_cu_analyse->b3_cu_pos_x << 1),
6936 (ps_cu_analyse->b3_cu_pos_y << 1),
6937 (ps_cu_analyse->u1_cu_size >> 2),
6938 0);
6939
6940 /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
6941 /* This is done by adding the bits for signalling chroma mode (0-3) */
6942 /* and subtracting the bits for chroma mode same as luma mode (4) */
6943 #if CHROMA_RDOPT_ENABLE
6944 {
6945 /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
6946 WORD32 bits_frac_1 =
6947 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
6948
6949 WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
6950
6951 /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
6952 WORD32 bits_for_mode4 =
6953 gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
6954
6955 /* accumulate into final rd cost for chroma */
6956 ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
6957 (bits_for_mode_0to3 - bits_for_mode4),
6958 ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
6959 (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
6960
6961 chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
6962 }
6963 #endif
6964
6965 if(ps_ctxt->u1_enable_psyRDOPT)
6966 {
6967 UWORD8 *pu1_recon_cu;
6968 WORD32 recon_stride;
6969 WORD32 curr_pos_x;
6970 WORD32 curr_pos_y;
6971 WORD32 start_index;
6972 WORD32 num_horz_cu_in_ctb;
6973 WORD32 had_block_size;
6974
6975 /* tODO: sreenivasa ctb size has to be used appropriately */
6976 had_block_size = 8;
6977 num_horz_cu_in_ctb = 2 * 64 / had_block_size;
6978 curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6979 curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6980 recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6981 pu1_recon_cu =
6982 aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; //
6983
6984 /* start index to index the source satd of curr cu int he current ctb*/
6985 start_index = 2 * (curr_pos_x / had_block_size) +
6986 (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
6987
6988 {
6989 chrm_cod_cost += ihevce_psy_rd_cost_croma(
6990 ps_ctxt->ai4_source_chroma_satd,
6991 pu1_recon_cu,
6992 recon_stride,
6993 1, //
6994 cu_size,
6995 0, // pic type
6996 0, //layer id
6997 ps_ctxt->i4_satd_lamda, // lambda
6998 start_index,
6999 ps_ctxt->u1_is_input_data_hbd, // 8 bit
7000 ps_ctxt->u1_chroma_array_type,
7001 &ps_ctxt->s_cmn_opt_func
7002
7003 ); // chroma subsampling 420
7004 }
7005 }
7006
7007 ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7008 ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7009
7010 memcpy(
7011 &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7012 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7013 IHEVC_CAB_CTXT_END);
7014 }
7015 }
7016
7017 /*!
7018 ******************************************************************************
7019 * \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7020 *
7021 * \brief
7022 * Coding unit processing function for chroma
7023 *
7024 * \param[in] ps_ctxt enc_loop module ctxt pointer
7025 * \param[in] rd_opt_curr_idx index in the array of RDopt params
7026 * \param[in] func_proc_mode TU_EQ_CU or other case
7027 * \param[in] pu1_chrm_src pointer to source data buffer
7028 * \param[in] chrm_src_stride source buffer stride
7029 * \param[in] pu1_cu_left pointer to left recon data buffer
7030 * \param[in] pu1_cu_top pointer to top recon data buffer
7031 * \param[in] pu1_cu_top_left pointer to top left recon data buffer
7032 * \param[in] left_stride left recon buffer stride
7033 * \param[out] cu_pos_x position x of current CU in CTB
7034 * \param[out] cu_pos_y position y of current CU in CTB
7035 * \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7036 *
7037 * \return
7038 * Chroma coding cost (cb adn Cr included)
7039 *
7040 * \author
7041 * Ittiam
7042 *
7043 *****************************************************************************
7044 */
ihevce_chroma_cu_prcs_rdopt(ihevce_enc_loop_ctxt_t * ps_ctxt,WORD32 rd_opt_curr_idx,WORD32 func_proc_mode,UWORD8 * pu1_chrm_src,WORD32 chrm_src_stride,UWORD8 * pu1_cu_left,UWORD8 * pu1_cu_top,UWORD8 * pu1_cu_top_left,WORD32 cu_left_stride,WORD32 cu_pos_x,WORD32 cu_pos_y,WORD32 * pi4_chrm_tu_bits,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy)7045 LWORD64 ihevce_chroma_cu_prcs_rdopt(
7046 ihevce_enc_loop_ctxt_t *ps_ctxt,
7047 WORD32 rd_opt_curr_idx,
7048 WORD32 func_proc_mode,
7049 UWORD8 *pu1_chrm_src,
7050 WORD32 chrm_src_stride,
7051 UWORD8 *pu1_cu_left,
7052 UWORD8 *pu1_cu_top,
7053 UWORD8 *pu1_cu_top_left,
7054 WORD32 cu_left_stride,
7055 WORD32 cu_pos_x,
7056 WORD32 cu_pos_y,
7057 WORD32 *pi4_chrm_tu_bits,
7058 WORD32 i4_alpha_stim_multiplier,
7059 UWORD8 u1_is_cu_noisy)
7060 {
7061 tu_enc_loop_out_t *ps_tu;
7062 tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7063
7064 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7065
7066 UWORD8 *pu1_pred;
7067 UWORD8 *pu1_recon;
7068 WORD32 i4_recon_stride;
7069 WORD32 cu_size, trans_size = 0;
7070 WORD32 pred_strd;
7071 WORD32 ctr, i4_subtu_idx;
7072 WORD32 scan_idx;
7073 WORD32 u1_is_cu_coded_old;
7074 WORD32 init_bytes_offset;
7075
7076 enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7077 recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7078
7079 WORD32 total_bytes_offset = 0;
7080 LWORD64 chrm_cod_cost = 0;
7081 WORD32 chrm_tu_bits = 0;
7082 WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7083 LWORD64 i8_ssd_cb = 0;
7084 WORD32 i4_bits_cb = 0;
7085 LWORD64 i8_ssd_cr = 0;
7086 WORD32 i4_bits_cr = 0;
7087 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7088 UWORD8 u1_num_tus =
7089 /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7090 (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7091 ? 1
7092 : ps_best_cu_prms->u2_num_tus_in_cu;
7093 UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7094 UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7095 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7096 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7097 /* Get the RDOPT cost of the best CU mode for early_exit */
7098 LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7099 /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7100 LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7101 WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7102 WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7103
7104 ihevc_intra_pred_chroma_ref_substitution_fptr =
7105 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7106
7107 if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7108 {
7109 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7110 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7111 }
7112
7113 /* Store the init bytes offset from luma */
7114 init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7115
7116 /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7117 Chroma pred storage buf. for final_recon function.
7118 The buffer is split into two and used as a ping-pong buffer */
7119 pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7120 rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7121 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7122
7123 pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7124
7125 pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7126 i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7127 cu_size = ps_best_cu_prms->u1_cu_size;
7128 chrm_tu_bits = 0;
7129
7130 /* get the first TU pointer */
7131 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7132 /* get the first TU enc_loop temp prms pointer */
7133 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7134
7135 if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7136 {
7137 /* Mode signalled by intra prediction for luma */
7138 luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7139
7140 #if DISABLE_RDOQ_INTRA
7141 i4_perform_rdoq = 0;
7142 #endif
7143 }
7144
7145 else
7146 {
7147 UWORD8 *pu1_pred_org = pu1_pred;
7148
7149 /* ------ Motion Compensation for Chroma -------- */
7150 for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7151 {
7152 pu_t *ps_pu;
7153 WORD32 inter_pu_wd;
7154 WORD32 inter_pu_ht;
7155
7156 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7157
7158 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7159 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7160 inter_pu_ht <<= u1_is_422;
7161
7162 ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7163
7164 if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7165 {
7166 /* 2Nx__ partion case */
7167 if(inter_pu_wd == cu_size)
7168 {
7169 pu1_pred += (inter_pu_ht * pred_strd);
7170 }
7171
7172 /* __x2N partion case */
7173 if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7174 {
7175 pu1_pred += inter_pu_wd;
7176 }
7177 }
7178 }
7179
7180 /* restore the pred pointer to start for transform loop */
7181 pu1_pred = pu1_pred_org;
7182 }
7183
7184 /* Used to store back only the luma based info. if SATD based chorma
7185 mode also comes */
7186 u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7187
7188 /* evaluate chroma candidates (same as luma) and
7189 if INTRA & HIGH_QUALITY compare with best SATD mode */
7190 {
7191 WORD32 calc_recon = 0, deq_data_strd;
7192 WORD16 *pi2_deq_data;
7193 UWORD8 *pu1_ecd_data;
7194 UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7195
7196 pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7197 pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7198 deq_data_strd = cu_size;
7199 /* update ecd buffer for storing coeff. */
7200 pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7201 pu1_ecd_data += init_bytes_offset;
7202 /* store chroma starting index */
7203 ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7204
7205 /* get the first TU pointer */
7206 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7207 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7208
7209 /* Reset total_bytes_offset for each candidate */
7210 chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7211 : luma_pred_mode;
7212
7213 total_bytes_offset = 0;
7214
7215 if(TU_EQ_SUBCU == func_proc_mode)
7216 {
7217 func_proc_mode = TU_EQ_CU_DIV2;
7218 }
7219
7220 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7221 TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */
7222 if(8 == cu_size)
7223 {
7224 func_proc_mode = TU_EQ_CU;
7225 }
7226
7227 /* loop based on num tus in a cu */
7228 if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7229 (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7230 (chrm_pred_mode !=
7231 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7232 {
7233 /* loop based on num tus in a cu */
7234 for(ctr = 0; ctr < u1_num_tus; ctr++)
7235 {
7236 WORD32 num_bytes = 0;
7237 LWORD64 curr_cb_cod_cost = 0;
7238 LWORD64 curr_cr_cod_cost = 0;
7239 WORD32 chrm_pred_func_idx = 0;
7240 UWORD8 u1_is_early_exit_condition_satisfied = 0;
7241
7242 /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */
7243 /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7244 ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7245 ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7246 ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7247 ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7248 ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7249 ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7250 ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7251 ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7252 ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7253 ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7254
7255 /* TU level inits */
7256 /* check if chroma present flag is set */
7257 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7258 {
7259 /* RDOPT copy States : TU init (best until prev TU) to current */
7260 COPY_CABAC_STATES(
7261 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7262 .s_cabac_ctxt.au1_ctxt_models[0],
7263 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7264 IHEVC_CAB_CTXT_END);
7265
7266 /* get the current transform size */
7267 trans_size = ps_tu->s_tu.b3_size;
7268 trans_size = (1 << (trans_size + 1)); /* in chroma units */
7269
7270 /* since 2x2 transform is not allowed for chroma*/
7271 if(2 == trans_size)
7272 {
7273 trans_size = 4;
7274 }
7275 }
7276
7277 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7278 {
7279 WORD32 cbf;
7280 UWORD8 u1_is_recon_available;
7281
7282 WORD32 nbr_flags = 0;
7283 WORD32 zero_cols = 0;
7284 WORD32 zero_rows = 0;
7285
7286 /* check if chroma present flag is set */
7287 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7288 {
7289 UWORD8 *pu1_cur_pred;
7290 UWORD8 *pu1_cur_recon;
7291 UWORD8 *pu1_cur_src;
7292 WORD16 *pi2_cur_deq_data;
7293 WORD32 curr_pos_x, curr_pos_y;
7294 LWORD64 trans_ssd_u, trans_ssd_v;
7295
7296 /* get the current sub-tu posx and posy w.r.t to cu */
7297 curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7298 curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7299 (i4_subtu_idx * trans_size);
7300
7301 /* 420sp case only vertical height will be half */
7302 if(u1_is_422 == 0)
7303 {
7304 curr_pos_y >>= 1;
7305 }
7306
7307 /* increment the pointers to start of current Sub-TU */
7308 pu1_cur_recon = (pu1_recon + curr_pos_x);
7309 pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7310 pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7311 pu1_cur_src += (curr_pos_y * chrm_src_stride);
7312 pu1_cur_pred = (pu1_pred + curr_pos_x);
7313 pu1_cur_pred += (curr_pos_y * pred_strd);
7314 pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7315 pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7316
7317 /* populate the coeffs scan idx */
7318 scan_idx = SCAN_DIAG_UPRIGHT;
7319
7320 /* perform intra prediction only for Intra case */
7321 if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7322 {
7323 UWORD8 *pu1_top_left;
7324 UWORD8 *pu1_top;
7325 UWORD8 *pu1_left;
7326 WORD32 left_strd;
7327
7328 calc_recon = !u1_compute_spatial_ssd &&
7329 ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7330 (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7331 ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7332 ((u1_num_tus == 4) && (ctr < 3)));
7333
7334 /* left cu boundary */
7335 if(0 == curr_pos_x)
7336 {
7337 pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7338 left_strd = cu_left_stride;
7339 }
7340 else
7341 {
7342 pu1_left = pu1_cur_recon - 2;
7343 left_strd = i4_recon_stride;
7344 }
7345
7346 /* top cu boundary */
7347 if(0 == curr_pos_y)
7348 {
7349 pu1_top = pu1_cu_top + curr_pos_x;
7350 }
7351 else
7352 {
7353 pu1_top = pu1_cur_recon - i4_recon_stride;
7354 }
7355
7356 /* by default top left is set to cu top left */
7357 pu1_top_left = pu1_cu_top_left;
7358
7359 /* top left based on position */
7360 if((0 != curr_pos_y) && (0 == curr_pos_x))
7361 {
7362 pu1_top_left = pu1_left - cu_left_stride;
7363 }
7364 else if(0 != curr_pos_x)
7365 {
7366 pu1_top_left = pu1_top - 2;
7367 }
7368
7369 /* for 4x4 transforms based on intra pred mode scan is choosen*/
7370 if(4 == trans_size)
7371 {
7372 /* for modes from 22 upto 30 horizontal scan is used */
7373 if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7374 {
7375 scan_idx = SCAN_HORZ;
7376 }
7377 /* for modes from 6 upto 14 horizontal scan is used */
7378 else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7379 {
7380 scan_idx = SCAN_VERT;
7381 }
7382 }
7383
7384 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7385 ps_best_cu_prms->au4_nbr_flags[ctr],
7386 i4_subtu_idx,
7387 trans_size,
7388 u1_is_422);
7389
7390 /* call the chroma reference array substitution */
7391 ihevc_intra_pred_chroma_ref_substitution_fptr(
7392 pu1_top_left,
7393 pu1_top,
7394 pu1_left,
7395 left_strd,
7396 trans_size,
7397 nbr_flags,
7398 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7399 1);
7400
7401 /* use the look up to get the function idx */
7402 chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7403
7404 /* call the intra prediction function */
7405 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7406 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7407 1,
7408 pu1_cur_pred,
7409 pred_strd,
7410 trans_size,
7411 chrm_pred_mode);
7412 }
7413
7414 if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7415 {
7416 ps_recon_datastore->au1_is_chromaRecon_available[0] =
7417 !ps_best_cu_prms->u1_skip_flag;
7418 }
7419 else if(!ctr && !i4_subtu_idx)
7420 {
7421 ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7422 }
7423 /************************************************************/
7424 /* recon loop is done for all cases including skip cu */
7425 /* This is because skipping chroma reisdual based on luma */
7426 /* skip decision can lead to chroma artifacts */
7427 /************************************************************/
7428 /************************************************************/
7429 /*In the high quality and medium speed modes, wherein chroma*/
7430 /*and luma costs are included in the total cost calculation */
7431 /*the cost is just a ssd cost, and not that obtained through*/
7432 /*iq_it path */
7433 /************************************************************/
7434 if(ps_best_cu_prms->u1_skip_flag == 0)
7435 {
7436 WORD32 tu_bits;
7437
7438 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7439 ps_ctxt,
7440 pu1_cur_pred,
7441 pred_strd,
7442 pu1_cur_src,
7443 chrm_src_stride,
7444 pi2_cur_deq_data,
7445 deq_data_strd,
7446 pu1_cur_recon,
7447 i4_recon_stride,
7448 pu1_ecd_data + total_bytes_offset,
7449 ps_ctxt->au1_cu_csbf,
7450 ps_ctxt->i4_cu_csbf_strd,
7451 trans_size,
7452 scan_idx,
7453 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7454 &num_bytes,
7455 &tu_bits,
7456 &zero_cols,
7457 &zero_rows,
7458 &u1_is_recon_available,
7459 i4_perform_sbh,
7460 i4_perform_rdoq,
7461 &trans_ssd_u,
7462 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7463 i4_alpha_stim_multiplier,
7464 u1_is_cu_noisy,
7465 #endif
7466 ps_best_cu_prms->u1_skip_flag,
7467 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7468 U_PLANE);
7469
7470 if(u1_compute_spatial_ssd && u1_is_recon_available)
7471 {
7472 ps_recon_datastore
7473 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7474 [i4_subtu_idx] = 0;
7475 }
7476 else
7477 {
7478 ps_recon_datastore
7479 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7480 [i4_subtu_idx] = UCHAR_MAX;
7481 }
7482
7483 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7484 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7485 {
7486 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7487 trans_ssd_u = ihevce_inject_stim_into_distortion(
7488 pu1_cur_src,
7489 chrm_src_stride,
7490 pu1_cur_pred,
7491 pred_strd,
7492 trans_ssd_u,
7493 i4_alpha_stim_multiplier,
7494 trans_size,
7495 0,
7496 ps_ctxt->u1_enable_psyRDOPT,
7497 U_PLANE);
7498 #else
7499 if(u1_compute_spatial_ssd && u1_is_recon_available)
7500 {
7501 trans_ssd_u = ihevce_inject_stim_into_distortion(
7502 pu1_cur_src,
7503 chrm_src_stride,
7504 pu1_cur_recon,
7505 i4_recon_stride,
7506 trans_ssd_u,
7507 i4_alpha_stim_multiplier,
7508 trans_size,
7509 0,
7510 ps_ctxt->u1_enable_psyRDOPT,
7511 U_PLANE);
7512 }
7513 else
7514 {
7515 trans_ssd_u = ihevce_inject_stim_into_distortion(
7516 pu1_cur_src,
7517 chrm_src_stride,
7518 pu1_cur_pred,
7519 pred_strd,
7520 trans_ssd_u,
7521 i4_alpha_stim_multiplier,
7522 trans_size,
7523 0,
7524 ps_ctxt->u1_enable_psyRDOPT,
7525 U_PLANE);
7526 }
7527 #endif
7528 }
7529 #endif
7530
7531 curr_cb_cod_cost =
7532 trans_ssd_u +
7533 COMPUTE_RATE_COST_CLIP30(
7534 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7535
7536 chrm_tu_bits += tu_bits;
7537 i4_bits_cb += tu_bits;
7538
7539 /* RDOPT copy States : New updated after curr TU to TU init */
7540 if(0 != cbf)
7541 {
7542 COPY_CABAC_STATES(
7543 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7544 &ps_ctxt->s_rdopt_entropy_ctxt
7545 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7546 .s_cabac_ctxt.au1_ctxt_models[0],
7547 IHEVC_CAB_CTXT_END);
7548 }
7549 /* RDOPT copy States : Restoring back the Cb init state to Cr */
7550 else
7551 {
7552 COPY_CABAC_STATES(
7553 &ps_ctxt->s_rdopt_entropy_ctxt
7554 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7555 .s_cabac_ctxt.au1_ctxt_models[0],
7556 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7557 IHEVC_CAB_CTXT_END);
7558 }
7559
7560 /* If Intra and TU=CU/2, need recon for next TUs */
7561 if(calc_recon)
7562 {
7563 ihevce_chroma_it_recon_fxn(
7564 ps_ctxt,
7565 pi2_cur_deq_data,
7566 deq_data_strd,
7567 pu1_cur_pred,
7568 pred_strd,
7569 pu1_cur_recon,
7570 i4_recon_stride,
7571 (pu1_ecd_data + total_bytes_offset),
7572 trans_size,
7573 cbf,
7574 zero_cols,
7575 zero_rows,
7576 U_PLANE);
7577
7578 ps_recon_datastore
7579 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7580 [i4_subtu_idx] = 0;
7581 }
7582 else
7583 {
7584 ps_recon_datastore
7585 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7586 [i4_subtu_idx] = UCHAR_MAX;
7587 }
7588 }
7589 else
7590 {
7591 /* num bytes is set to 0 */
7592 num_bytes = 0;
7593
7594 /* cbf is returned as 0 */
7595 cbf = 0;
7596
7597 curr_cb_cod_cost = trans_ssd_u =
7598
7599 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7600 pu1_cur_pred,
7601 pu1_cur_src,
7602 pred_strd,
7603 chrm_src_stride,
7604 trans_size,
7605 trans_size,
7606 U_PLANE);
7607
7608 if(u1_compute_spatial_ssd)
7609 {
7610 /* buffer copy fromp pred to recon */
7611
7612 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7613 pu1_cur_pred,
7614 pred_strd,
7615 pu1_cur_recon,
7616 i4_recon_stride,
7617 trans_size,
7618 trans_size,
7619 U_PLANE);
7620
7621 ps_recon_datastore
7622 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7623 [i4_subtu_idx] = 0;
7624 }
7625
7626 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7627 {
7628 trans_ssd_u = ihevce_inject_stim_into_distortion(
7629 pu1_cur_src,
7630 chrm_src_stride,
7631 pu1_cur_pred,
7632 pred_strd,
7633 trans_ssd_u,
7634 i4_alpha_stim_multiplier,
7635 trans_size,
7636 0,
7637 ps_ctxt->u1_enable_psyRDOPT,
7638 U_PLANE);
7639 }
7640
7641 #if ENABLE_INTER_ZCU_COST
7642 #if !WEIGH_CHROMA_COST
7643 /* cbf = 0, accumulate cu not coded cost */
7644 ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7645 #else
7646 /* cbf = 0, accumulate cu not coded cost */
7647
7648 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7649 (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7650 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7651 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7652 #endif
7653 #endif
7654 }
7655
7656 #if !WEIGH_CHROMA_COST
7657 curr_rdopt_cost += curr_cb_cod_cost;
7658 #else
7659 curr_rdopt_cost +=
7660 ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7661 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7662 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7663 #endif
7664 chrm_cod_cost += curr_cb_cod_cost;
7665 i8_ssd_cb += trans_ssd_u;
7666
7667 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7668 {
7669 /* Early exit : If the current running cost exceeds
7670 the prev. best mode cost, break */
7671 if(curr_rdopt_cost > prev_best_rdopt_cost)
7672 {
7673 u1_is_early_exit_condition_satisfied = 1;
7674 break;
7675 }
7676 }
7677
7678 /* inter cu is coded if any of the tu is coded in it */
7679 ps_best_cu_prms->u1_is_cu_coded |= cbf;
7680
7681 /* update CB related params */
7682 ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7683 total_bytes_offset + init_bytes_offset;
7684
7685 if(0 == i4_subtu_idx)
7686 {
7687 ps_tu->s_tu.b1_cb_cbf = cbf;
7688 }
7689 else
7690 {
7691 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7692 }
7693
7694 total_bytes_offset += num_bytes;
7695
7696 ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7697 ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7698 ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7699
7700 /* recon loop is done for non skip cases */
7701 if(ps_best_cu_prms->u1_skip_flag == 0)
7702 {
7703 WORD32 tu_bits;
7704
7705 cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7706 ps_ctxt,
7707 pu1_cur_pred,
7708 pred_strd,
7709 pu1_cur_src,
7710 chrm_src_stride,
7711 pi2_cur_deq_data + trans_size,
7712 deq_data_strd,
7713 pu1_cur_recon,
7714 i4_recon_stride,
7715 pu1_ecd_data + total_bytes_offset,
7716 ps_ctxt->au1_cu_csbf,
7717 ps_ctxt->i4_cu_csbf_strd,
7718 trans_size,
7719 scan_idx,
7720 PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7721 &num_bytes,
7722 &tu_bits,
7723 &zero_cols,
7724 &zero_rows,
7725 &u1_is_recon_available,
7726 i4_perform_sbh,
7727 i4_perform_rdoq,
7728 &trans_ssd_v,
7729 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7730 i4_alpha_stim_multiplier,
7731 u1_is_cu_noisy,
7732 #endif
7733 ps_best_cu_prms->u1_skip_flag,
7734 u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7735 V_PLANE);
7736
7737 if(u1_compute_spatial_ssd && u1_is_recon_available)
7738 {
7739 ps_recon_datastore
7740 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7741 [i4_subtu_idx] = 0;
7742 }
7743 else
7744 {
7745 ps_recon_datastore
7746 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7747 [i4_subtu_idx] = UCHAR_MAX;
7748 }
7749
7750 #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7751 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7752 {
7753 #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7754 trans_ssd_v = ihevce_inject_stim_into_distortion(
7755 pu1_cur_src,
7756 chrm_src_stride,
7757 pu1_cur_pred,
7758 pred_strd,
7759 trans_ssd_v,
7760 i4_alpha_stim_multiplier,
7761 trans_size,
7762 0,
7763 ps_ctxt->u1_enable_psyRDOPT,
7764 V_PLANE);
7765 #else
7766 if(u1_compute_spatial_ssd && u1_is_recon_available)
7767 {
7768 trans_ssd_v = ihevce_inject_stim_into_distortion(
7769 pu1_cur_src,
7770 chrm_src_stride,
7771 pu1_cur_recon,
7772 i4_recon_stride,
7773 trans_ssd_v,
7774 i4_alpha_stim_multiplier,
7775 trans_size,
7776 0,
7777 ps_ctxt->u1_enable_psyRDOPT,
7778 V_PLANE);
7779 }
7780 else
7781 {
7782 trans_ssd_v = ihevce_inject_stim_into_distortion(
7783 pu1_cur_src,
7784 chrm_src_stride,
7785 pu1_cur_pred,
7786 pred_strd,
7787 trans_ssd_v,
7788 i4_alpha_stim_multiplier,
7789 trans_size,
7790 0,
7791 ps_ctxt->u1_enable_psyRDOPT,
7792 V_PLANE);
7793 }
7794 #endif
7795 }
7796 #endif
7797
7798 curr_cr_cod_cost =
7799 trans_ssd_v +
7800 COMPUTE_RATE_COST_CLIP30(
7801 tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7802 chrm_tu_bits += tu_bits;
7803 i4_bits_cr += tu_bits;
7804
7805 /* RDOPT copy States : New updated after curr TU to TU init */
7806 if(0 != cbf)
7807 {
7808 COPY_CABAC_STATES(
7809 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7810 &ps_ctxt->s_rdopt_entropy_ctxt
7811 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7812 .s_cabac_ctxt.au1_ctxt_models[0],
7813 IHEVC_CAB_CTXT_END);
7814 }
7815 /* RDOPT copy States : Restoring back the Cb init state to Cr */
7816 else
7817 {
7818 COPY_CABAC_STATES(
7819 &ps_ctxt->s_rdopt_entropy_ctxt
7820 .as_cu_entropy_ctxt[rd_opt_curr_idx]
7821 .s_cabac_ctxt.au1_ctxt_models[0],
7822 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7823 IHEVC_CAB_CTXT_END);
7824 }
7825
7826 /* If Intra and TU=CU/2, need recon for next TUs */
7827 if(calc_recon)
7828 {
7829 ihevce_chroma_it_recon_fxn(
7830 ps_ctxt,
7831 (pi2_cur_deq_data + trans_size),
7832 deq_data_strd,
7833 pu1_cur_pred,
7834 pred_strd,
7835 pu1_cur_recon,
7836 i4_recon_stride,
7837 (pu1_ecd_data + total_bytes_offset),
7838 trans_size,
7839 cbf,
7840 zero_cols,
7841 zero_rows,
7842 V_PLANE);
7843
7844 ps_recon_datastore
7845 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7846 [i4_subtu_idx] = 0;
7847 }
7848 else
7849 {
7850 ps_recon_datastore
7851 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7852 [i4_subtu_idx] = UCHAR_MAX;
7853 }
7854 }
7855 else
7856 {
7857 /* num bytes is set to 0 */
7858 num_bytes = 0;
7859
7860 /* cbf is returned as 0 */
7861 cbf = 0;
7862
7863 curr_cr_cod_cost = trans_ssd_v =
7864
7865 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7866 pu1_cur_pred,
7867 pu1_cur_src,
7868 pred_strd,
7869 chrm_src_stride,
7870 trans_size,
7871 trans_size,
7872 V_PLANE);
7873
7874 if(u1_compute_spatial_ssd)
7875 {
7876 /* buffer copy fromp pred to recon */
7877 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7878 pu1_cur_pred,
7879 pred_strd,
7880 pu1_cur_recon,
7881 i4_recon_stride,
7882 trans_size,
7883 trans_size,
7884 V_PLANE);
7885
7886 ps_recon_datastore
7887 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7888 [i4_subtu_idx] = 0;
7889 }
7890
7891 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7892 {
7893 trans_ssd_v = ihevce_inject_stim_into_distortion(
7894 pu1_cur_src,
7895 chrm_src_stride,
7896 pu1_cur_pred,
7897 pred_strd,
7898 trans_ssd_v,
7899 i4_alpha_stim_multiplier,
7900 trans_size,
7901 0,
7902 ps_ctxt->u1_enable_psyRDOPT,
7903 V_PLANE);
7904 }
7905
7906 #if ENABLE_INTER_ZCU_COST
7907 #if !WEIGH_CHROMA_COST
7908 /* cbf = 0, accumulate cu not coded cost */
7909 ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
7910 #else
7911 /* cbf = 0, accumulate cu not coded cost */
7912
7913 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7914 (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7915 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7916 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7917 #endif
7918 #endif
7919 }
7920
7921 #if !WEIGH_CHROMA_COST
7922 curr_rdopt_cost += curr_cr_cod_cost;
7923 #else
7924 curr_rdopt_cost +=
7925 ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7926 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7927 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7928 #endif
7929
7930 chrm_cod_cost += curr_cr_cod_cost;
7931 i8_ssd_cr += trans_ssd_v;
7932
7933 if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7934 {
7935 /* Early exit : If the current running cost exceeds
7936 the prev. best mode cost, break */
7937 if(curr_rdopt_cost > prev_best_rdopt_cost)
7938 {
7939 u1_is_early_exit_condition_satisfied = 1;
7940 break;
7941 }
7942 }
7943
7944 /* inter cu is coded if any of the tu is coded in it */
7945 ps_best_cu_prms->u1_is_cu_coded |= cbf;
7946
7947 /* update CR related params */
7948 ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
7949 total_bytes_offset + init_bytes_offset;
7950
7951 if(0 == i4_subtu_idx)
7952 {
7953 ps_tu->s_tu.b1_cr_cbf = cbf;
7954 }
7955 else
7956 {
7957 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
7958 }
7959
7960 total_bytes_offset += num_bytes;
7961
7962 ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
7963 ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
7964 ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
7965 }
7966 else
7967 {
7968 ps_recon_datastore
7969 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
7970 UCHAR_MAX;
7971 ps_recon_datastore
7972 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
7973 UCHAR_MAX;
7974 }
7975 }
7976
7977 if(u1_is_early_exit_condition_satisfied)
7978 {
7979 break;
7980 }
7981
7982 /* loop increments */
7983 ps_tu++;
7984 ps_tu_temp_prms++;
7985 }
7986
7987 /* Signal as luma mode. HIGH_QUALITY may update it */
7988 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
7989
7990 /* modify the cost chrm_cod_cost */
7991 if(ps_ctxt->u1_enable_psyRDOPT)
7992 {
7993 UWORD8 *pu1_recon_cu;
7994 WORD32 recon_stride;
7995 WORD32 curr_pos_x;
7996 WORD32 curr_pos_y;
7997 WORD32 start_index;
7998 WORD32 num_horz_cu_in_ctb;
7999 WORD32 had_block_size;
8000 /* tODO: sreenivasa ctb size has to be used appropriately */
8001 had_block_size = 8;
8002 num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8003
8004 curr_pos_x = cu_pos_x << 3; /* pel units */
8005 curr_pos_y = cu_pos_y << 3; /* pel units */
8006 recon_stride = i4_recon_stride;
8007 pu1_recon_cu = pu1_recon;
8008
8009 /* start index to index the source satd of curr cu int he current ctb*/
8010 start_index = 2 * (curr_pos_x / had_block_size) +
8011 (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8012
8013 {
8014 chrm_cod_cost += ihevce_psy_rd_cost_croma(
8015 ps_ctxt->ai4_source_chroma_satd,
8016 pu1_recon,
8017 recon_stride,
8018 1, //
8019 cu_size,
8020 0, // pic type
8021 0, //layer id
8022 ps_ctxt->i4_satd_lamda, // lambda
8023 start_index,
8024 ps_ctxt->u1_is_input_data_hbd, // 8 bit
8025 ps_ctxt->u1_chroma_array_type,
8026 &ps_ctxt->s_cmn_opt_func
8027
8028 ); // chroma subsampling 420
8029 }
8030 }
8031 }
8032 else
8033 {
8034 u1_is_mode_eq_chroma_satd_mode = 1;
8035 chrm_cod_cost = MAX_COST_64;
8036 }
8037
8038 /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8039 if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8040 (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8041 {
8042 if(64 == cu_size)
8043 {
8044 ASSERT(TU_EQ_CU != func_proc_mode);
8045 }
8046
8047 if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8048 .i8_chroma_best_rdopt < chrm_cod_cost)
8049 {
8050 UWORD8 *pu1_src;
8051 UWORD8 *pu1_ecd_data_src_cb;
8052 UWORD8 *pu1_ecd_data_src_cr;
8053
8054 chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8055 &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8056
8057 UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8058 WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8059 WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8060
8061 pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8062 chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8063 chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8064 chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8065
8066 if(u1_is_mode_eq_chroma_satd_mode)
8067 {
8068 chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8069 }
8070
8071 /*Resetting total_num_bytes_to 0*/
8072 total_bytes_offset = 0;
8073
8074 /* Update the CABAC state corresponding to chroma only */
8075 /* Chroma Cbf */
8076 memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8077 /* Chroma transform skip */
8078 memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8079 /* Chroma last coeff x prefix */
8080 memcpy(
8081 pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8082 pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8083 3);
8084 /* Chroma last coeff y prefix */
8085 memcpy(
8086 pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8087 pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8088 3);
8089 /* Chroma csbf */
8090 memcpy(
8091 pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8092 pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8093 2);
8094 /* Chroma sig coeff flags */
8095 memcpy(
8096 pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8097 /* Chroma absgt1 flags */
8098 memcpy(
8099 pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8100 pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8101 8);
8102 /* Chroma absgt2 flags */
8103 memcpy(
8104 pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8105 pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8106 2);
8107
8108 ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8109 ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8110
8111 /* update to luma decision as we update chroma in final mode */
8112 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8113
8114 for(ctr = 0; ctr < u1_num_tus; ctr++)
8115 {
8116 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8117 {
8118 WORD32 cbf;
8119 WORD32 num_bytes;
8120
8121 pu1_ecd_data_src_cb =
8122 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8123 pu1_ecd_data_src_cr =
8124 &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8125
8126 /* check if chroma present flag is set */
8127 if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8128 {
8129 UWORD8 *pu1_cur_pred_dest;
8130 UWORD8 *pu1_cur_pred_src;
8131 WORD32 pred_src_strd;
8132 WORD16 *pi2_cur_deq_data_dest;
8133 WORD16 *pi2_cur_deq_data_src_cb;
8134 WORD16 *pi2_cur_deq_data_src_cr;
8135 WORD32 deq_src_strd;
8136
8137 WORD32 curr_pos_x, curr_pos_y;
8138
8139 trans_size = ps_tu->s_tu.b3_size;
8140 trans_size = (1 << (trans_size + 1)); /* in chroma units */
8141
8142 /*Deriving stride values*/
8143 pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8144 deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8145
8146 /* since 2x2 transform is not allowed for chroma*/
8147 if(2 == trans_size)
8148 {
8149 trans_size = 4;
8150 }
8151
8152 /* get the current tu posx and posy w.r.t to cu */
8153 curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8154 curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8155 (i4_subtu_idx * trans_size);
8156
8157 /* 420sp case only vertical height will be half */
8158 if(0 == u1_is_422)
8159 {
8160 curr_pos_y >>= 1;
8161 }
8162
8163 /* increment the pointers to start of current TU */
8164 pu1_cur_pred_src =
8165 ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8166 pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8167 pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8168 pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8169
8170 pi2_cur_deq_data_src_cb =
8171 &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8172 pi2_cur_deq_data_src_cr =
8173 &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8174 pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8175 pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8176 pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8177 pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8178
8179 /*Overwriting deq data with that belonging to the winning special mode
8180 (luma mode != chroma mode)
8181 ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8182 correspondingly manipulate to copy WORD16 data*/
8183
8184 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8185 (UWORD8 *)pi2_cur_deq_data_dest,
8186 (deq_data_strd << 1),
8187 (UWORD8 *)pi2_cur_deq_data_src_cb,
8188 (deq_src_strd << 1),
8189 (trans_size << 1),
8190 trans_size);
8191
8192 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8193 (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8194 (deq_data_strd << 1),
8195 (UWORD8 *)pi2_cur_deq_data_src_cr,
8196 (deq_src_strd << 1),
8197 (trans_size << 1),
8198 trans_size);
8199
8200 /*Overwriting pred data with that belonging to the winning special mode
8201 (luma mode != chroma mode)*/
8202
8203 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8204 pu1_cur_pred_dest,
8205 pred_strd,
8206 pu1_cur_pred_src,
8207 pred_src_strd,
8208 (trans_size << 1),
8209 trans_size);
8210
8211 num_bytes = ps_chr_intra_satd_ctxt
8212 ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8213 cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8214 /* inter cu is coded if any of the tu is coded in it */
8215 ps_best_cu_prms->u1_is_cu_coded |= cbf;
8216
8217 /* update CB related params */
8218 ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8219 total_bytes_offset + init_bytes_offset;
8220
8221 if(0 == i4_subtu_idx)
8222 {
8223 ps_tu->s_tu.b1_cb_cbf = cbf;
8224 }
8225 else
8226 {
8227 ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8228 }
8229
8230 /*Overwriting the cb ecd data corresponding to the special mode*/
8231 if(0 != num_bytes)
8232 {
8233 memcpy(
8234 (pu1_ecd_data + total_bytes_offset),
8235 pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8236 num_bytes);
8237 }
8238
8239 total_bytes_offset += num_bytes;
8240 ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8241 ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8242
8243 num_bytes = ps_chr_intra_satd_ctxt
8244 ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8245 cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8246 /* inter cu is coded if any of the tu is coded in it */
8247 ps_best_cu_prms->u1_is_cu_coded |= cbf;
8248
8249 /*Overwriting the cr ecd data corresponding to the special mode*/
8250 if(0 != num_bytes)
8251 {
8252 memcpy(
8253 (pu1_ecd_data + total_bytes_offset),
8254 pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8255 num_bytes);
8256 }
8257
8258 /* update CR related params */
8259 ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8260 total_bytes_offset + init_bytes_offset;
8261
8262 if(0 == i4_subtu_idx)
8263 {
8264 ps_tu->s_tu.b1_cr_cbf = cbf;
8265 }
8266 else
8267 {
8268 ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8269 }
8270
8271 total_bytes_offset += num_bytes;
8272 ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8273
8274 /*Updating zero rows and zero cols*/
8275 ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8276 ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8277 ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8278 ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8279 ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8280 ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8281 ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8282 ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8283
8284 ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8285
8286 if((u1_num_tus > 1) &&
8287 ps_recon_datastore->au1_is_chromaRecon_available[2])
8288 {
8289 ps_recon_datastore
8290 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8291 [i4_subtu_idx] = 2;
8292 ps_recon_datastore
8293 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8294 [i4_subtu_idx] = 2;
8295 }
8296 else if(
8297 (1 == u1_num_tus) &&
8298 ps_recon_datastore->au1_is_chromaRecon_available[1])
8299 {
8300 ps_recon_datastore
8301 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8302 [i4_subtu_idx] = 1;
8303 ps_recon_datastore
8304 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8305 [i4_subtu_idx] = 1;
8306 }
8307 else
8308 {
8309 ps_recon_datastore
8310 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8311 [i4_subtu_idx] = UCHAR_MAX;
8312 ps_recon_datastore
8313 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8314 [i4_subtu_idx] = UCHAR_MAX;
8315 }
8316 }
8317 }
8318
8319 /* loop increments */
8320 ps_tu++;
8321 ps_tu_temp_prms++;
8322 }
8323 }
8324
8325 if(!u1_is_422)
8326 {
8327 if(chrm_pred_mode == luma_pred_mode)
8328 {
8329 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8330 }
8331 else if(chrm_pred_mode == 0)
8332 {
8333 ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8334 }
8335 else if(chrm_pred_mode == 1)
8336 {
8337 ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8338 }
8339 else if(chrm_pred_mode == 10)
8340 {
8341 ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8342 }
8343 else if(chrm_pred_mode == 26)
8344 {
8345 ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8346 }
8347 else
8348 {
8349 ASSERT(0); /*Should not come here*/
8350 }
8351 }
8352 else
8353 {
8354 if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8355 {
8356 ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8357 }
8358 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8359 {
8360 ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8361 }
8362 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8363 {
8364 ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8365 }
8366 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8367 {
8368 ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8369 }
8370 else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8371 {
8372 ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8373 }
8374 else
8375 {
8376 ASSERT(0); /*Should not come here*/
8377 }
8378 }
8379 }
8380
8381 /* Store the actual chroma mode */
8382 ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8383 }
8384
8385 /* update the total bytes produced */
8386 ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8387
8388 /* store the final chrm bits accumulated */
8389 *pi4_chrm_tu_bits = chrm_tu_bits;
8390
8391 return (chrm_cod_cost);
8392 }
8393
8394 /*!
8395 ******************************************************************************
8396 * \if Function name : ihevce_final_rdopt_mode_prcs \endif
8397 *
8398 * \brief
8399 * Final RDOPT mode process function. Performs Recon computation for the
8400 * final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8401 *
8402 * \param[in] pv_ctxt : pointer to enc_loop module
8403 * \param[in] ps_prms : pointer to struct containing requisite parameters
8404 *
8405 * \return
8406 * None
8407 *
8408 * \author
8409 * Ittiam
8410 *
8411 *****************************************************************************
8412 */
ihevce_final_rdopt_mode_prcs(ihevce_enc_loop_ctxt_t * ps_ctxt,final_mode_process_prms_t * ps_prms)8413 void ihevce_final_rdopt_mode_prcs(
8414 ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8415 {
8416 enc_loop_cu_final_prms_t *ps_best_cu_prms;
8417 tu_enc_loop_out_t *ps_tu_enc_loop;
8418 tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8419 nbr_avail_flags_t s_nbr;
8420 recon_datastore_t *ps_recon_datastore;
8421
8422 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8423 ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8424 ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8425
8426 WORD32 num_tu_in_cu;
8427 LWORD64 rd_opt_cost;
8428 WORD32 ctr;
8429 WORD32 i4_subtu_idx;
8430 WORD32 cu_size;
8431 WORD32 cu_pos_x, cu_pos_y;
8432 WORD32 chrm_present_flag = 1;
8433 WORD32 num_bytes, total_bytes = 0;
8434 WORD32 chrm_ctr = 0;
8435 WORD32 u1_is_cu_coded;
8436 UWORD8 *pu1_old_ecd_data;
8437 UWORD8 *pu1_chrm_old_ecd_data;
8438 UWORD8 *pu1_cur_pred;
8439 WORD16 *pi2_deq_data;
8440 WORD16 *pi2_chrm_deq_data;
8441 WORD16 *pi2_cur_deq_data;
8442 WORD16 *pi2_cur_deq_data_chrm;
8443 UWORD8 *pu1_cur_luma_recon;
8444 UWORD8 *pu1_cur_chroma_recon;
8445 UWORD8 *pu1_cur_src;
8446 UWORD8 *pu1_cur_src_chrm;
8447 UWORD8 *pu1_cur_pred_chrm;
8448 UWORD8 *pu1_intra_pred_mode;
8449 UWORD32 *pu4_nbr_flags;
8450 LWORD64 i8_ssd;
8451
8452 cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8453 cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8454 enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8455
8456 WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8457 WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8458 UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8459 WORD32 src_strd = ps_prms->src_strd;
8460 UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8461 WORD32 pred_strd = ps_prms->pred_strd;
8462 UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8463 WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8464 UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8465 UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8466 WORD32 csbf_strd = ps_prms->csbf_strd;
8467 UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8468 WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8469 UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8470 WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8471 UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8472 UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8473 UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8474 WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8475 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8476 UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8477 /* Get the Chroma pointer and parameters */
8478 UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8479 WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8480 UWORD8 u1_compute_spatial_ssd_luma = 0;
8481 UWORD8 u1_compute_spatial_ssd_chroma = 0;
8482 /* Get the pointer for function selector */
8483 ihevc_intra_pred_luma_ref_substitution_fptr =
8484 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8485
8486 ihevc_intra_pred_ref_filtering_fptr =
8487 ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8488
8489 ihevc_intra_pred_chroma_ref_substitution_fptr =
8490 ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8491
8492 /* Get the best CU parameters */
8493 ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8494 num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8495 cu_size = ps_best_cu_prms->u1_cu_size;
8496 cu_pos_x = u1_cu_pos_x;
8497 cu_pos_y = u1_cu_pos_y;
8498 pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8499 pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8500 ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8501
8502 /* get the first TU pointer */
8503 ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8504 /* get the first TU only enc_loop prms pointer */
8505 ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8506 /*modify quant related param in ctxt based on current cu qp*/
8507 if((ps_ctxt->i1_cu_qp_delta_enable))
8508 {
8509 /*recompute quant related param at every cu level*/
8510 ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8511
8512 /* get frame level lambda params */
8513 ihevce_get_cl_cu_lambda_prms(
8514 ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8515 }
8516
8517 ps_best_cu_prms->i8_cu_ssd = 0;
8518 ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8519
8520 /* For skip case : Set TU_size = CU_size and make cbf = 0
8521 so that same TU loop can be used for all modes */
8522 if(PRED_MODE_SKIP == packed_pred_mode)
8523 {
8524 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8525 {
8526 ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8527
8528 ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8529
8530 ps_tu_enc_loop++;
8531 ps_tu_enc_loop_temp_prms++;
8532 }
8533
8534 /* go back to the first TU pointer */
8535 ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8536 ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8537 }
8538 /** For inter case, pred calculation is outside the loop **/
8539 if(PRED_MODE_INTRA != packed_pred_mode)
8540 {
8541 /**------------- Compute pred data if required --------------**/
8542 if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8543 {
8544 nbr_4x4_t *ps_topleft_nbr_4x4;
8545 nbr_4x4_t *ps_left_nbr_4x4;
8546 nbr_4x4_t *ps_top_nbr_4x4;
8547 WORD32 nbr_4x4_left_strd;
8548
8549 ps_best_inter_cand->pu1_pred_data = pu1_pred;
8550 ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8551
8552 /* Get the CU nbr information */
8553 ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8554 ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8555 ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8556 nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8557
8558 /* MVP ,MVD calc and Motion compensation */
8559 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8560 ps_ctxt,
8561 ps_best_inter_cand,
8562 u1_cu_size,
8563 cu_pos_x,
8564 cu_pos_y,
8565 ps_left_nbr_4x4,
8566 ps_top_nbr_4x4,
8567 ps_topleft_nbr_4x4,
8568 nbr_4x4_left_strd,
8569 rd_opt_best_idx);
8570 }
8571
8572 /** ------ Motion Compensation for Chroma -------- **/
8573 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8574 {
8575 UWORD8 *pu1_cur_pred;
8576 pu1_cur_pred = pu1_pred_chrm;
8577
8578 /* run a loop over all the partitons in cu */
8579 for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8580 {
8581 pu_t *ps_pu;
8582 WORD32 inter_pu_wd, inter_pu_ht;
8583
8584 ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8585
8586 /* IF AMP then each partitions can have diff wd ht */
8587 inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8588 inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8589 inter_pu_ht <<= u1_is_422;
8590 /* chroma mc func */
8591 ihevce_chroma_inter_pred_pu(
8592 &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8593 if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8594 {
8595 /* 2Nx__ partion case */
8596 if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8597 {
8598 pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8599 }
8600 /* __x2N partion case */
8601 if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8602 {
8603 pu1_cur_pred += inter_pu_wd;
8604 }
8605 }
8606 }
8607 }
8608 }
8609 pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8610 pi2_chrm_deq_data =
8611 &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8612 pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8613 pu1_chrm_old_ecd_data =
8614 &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8615
8616 /* default value for cu coded flag */
8617 u1_is_cu_coded = 0;
8618
8619 /* If we are re-computing coeff, set sad to 0 and start accumulating */
8620 /* else use the best cand. sad from RDOPT stage */
8621 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8622 {
8623 /*init of ssd of CU accuumulated over all TU*/
8624 ps_best_cu_prms->u4_cu_sad = 0;
8625
8626 /* reset the luma residual bits */
8627 ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8628 }
8629
8630 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8631 {
8632 /* reset the chroma residual bits */
8633 ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8634 }
8635
8636 if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8637 (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8638 {
8639 /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8640 the quantized coefficients might be changed.
8641 We are copying only those states which correspond to the header from the cabac state
8642 of the previous CU, because the header is going to be recomputed for this condition*/
8643 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8644 memcpy(
8645 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8646 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8647 IHEVC_CAB_COEFFX_PREFIX);
8648
8649 if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8650 {
8651 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8652 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8653 (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8654 IHEVC_CAB_COEFFX_PREFIX),
8655 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8656 }
8657 else
8658 {
8659 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8660 (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8661 (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8662 .s_cabac_ctxt.au1_ctxt_models[0] +
8663 IHEVC_CAB_COEFFX_PREFIX),
8664 (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8665 }
8666 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8667 }
8668 else
8669 {
8670 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8671 }
8672
8673 /* Zero cbf tool is disabled for intra CUs */
8674 if(PRED_MODE_INTRA == packed_pred_mode)
8675 {
8676 #if ENABLE_ZERO_CBF_IN_INTRA
8677 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8678 #else
8679 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8680 #endif
8681 }
8682 else
8683 {
8684 #if DISABLE_ZERO_ZBF_IN_INTER
8685 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8686 #else
8687 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8688 #endif
8689 }
8690
8691 /** Loop for all tu blocks in current cu and do reconstruction **/
8692 for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8693 {
8694 tu_t *ps_tu;
8695 WORD32 trans_size, num_4x4_in_tu;
8696 WORD32 cbf, zero_rows, zero_cols;
8697 WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8698 WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8699 WORD32 luma_pred_mode, chroma_pred_mode = 0;
8700 UWORD8 au1_is_recon_available[2];
8701
8702 ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8703
8704 u1_compute_spatial_ssd_luma = 0;
8705 u1_compute_spatial_ssd_chroma = 0;
8706
8707 trans_size = 1 << (ps_tu->b3_size + 2);
8708 num_4x4_in_tu = (trans_size >> 2);
8709 cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8710 cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8711
8712 /* populate the coeffs scan idx */
8713 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8714
8715 /* get the current pos x and pos y in pixels */
8716 cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8717 cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8718
8719 /* Update pointers based on the location */
8720 pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8721 pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8722 pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8723 pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8724
8725 pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8726 pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8727
8728 pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8729 pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8730
8731 pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8732 pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8733 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8734
8735 pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8736 pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8737 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8738
8739 pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8740 pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8741 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8742
8743 pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8744 pi2_cur_deq_data_chrm +=
8745 ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8746
8747 /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8748 chrm_present_flag = 1; /* by default chroma present is set to 1*/
8749
8750 if(4 == trans_size)
8751 {
8752 /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8753 if(0 != chrm_ctr)
8754 {
8755 chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8756 }
8757
8758 /* increment the chrm ctr unconditionally */
8759 chrm_ctr++;
8760 /* after ctr reached 4 reset it */
8761 if(4 == chrm_ctr)
8762 {
8763 chrm_ctr = 0;
8764 }
8765 }
8766
8767 /**------------- Compute pred data if required --------------**/
8768 if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8769 {
8770 /* Get the pred mode for scan idx calculation, even if pred is not required */
8771 luma_pred_mode = *pu1_intra_pred_mode;
8772
8773 if((ps_ctxt->i4_rc_pass == 1) ||
8774 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8775 {
8776 WORD32 nbr_flags;
8777 WORD32 luma_pred_func_idx;
8778 UWORD8 *pu1_left;
8779 UWORD8 *pu1_top;
8780 UWORD8 *pu1_top_left;
8781 WORD32 left_strd;
8782
8783 /* left cu boundary */
8784 if(0 == cu_pos_x_in_pix)
8785 {
8786 left_strd = ps_cu_nbr_prms->cu_left_stride;
8787 pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8788 }
8789 else
8790 {
8791 pu1_left = pu1_cur_luma_recon - 1;
8792 left_strd = recon_luma_strd;
8793 }
8794
8795 /* top cu boundary */
8796 if(0 == cu_pos_y_in_pix)
8797 {
8798 pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8799 }
8800 else
8801 {
8802 pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8803 }
8804
8805 /* by default top left is set to cu top left */
8806 pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8807
8808 /* top left based on position */
8809 if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8810 {
8811 pu1_top_left = pu1_left - left_strd;
8812 }
8813 else if(0 != cu_pos_x_in_pix)
8814 {
8815 pu1_top_left = pu1_top - 1;
8816 }
8817
8818 /* get the neighbour availability flags */
8819 nbr_flags = ihevce_get_nbr_intra(
8820 &s_nbr,
8821 ps_ctxt->pu1_ctb_nbr_map,
8822 ps_ctxt->i4_nbr_map_strd,
8823 cu_pos_x_in_4x4,
8824 cu_pos_y_in_4x4,
8825 num_4x4_in_tu);
8826
8827 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8828 {
8829 /* copy the nbr flags for chroma reuse */
8830 if(4 != trans_size)
8831 {
8832 *pu4_nbr_flags = nbr_flags;
8833 }
8834 else if(1 == chrm_present_flag)
8835 {
8836 /* compute the avail flags assuming luma trans is 8x8 */
8837 /* get the neighbour availability flags */
8838 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8839 ps_ctxt->pu1_ctb_nbr_map,
8840 ps_ctxt->i4_nbr_map_strd,
8841 cu_pos_x_in_4x4,
8842 cu_pos_y_in_4x4,
8843 (num_4x4_in_tu << 1),
8844 (num_4x4_in_tu << 1));
8845 }
8846
8847 /* call reference array substitution */
8848 ihevc_intra_pred_luma_ref_substitution_fptr(
8849 pu1_top_left,
8850 pu1_top,
8851 pu1_left,
8852 left_strd,
8853 trans_size,
8854 nbr_flags,
8855 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8856 1);
8857
8858 /* call reference filtering */
8859 ihevc_intra_pred_ref_filtering_fptr(
8860 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8861 trans_size,
8862 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8863 luma_pred_mode,
8864 ps_ctxt->i1_strong_intra_smoothing_enable_flag);
8865
8866 /* use the look up to get the function idx */
8867 luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
8868
8869 /* call the intra prediction function */
8870 ps_ctxt->apf_lum_ip[luma_pred_func_idx](
8871 (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8872 1,
8873 pu1_cur_pred,
8874 pred_strd,
8875 trans_size,
8876 luma_pred_mode);
8877 }
8878 }
8879 else if(
8880 (1 == chrm_present_flag) &&
8881 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
8882 {
8883 WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
8884
8885 if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
8886 {
8887 temp_num_4x4_in_tu = num_4x4_in_tu << 1;
8888 }
8889
8890 *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8891 ps_ctxt->pu1_ctb_nbr_map,
8892 ps_ctxt->i4_nbr_map_strd,
8893 cu_pos_x_in_4x4,
8894 cu_pos_y_in_4x4,
8895 temp_num_4x4_in_tu,
8896 temp_num_4x4_in_tu);
8897 }
8898
8899 /* Get the pred mode for scan idx calculation, even if pred is not required */
8900 chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
8901 }
8902
8903 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8904 {
8905 WORD32 temp_bits;
8906 LWORD64 temp_cost;
8907 UWORD32 u4_tu_sad;
8908 WORD32 perform_sbh, perform_rdoq;
8909
8910 if(PRED_MODE_INTRA == packed_pred_mode)
8911 {
8912 /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
8913 if(trans_size < 16)
8914 {
8915 /* for modes from 22 upto 30 horizontal scan is used */
8916 if((luma_pred_mode > 21) && (luma_pred_mode < 31))
8917 {
8918 ps_ctxt->i4_scan_idx = SCAN_HORZ;
8919 }
8920 /* for modes from 6 upto 14 horizontal scan is used */
8921 else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
8922 {
8923 ps_ctxt->i4_scan_idx = SCAN_VERT;
8924 }
8925 }
8926 }
8927
8928 /* RDOPT copy States : TU init (best until prev TU) to current */
8929 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8930 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8931 .s_cabac_ctxt.au1_ctxt_models[0] +
8932 IHEVC_CAB_COEFFX_PREFIX,
8933 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
8934 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
8935
8936 if(ps_prms->u1_recompute_sbh_and_rdoq)
8937 {
8938 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
8939 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
8940 }
8941 else
8942 {
8943 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
8944 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
8945 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
8946 we would have to do RDOQ again.*/
8947 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
8948 }
8949
8950 #if DISABLE_RDOQ_INTRA
8951 if(PRED_MODE_INTRA == packed_pred_mode)
8952 {
8953 perform_rdoq = 0;
8954 }
8955 #endif
8956 /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
8957 so that all candidates and best candidate are quantized with same rounding factor */
8958 if(1 == perform_rdoq)
8959 {
8960 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
8961 }
8962
8963 cbf = ihevce_t_q_iq_ssd_scan_fxn(
8964 ps_ctxt,
8965 pu1_cur_pred,
8966 pred_strd,
8967 pu1_cur_src,
8968 src_strd,
8969 pi2_cur_deq_data,
8970 cu_size, /*deq_data stride is cu_size*/
8971 pu1_cur_luma_recon,
8972 recon_luma_strd,
8973 pu1_final_ecd_data,
8974 pu1_csbf_buf,
8975 csbf_strd,
8976 trans_size,
8977 packed_pred_mode,
8978 &temp_cost,
8979 &num_bytes,
8980 &temp_bits,
8981 &u4_tu_sad,
8982 &zero_cols,
8983 &zero_rows,
8984 &au1_is_recon_available[0],
8985 perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
8986 perform_sbh,
8987 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
8988 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
8989 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
8990 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
8991 100.0,
8992 ps_prms->u1_is_cu_noisy,
8993 #endif
8994 u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
8995 1 /*early cbf*/
8996 ); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
8997
8998 /* Accumulate luma residual bits */
8999 ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9000
9001 /* RDOPT copy States : New updated after curr TU to TU init */
9002 if(0 != cbf)
9003 {
9004 /* update to new state only if CBF is non zero */
9005 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9006 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9007 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9008 .s_cabac_ctxt.au1_ctxt_models[0] +
9009 IHEVC_CAB_COEFFX_PREFIX,
9010 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9011 }
9012
9013 /* accumulate the TU sad into cu sad */
9014 ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9015 ps_tu->b1_y_cbf = cbf;
9016 ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9017
9018 /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9019 if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9020 {
9021 WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9022 nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9023 ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9024 ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9025 /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9026 ps_cur_nbr_4x4->b1_y_cbf = cbf;
9027 /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9028 ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9029 /* Qp and cbf are stored for the all 4x4 in TU */
9030 {
9031 WORD32 i, j;
9032 nbr_4x4_t *ps_tmp_4x4;
9033 ps_tmp_4x4 = ps_cur_nbr_4x4;
9034
9035 for(i = 0; i < num_4x4_in_tu; i++)
9036 {
9037 for(j = 0; j < num_4x4_in_tu; j++)
9038 {
9039 ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9040 ps_tmp_4x4[j].b1_y_cbf = cbf;
9041 }
9042 /* row level update*/
9043 ps_tmp_4x4 += num_4x4_in_cu;
9044 }
9045 }
9046 }
9047 }
9048 else
9049 {
9050 zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9051 zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9052
9053 if(ps_prms->u1_will_cabac_state_change)
9054 {
9055 num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9056 }
9057 else
9058 {
9059 num_bytes = 0;
9060 }
9061
9062 /* copy luma ecd data to final buffer */
9063 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9064
9065 pu1_old_ecd_data += num_bytes;
9066
9067 au1_is_recon_available[0] = 0;
9068 }
9069
9070 /**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/
9071 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9072 (!u1_compute_spatial_ssd_luma ||
9073 (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9074 {
9075 if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9076 (ps_recon_datastore->u1_is_lumaRecon_available &&
9077 (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9078 {
9079 ihevce_it_recon_fxn(
9080 ps_ctxt,
9081 pi2_cur_deq_data,
9082 cu_size,
9083 pu1_cur_pred,
9084 pred_strd,
9085 pu1_cur_luma_recon,
9086 recon_luma_strd,
9087 pu1_final_ecd_data,
9088 trans_size,
9089 packed_pred_mode,
9090 ps_tu->b1_y_cbf,
9091 zero_cols,
9092 zero_rows);
9093 }
9094 else if(
9095 ps_recon_datastore->u1_is_lumaRecon_available &&
9096 (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9097 {
9098 UWORD8 *pu1_recon_src =
9099 ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9100 [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9101 cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9102
9103 ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9104 pu1_cur_luma_recon,
9105 recon_luma_strd,
9106 pu1_recon_src,
9107 ps_recon_datastore->i4_lumaRecon_stride,
9108 trans_size,
9109 trans_size);
9110 }
9111 }
9112
9113 if(ps_prms->u1_will_cabac_state_change)
9114 {
9115 ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9116 }
9117
9118 pu1_final_ecd_data += num_bytes;
9119 /* update total bytes consumed */
9120 total_bytes += num_bytes;
9121
9122 u1_is_cu_coded |= ps_tu->b1_y_cbf;
9123
9124 /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9125 if(1 == chrm_present_flag)
9126 {
9127 pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9128 pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9129 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9130
9131 pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9132 pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9133 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9134
9135 pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9136 pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9137 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9138
9139 pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9140 pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9141 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9142
9143 if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9144 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9145 (PRED_MODE_INTRA != packed_pred_mode))
9146 {
9147 WORD32 i4_num_bytes;
9148 UWORD8 *pu1_chroma_pred;
9149 UWORD8 *pu1_chroma_recon;
9150 WORD16 *pi2_chroma_deq;
9151 UWORD32 u4_zero_col;
9152 UWORD32 u4_zero_row;
9153
9154 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9155 {
9156 WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9157 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9158 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9159
9160 if(0 == u1_is_422)
9161 {
9162 i4_subtu_pos_y >>= 1;
9163 }
9164
9165 pu1_chroma_pred =
9166 pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9167 pu1_chroma_recon = pu1_cur_chroma_recon +
9168 (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9169 pi2_chroma_deq =
9170 pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9171
9172 u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9173 u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9174
9175 if(ps_prms->u1_will_cabac_state_change)
9176 {
9177 i4_num_bytes =
9178 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9179 }
9180 else
9181 {
9182 i4_num_bytes = 0;
9183 }
9184
9185 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9186
9187 pu1_old_ecd_data += i4_num_bytes;
9188
9189 au1_is_recon_available[U_PLANE] = 0;
9190
9191 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9192 (!u1_compute_spatial_ssd_chroma ||
9193 (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9194 {
9195 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9196 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9197 (UCHAR_MAX ==
9198 ps_recon_datastore
9199 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9200 {
9201 ihevce_chroma_it_recon_fxn(
9202 ps_ctxt,
9203 pi2_chroma_deq,
9204 cu_size,
9205 pu1_chroma_pred,
9206 pred_chrm_strd,
9207 pu1_chroma_recon,
9208 recon_chrma_strd,
9209 pu1_final_ecd_data,
9210 chroma_trans_size,
9211 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9212 u4_zero_col,
9213 u4_zero_row,
9214 U_PLANE);
9215 }
9216 else if(
9217 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9218 (UCHAR_MAX !=
9219 ps_recon_datastore
9220 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9221 {
9222 UWORD8 *pu1_recon_src =
9223 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9224 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9225 [U_PLANE][ctr][i4_subtu_idx]]) +
9226 i4_subtu_pos_x +
9227 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9228
9229 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9230 pu1_recon_src,
9231 ps_recon_datastore->i4_lumaRecon_stride,
9232 pu1_chroma_recon,
9233 recon_chrma_strd,
9234 chroma_trans_size,
9235 chroma_trans_size,
9236 U_PLANE);
9237 }
9238 }
9239
9240 u1_is_cu_coded |=
9241 ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9242
9243 pu1_final_ecd_data += i4_num_bytes;
9244 total_bytes += i4_num_bytes;
9245 }
9246
9247 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9248 {
9249 WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9250 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9251 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9252
9253 if(0 == u1_is_422)
9254 {
9255 i4_subtu_pos_y >>= 1;
9256 }
9257
9258 pu1_chroma_pred =
9259 pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9260 pu1_chroma_recon = pu1_cur_chroma_recon +
9261 (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9262 pi2_chroma_deq = pi2_cur_deq_data_chrm +
9263 (i4_subtu_idx * chroma_trans_size * cu_size) +
9264 chroma_trans_size;
9265
9266 u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9267 u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9268
9269 if(ps_prms->u1_will_cabac_state_change)
9270 {
9271 i4_num_bytes =
9272 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9273 }
9274 else
9275 {
9276 i4_num_bytes = 0;
9277 }
9278
9279 memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9280
9281 pu1_old_ecd_data += i4_num_bytes;
9282
9283 au1_is_recon_available[V_PLANE] = 0;
9284
9285 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9286 (!u1_compute_spatial_ssd_chroma ||
9287 (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9288 {
9289 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9290 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9291 (UCHAR_MAX ==
9292 ps_recon_datastore
9293 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9294 {
9295 ihevce_chroma_it_recon_fxn(
9296 ps_ctxt,
9297 pi2_chroma_deq,
9298 cu_size,
9299 pu1_chroma_pred,
9300 pred_chrm_strd,
9301 pu1_chroma_recon,
9302 recon_chrma_strd,
9303 pu1_final_ecd_data,
9304 chroma_trans_size,
9305 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9306 u4_zero_col,
9307 u4_zero_row,
9308 V_PLANE);
9309 }
9310 else if(
9311 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9312 (UCHAR_MAX !=
9313 ps_recon_datastore
9314 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9315 {
9316 UWORD8 *pu1_recon_src =
9317 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9318 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9319 [V_PLANE][ctr][i4_subtu_idx]]) +
9320 i4_subtu_pos_x +
9321 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9322
9323 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9324 pu1_recon_src,
9325 ps_recon_datastore->i4_lumaRecon_stride,
9326 pu1_chroma_recon,
9327 recon_chrma_strd,
9328 chroma_trans_size,
9329 chroma_trans_size,
9330 V_PLANE);
9331 }
9332 }
9333
9334 u1_is_cu_coded |=
9335 ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9336
9337 pu1_final_ecd_data += i4_num_bytes;
9338 total_bytes += i4_num_bytes;
9339 }
9340 }
9341 else
9342 {
9343 WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9344
9345 for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9346 {
9347 WORD32 cb_cbf, cr_cbf;
9348 WORD32 cb_num_bytes, cr_num_bytes;
9349
9350 WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9351
9352 WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9353 WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9354
9355 if(0 == u1_is_422)
9356 {
9357 i4_subtu_pos_y >>= 1;
9358 }
9359
9360 pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9361 pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9362 pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9363 pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9364
9365 if((PRED_MODE_INTRA == packed_pred_mode) &&
9366 (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9367 {
9368 WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9369 UWORD8 *pu1_left_chrm;
9370 UWORD8 *pu1_top_chrm;
9371 UWORD8 *pu1_top_left_chrm;
9372
9373 nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9374 *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9375
9376 /* left cu boundary */
9377 if(0 == i4_subtu_pos_x)
9378 {
9379 left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9380 pu1_left_chrm =
9381 ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9382 }
9383 else
9384 {
9385 pu1_left_chrm = pu1_cur_chroma_recon - 2;
9386 left_strd_chrm = recon_chrma_strd;
9387 }
9388
9389 /* top cu boundary */
9390 if(0 == i4_subtu_pos_y)
9391 {
9392 pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9393 }
9394 else
9395 {
9396 pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9397 }
9398
9399 /* by default top left is set to cu top left */
9400 pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9401
9402 /* top left based on position */
9403 if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9404 {
9405 pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9406 }
9407 else if(0 != i4_subtu_pos_x)
9408 {
9409 pu1_top_left_chrm = pu1_top_chrm - 2;
9410 }
9411
9412 /* call the chroma reference array substitution */
9413 ihevc_intra_pred_chroma_ref_substitution_fptr(
9414 pu1_top_left_chrm,
9415 pu1_top_chrm,
9416 pu1_left_chrm,
9417 left_strd_chrm,
9418 chroma_trans_size,
9419 nbr_flags,
9420 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9421 1);
9422
9423 /* use the look up to get the function idx */
9424 chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9425
9426 /* call the intra prediction function */
9427 ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9428 (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9429 1,
9430 pu1_cur_pred_chrm,
9431 pred_chrm_strd,
9432 chroma_trans_size,
9433 chroma_pred_mode);
9434 }
9435
9436 /**---------- Compute iq&coeff data if required : Chroma ------------**/
9437 if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9438 {
9439 WORD32 perform_sbh, perform_rdoq, temp_bits;
9440
9441 if(ps_prms->u1_recompute_sbh_and_rdoq)
9442 {
9443 perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9444 perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9445 }
9446 else
9447 {
9448 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9449 perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9450 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9451 we would have to do RDOQ again.*/
9452 perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9453 }
9454
9455 /* populate the coeffs scan idx */
9456 ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9457
9458 if(PRED_MODE_INTRA == packed_pred_mode)
9459 {
9460 /* for 4x4 transforms based on intra pred mode scan is choosen*/
9461 if(4 == chroma_trans_size)
9462 {
9463 /* for modes from 22 upto 30 horizontal scan is used */
9464 if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9465 {
9466 ps_ctxt->i4_scan_idx = SCAN_HORZ;
9467 }
9468 /* for modes from 6 upto 14 horizontal scan is used */
9469 else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9470 {
9471 ps_ctxt->i4_scan_idx = SCAN_VERT;
9472 }
9473 }
9474 }
9475
9476 #if DISABLE_RDOQ_INTRA
9477 if(PRED_MODE_INTRA == packed_pred_mode)
9478 {
9479 perform_rdoq = 0;
9480 }
9481 #endif
9482
9483 /* RDOPT copy States : TU init (best until prev TU) to current */
9484 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9485 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9486 .s_cabac_ctxt.au1_ctxt_models[0] +
9487 IHEVC_CAB_COEFFX_PREFIX,
9488 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9489 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9490
9491 ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9492 /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9493 so that all candidates and best candidate are quantized with same rounding factor */
9494 if(1 == perform_rdoq)
9495 {
9496 ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9497 }
9498
9499 if(!ps_best_cu_prms->u1_skip_flag ||
9500 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9501 {
9502 /* Cb */
9503 cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9504 ps_ctxt,
9505 pu1_cur_pred_chrm,
9506 pred_chrm_strd,
9507 pu1_cur_src_chrm,
9508 src_chrm_strd,
9509 pi2_cur_deq_data_chrm,
9510 cu_size,
9511 pu1_chrm_recon,
9512 recon_chrma_strd,
9513 pu1_final_ecd_data,
9514 pu1_csbf_buf,
9515 csbf_strd,
9516 chroma_trans_size,
9517 ps_ctxt->i4_scan_idx,
9518 (PRED_MODE_INTRA == packed_pred_mode),
9519 &cb_num_bytes,
9520 &temp_bits,
9521 &cb_zero_col,
9522 &cb_zero_row,
9523 &au1_is_recon_available[U_PLANE],
9524 perform_sbh,
9525 perform_rdoq,
9526 &i8_ssd,
9527 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9528 !ps_ctxt->u1_is_refPic
9529 ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9530 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9531 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9532 100.0,
9533 ps_prms->u1_is_cu_noisy,
9534 #endif
9535 ps_best_cu_prms->u1_skip_flag &&
9536 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9537 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9538 : FREQUENCY_DOMAIN_SSD,
9539 U_PLANE);
9540 }
9541 else
9542 {
9543 cb_cbf = 0;
9544 temp_bits = 0;
9545 cb_num_bytes = 0;
9546 au1_is_recon_available[U_PLANE] = 0;
9547 cb_zero_col = 0;
9548 cb_zero_row = 0;
9549 }
9550
9551 /* Accumulate chroma residual bits */
9552 ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9553
9554 /* RDOPT copy States : New updated after curr TU to TU init */
9555 if(0 != cb_cbf)
9556 {
9557 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9558 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9559 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9560 .s_cabac_ctxt.au1_ctxt_models[0] +
9561 IHEVC_CAB_COEFFX_PREFIX,
9562 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9563 }
9564 /* RDOPT copy States : Restoring back the Cb init state to Cr */
9565 else
9566 {
9567 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9568 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9569 .s_cabac_ctxt.au1_ctxt_models[0] +
9570 IHEVC_CAB_COEFFX_PREFIX,
9571 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9572 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9573 }
9574
9575 if(!ps_best_cu_prms->u1_skip_flag ||
9576 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9577 {
9578 /* Cr */
9579 cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9580 ps_ctxt,
9581 pu1_cur_pred_chrm,
9582 pred_chrm_strd,
9583 pu1_cur_src_chrm,
9584 src_chrm_strd,
9585 pi2_cur_deq_data_chrm + chroma_trans_size,
9586 cu_size,
9587 pu1_chrm_recon,
9588 recon_chrma_strd,
9589 pu1_final_ecd_data + cb_num_bytes,
9590 pu1_csbf_buf,
9591 csbf_strd,
9592 chroma_trans_size,
9593 ps_ctxt->i4_scan_idx,
9594 (PRED_MODE_INTRA == packed_pred_mode),
9595 &cr_num_bytes,
9596 &temp_bits,
9597 &cr_zero_col,
9598 &cr_zero_row,
9599 &au1_is_recon_available[V_PLANE],
9600 perform_sbh,
9601 perform_rdoq,
9602 &i8_ssd,
9603 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9604 !ps_ctxt->u1_is_refPic
9605 ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9606 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9607 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9608 100.0,
9609 ps_prms->u1_is_cu_noisy,
9610 #endif
9611 ps_best_cu_prms->u1_skip_flag &&
9612 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9613 u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9614 : FREQUENCY_DOMAIN_SSD,
9615 V_PLANE);
9616 }
9617 else
9618 {
9619 cr_cbf = 0;
9620 temp_bits = 0;
9621 cr_num_bytes = 0;
9622 au1_is_recon_available[V_PLANE] = 0;
9623 cr_zero_col = 0;
9624 cr_zero_row = 0;
9625 }
9626
9627 /* Accumulate chroma residual bits */
9628 ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9629
9630 /* RDOPT copy States : New updated after curr TU to TU init */
9631 if(0 != cr_cbf)
9632 {
9633 COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9634 &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9635 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9636 .s_cabac_ctxt.au1_ctxt_models[0] +
9637 IHEVC_CAB_COEFFX_PREFIX,
9638 IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9639 }
9640
9641 if(0 == i4_subtu_idx)
9642 {
9643 ps_tu->b1_cb_cbf = cb_cbf;
9644 ps_tu->b1_cr_cbf = cr_cbf;
9645 }
9646 else
9647 {
9648 ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9649 ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9650 }
9651 }
9652 else
9653 {
9654 cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9655 cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9656 cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9657 cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9658
9659 if(ps_prms->u1_will_cabac_state_change)
9660 {
9661 cb_num_bytes =
9662 ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9663 }
9664 else
9665 {
9666 cb_num_bytes = 0;
9667 }
9668
9669 if(ps_prms->u1_will_cabac_state_change)
9670 {
9671 cr_num_bytes =
9672 ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9673 }
9674 else
9675 {
9676 cr_num_bytes = 0;
9677 }
9678
9679 /* copy cb ecd data to final buffer */
9680 memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9681
9682 pu1_chrm_old_ecd_data += cb_num_bytes;
9683
9684 /* copy cb ecd data to final buffer */
9685 memcpy(
9686 (pu1_final_ecd_data + cb_num_bytes),
9687 pu1_chrm_old_ecd_data,
9688 cr_num_bytes);
9689
9690 pu1_chrm_old_ecd_data += cr_num_bytes;
9691
9692 au1_is_recon_available[U_PLANE] = 0;
9693 au1_is_recon_available[V_PLANE] = 0;
9694 }
9695
9696 /**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/
9697 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9698 (!u1_compute_spatial_ssd_chroma ||
9699 (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9700 {
9701 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9702 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9703 (UCHAR_MAX ==
9704 ps_recon_datastore
9705 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9706 {
9707 ihevce_chroma_it_recon_fxn(
9708 ps_ctxt,
9709 pi2_cur_deq_data_chrm,
9710 cu_size,
9711 pu1_cur_pred_chrm,
9712 pred_chrm_strd,
9713 pu1_cur_chroma_recon,
9714 recon_chrma_strd,
9715 pu1_final_ecd_data,
9716 chroma_trans_size,
9717 (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9718 cb_zero_col,
9719 cb_zero_row,
9720 U_PLANE);
9721 }
9722 else if(
9723 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9724 (UCHAR_MAX !=
9725 ps_recon_datastore
9726 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9727 {
9728 UWORD8 *pu1_recon_src =
9729 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9730 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9731 [U_PLANE][ctr][i4_subtu_idx]]) +
9732 i4_subtu_pos_x +
9733 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9734
9735 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9736 pu1_recon_src,
9737 ps_recon_datastore->i4_lumaRecon_stride,
9738 pu1_cur_chroma_recon,
9739 recon_chrma_strd,
9740 chroma_trans_size,
9741 chroma_trans_size,
9742 U_PLANE);
9743 }
9744 }
9745
9746 u1_is_cu_coded |=
9747 ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9748
9749 if(ps_prms->u1_will_cabac_state_change)
9750 {
9751 ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9752 }
9753
9754 pu1_final_ecd_data += cb_num_bytes;
9755 /* update total bytes consumed */
9756 total_bytes += cb_num_bytes;
9757
9758 if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9759 (!u1_compute_spatial_ssd_chroma ||
9760 (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9761 {
9762 if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9763 (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9764 (UCHAR_MAX ==
9765 ps_recon_datastore
9766 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9767 {
9768 ihevce_chroma_it_recon_fxn(
9769 ps_ctxt,
9770 pi2_cur_deq_data_chrm + chroma_trans_size,
9771 cu_size,
9772 pu1_cur_pred_chrm,
9773 pred_chrm_strd,
9774 pu1_cur_chroma_recon,
9775 recon_chrma_strd,
9776 pu1_final_ecd_data,
9777 chroma_trans_size,
9778 (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9779 cr_zero_col,
9780 cr_zero_row,
9781 V_PLANE);
9782 }
9783 else if(
9784 ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9785 (UCHAR_MAX !=
9786 ps_recon_datastore
9787 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9788 {
9789 UWORD8 *pu1_recon_src =
9790 ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9791 [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9792 [V_PLANE][ctr][i4_subtu_idx]]) +
9793 i4_subtu_pos_x +
9794 i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9795
9796 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9797 pu1_recon_src,
9798 ps_recon_datastore->i4_lumaRecon_stride,
9799 pu1_cur_chroma_recon,
9800 recon_chrma_strd,
9801 chroma_trans_size,
9802 chroma_trans_size,
9803 V_PLANE);
9804 }
9805 }
9806
9807 u1_is_cu_coded |=
9808 ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9809
9810 if(ps_prms->u1_will_cabac_state_change)
9811 {
9812 ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9813 }
9814
9815 pu1_final_ecd_data += cr_num_bytes;
9816 /* update total bytes consumed */
9817 total_bytes += cr_num_bytes;
9818 }
9819 }
9820 }
9821 else
9822 {
9823 ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9824 ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9825 ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9826 ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9827 ps_tu->b1_cb_cbf = 0;
9828 ps_tu->b1_cr_cbf = 0;
9829 ps_tu->b1_cb_cbf_subtu1 = 0;
9830 ps_tu->b1_cr_cbf_subtu1 = 0;
9831 }
9832
9833 /* Update to next TU */
9834 ps_tu_enc_loop++;
9835 ps_tu_enc_loop_temp_prms++;
9836
9837 pu4_nbr_flags++;
9838 pu1_intra_pred_mode++;
9839
9840 /*Do not set the nbr map for last pu in cu */
9841 if((num_tu_in_cu - 1) != ctr)
9842 {
9843 /* set the neighbour map to 1 */
9844 ihevce_set_nbr_map(
9845 ps_ctxt->pu1_ctb_nbr_map,
9846 ps_ctxt->i4_nbr_map_strd,
9847 cu_pos_x_in_4x4,
9848 cu_pos_y_in_4x4,
9849 (trans_size >> 2),
9850 1);
9851 }
9852 }
9853
9854 if(ps_prms->u1_will_cabac_state_change)
9855 {
9856 ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9857
9858 /* Modify skip flag, if luma is skipped & Chroma is coded */
9859 if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
9860 {
9861 ps_best_cu_prms->u1_skip_flag = 0;
9862 }
9863 }
9864
9865 /* during chroma evaluation if skip decision was over written */
9866 /* then the current skip candidate is set to a non skip candidate */
9867 if(PRED_MODE_INTRA != packed_pred_mode)
9868 {
9869 ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
9870 }
9871
9872 /**------------- Compute header data if required --------------**/
9873 if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
9874 {
9875 WORD32 cbf_bits;
9876 WORD32 cu_bits;
9877 WORD32 unit_4x4_size = cu_size >> 2;
9878
9879 /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
9880 be copied as the base reference for the next cu
9881 Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
9882 luma and chroma are being reevaluated*/
9883 COPY_CABAC_STATES(
9884 &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9885 .s_cabac_ctxt.au1_ctxt_models[0],
9886 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
9887 IHEVC_CAB_CTXT_END);
9888
9889 /* get the neighbour availability flags for current cu */
9890 ihevce_get_only_nbr_flag(
9891 &s_nbr,
9892 ps_ctxt->pu1_ctb_nbr_map,
9893 ps_ctxt->i4_nbr_map_strd,
9894 (cu_pos_x << 1),
9895 (cu_pos_y << 1),
9896 unit_4x4_size,
9897 unit_4x4_size);
9898
9899 cu_bits = ihevce_entropy_rdo_encode_cu(
9900 &ps_ctxt->s_rdopt_entropy_ctxt,
9901 ps_best_cu_prms,
9902 cu_pos_x,
9903 cu_pos_y,
9904 cu_size,
9905 ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
9906 : s_nbr.u1_top_avail,
9907 s_nbr.u1_left_avail,
9908 (pu1_final_ecd_data - total_bytes),
9909 &cbf_bits);
9910
9911 /* cbf bits are excluded from header bits, instead considered as texture bits */
9912 ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
9913 ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
9914 }
9915
9916 if(ps_prms->u1_will_cabac_state_change)
9917 {
9918 ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
9919 }
9920 }
9921
9922 /*!
9923 ******************************************************************************
9924 * \if Function name : ihevce_set_eval_flags \endif
9925 *
9926 * \brief
9927 * Function which decides which eval flags have to be set based on present
9928 * and RDOQ conditions
9929 *
9930 * \param[in] ps_ctxt : encoder ctxt pointer
9931 * \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
9932 *
9933 * \return
9934 * None
9935 *
9936 * \author
9937 * Ittiam
9938 *
9939 *****************************************************************************
9940 */
ihevce_set_eval_flags(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_final_prms_t * ps_enc_loop_bestprms)9941 void ihevce_set_eval_flags(
9942 ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
9943 {
9944 WORD32 count = 0;
9945
9946 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
9947
9948 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
9949 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
9950
9951 if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
9952 {
9953 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
9954 }
9955 else
9956 {
9957 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
9958 }
9959
9960 if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
9961 (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
9962 {
9963 /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
9964 RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
9965 for the current CU will change. Therefore, we need to reevaluate the pred data*/
9966 if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
9967 (ps_enc_loop_bestprms->u1_intra_flag == 1))
9968 {
9969 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
9970 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
9971 }
9972 if(ps_enc_loop_bestprms->u1_skip_flag == 1)
9973 {
9974 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9975 {
9976 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9977 .b1_eval_luma_iq_and_coeff_data = 0;
9978 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9979 .b1_eval_chroma_iq_and_coeff_data = 0;
9980 }
9981 }
9982 else
9983 {
9984 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9985 {
9986 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9987 .b1_eval_luma_iq_and_coeff_data = 1;
9988 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9989 .b1_eval_chroma_iq_and_coeff_data = 1;
9990 }
9991 }
9992 }
9993 else
9994 {
9995 switch(ps_ctxt->i4_quality_preset)
9996 {
9997 case IHEVCE_QUALITY_P0:
9998 case IHEVCE_QUALITY_P2:
9999 case IHEVCE_QUALITY_P3:
10000 {
10001 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10002 {
10003 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10004 .b1_eval_luma_iq_and_coeff_data = 0;
10005 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10006 .b1_eval_chroma_iq_and_coeff_data =
10007 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10008 }
10009
10010 break;
10011 }
10012 case IHEVCE_QUALITY_P4:
10013 case IHEVCE_QUALITY_P5:
10014 {
10015 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10016 {
10017 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10018 .b1_eval_luma_iq_and_coeff_data = 0;
10019 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10020 .b1_eval_chroma_iq_and_coeff_data =
10021 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10022 }
10023
10024 break;
10025 }
10026 case IHEVCE_QUALITY_P6:
10027 {
10028 for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10029 {
10030 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10031 .b1_eval_luma_iq_and_coeff_data = 0;
10032 #if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10033 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10034 .b1_eval_chroma_iq_and_coeff_data =
10035 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10036 #else
10037 if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10038 (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10039 {
10040 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10041 .b1_eval_chroma_iq_and_coeff_data =
10042 ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10043 }
10044 else
10045 {
10046 ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10047 .b1_eval_chroma_iq_and_coeff_data =
10048 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10049 }
10050 #endif
10051 }
10052
10053 break;
10054 }
10055 default:
10056 {
10057 break;
10058 }
10059 }
10060 }
10061
10062 /* Not recomputing Luma pred-data and header data for any preset now */
10063 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10064 }
10065
10066 /**
10067 ******************************************************************************
10068 *
10069 * @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10070 * (not coded children) into a parent node(not coded).
10071 *
10072 * @par Description
10073 * This is required post RDO evaluation as TU decisions are
10074 * pre-determined(pre RDO) based on recursive SATD,
10075 * while the quad children TU's can be skipped during RDO
10076 *
10077 * The shrink process is applied iteratively till there are no
10078 * more modes to shrink
10079 *
10080 * @param[inout] ps_tu_enc_loop
10081 * pointer to tu enc loop params of inter cu
10082 *
10083 * @param[inout] ps_tu_enc_loop_temp_prms
10084 * pointer to temp tu enc loop params of inter cu
10085 *
10086 * @param[in] num_tu_in_cu
10087 * number of tus in cu
10088 *
10089 * @return modified number of tus in cu
10090 *
10091 ******************************************************************************
10092 */
ihevce_shrink_inter_tu_tree(tu_enc_loop_out_t * ps_tu_enc_loop,tu_enc_loop_temp_prms_t * ps_tu_enc_loop_temp_prms,recon_datastore_t * ps_recon_datastore,WORD32 num_tu_in_cu,UWORD8 u1_is_422)10093 WORD32 ihevce_shrink_inter_tu_tree(
10094 tu_enc_loop_out_t *ps_tu_enc_loop,
10095 tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10096 recon_datastore_t *ps_recon_datastore,
10097 WORD32 num_tu_in_cu,
10098 UWORD8 u1_is_422)
10099 {
10100 WORD32 recurse = 1;
10101 WORD32 ctr;
10102
10103 /* ------------- Quadtree TU Split Transform flag optimization ------------ */
10104 /* Post RDO, if all 4 child nodes are not coded the overheads of split TU */
10105 /* flags and cbf flags are saved by merging to parent node and marking */
10106 /* parent TU as not coded */
10107 /* */
10108 /* ParentTUSplit=1 */
10109 /* | */
10110 /* --------------------------------------------------------- */
10111 /* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */
10112 /* || */
10113 /* \/ */
10114 /* */
10115 /* ParentTUSplit=0 (Not Coded) */
10116 /* */
10117 /* ------------- Quadtree TU Split Transform flag optimization ------------ */
10118 while((num_tu_in_cu > 4) && recurse)
10119 {
10120 recurse = 0;
10121
10122 /* Validate inter CU */
10123 //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10124
10125 /* loop for all tu blocks in current cu */
10126 for(ctr = 0; ctr < num_tu_in_cu;)
10127 {
10128 /* Get current tu posx, posy and size */
10129 WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10130 WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10131 /* +1 is for parents size */
10132 WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10133
10134 /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10135 WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10136 eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10137
10138 /* As TUs are published in encode order (Z SCAN), */
10139 /* Four consecutive TUS of same size implies we have hit leaf nodes. */
10140 if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10141 ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10142 ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10143 eval_merge)
10144 {
10145 WORD32 merge_parent = 1;
10146
10147 /* If any leaf noded is coded, it cannot be merged to parent */
10148 if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10149 (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10150
10151 (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10152 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10153 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10154
10155 (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10156 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10157 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10158
10159 (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10160 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10161 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10162 {
10163 merge_parent = 0;
10164 }
10165
10166 if(u1_is_422)
10167 {
10168 if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10169 (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10170
10171 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10172 (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10173
10174 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10175 (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10176
10177 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10178 (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10179 {
10180 merge_parent = 0;
10181 }
10182 }
10183
10184 if(merge_parent)
10185 {
10186 /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10187
10188 if(ps_recon_datastore->u1_is_lumaRecon_available)
10189 {
10190 ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10191
10192 memmove(
10193 &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10194 &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10195 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10196 }
10197
10198 if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10199 {
10200 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10201 UCHAR_MAX;
10202 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10203 UCHAR_MAX;
10204
10205 memmove(
10206 &ps_recon_datastore
10207 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10208 &ps_recon_datastore
10209 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10210 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10211
10212 memmove(
10213 &ps_recon_datastore
10214 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10215 &ps_recon_datastore
10216 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10217 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10218
10219 if(u1_is_422)
10220 {
10221 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10222 UCHAR_MAX;
10223 ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10224 UCHAR_MAX;
10225
10226 memmove(
10227 &ps_recon_datastore
10228 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10229 &ps_recon_datastore
10230 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10231 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10232
10233 memmove(
10234 &ps_recon_datastore
10235 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10236 &ps_recon_datastore
10237 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10238 (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10239 }
10240 }
10241
10242 /* Parent node size is one more than that of child */
10243 ps_tu_enc_loop[ctr].s_tu.b3_size++;
10244
10245 ctr++;
10246
10247 /* move the subsequent TUs to next element */
10248 ASSERT(num_tu_in_cu >= (ctr + 3));
10249 memmove(
10250 (void *)(ps_tu_enc_loop + ctr),
10251 (void *)(ps_tu_enc_loop + ctr + 3),
10252 (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10253
10254 /* Also memmove the temp TU params */
10255 memmove(
10256 (void *)(ps_tu_enc_loop_temp_prms + ctr),
10257 (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10258 (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10259
10260 /* Number of TUs in CU are now less by 3 */
10261 num_tu_in_cu -= 3;
10262
10263 /* Recurse again as new parent also be can be merged later */
10264 recurse = 1;
10265 }
10266 else
10267 {
10268 /* Go to next set of leaf nodes */
10269 ctr += 4;
10270 }
10271 }
10272 else
10273 {
10274 ctr++;
10275 }
10276 }
10277 }
10278
10279 /* return the modified num TUs*/
10280 ASSERT(num_tu_in_cu > 0);
10281 return (num_tu_in_cu);
10282 }
10283
ihevce_intra_mode_nxn_hash_updater(UWORD8 * pu1_mode_array,UWORD8 * pu1_hash_table,UWORD8 u1_num_ipe_modes)10284 UWORD8 ihevce_intra_mode_nxn_hash_updater(
10285 UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10286 {
10287 WORD32 i;
10288 WORD32 i4_mode;
10289
10290 for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10291 {
10292 if(pu1_mode_array[i] < 35)
10293 {
10294 if(pu1_mode_array[i] != 0)
10295 {
10296 i4_mode = pu1_mode_array[i] - 1;
10297
10298 if(!pu1_hash_table[i4_mode])
10299 {
10300 pu1_hash_table[i4_mode] = 1;
10301 pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10302 u1_num_ipe_modes++;
10303 }
10304 }
10305
10306 if(pu1_mode_array[i] != 34)
10307 {
10308 i4_mode = pu1_mode_array[i] + 1;
10309
10310 if((!pu1_hash_table[i4_mode]))
10311 {
10312 pu1_hash_table[i4_mode] = 1;
10313 pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10314 u1_num_ipe_modes++;
10315 }
10316 }
10317 }
10318 }
10319
10320 if(!pu1_hash_table[INTRA_PLANAR])
10321 {
10322 pu1_hash_table[INTRA_PLANAR] = 1;
10323 pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10324 u1_num_ipe_modes++;
10325 }
10326
10327 if(!pu1_hash_table[INTRA_DC])
10328 {
10329 pu1_hash_table[INTRA_DC] = 1;
10330 pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10331 u1_num_ipe_modes++;
10332 }
10333
10334 return u1_num_ipe_modes;
10335 }
10336
10337 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
ihevce_determine_tu_tree_distribution(cu_inter_cand_t * ps_cu_data,me_func_selector_t * ps_func_selector,WORD16 * pi2_scratch_mem,UWORD8 * pu1_inp,WORD32 i4_inp_stride,WORD32 i4_lambda,UWORD8 u1_lambda_q_shift,UWORD8 u1_cu_size,UWORD8 u1_max_tr_depth)10338 WORD32 ihevce_determine_tu_tree_distribution(
10339 cu_inter_cand_t *ps_cu_data,
10340 me_func_selector_t *ps_func_selector,
10341 WORD16 *pi2_scratch_mem,
10342 UWORD8 *pu1_inp,
10343 WORD32 i4_inp_stride,
10344 WORD32 i4_lambda,
10345 UWORD8 u1_lambda_q_shift,
10346 UWORD8 u1_cu_size,
10347 UWORD8 u1_max_tr_depth)
10348 {
10349 err_prms_t s_err_prms;
10350
10351 PF_SAD_FXN_TU_REC pf_err_compute[4];
10352
10353 WORD32 i4_satd;
10354
10355 s_err_prms.pi4_sad_grid = &i4_satd;
10356 s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10357 s_err_prms.pu1_inp = pu1_inp;
10358 s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10359 s_err_prms.i4_inp_stride = i4_inp_stride;
10360 s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10361 s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10362
10363 if(u1_cu_size == 64)
10364 {
10365 s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10366 }
10367 else
10368 {
10369 s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10370 }
10371
10372 pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10373 pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10374 pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10375 pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10376
10377 i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10378 &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10379
10380 if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10381 {
10382 ps_cu_data->ai4_tu_split_flag[0] = 1;
10383 }
10384
10385 return i4_satd;
10386 }
10387 #endif
10388
ihevce_populate_nbr_4x4_with_pu_data(nbr_4x4_t * ps_nbr_4x4,pu_t * ps_pu,WORD32 i4_nbr_buf_stride)10389 void ihevce_populate_nbr_4x4_with_pu_data(
10390 nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10391 {
10392 WORD32 i, j;
10393
10394 nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10395
10396 WORD32 ht = (ps_pu->b4_ht + 1);
10397 WORD32 wd = (ps_pu->b4_wd + 1);
10398
10399 ps_nbr_4x4->b1_intra_flag = 0;
10400 ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10401 ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10402 ps_nbr_4x4->mv = ps_pu->mv;
10403
10404 for(i = 0; i < ht; i++)
10405 {
10406 for(j = 0; j < wd; j++)
10407 {
10408 ps_tmp_4x4[j] = *ps_nbr_4x4;
10409 }
10410
10411 ps_tmp_4x4 += i4_nbr_buf_stride;
10412 }
10413 }
10414
ihevce_call_luma_inter_pred_rdopt_pass1(ihevce_enc_loop_ctxt_t * ps_ctxt,cu_inter_cand_t * ps_inter_cand,WORD32 cu_size)10415 void ihevce_call_luma_inter_pred_rdopt_pass1(
10416 ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10417 {
10418 pu_t *ps_pu;
10419 UWORD8 *pu1_pred;
10420 WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10421 WORD32 inter_pu_wd, inter_pu_ht;
10422
10423 pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10424 pred_stride = ps_inter_cand->i4_pred_data_stride;
10425 num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10426
10427 for(ctr = 0; ctr < num_cu_part; ctr++)
10428 {
10429 ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10430
10431 /* IF AMP then each partitions can have diff wd ht */
10432 inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10433 inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10434
10435 skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10436 //if(0 == skip_or_merge_flag)
10437 {
10438 ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10439 }
10440 if((2 == num_cu_part) && (0 == ctr))
10441 {
10442 /* 2Nx__ partion case */
10443 if(inter_pu_wd == cu_size)
10444 {
10445 pu1_pred += (inter_pu_ht * pred_stride);
10446 }
10447
10448 /* __x2N partion case */
10449 if(inter_pu_ht == cu_size)
10450 {
10451 pu1_pred += inter_pu_wd;
10452 }
10453 }
10454 }
10455 }
10456
ihevce_it_recon_ssd(ihevce_enc_loop_ctxt_t * ps_ctxt,UWORD8 * pu1_src,WORD32 i4_src_strd,UWORD8 * pu1_pred,WORD32 i4_pred_strd,WORD16 * pi2_deq_data,WORD32 i4_deq_data_strd,UWORD8 * pu1_recon,WORD32 i4_recon_stride,UWORD8 * pu1_ecd_data,UWORD8 u1_trans_size,UWORD8 u1_pred_mode,WORD32 i4_cbf,WORD32 i4_zero_col,WORD32 i4_zero_row,CHROMA_PLANE_ID_T e_chroma_plane)10457 LWORD64 ihevce_it_recon_ssd(
10458 ihevce_enc_loop_ctxt_t *ps_ctxt,
10459 UWORD8 *pu1_src,
10460 WORD32 i4_src_strd,
10461 UWORD8 *pu1_pred,
10462 WORD32 i4_pred_strd,
10463 WORD16 *pi2_deq_data,
10464 WORD32 i4_deq_data_strd,
10465 UWORD8 *pu1_recon,
10466 WORD32 i4_recon_stride,
10467 UWORD8 *pu1_ecd_data,
10468 UWORD8 u1_trans_size,
10469 UWORD8 u1_pred_mode,
10470 WORD32 i4_cbf,
10471 WORD32 i4_zero_col,
10472 WORD32 i4_zero_row,
10473 CHROMA_PLANE_ID_T e_chroma_plane)
10474 {
10475 if(NULL_PLANE == e_chroma_plane)
10476 {
10477 ihevce_it_recon_fxn(
10478 ps_ctxt,
10479 pi2_deq_data,
10480 i4_deq_data_strd,
10481 pu1_pred,
10482 i4_pred_strd,
10483 pu1_recon,
10484 i4_recon_stride,
10485 pu1_ecd_data,
10486 u1_trans_size,
10487 u1_pred_mode,
10488 i4_cbf,
10489 i4_zero_col,
10490 i4_zero_row);
10491
10492 return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10493 pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
10494 e_chroma_plane);
10495 }
10496 else
10497 {
10498 ihevce_chroma_it_recon_fxn(
10499 ps_ctxt,
10500 pi2_deq_data,
10501 i4_deq_data_strd,
10502 pu1_pred,
10503 i4_pred_strd,
10504 pu1_recon,
10505 i4_recon_stride,
10506 pu1_ecd_data,
10507 u1_trans_size,
10508 i4_cbf,
10509 i4_zero_col,
10510 i4_zero_row,
10511 e_chroma_plane);
10512
10513 return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10514 pu1_recon,
10515 pu1_src,
10516 i4_recon_stride,
10517 i4_src_strd,
10518 u1_trans_size,
10519 u1_trans_size,
10520 e_chroma_plane);
10521 }
10522 }
10523
10524 /*!
10525 ******************************************************************************
10526 * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10527 *
10528 * \brief
10529 * Transform unit level (Chroma) enc_loop function
10530 *
10531 * \param[in] ps_ctxt enc_loop module ctxt pointer
10532 * \param[in] pu1_pred pointer to predicted data buffer
10533 * \param[in] pred_strd predicted buffer stride
10534 * \param[in] pu1_src pointer to source data buffer
10535 * \param[in] src_strd source buffer stride
10536 * \param[in] pi2_deq_data pointer to store iq data
10537 * \param[in] deq_data_strd iq data buffer stride
10538 * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
10539 * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
10540 * block
10541 * \param[out] csbf_strd csbf buffer stride
10542 * \param[in] trans_size transform size (4, 8, 16)
10543 * \param[in] intra_flag 0:Inter/Skip 1:Intra
10544 * \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10545 * coeff buffer
10546 the current TU in RDopt Mode
10547 * \param[out] pi4_zero_col pointer to store the zero_col info for the TU
10548 * \param[out] pi4_zero_row pointer to store the zero_row info for the TU
10549 *
10550 * \return
10551 * CBF of the current block
10552 *
10553 * \author
10554 * Ittiam
10555 *
10556 *****************************************************************************
10557 */
ihevce_chroma_t_q_iq_ssd_scan_fxn(ihevce_enc_loop_ctxt_t * ps_ctxt,UWORD8 * pu1_pred,WORD32 pred_strd,UWORD8 * pu1_src,WORD32 src_strd,WORD16 * pi2_deq_data,WORD32 deq_data_strd,UWORD8 * pu1_recon,WORD32 i4_recon_stride,UWORD8 * pu1_ecd_data,UWORD8 * pu1_csbf_buf,WORD32 csbf_strd,WORD32 trans_size,WORD32 i4_scan_idx,WORD32 intra_flag,WORD32 * pi4_coeff_off,WORD32 * pi4_tu_bits,WORD32 * pi4_zero_col,WORD32 * pi4_zero_row,UWORD8 * pu1_is_recon_available,WORD32 i4_perform_sbh,WORD32 i4_perform_rdoq,LWORD64 * pi8_cost,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_is_cu_noisy,UWORD8 u1_is_skip,SSD_TYPE_T e_ssd_type,CHROMA_PLANE_ID_T e_chroma_plane)10558 WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10559 ihevce_enc_loop_ctxt_t *ps_ctxt,
10560 UWORD8 *pu1_pred,
10561 WORD32 pred_strd,
10562 UWORD8 *pu1_src,
10563 WORD32 src_strd,
10564 WORD16 *pi2_deq_data,
10565 WORD32 deq_data_strd,
10566 UWORD8 *pu1_recon,
10567 WORD32 i4_recon_stride,
10568 UWORD8 *pu1_ecd_data,
10569 UWORD8 *pu1_csbf_buf,
10570 WORD32 csbf_strd,
10571 WORD32 trans_size,
10572 WORD32 i4_scan_idx,
10573 WORD32 intra_flag,
10574 WORD32 *pi4_coeff_off,
10575 WORD32 *pi4_tu_bits,
10576 WORD32 *pi4_zero_col,
10577 WORD32 *pi4_zero_row,
10578 UWORD8 *pu1_is_recon_available,
10579 WORD32 i4_perform_sbh,
10580 WORD32 i4_perform_rdoq,
10581 LWORD64 *pi8_cost,
10582 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10583 WORD32 i4_alpha_stim_multiplier,
10584 UWORD8 u1_is_cu_noisy,
10585 #endif
10586 UWORD8 u1_is_skip,
10587 SSD_TYPE_T e_ssd_type,
10588 CHROMA_PLANE_ID_T e_chroma_plane)
10589 {
10590 WORD32 trans_idx, cbf, u4_blk_sad;
10591 WORD16 *pi2_quant_coeffs;
10592 WORD16 *pi2_trans_values;
10593 WORD32 quant_scale_mat_offset;
10594 WORD32 *pi4_trans_scratch;
10595 WORD32 *pi4_subBlock2csbfId_map = NULL;
10596
10597 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10598 WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10599 #endif
10600
10601 rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10602
10603 WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10604 (!intra_flag && ENABLE_INTER_ZCU_COST);
10605 WORD32 i4_perform_coeff_level_rdoq =
10606 (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10607 (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10608
10609 ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10610 ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10611
10612 *pi4_coeff_off = 0;
10613 *pi4_tu_bits = 0;
10614 pu1_is_recon_available[0] = 0;
10615
10616 pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10617 pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10618 pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10619
10620 if(2 == trans_size)
10621 {
10622 trans_size = 4;
10623 }
10624
10625 /* translate the transform size to index */
10626 trans_idx = trans_size >> 2;
10627
10628 if(16 == trans_size)
10629 {
10630 trans_idx = 3;
10631 }
10632
10633 if(u1_is_skip)
10634 {
10635 pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10636 pu1_pred,
10637 pu1_src,
10638 pred_strd,
10639 src_strd,
10640 trans_size,
10641 trans_size,
10642 e_chroma_plane);
10643
10644 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10645 {
10646 /* buffer copy fromp pred to recon */
10647 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10648 pu1_pred,
10649 pred_strd,
10650 pu1_recon,
10651 i4_recon_stride,
10652 trans_size,
10653 trans_size,
10654 e_chroma_plane);
10655
10656 pu1_is_recon_available[0] = 1;
10657 }
10658
10659 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10660 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10661 {
10662 pi8_cost[0] = ihevce_inject_stim_into_distortion(
10663 pu1_src,
10664 src_strd,
10665 pu1_pred,
10666 pred_strd,
10667 pi8_cost[0],
10668 i4_alpha_stim_multiplier,
10669 trans_size,
10670 0,
10671 ps_ctxt->u1_enable_psyRDOPT,
10672 e_chroma_plane);
10673 }
10674 #endif
10675
10676 #if ENABLE_INTER_ZCU_COST
10677 #if !WEIGH_CHROMA_COST
10678 /* cbf = 0, accumulate cu not coded cost */
10679 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10680 #else
10681 ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10682 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10683 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10684 #endif
10685 #endif
10686
10687 return 0;
10688 }
10689
10690 if(intra_flag == 1)
10691 {
10692 quant_scale_mat_offset = 0;
10693
10694 #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10695 ai4_quant_rounding_factors[0][0] =
10696 MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10697
10698 for(i = 0; i < trans_size * trans_size; i++)
10699 {
10700 ai4_quant_rounding_factors[1][i] =
10701 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10702 (1 << QUANT_ROUND_FACTOR_Q) / 3);
10703 ai4_quant_rounding_factors[2][i] =
10704 MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10705 (1 << QUANT_ROUND_FACTOR_Q) / 3);
10706 }
10707 #endif
10708 }
10709 else
10710 {
10711 quant_scale_mat_offset = NUM_TRANS_TYPES;
10712 }
10713
10714 switch(trans_size)
10715 {
10716 case 4:
10717 {
10718 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10719
10720 break;
10721 }
10722 case 8:
10723 {
10724 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10725
10726 break;
10727 }
10728 case 16:
10729 {
10730 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10731
10732 break;
10733 }
10734 case 32:
10735 {
10736 pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10737
10738 break;
10739 }
10740 }
10741
10742 /* ---------- call residue and transform block ------- */
10743 u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10744 pu1_src,
10745 pu1_pred,
10746 pi4_trans_scratch,
10747 pi2_trans_values,
10748 src_strd,
10749 pred_strd,
10750 trans_size,
10751 e_chroma_plane);
10752 (void)u4_blk_sad;
10753 /* -------- calculate SSD calculation in Transform Domain ------ */
10754
10755 cbf = ps_ctxt->apf_quant_iquant_ssd
10756 [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10757
10758 (pi2_trans_values,
10759 ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10760 pi2_quant_coeffs,
10761 pi2_deq_data,
10762 trans_size,
10763 ps_ctxt->i4_chrm_cu_qp_div6,
10764 ps_ctxt->i4_chrm_cu_qp_mod6,
10765 #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10766 ps_ctxt->i4_quant_rnd_factor[intra_flag],
10767 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10768 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10769 #else
10770 intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10771 intra_flag ? ai4_quant_rounding_factors[1]
10772 : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10773 intra_flag ? ai4_quant_rounding_factors[2]
10774 : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10775 #endif
10776 trans_size,
10777 trans_size,
10778 deq_data_strd,
10779 pu1_csbf_buf,
10780 csbf_strd,
10781 pi4_zero_col,
10782 pi4_zero_row,
10783 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10784 pi8_cost);
10785
10786 if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10787 {
10788 pi8_cost[0] = UINT_MAX;
10789 }
10790
10791 if(0 != cbf)
10792 {
10793 if(i4_perform_sbh || i4_perform_rdoq)
10794 {
10795 ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10796 ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10797
10798 ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10799 ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10800 ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10801 ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10802 ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10803
10804 ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10805 ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10806 ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10807 ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10808 ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10809 ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10810 ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10811
10812 if((!i4_perform_rdoq))
10813 {
10814 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10815
10816 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10817 }
10818 }
10819
10820 /* ------- call coeffs scan function ------- */
10821 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10822 pi2_quant_coeffs,
10823 pi4_subBlock2csbfId_map,
10824 i4_scan_idx,
10825 trans_size,
10826 pu1_ecd_data,
10827 pu1_csbf_buf,
10828 csbf_strd);
10829 }
10830
10831 /* Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10832 pi8_cost[0] >>= ga_trans_shift[trans_idx];
10833
10834 #if RDOPT_ZERO_CBF_ENABLE
10835 if((0 != cbf))
10836 {
10837 WORD32 tu_bits;
10838 LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10839
10840 zero_cbf_cost_u = 0;
10841
10842 /*Populating the feilds of rdoq_ctxt structure*/
10843 if(i4_perform_rdoq)
10844 {
10845 //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10846 /* transform size to log2transform size */
10847 GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10848 ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10849
10850 ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10851 ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10852 ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10853 ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10854 (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10855 ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10856 ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10857 ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10858 }
10859 else if(i4_perform_zcbf)
10860 {
10861 /* cost of zero cbf encoding */
10862 zero_cbf_cost_u =
10863
10864 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10865 pu1_pred,
10866 pu1_src,
10867 pred_strd,
10868 src_strd,
10869 trans_size,
10870 trans_size,
10871 e_chroma_plane);
10872 }
10873
10874 /************************************************************************/
10875 /* call the entropy rdo encode to get the bit estimate for current tu */
10876 /* note that tu includes only residual coding bits and does not include */
10877 /* tu split, cbf and qp delta encoding bits for a TU */
10878 /************************************************************************/
10879 if(i4_perform_rdoq)
10880 {
10881 tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
10882 &ps_ctxt->s_rdopt_entropy_ctxt,
10883 pu1_ecd_data,
10884 trans_size,
10885 0,
10886 ps_rdoq_sbh_ctxt,
10887 pi8_cost,
10888 &zero_cbf_cost_u,
10889 0);
10890 //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
10891
10892 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
10893 {
10894 cbf = 0;
10895
10896 /* num bytes is set to 0 */
10897 *pi4_coeff_off = 0;
10898 }
10899
10900 (*pi4_tu_bits) += tu_bits;
10901
10902 if((i4_perform_sbh) && (0 != cbf))
10903 {
10904 ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
10905
10906 ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10907
10908 pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10909 }
10910
10911 /*Add round value before normalizing*/
10912 pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
10913 pi8_cost[0] >>= ga_trans_shift[trans_idx];
10914
10915 if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
10916 {
10917 *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10918 pi2_quant_coeffs,
10919 pi4_subBlock2csbfId_map,
10920 i4_scan_idx,
10921 trans_size,
10922 pu1_ecd_data,
10923 ps_rdoq_sbh_ctxt->pu1_csbf_buf,
10924 csbf_strd);
10925 }
10926 }
10927 else
10928 {
10929 /************************************************************************/
10930 /* call the entropy rdo encode to get the bit estimate for current tu */
10931 /* note that tu includes only residual coding bits and does not include */
10932 /* tu split, cbf and qp delta encoding bits for a TU */
10933 /************************************************************************/
10934 tu_bits = ihevce_entropy_rdo_encode_tu(
10935 &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
10936
10937 (*pi4_tu_bits) += tu_bits;
10938 }
10939
10940 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10941 {
10942 pi8_cost[0] = ihevce_it_recon_ssd(
10943 ps_ctxt,
10944 pu1_src,
10945 src_strd,
10946 pu1_pred,
10947 pred_strd,
10948 pi2_deq_data,
10949 deq_data_strd,
10950 pu1_recon,
10951 i4_recon_stride,
10952 pu1_ecd_data,
10953 trans_size,
10954 PRED_MODE_INTRA,
10955 cbf,
10956 pi4_zero_col[0],
10957 pi4_zero_row[0],
10958 e_chroma_plane);
10959
10960 pu1_is_recon_available[0] = 1;
10961 }
10962
10963 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10964 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10965 {
10966 pi8_cost[0] = ihevce_inject_stim_into_distortion(
10967 pu1_src,
10968 src_strd,
10969 pu1_recon,
10970 i4_recon_stride,
10971 pi8_cost[0],
10972 i4_alpha_stim_multiplier,
10973 trans_size,
10974 0,
10975 ps_ctxt->u1_enable_psyRDOPT,
10976 e_chroma_plane);
10977 }
10978 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10979 {
10980 pi8_cost[0] = ihevce_inject_stim_into_distortion(
10981 pu1_src,
10982 src_strd,
10983 pu1_pred,
10984 pred_strd,
10985 pi8_cost[0],
10986 i4_alpha_stim_multiplier,
10987 trans_size,
10988 0,
10989 ps_ctxt->u1_enable_psyRDOPT,
10990 e_chroma_plane);
10991 }
10992 #endif
10993
10994 curr_cb_cod_cost = pi8_cost[0];
10995
10996 /* add the SSD cost to bits estimate given by ECD */
10997 curr_cb_cod_cost +=
10998 COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
10999
11000 if(i4_perform_zcbf)
11001 {
11002 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11003 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11004 {
11005 zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11006 pu1_src,
11007 src_strd,
11008 pu1_pred,
11009 pred_strd,
11010 zero_cbf_cost_u,
11011 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11012 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11013 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11014 100.0,
11015 trans_size,
11016 0,
11017 ps_ctxt->u1_enable_psyRDOPT,
11018 e_chroma_plane);
11019 }
11020 #endif
11021 /* force the tu as zero cbf if zero_cbf_cost is lower */
11022 if(zero_cbf_cost_u < curr_cb_cod_cost)
11023 {
11024 *pi4_coeff_off = 0;
11025 cbf = 0;
11026 (*pi4_tu_bits) = 0;
11027 pi8_cost[0] = zero_cbf_cost_u;
11028
11029 pu1_is_recon_available[0] = 0;
11030
11031 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11032 {
11033 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11034 pu1_pred,
11035 pred_strd,
11036 pu1_recon,
11037 i4_recon_stride,
11038 trans_size,
11039 trans_size,
11040 e_chroma_plane);
11041
11042 pu1_is_recon_available[0] = 1;
11043 }
11044 }
11045
11046 #if ENABLE_INTER_ZCU_COST
11047 if(!intra_flag)
11048 {
11049 #if !WEIGH_CHROMA_COST
11050 ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11051 #else
11052 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11053 (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11054 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11055 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11056 #endif
11057 }
11058 #endif
11059 }
11060 }
11061 else
11062 {
11063 if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11064 {
11065 pi8_cost[0] = ihevce_it_recon_ssd(
11066 ps_ctxt,
11067 pu1_src,
11068 src_strd,
11069 pu1_pred,
11070 pred_strd,
11071 pi2_deq_data,
11072 deq_data_strd,
11073 pu1_recon,
11074 i4_recon_stride,
11075 pu1_ecd_data,
11076 trans_size,
11077 PRED_MODE_INTRA,
11078 cbf,
11079 pi4_zero_col[0],
11080 pi4_zero_row[0],
11081 e_chroma_plane);
11082
11083 pu1_is_recon_available[0] = 1;
11084 }
11085
11086 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11087 if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11088 {
11089 pi8_cost[0] = ihevce_inject_stim_into_distortion(
11090 pu1_src,
11091 src_strd,
11092 pu1_recon,
11093 i4_recon_stride,
11094 pi8_cost[0],
11095 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11096 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11097 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11098 100.0,
11099 trans_size,
11100 0,
11101 ps_ctxt->u1_enable_psyRDOPT,
11102 e_chroma_plane);
11103 }
11104 else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11105 {
11106 pi8_cost[0] = ihevce_inject_stim_into_distortion(
11107 pu1_src,
11108 src_strd,
11109 pu1_pred,
11110 pred_strd,
11111 pi8_cost[0],
11112 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11113 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11114 (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11115 100.0,
11116 trans_size,
11117 0,
11118 ps_ctxt->u1_enable_psyRDOPT,
11119 e_chroma_plane);
11120 }
11121 #endif
11122
11123 #if ENABLE_INTER_ZCU_COST
11124 if(!intra_flag)
11125 {
11126 #if !WEIGH_CHROMA_COST
11127 /* cbf = 0, accumulate cu not coded cost */
11128 ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11129 #else
11130 /* cbf = 0, accumulate cu not coded cost */
11131
11132 ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11133 (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11134 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11135 CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11136 #endif
11137 }
11138 #endif
11139 }
11140 #endif /* RDOPT_ZERO_CBF_ENABLE */
11141
11142 return (cbf);
11143 }
11144