1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_pass.c
24 *
25 * \brief
26 * This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 * 18/09/2012
30 *
31 * \author
32 * Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40
41 /*****************************************************************************/
42 /* File Includes */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81 #include "ihevc_quant_tables.h"
82
83 #include "ihevce_defs.h"
84 #include "ihevce_hle_interface.h"
85 #include "ihevce_lap_enc_structs.h"
86 #include "ihevce_multi_thrd_structs.h"
87 #include "ihevce_multi_thrd_funcs.h"
88 #include "ihevce_me_common_defs.h"
89 #include "ihevce_had_satd.h"
90 #include "ihevce_error_codes.h"
91 #include "ihevce_bitstream.h"
92 #include "ihevce_cabac.h"
93 #include "ihevce_rdoq_macros.h"
94 #include "ihevce_function_selector.h"
95 #include "ihevce_enc_structs.h"
96 #include "ihevce_entropy_structs.h"
97 #include "ihevce_cmn_utils_instr_set_router.h"
98 #include "ihevce_ipe_instr_set_router.h"
99 #include "ihevce_decomp_pre_intra_structs.h"
100 #include "ihevce_decomp_pre_intra_pass.h"
101 #include "ihevce_enc_loop_structs.h"
102 #include "ihevce_nbr_avail.h"
103 #include "ihevce_enc_loop_utils.h"
104 #include "ihevce_sub_pic_rc.h"
105 #include "ihevce_global_tables.h"
106 #include "ihevce_bs_compute_ctb.h"
107 #include "ihevce_cabac_rdo.h"
108 #include "ihevce_deblk.h"
109 #include "ihevce_frame_process.h"
110 #include "ihevce_rc_enc_structs.h"
111 #include "hme_datatype.h"
112 #include "hme_interface.h"
113 #include "hme_common_defs.h"
114 #include "hme_defs.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128
129 #include "cast_types.h"
130 #include "osal.h"
131 #include "osal_defaults.h"
132
133 /*****************************************************************************/
134 /* Globals */
135 /*****************************************************************************/
136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137
138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139
140 /*****************************************************************************/
141 /* Constant Macros */
142 /*****************************************************************************/
143 #define UPDATE_QP_AT_CTB 6
144 #define INTRAPRED_SIMD_LEFT_PADDING 16
145 #define INTRAPRED_SIMD_RIGHT_PADDING 8
146
147 /*****************************************************************************/
148 /* Function Definitions */
149 /*****************************************************************************/
150
151 /*!
152 ******************************************************************************
153 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
154 *
155 * \brief
156 * This function copy the right data of CTB to context buffers
157 *
158 * \date
159 * 18/09/2012
160 *
161 * \author
162 * Ittiam
163 *
164 * \return
165 *
166 * List of Functions
167 *
168 *
169 ******************************************************************************
170 */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)171 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
172 {
173 /* ------------------------------------------------------------------ */
174 /* copy the right coloum data to the context buffers */
175 /* ------------------------------------------------------------------ */
176
177 nbr_4x4_t *ps_left_nbr;
178 nbr_4x4_t *ps_nbr;
179 UWORD8 *pu1_buff;
180 WORD32 num_pels;
181 UWORD8 *pu1_luma_left, *pu1_chrm_left;
182
183 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
184
185 pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
186 pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
187 ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
188
189 /* copy right luma data */
190 pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
191
192 for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
193 {
194 WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
195
196 pu1_luma_left[num_pels] = pu1_buff[i4_indx];
197 }
198
199 /* copy right chroma data */
200 pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
201
202 for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
203 {
204 WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
205
206 *pu1_chrm_left++ = pu1_buff[i4_indx];
207 *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
208 }
209
210 /* store the nbr 4x4 data at ctb level */
211 {
212 WORD32 ctr;
213 WORD32 nbr_strd;
214
215 nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
216
217 /* copy right nbr data */
218 ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
219 ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
220
221 for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
222 {
223 WORD32 i4_indx = nbr_strd * ctr;
224
225 ps_left_nbr[ctr] = ps_nbr[i4_indx];
226 }
227 }
228 return;
229 }
230
231 /*!
232 ******************************************************************************
233 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
234 *
235 * \brief
236 * Mark all modes for inter/intra for evaluation. This function will be
237 * called by ref instance
238 *
239 * \param[in] pv_ctxt : pointer to enc_loop module
240 * \param[in] ps_cu_analyse : pointer to cu analyse
241 *
242 * \return
243 * None
244 *
245 * \author
246 * Ittiam
247 *
248 *****************************************************************************
249 */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)250 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
251 {
252 UWORD8 ctr;
253 WORD32 i4_part;
254
255 (void)pv_ctxt;
256 /* run a loop over all Inter cands */
257 for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
258 {
259 ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
260 }
261
262 /* run a loop over all intra candidates */
263 if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
264 {
265 for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
266 {
267 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
268 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
269
270 for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
271 {
272 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
273 }
274 }
275 }
276 }
277
278 /*!
279 ******************************************************************************
280 * \if Function name : ihevce_cu_mode_decide \endif
281 *
282 * \brief
283 * Coding Unit mode decide function. Performs RD opt and decides the best mode
284 *
285 * \param[in] ps_ctxt : pointer to enc_loop module
286 * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
287 * \param[in] ps_cu_analyse : pointer to cu analyse
288 * \param[out] ps_cu_final : pointer to cu final
289 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
290 * \param[out]ps_row_col_pu; colocated pu buffer pointer
291 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
292 * \param[in]col_start_pu_idx : pu index start value
293 *
294 * \return
295 * None
296 *
297 *
298 * \author
299 * Ittiam
300 *
301 *****************************************************************************
302 */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)303 LWORD64 ihevce_cu_mode_decide(
304 ihevce_enc_loop_ctxt_t *ps_ctxt,
305 enc_loop_cu_prms_t *ps_cu_prms,
306 cu_analyse_t *ps_cu_analyse,
307 final_mode_state_t *ps_final_mode_state,
308 UWORD8 *pu1_ecd_data,
309 pu_col_mv_t *ps_col_pu,
310 UWORD8 *pu1_col_pu_map,
311 WORD32 col_start_pu_idx)
312 {
313 enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
314 cu_nbr_prms_t s_cu_nbr_prms;
315 inter_cu_mode_info_t s_inter_cu_mode_info;
316 cu_inter_cand_t *ps_best_inter_cand = NULL;
317 UWORD8 *pu1_cu_top;
318 UWORD8 *pu1_cu_top_left;
319 UWORD8 *pu1_cu_left;
320 UWORD8 *pu1_final_recon = NULL;
321 UWORD8 *pu1_curr_src = NULL;
322 void *pv_curr_src = NULL;
323 void *pv_cu_left = NULL;
324 void *pv_cu_top = NULL;
325 void *pv_cu_top_left = NULL;
326
327 WORD32 cu_left_stride = 0;
328 WORD32 ctr;
329 WORD32 rd_opt_best_idx;
330 LWORD64 rd_opt_least_cost;
331 WORD32 rd_opt_curr_idx;
332 WORD32 num_4x4_in_ctb;
333 WORD32 nbr_4x4_left_strd = 0;
334
335 nbr_4x4_t *ps_topleft_nbr_4x4;
336 nbr_4x4_t *ps_left_nbr_4x4 = NULL;
337 nbr_4x4_t *ps_top_nbr_4x4 = NULL;
338 nbr_4x4_t *ps_curr_nbr_4x4;
339 WORD32 enable_intra_eval_flag;
340 WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
341 WORD32 curr_cu_pos_in_row;
342 WORD32 cu_top_right_offset;
343 WORD32 cu_top_right_dep_pos;
344 WORD32 i4_ctb_x_off, i4_ctb_y_off;
345
346 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
347 (void)ps_final_mode_state;
348 /* default init */
349 rd_opt_least_cost = MAX_COST_64;
350 ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
351 ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
352
353 /* Zero cbf tool is enabled by default for all presets */
354 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
355
356 rd_opt_best_idx = 1;
357 rd_opt_curr_idx = 0;
358 enable_intra_eval_flag = 1;
359
360 /* CU params in enc ctxt*/
361 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
362 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
363 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
364
365 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
366 ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
367 ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
368 ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
369
370 /* CB and Cr are pixel interleaved */
371 s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
372
373 s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
374
375 if(!ps_ctxt->u1_is_input_data_hbd)
376 {
377 /* --------------------------------------- */
378 /* ----- Luma Pointers Derivation -------- */
379 /* --------------------------------------- */
380
381 /* based on CU position derive the pointers */
382 pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
383
384 pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
385
386 pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
387
388 pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
389
390 pv_curr_src = pu1_curr_src;
391
392 /* CU left */
393 if(0 == ps_cu_analyse->b3_cu_pos_x)
394 {
395 /* CTB boundary */
396 pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
397 pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
398 cu_left_stride = 1;
399
400 ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
401 ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
402 nbr_4x4_left_strd = 1;
403 }
404 else
405 {
406 /* inside CTB */
407 pu1_cu_left = pu1_final_recon - 1;
408 cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
409
410 ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
411 nbr_4x4_left_strd = num_4x4_in_ctb;
412 }
413
414 pv_cu_left = pu1_cu_left;
415
416 /* CU top */
417 if(0 == ps_cu_analyse->b3_cu_pos_y)
418 {
419 /* CTB boundary */
420 pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
421 pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
422 pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
423
424 ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
425 ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
426 ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
427 }
428 else
429 {
430 /* inside CTB */
431 pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
432
433 ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
434 }
435
436 pv_cu_top = pu1_cu_top;
437
438 /* CU top left */
439 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
440 {
441 /* left ctb boundary but not first row */
442 pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
443 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
444 }
445 else
446 {
447 /* rest all cases topleft is top -1 */
448 pu1_cu_top_left = pu1_cu_top - 1;
449 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
450 }
451
452 pv_cu_top_left = pu1_cu_top_left;
453
454 /* Store the CU nbr information in the ctxt for final reconstruction fun. */
455 s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
456 s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
457 s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
458 s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
459 s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
460 s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
461 s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
462 s_cu_nbr_prms.cu_left_stride = cu_left_stride;
463
464 /* ------------------------------------------------------------ */
465 /* -- Initialize the number of neigbour skip cu count for rdo --*/
466 /* ------------------------------------------------------------ */
467 {
468 nbr_avail_flags_t s_nbr;
469 WORD32 i4_num_nbr_skip_cus = 0;
470
471 /* get the neighbour availability flags for current cu */
472 ihevce_get_nbr_intra(
473 &s_nbr,
474 ps_ctxt->pu1_ctb_nbr_map,
475 ps_ctxt->i4_nbr_map_strd,
476 (ps_cu_analyse->b3_cu_pos_x << 1),
477 (ps_cu_analyse->b3_cu_pos_y << 1),
478 (ps_cu_analyse->u1_cu_size >> 2));
479 if(s_nbr.u1_top_avail)
480 {
481 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
482 }
483
484 if(s_nbr.u1_left_avail)
485 {
486 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
487 }
488 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
489 i4_num_nbr_skip_cus;
490 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
491 i4_num_nbr_skip_cus;
492 }
493
494 /* --------------------------------------- */
495 /* --- Chroma Pointers Derivation -------- */
496 /* --------------------------------------- */
497
498 /* based on CU position derive the pointers */
499 s_chrm_cu_buf_prms.pu1_final_recon =
500 ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
501
502 s_chrm_cu_buf_prms.pu1_curr_src =
503 ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
504
505 s_chrm_cu_buf_prms.pu1_final_recon +=
506 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
507
508 s_chrm_cu_buf_prms.pu1_curr_src +=
509 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
510
511 /* CU left */
512 if(0 == ps_cu_analyse->b3_cu_pos_x)
513 {
514 /* CTB boundary */
515 s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
516 s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
517 s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
518 }
519 else
520 {
521 /* inside CTB */
522 s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
523 s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
524 }
525
526 /* CU top */
527 if(0 == ps_cu_analyse->b3_cu_pos_y)
528 {
529 /* CTB boundary */
530 s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
531 s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
532 s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
533 }
534 else
535 {
536 /* inside CTB */
537 s_chrm_cu_buf_prms.pu1_cu_top =
538 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
539 }
540
541 /* CU top left */
542 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
543 {
544 /* left ctb boundary but not first row */
545 s_chrm_cu_buf_prms.pu1_cu_top_left =
546 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
547 }
548 else
549 {
550 /* rest all cases topleft is top -2 */
551 s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
552 }
553 }
554
555 /* Set Variables for Dep. Checking and Setting */
556 i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
557
558 i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
559 ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
560
561 /* Set the pred pointer count for ME/intra to 0 to start */
562 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
563
564 ASSERT(
565 (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
566
567 ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
568 s_inter_cu_mode_info.u1_num_inter_cands = 0;
569 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
570 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
571
572 ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
573 ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
574 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
575 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
576 ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
577 if(0 != ps_cu_analyse->u1_num_inter_cands)
578 {
579 ihevce_inter_cand_sifter_prms_t s_prms;
580
581 UWORD8 u1_enable_top_row_sync;
582
583 if(ps_ctxt->u1_disable_intra_eval)
584 {
585 u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
586 }
587 else
588 {
589 u1_enable_top_row_sync = 1;
590 }
591
592 if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
593 {
594 /* Wait till top data is ready */
595 /* Currently checking till top right CU */
596 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
597
598 if(i4_ctb_y_off == 0)
599 {
600 /* No wait for 1st row */
601 cu_top_right_offset = -(MAX_CTB_SIZE);
602 {
603 ihevce_tile_params_t *ps_col_tile_params =
604 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
605 ps_ctxt->i4_tile_col_idx);
606 /* No wait for 1st row */
607 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
608 }
609 cu_top_right_dep_pos = 0;
610 }
611 else
612 {
613 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
614 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
615 }
616
617 if(0 == ps_cu_analyse->b3_cu_pos_y)
618 {
619 ihevce_dmgr_chk_row_row_sync(
620 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
621 curr_cu_pos_in_row,
622 cu_top_right_offset,
623 cu_top_right_dep_pos,
624 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
625 ps_ctxt->thrd_id);
626 }
627 }
628
629 if(ps_ctxt->i1_cu_qp_delta_enable)
630 {
631 ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0);
632 }
633
634 s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
635 s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
636 s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
637 s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
638 s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
639 s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
640 s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
641 s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
642 s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
643 s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
644 s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
645 s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
646 s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
647 s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
648 s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
649 s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
650 s_prms.pv_src = pv_curr_src;
651 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
652 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
653 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
654 s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
655 s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
656 s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
657 s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
658 s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
659 s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
660 s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
661 s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
662 s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
663 s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
664 s_prms.u1_use_merge_cand_from_top_row =
665 (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
666 s_prms.u1_merge_idx_cabac_model =
667 ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
668 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
669 s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
670 s_prms.u1_reuse_me_sad = 1;
671 #else
672 s_prms.u1_reuse_me_sad = 0;
673 #endif
674
675 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
676 {
677 if(ps_ctxt->i4_temporal_layer == 1)
678 {
679 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
680 }
681 else
682 {
683 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
684 }
685 }
686 else
687 {
688 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
689 }
690 s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
691
692 if(s_prms.u1_is_cu_noisy)
693 {
694 s_prms.i4_lambda_qf =
695 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
696 }
697 s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
698
699 s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
700
701 s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
702 ihevce_inter_cand_sifter(&s_prms);
703 }
704 if(u1_is_422)
705 {
706 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
707 UWORD8 u1_num_bufs_allocated;
708
709 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
710 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
711
712 ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
713
714 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
715 ctr++)
716 {
717 {
718 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
719 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
720 }
721
722 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
723
724 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
725 }
726
727 {
728 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
729 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
730 }
731
732 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
733
734 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
735 }
736 else
737 {
738 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
739 UWORD8 u1_num_bufs_allocated;
740
741 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
742 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
743
744 ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
745
746 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
747 ctr++)
748 {
749 {
750 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
751 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
752 }
753
754 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
755
756 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
757 }
758 }
759
760 ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
761
762 ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
763 ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
764 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
765 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
766 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
767 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
768 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
769 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
770 /* --------------------------------------- */
771 /* ------ Inter RD OPT stage ------------- */
772 /* --------------------------------------- */
773 if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
774 {
775 UWORD8 u1_ssd_bit_info_ctr = 0;
776
777 /* -- run a loop over all Inter rd opt cands ------ */
778 for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
779 {
780 cu_inter_cand_t *ps_inter_cand;
781
782 LWORD64 rd_opt_cost = 0;
783
784 ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
785
786 if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
787 (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
788 {
789 ps_inter_cand->b1_eval_mark = 1;
790 }
791
792 /****************************************************************/
793 /* This check is only valid for derived instances. */
794 /* check if this mode needs to be evaluated or not. */
795 /* if it is a skip candidate, go ahead and evaluate it even if */
796 /* it has not been marked while sorting. */
797 /****************************************************************/
798 if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
799 {
800 continue;
801 }
802
803 /* RDOPT related copies and settings */
804 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
805
806 /* RDOPT copy States : Prev Cu best to current init */
807 COPY_CABAC_STATES(
808 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
809 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
810 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
811 /* MVP ,MVD calc and Motion compensation */
812 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
813 ps_ctxt,
814 ps_inter_cand,
815 ps_cu_analyse->u1_cu_size,
816 ps_cu_analyse->b3_cu_pos_x,
817 ps_cu_analyse->b3_cu_pos_y,
818 ps_left_nbr_4x4,
819 ps_top_nbr_4x4,
820 ps_topleft_nbr_4x4,
821 nbr_4x4_left_strd,
822 rd_opt_curr_idx);
823
824 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
825 if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
826 {
827 ihevce_determine_tu_tree_distribution(
828 ps_inter_cand,
829 (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
830 ps_ctxt->ai2_scratch,
831 (UWORD8 *)pv_curr_src,
832 ps_cu_prms->i4_luma_src_stride,
833 ps_ctxt->i4_satd_lamda,
834 LAMBDA_Q_SHIFT,
835 ps_cu_analyse->u1_cu_size,
836 ps_ctxt->u1_max_tr_depth);
837 }
838 #endif
839 #if DISABLE_ZERO_ZBF_IN_INTER
840 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
841 #else
842 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
843 #endif
844 /* Recon loop with different TUs based on partition type*/
845 rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
846 ps_ctxt,
847 ps_cu_prms,
848 pv_curr_src,
849 ps_cu_analyse->u1_cu_size,
850 ps_cu_analyse->b3_cu_pos_x,
851 ps_cu_analyse->b3_cu_pos_y,
852 rd_opt_curr_idx,
853 &s_chrm_cu_buf_prms,
854 ps_inter_cand,
855 ps_cu_analyse,
856 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
857 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
858 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
859 100.0);
860
861 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
862 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
863 {
864 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
865 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
866 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
867 }
868 #endif
869
870 /* based on the rd opt cost choose the best and current index */
871 if(rd_opt_cost < rd_opt_least_cost)
872 {
873 /* swap the best and current indx */
874 rd_opt_best_idx = !rd_opt_best_idx;
875 rd_opt_curr_idx = !rd_opt_curr_idx;
876
877 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
878 rd_opt_least_cost = rd_opt_cost;
879 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
880
881 /* Store the best Inter cand. for final_recon function */
882 ps_best_inter_cand = ps_inter_cand;
883 }
884
885 /* set the neighbour map to 0 */
886 ihevce_set_nbr_map(
887 ps_ctxt->pu1_ctb_nbr_map,
888 ps_ctxt->i4_nbr_map_strd,
889 (ps_cu_analyse->b3_cu_pos_x << 1),
890 (ps_cu_analyse->b3_cu_pos_y << 1),
891 (ps_cu_analyse->u1_cu_size >> 2),
892 0);
893
894 } /* end of loop for all the Inter RD OPT cand */
895 }
896 /* --------------------------------------- */
897 /* ---- Conditional Eval of Intra -------- */
898 /* --------------------------------------- */
899 {
900 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
901 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
902
903 /* check if inter candidates are valid */
904 if(0 != ps_cu_analyse->u1_num_inter_cands)
905 {
906 /* if skip or no residual inter candidates has won then */
907 /* evaluation of intra candidates is disabled */
908 if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
909 (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
910 {
911 enable_intra_eval_flag = 0;
912 }
913 }
914 /* Disable Intra Gating for HIGH QUALITY PRESET */
915 #if !ENABLE_INTRA_GATING_FOR_HQ
916 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
917 {
918 enable_intra_eval_flag = 1;
919
920 #if DISABLE_LARGE_INTRA_PQ
921 if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
922 (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
923 {
924 if(ps_cu_analyse->u1_cu_size > 16)
925 {
926 /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
927 enable_intra_eval_flag = 0;
928 }
929 else if(ps_cu_analyse->u1_cu_size == 16)
930 {
931 /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
932 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
933 }
934 }
935 #endif
936 }
937 #endif
938 }
939
940 /* --------------------------------------- */
941 /* ------ Intra RD OPT stage ------------- */
942 /* --------------------------------------- */
943
944 /* -- run a loop over all Intra rd opt cands ------ */
945 if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
946 {
947 LWORD64 rd_opt_cost;
948 WORD32 end_flag = 0;
949 WORD32 cu_eval_done = 0;
950 WORD32 subcu_eval_done = 0;
951 WORD32 subpu_eval_done = 0;
952 WORD32 max_trans_size;
953 WORD32 sync_wait_stride;
954 max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
955 sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
956
957 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
958 {
959 /* Wait till top data is ready */
960 /* Currently checking till top right CU */
961 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
962
963 if(i4_ctb_y_off == 0)
964 {
965 /* No wait for 1st row */
966 cu_top_right_offset = -(MAX_CTB_SIZE);
967 {
968 ihevce_tile_params_t *ps_col_tile_params =
969 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
970 ps_ctxt->i4_tile_col_idx);
971 /* No wait for 1st row */
972 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
973 }
974 cu_top_right_dep_pos = 0;
975 }
976 else
977 {
978 cu_top_right_offset = sync_wait_stride;
979 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
980 }
981
982 if(0 == ps_cu_analyse->b3_cu_pos_y)
983 {
984 ihevce_dmgr_chk_row_row_sync(
985 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
986 curr_cu_pos_in_row,
987 cu_top_right_offset,
988 cu_top_right_dep_pos,
989 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
990 ps_ctxt->thrd_id);
991 }
992 }
993 ctr = 0;
994
995 /* Zero cbf tool is disabled for intra CUs */
996 #if ENABLE_ZERO_CBF_IN_INTRA
997 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
998 #else
999 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
1000 #endif
1001
1002 /* Intra Mode gating based on MPM cand list and encoder quality preset */
1003 if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
1004 {
1005 ihevce_mpm_idx_based_filter_RDOPT_cand(
1006 ps_ctxt,
1007 ps_cu_analyse,
1008 ps_left_nbr_4x4,
1009 ps_top_nbr_4x4,
1010 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1011 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1012
1013 ihevce_mpm_idx_based_filter_RDOPT_cand(
1014 ps_ctxt,
1015 ps_cu_analyse,
1016 ps_left_nbr_4x4,
1017 ps_top_nbr_4x4,
1018 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1019 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1020 }
1021
1022 /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1023 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1024 {
1025 /* For cu_size = 64, there won't be any TU_EQ_CU case */
1026 if(64 != ps_cu_analyse->u1_cu_size)
1027 {
1028 /* RDOPT copy States : Prev Cu best to current init */
1029 COPY_CABAC_STATES(
1030 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1031 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1032 IHEVC_CAB_CTXT_END);
1033
1034 /* RDOPT related copies and settings */
1035 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1036
1037 /* Calc. best SATD mode for TU_EQ_CU case */
1038 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1039 ps_ctxt,
1040 &s_chrm_cu_buf_prms,
1041 ps_cu_analyse,
1042 rd_opt_curr_idx,
1043 TU_EQ_CU,
1044 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1045 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1046 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1047 100.0,
1048 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1049
1050 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1051 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1052 {
1053 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1054 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1055 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1056 }
1057 #endif
1058 }
1059
1060 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1061 TU_EQ_CU_DIV2 case */
1062
1063 if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1064 255) &&
1065 (8 != ps_cu_analyse->u1_cu_size))
1066 {
1067 /* RDOPT copy States : Prev Cu best to current init */
1068 COPY_CABAC_STATES(
1069 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1070 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1071 IHEVC_CAB_CTXT_END);
1072
1073 /* RDOPT related copies and settings */
1074 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1075
1076 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1077 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1078 ps_ctxt,
1079 &s_chrm_cu_buf_prms,
1080 ps_cu_analyse,
1081 rd_opt_curr_idx,
1082 TU_EQ_CU_DIV2,
1083 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1084 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1085 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1086 100.0,
1087 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1088
1089 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1090 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1091 {
1092 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1093 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1094 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1095 }
1096 #endif
1097 }
1098 }
1099
1100 while(0 == end_flag)
1101 {
1102 UWORD8 *pu1_mode = NULL;
1103 WORD32 curr_func_mode = 0;
1104 void *pv_pred;
1105
1106 ASSERT(ctr < 36);
1107
1108 /* TU equal to CU size evaluation of different modes */
1109 if(0 == cu_eval_done)
1110 {
1111 /* check if the all the modes have been evaluated */
1112 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1113 {
1114 cu_eval_done = 1;
1115 ctr = 0;
1116 }
1117 else if(
1118 (1 == ctr) &&
1119 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1120 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1121 (ps_ctxt->i1_slice_type != ISLICE))
1122 {
1123 ctr = 0;
1124 cu_eval_done = 1;
1125 subcu_eval_done = 1;
1126 subpu_eval_done = 1;
1127 }
1128 else
1129 {
1130 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1131 {
1132 ctr++;
1133 continue;
1134 }
1135
1136 pu1_mode =
1137 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1138 ctr++;
1139 curr_func_mode = TU_EQ_CU;
1140 }
1141 }
1142 /* Sub CU (NXN) mode evaluation of different pred modes */
1143 if((0 == subpu_eval_done) && (1 == cu_eval_done))
1144 {
1145 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1146 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1147 {
1148 pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1149
1150 curr_func_mode = TU_EQ_SUBCU;
1151 /* check if the any modes have to be evaluated */
1152 if(255 == *pu1_mode)
1153 {
1154 subpu_eval_done = 1;
1155 ctr = 0;
1156 }
1157 else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1158 {
1159 subpu_eval_done = 1;
1160 ctr = 0;
1161 }
1162 else
1163 {
1164 ctr++;
1165 }
1166 }
1167 }
1168
1169 /* TU size equal to CU div2 mode evaluation of different pred modes */
1170 if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1171 {
1172 /* check if the all the modes have been evaluated */
1173 if(255 ==
1174 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1175 {
1176 subcu_eval_done = 1;
1177 }
1178 else if(
1179 (1 == ctr) &&
1180 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1181 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1182 (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1183 {
1184 subcu_eval_done = 1;
1185 }
1186 else
1187 {
1188 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1189 {
1190 ctr++;
1191 continue;
1192 }
1193
1194 pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1195 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1196
1197 ctr++;
1198 curr_func_mode = TU_EQ_CU_DIV2;
1199 }
1200 }
1201
1202 /* check if all CU option have been evalueted */
1203 if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1204 {
1205 break;
1206 }
1207
1208 /* RDOPT related copies and settings */
1209 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1210
1211 /* Assign ME/Intra pred buf. to the current intra cand. since we
1212 are storing pred data for final_reon function */
1213 {
1214 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1215 }
1216
1217 /* RDOPT copy States : Prev Cu best to current init */
1218 COPY_CABAC_STATES(
1219 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1220 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1221 IHEVC_CAB_CTXT_END);
1222
1223 /* call the function which performs the normative Intra encode */
1224 rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1225 ps_ctxt,
1226 ps_cu_prms,
1227 pv_pred,
1228 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1229 &s_chrm_cu_buf_prms,
1230 pu1_mode,
1231 ps_cu_analyse,
1232 pv_curr_src,
1233 pv_cu_left,
1234 pv_cu_top,
1235 pv_cu_top_left,
1236 ps_left_nbr_4x4,
1237 ps_top_nbr_4x4,
1238 nbr_4x4_left_strd,
1239 cu_left_stride,
1240 rd_opt_curr_idx,
1241 curr_func_mode,
1242 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1243 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1244 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1245 100.0);
1246
1247 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1248 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1249 {
1250 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1251 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1252 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1253 }
1254 #endif
1255
1256 /* based on the rd opt cost choose the best and current index */
1257 if(rd_opt_cost < rd_opt_least_cost)
1258 {
1259 /* swap the best and current indx */
1260 rd_opt_best_idx = !rd_opt_best_idx;
1261 rd_opt_curr_idx = !rd_opt_curr_idx;
1262 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1263
1264 rd_opt_least_cost = rd_opt_cost;
1265 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1266 }
1267
1268 if((TU_EQ_SUBCU == curr_func_mode) &&
1269 (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1270 (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1271 {
1272 UWORD8 au1_tu_eq_cu_div2_modes[4];
1273 UWORD8 au1_freq_of_mode[4];
1274
1275 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1276 {
1277 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1278 255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1279 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1280 255;
1281 }
1282 else
1283 {
1284 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1285 ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1286 au1_tu_eq_cu_div2_modes,
1287 au1_freq_of_mode,
1288 4);
1289
1290 if(2 == i4_num_clusters)
1291 {
1292 if(au1_freq_of_mode[0] == 3)
1293 {
1294 ps_cu_analyse->s_cu_intra_cand
1295 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1296 au1_tu_eq_cu_div2_modes[0];
1297 ps_cu_analyse->s_cu_intra_cand
1298 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1299 }
1300 else if(au1_freq_of_mode[1] == 3)
1301 {
1302 ps_cu_analyse->s_cu_intra_cand
1303 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1304 au1_tu_eq_cu_div2_modes[1];
1305 ps_cu_analyse->s_cu_intra_cand
1306 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1307 }
1308 else
1309 {
1310 ps_cu_analyse->s_cu_intra_cand
1311 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1312 au1_tu_eq_cu_div2_modes[0];
1313 ps_cu_analyse->s_cu_intra_cand
1314 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1315 au1_tu_eq_cu_div2_modes[1];
1316 ps_cu_analyse->s_cu_intra_cand
1317 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1318 }
1319 }
1320 }
1321 }
1322
1323 /* set the neighbour map to 0 */
1324 ihevce_set_nbr_map(
1325 ps_ctxt->pu1_ctb_nbr_map,
1326 ps_ctxt->i4_nbr_map_strd,
1327 (ps_cu_analyse->b3_cu_pos_x << 1),
1328 (ps_cu_analyse->b3_cu_pos_y << 1),
1329 (ps_cu_analyse->u1_cu_size >> 2),
1330 0);
1331 }
1332
1333 } /* end of Intra RD OPT cand evaluation */
1334
1335 ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1336 ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1337 ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1338
1339 /* --------------------------------------- */
1340 /* --------Final mode Recon ---------- */
1341 /* --------------------------------------- */
1342 {
1343 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1344 void *pv_final_pred = NULL;
1345 WORD32 final_pred_strd = 0;
1346 void *pv_final_pred_chrm = NULL;
1347 WORD32 final_pred_strd_chrm = 0;
1348 WORD32 packed_pred_mode;
1349
1350 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1351 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1352 {
1353 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1354 }
1355 #else
1356 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1357 #endif
1358
1359 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1360 packed_pred_mode =
1361 ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1362
1363 if(!ps_ctxt->u1_is_input_data_hbd)
1364 {
1365 if(ps_enc_loop_bestprms->u1_intra_flag)
1366 {
1367 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1368 final_pred_strd =
1369 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1370 }
1371 else
1372 {
1373 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1374 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1375 }
1376
1377 pv_final_pred_chrm =
1378 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1379 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1380 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1381 final_pred_strd_chrm =
1382 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1383 }
1384
1385 ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1386
1387 {
1388 final_mode_process_prms_t s_prms;
1389
1390 void *pv_cu_luma_recon;
1391 void *pv_cu_chroma_recon;
1392 WORD32 luma_stride, chroma_stride;
1393
1394 if(!ps_ctxt->u1_is_input_data_hbd)
1395 {
1396 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1397 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1398 {
1399 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1400 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1401 luma_stride = ps_cu_analyse->u1_cu_size;
1402 chroma_stride = ps_cu_analyse->u1_cu_size;
1403 }
1404 else
1405 {
1406 /* based on CU position derive the luma pointers */
1407 pv_cu_luma_recon = pu1_final_recon;
1408
1409 /* based on CU position derive the chroma pointers */
1410 pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1411
1412 luma_stride = ps_cu_prms->i4_luma_recon_stride;
1413
1414 chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1415 }
1416 #else
1417 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1418 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1419 luma_stride = ps_cu_analyse->u1_cu_size;
1420 chroma_stride = ps_cu_analyse->u1_cu_size;
1421 #endif
1422
1423 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1424 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1425 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1426 s_prms.packed_pred_mode = packed_pred_mode;
1427 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1428 s_prms.pv_src = pu1_curr_src;
1429 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1430 s_prms.pv_pred = pv_final_pred;
1431 s_prms.pred_strd = final_pred_strd;
1432 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1433 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1434 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1435 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1436 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1437 s_prms.pv_luma_recon = pv_cu_luma_recon;
1438 s_prms.recon_luma_strd = luma_stride;
1439 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1440 s_prms.recon_chrma_strd = chroma_stride;
1441 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1442 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1443 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1444 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1445 s_prms.u1_will_cabac_state_change = 1;
1446 s_prms.u1_recompute_sbh_and_rdoq = 0;
1447 s_prms.u1_is_first_pass = 1;
1448 }
1449
1450 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1451 s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1452 ? ps_cu_prms->u1_is_cu_noisy
1453 : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1454 #endif
1455
1456 ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1457
1458 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1459 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1460 {
1461 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1462 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1463 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1464 }
1465 #endif
1466 }
1467 }
1468
1469 /* --------------------------------------- */
1470 /* --------Populate CU out prms ---------- */
1471 /* --------------------------------------- */
1472 {
1473 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1474 UWORD8 *pu1_pu_map;
1475 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1476
1477 /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1478 /* then it has to be coded as skip CU */
1479 if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1480 (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1481 (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1482 {
1483 ps_enc_loop_bestprms->u1_skip_flag = 1;
1484 }
1485
1486 /* update number PUs in CU */
1487 ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1488
1489 /* ---- populate the colocated pu map index --- */
1490 for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1491 {
1492 WORD32 i;
1493 WORD32 vert_ht;
1494 WORD32 horz_wd;
1495
1496 if(ps_enc_loop_bestprms->u1_intra_flag)
1497 {
1498 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1499 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1500 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1501 }
1502 else
1503 {
1504 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1505 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1506 }
1507
1508 pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1509 pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1510
1511 for(i = 0; i < vert_ht; i++)
1512 {
1513 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1514 pu1_pu_map += num_4x4_in_ctb;
1515 }
1516 /* increment the index */
1517 col_start_pu_idx++;
1518 }
1519 /* ---- copy the colocated PUs to frm pu ----- */
1520 memcpy(
1521 ps_col_pu,
1522 &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1523 ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1524
1525 /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1526 {
1527 entropy_context_t *ps_entropy_ctxt;
1528
1529 WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1530
1531 WORD32 log2_min_cu_qp_delta_size;
1532 UWORD32 block_addr_align;
1533 ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1534
1535 log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1536 diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1537
1538 log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1539 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1540
1541 ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1542 ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1543 /*Update the Qp value used. It will not have a valid value iff
1544 current CU is (skipped/no_cbf). In that case the Qp needed for
1545 deblocking is calculated from top/left/previous coded CU*/
1546
1547 ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1548
1549 if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1550 ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1551 {
1552 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1553 }
1554 else
1555 {
1556 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1557 }
1558 }
1559
1560 /* -- at the end of CU set the neighbour map to 1 -- */
1561 ihevce_set_nbr_map(
1562 ps_ctxt->pu1_ctb_nbr_map,
1563 ps_ctxt->i4_nbr_map_strd,
1564 (ps_cu_analyse->b3_cu_pos_x << 1),
1565 (ps_cu_analyse->b3_cu_pos_y << 1),
1566 (ps_cu_analyse->u1_cu_size >> 2),
1567 1);
1568
1569 /* -- at the end of CU update best cabac rdopt states -- */
1570 /* -- and also set the top row skip flags ------------- */
1571 ihevce_entropy_update_best_cu_states(
1572 &ps_ctxt->s_rdopt_entropy_ctxt,
1573 ps_cu_analyse->b3_cu_pos_x,
1574 ps_cu_analyse->b3_cu_pos_y,
1575 ps_cu_analyse->u1_cu_size,
1576 0,
1577 rd_opt_best_idx);
1578 }
1579
1580 /* Store Output struct */
1581 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1582 {
1583 {
1584 memcpy(
1585 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1586 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1587 sizeof(enc_loop_cu_final_prms_t));
1588 }
1589
1590 memcpy(
1591 &ps_ctxt->as_cu_recur_nbr[0],
1592 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1593 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1594 (ps_cu_analyse->u1_cu_size >> 2));
1595
1596 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1597
1598 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1599 }
1600 #else
1601 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1602 {
1603 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1604
1605 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1606
1607 if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1608 {
1609 /* Wait till top data is ready */
1610 /* Currently checking till top right CU */
1611 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1612
1613 if(i4_ctb_y_off == 0)
1614 {
1615 /* No wait for 1st row */
1616 cu_top_right_offset = -(MAX_CTB_SIZE);
1617 {
1618 ihevce_tile_params_t *ps_col_tile_params =
1619 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1620 ps_ctxt->i4_tile_col_idx);
1621
1622 /* No wait for 1st row */
1623 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1624 }
1625 cu_top_right_dep_pos = 0;
1626 }
1627 else
1628 {
1629 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1630 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1631 }
1632
1633 if(0 == ps_cu_analyse->b3_cu_pos_y)
1634 {
1635 ihevce_dmgr_chk_row_row_sync(
1636 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1637 curr_cu_pos_in_row,
1638 cu_top_right_offset,
1639 cu_top_right_dep_pos,
1640 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1641 ps_ctxt->thrd_id);
1642 }
1643 }
1644 }
1645 else
1646 {
1647 {
1648 memcpy(
1649 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1650 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1651 sizeof(enc_loop_cu_final_prms_t));
1652 }
1653
1654 memcpy(
1655 &ps_ctxt->as_cu_recur_nbr[0],
1656 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1657 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1658 (ps_cu_analyse->u1_cu_size >> 2));
1659
1660 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1661
1662 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1663 }
1664 #endif
1665
1666 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1667 ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1668
1669 return rd_opt_least_cost;
1670 }
1671
1672 /*!
1673 ******************************************************************************
1674 * \if Function name : ihevce_enc_loop_process_row \endif
1675 *
1676 * \brief
1677 * Row level enc_loop pass function
1678 *
1679 * \param[in] pv_ctxt : pointer to enc_loop module
1680 * \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer)
1681 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1682 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1683 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1684 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1685 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1686 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1687 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1688 *
1689 * \return
1690 * None
1691 *
1692 * Note : Currently the frame level calcualtions done assumes that
1693 * framewidth of the input /recon are excat multiple of ctbsize
1694 *
1695 * \author
1696 * Ittiam
1697 *
1698 *****************************************************************************
1699 */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1700 void ihevce_enc_loop_process_row(
1701 ihevce_enc_loop_ctxt_t *ps_ctxt,
1702 iv_enc_yuv_buf_t *ps_curr_src_bufs,
1703 iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1704 iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1705 UWORD8 **ppu1_y_subpel_planes,
1706 ctb_analyse_t *ps_ctb_in,
1707 ctb_enc_loop_out_t *ps_ctb_out,
1708 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1709 cur_ctb_cu_tree_t *ps_row_cu_tree,
1710 cu_enc_loop_out_t *ps_row_cu,
1711 tu_enc_loop_out_t *ps_row_tu,
1712 pu_t *ps_row_pu,
1713 pu_col_mv_t *ps_row_col_pu,
1714 UWORD16 *pu2_num_pu_map,
1715 UWORD8 *pu1_row_pu_map,
1716 UWORD8 *pu1_row_ecd_data,
1717 UWORD32 *pu4_pu_offsets,
1718 frm_ctb_ctxt_t *ps_frm_ctb_prms,
1719 WORD32 vert_ctr,
1720 recon_pic_buf_t *ps_frm_recon,
1721 void *pv_dep_mngr_encloop_dep_me,
1722 pad_interp_recon_frm_t *ps_pad_interp_recon,
1723 WORD32 i4_pass,
1724 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1725 ihevce_tile_params_t *ps_tile_params)
1726 {
1727 enc_loop_cu_prms_t s_cu_prms;
1728 ctb_enc_loop_out_t *ps_ctb_out_dblk;
1729
1730 WORD32 ctb_ctr, ctb_start, ctb_end;
1731 WORD32 col_pu_map_idx;
1732 WORD32 num_ctbs_horz_pic;
1733 WORD32 ctb_size;
1734 WORD32 last_ctb_row_flag;
1735 WORD32 last_ctb_col_flag;
1736 WORD32 last_hz_ctb_wd;
1737 WORD32 last_vt_ctb_ht;
1738 void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1739 void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao;
1740 void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1741 WORD32 dblk_offset, dblk_check_dep_pos;
1742 WORD32 sao_offset, sao_check_dep_pos;
1743 WORD32 aux_offset, aux_check_dep_pos;
1744 void *pv_dep_mngr_me_dep_encloop;
1745 ctb_enc_loop_out_t *ps_ctb_out_sao;
1746 /*Structure to store deblocking parameters at CTB-row level*/
1747 deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1748 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1749
1750 pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1751 num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1752 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1753
1754 /* Store the num_ctb_horz in sao context*/
1755 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1756 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1757
1758 /* Set Variables for Dep. Checking and Setting */
1759 aux_check_dep_pos = vert_ctr;
1760 aux_offset = 2; /* Should be there for 0th row also */
1761 if(vert_ctr > 0)
1762 {
1763 dblk_check_dep_pos = vert_ctr - 1;
1764 dblk_offset = 2;
1765 }
1766 else
1767 {
1768 /* First row should run without waiting */
1769 dblk_check_dep_pos = 0;
1770 dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1771 }
1772
1773 /* Set sao_offset and sao_check_dep_pos */
1774 if(vert_ctr > 1)
1775 {
1776 sao_check_dep_pos = vert_ctr - 2;
1777 sao_offset = 2;
1778 }
1779 else
1780 {
1781 /* First row should run without waiting */
1782 sao_check_dep_pos = 0;
1783 sao_offset = -(ps_tile_params->i4_first_sample_x + 1);
1784 }
1785
1786 /* check if the current row processed in last CTb row */
1787 last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1788
1789 /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1790 last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1791
1792 /* Valid Height (pixels) in the last CTB row (padding cases) */
1793 last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1794 ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1795 /* reset the states copied flag */
1796 ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1797 ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1798
1799 /* populate the cu prms which are common for entire ctb row */
1800 s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1801 s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1802 s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1803 s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1804 s_cu_prms.i4_ctb_size = ctb_size;
1805
1806 ps_ctxt->i4_is_first_cu_qg_coded = 0;
1807
1808 /* Initialize the number of PUs for the first CTB to 0 */
1809 *pu2_num_pu_map = 0;
1810
1811 /*Getting the address of BS and Qp arrays and other info*/
1812 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1813 {
1814 WORD32 num_ctbs_horz_tile;
1815 /* Update the pointers which are accessed not by using ctb_ctr
1816 to the tile start here! */
1817 ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1818 ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1819
1820 ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1821 ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1822 ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1823 pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1824 pu1_row_ecd_data +=
1825 (ps_tile_params->i4_first_ctb_x *
1826 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1827 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1828 MAX_SCAN_COEFFS_BYTES_4x4);
1829
1830 /* Update the pointers to the tile start */
1831 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1832 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
1833 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1834 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
1835 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1836
1837 num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1838
1839 ctb_start = ps_tile_params->i4_first_ctb_x;
1840 ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1841 }
1842 ps_ctb_out_dblk = ps_ctb_out;
1843
1844 ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1845
1846 /* --------- Loop over all the CTBs in a row --------------- */
1847 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1848 {
1849 cu_final_update_prms s_cu_update_prms;
1850
1851 cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1852 me_ctb_data_t *ps_cu_me_data;
1853 ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1854 cu_enc_loop_out_t *ps_cu_final;
1855 pu_col_mv_t *ps_ctb_col_pu;
1856
1857 WORD32 cur_ctb_ht, cur_ctb_wd;
1858 WORD32 last_cu_pos_in_ctb;
1859 WORD32 last_cu_size;
1860 WORD32 num_pus_in_ctb;
1861 UWORD8 u1_is_ctb_noisy;
1862 ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1863
1864 if(ctb_ctr)
1865 {
1866 ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1867 }
1868 /*If Sup pic rc is enabled*/
1869 if(ps_ctxt->i4_sub_pic_level_rc)
1870 {
1871 ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1872 }
1873 /* check if the current row processed in last CTb row */
1874 last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1875 if(1 == last_ctb_col_flag)
1876 {
1877 cur_ctb_wd = last_hz_ctb_wd;
1878 }
1879 else
1880 {
1881 cur_ctb_wd = ctb_size;
1882 }
1883
1884 /* If it's the last CTB, get the actual ht of CTB */
1885 if(1 == last_ctb_row_flag)
1886 {
1887 cur_ctb_ht = last_vt_ctb_ht;
1888 }
1889 else
1890 {
1891 cur_ctb_ht = ctb_size;
1892 }
1893
1894 ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1895 ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1896
1897 /* Wait till reference frame recon is available */
1898
1899 /* ------------ Wait till current data is ready from ME -------------- */
1900
1901 /*only for ref instance and Non I pics */
1902 if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1903 ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1904 {
1905 if(ctb_ctr < (num_ctbs_horz_pic))
1906 {
1907 ihevce_dmgr_chk_row_row_sync(
1908 pv_dep_mngr_encloop_dep_me,
1909 ctb_ctr,
1910 1,
1911 vert_ctr,
1912 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1913 ps_ctxt->thrd_id);
1914 }
1915 }
1916
1917 /* store the cu pointer for current ctb out */
1918 ps_ctb_out->ps_enc_cu = ps_row_cu;
1919 ps_cu_final = ps_row_cu;
1920
1921 /* Get the base point of CU recursion tree */
1922 if(ISLICE != ps_ctxt->i1_slice_type)
1923 {
1924 ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1925 ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1926 }
1927 else
1928 {
1929 /* Initialize ptr to current CTB */
1930 ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1931 }
1932
1933 /* Get the ME data pointer for 16x16 block data in ctb */
1934 ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1935 u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1936 s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1937 s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1938
1939 /* store the ctb level prms in cu prms */
1940 s_cu_prms.i4_ctb_pos = ctb_ctr;
1941
1942 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1943 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1944
1945 {
1946 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1947 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1948 }
1949
1950 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1951
1952 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1953
1954 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1955
1956 /* Initialize ptr to current CTB */
1957 ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size;
1958
1959 /* reset the map idx for current ctb */
1960 col_pu_map_idx = 0;
1961 num_pus_in_ctb = 0;
1962
1963 /* reset the map buffer to 0*/
1964
1965 memset(
1966 &ps_ctxt->au1_nbr_ctb_map[0][0],
1967 0,
1968 (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1969
1970 /* set the CTB neighbour availability flags */
1971 ihevce_set_ctb_nbr(
1972 &ps_ctb_out->s_ctb_nbr_avail_flags,
1973 ps_ctxt->pu1_ctb_nbr_map,
1974 ps_ctxt->i4_nbr_map_strd,
1975 ctb_ctr,
1976 vert_ctr,
1977 ps_frm_ctb_prms);
1978
1979 /* -------- update the cur CTB offsets for inter prediction-------- */
1980 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1981 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1982
1983 /* -------- update the cur CTB offsets for MV prediction-------- */
1984 ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1985 ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1986
1987 /* -------------- Boundary Strength Initialization ----------- */
1988 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1989 {
1990 ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1991 }
1992
1993 /* -------- update cur CTB offsets for entropy rdopt context------- */
1994 ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1995
1996 /* --------- CU Recursion --------------- */
1997
1998 {
1999 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2000 WORD32 i4_max_tree_depth = 4;
2001 #endif
2002 WORD32 i4_tree_depth = 0;
2003 /* Init no. of CU in CTB to 0*/
2004 ps_ctb_out->u1_num_cus_in_ctb = 0;
2005
2006 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2007 if(ps_ctxt->i4_bitrate_instance_num == 0)
2008 {
2009 WORD32 i4_max_tree_depth = 4;
2010 WORD32 i;
2011 for(i = 0; i < i4_max_tree_depth; i++)
2012 {
2013 COPY_CABAC_STATES(
2014 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2015 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2016 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2017 }
2018 }
2019 #else
2020 if(ps_ctxt->i4_bitrate_instance_num == 0)
2021 {
2022 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2023 {
2024 WORD32 i4_max_tree_depth = 4;
2025 WORD32 i;
2026 for(i = 0; i < i4_max_tree_depth; i++)
2027 {
2028 COPY_CABAC_STATES(
2029 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2030 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2031 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2032 }
2033 }
2034 }
2035
2036 #endif
2037 if(ps_ctxt->i4_bitrate_instance_num == 0)
2038 {
2039 /* FOR I- PIC populate the curr_ctb accordingly */
2040 if(ISLICE == ps_ctxt->i1_slice_type)
2041 {
2042 ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2043 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2044
2045 ihevce_populate_cu_tree(
2046 ps_ctb_ipe_analyse,
2047 ps_cu_tree_analyse,
2048 0,
2049 (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2050 POS_NA,
2051 POS_NA,
2052 POS_NA);
2053 }
2054 }
2055 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2056 ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2057 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2058
2059 s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2060 s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2061 s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2062 s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2063 s_cu_update_prms.pps_cu_final = &ps_cu_final;
2064 s_cu_update_prms.pps_row_pu = &ps_row_pu;
2065 s_cu_update_prms.pps_row_tu = &ps_row_tu;
2066 s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2067
2068 // source satd computation
2069 /* compute the source 8x8 SATD for the current CTB */
2070 /* populate pui4_source_satd in some structure and pass it inside */
2071 if(ps_ctxt->u1_enable_psyRDOPT)
2072 {
2073 /* declare local variables */
2074 WORD32 i;
2075 WORD32 ctb_size;
2076 WORD32 num_comp_had_blocks;
2077 UWORD8 *pu1_l0_block;
2078 WORD32 block_ht;
2079 WORD32 block_wd;
2080 WORD32 ht_offset;
2081 WORD32 wd_offset;
2082
2083 WORD32 num_horz_blocks;
2084 WORD32 had_block_size;
2085 WORD32 total_had_block_size;
2086 WORD16 pi2_residue_had_zscan[64];
2087 UWORD8 ai1_zeros_buffer[64];
2088
2089 WORD32 index_satd;
2090 WORD32 is_hbd;
2091 /* initialize the variables */
2092 block_ht = cur_ctb_ht;
2093 block_wd = cur_ctb_wd;
2094
2095 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2096
2097 had_block_size = 8;
2098 total_had_block_size = had_block_size * had_block_size;
2099
2100 for(i = 0; i < total_had_block_size; i++)
2101 {
2102 ai1_zeros_buffer[i] = 0;
2103 }
2104
2105 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
2106 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2107
2108 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
2109 ht_offset = -had_block_size;
2110 wd_offset = -had_block_size;
2111
2112 index_satd = 0;
2113 /*Loop over all 8x8 blocsk in the CTB*/
2114 for(i = 0; i < num_comp_had_blocks; i++)
2115 {
2116 if(i % num_horz_blocks == 0)
2117 {
2118 wd_offset = -had_block_size;
2119 ht_offset += had_block_size;
2120 }
2121 wd_offset += had_block_size;
2122
2123 if(!is_hbd)
2124 {
2125 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2126 pu1_l0_block = s_cu_prms.pu1_luma_src +
2127 ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2128
2129 ps_ctxt->ai4_source_satd_8x8[index_satd] =
2130
2131 ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2132 pu1_l0_block,
2133 ps_curr_src_bufs->i4_y_strd,
2134 ai1_zeros_buffer,
2135 had_block_size,
2136 pi2_residue_had_zscan,
2137 had_block_size);
2138 }
2139 index_satd++;
2140 }
2141 }
2142
2143 if(ps_ctxt->u1_enable_psyRDOPT)
2144 {
2145 /* declare local variables */
2146 WORD32 i;
2147 WORD32 ctb_size;
2148 WORD32 num_comp_had_blocks;
2149 UWORD8 *pu1_l0_block;
2150 UWORD8 *pu1_l0_block_prev = NULL;
2151 WORD32 block_ht;
2152 WORD32 block_wd;
2153 WORD32 ht_offset;
2154 WORD32 wd_offset;
2155
2156 WORD32 num_horz_blocks;
2157 WORD32 had_block_size;
2158 WORD16 pi2_residue_had[64];
2159 UWORD8 ai1_zeros_buffer[64];
2160 WORD32 index_satd = 0;
2161
2162 WORD32 is_hbd;
2163 is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit
2164
2165 /* initialize the variables */
2166 /* change this based ont he bit depth */
2167 // ps_ctxt->u1_chroma_array_type
2168 if(ps_ctxt->u1_chroma_array_type == 1)
2169 {
2170 block_ht = cur_ctb_ht / 2;
2171 block_wd = cur_ctb_wd / 2;
2172 }
2173 else
2174 {
2175 block_ht = cur_ctb_ht;
2176 block_wd = cur_ctb_wd / 2;
2177 }
2178
2179 had_block_size = 4;
2180 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2181
2182 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
2183 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2184
2185 num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size;
2186 ht_offset = -had_block_size;
2187 wd_offset = -had_block_size;
2188
2189 if(!is_hbd)
2190 {
2191 /* loop over for every 4x4 blocks in the CU for Cb */
2192 for(i = 0; i < num_comp_had_blocks; i++)
2193 {
2194 if(i % num_horz_blocks == 0)
2195 {
2196 wd_offset = -had_block_size;
2197 ht_offset += had_block_size;
2198 }
2199 wd_offset += had_block_size;
2200
2201 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2202 if(i % 2 != 0)
2203 {
2204 if(!is_hbd)
2205 {
2206 pu1_l0_block = pu1_l0_block_prev + 1;
2207 }
2208 }
2209 else
2210 {
2211 if(!is_hbd)
2212 {
2213 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2214 s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2215 pu1_l0_block_prev = pu1_l0_block;
2216 }
2217 }
2218
2219 if(had_block_size == 4)
2220 {
2221 if(!is_hbd)
2222 {
2223 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2224 ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2225 pu1_l0_block,
2226 s_cu_prms.i4_chrm_src_stride,
2227 ai1_zeros_buffer,
2228 had_block_size,
2229 pi2_residue_had,
2230 had_block_size);
2231 }
2232
2233 index_satd++;
2234
2235 } // block size of 4x4
2236
2237 } // for all blocks
2238
2239 } // is hbd check
2240 }
2241
2242 ihevce_cu_recurse_decide(
2243 ps_ctxt,
2244 &s_cu_prms,
2245 ps_cu_tree_analyse,
2246 ps_cu_tree_analyse,
2247 ps_ctb_ipe_analyse,
2248 ps_cu_me_data,
2249 &ps_ctb_col_pu,
2250 &s_cu_update_prms,
2251 pu1_row_pu_map,
2252 &col_pu_map_idx,
2253 i4_tree_depth,
2254 ctb_ctr << 6,
2255 vert_ctr << 6,
2256 cur_ctb_ht);
2257
2258 if(ps_ctxt->i1_slice_type != ISLICE)
2259 {
2260 ASSERT(
2261 (cur_ctb_wd * cur_ctb_ht) <=
2262 ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2263 }
2264 /*If Sup pic rc is enabled*/
2265 if(1 == ps_ctxt->i4_sub_pic_level_rc)
2266 {
2267 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2268 ihevce_sub_pic_rc_in_data(
2269 (void *)ps_multi_thrd_ctxt,
2270 (void *)ps_ctxt,
2271 (void *)ps_ctb_ipe_analyse,
2272 (void *)ps_frm_ctb_prms);
2273 }
2274
2275 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2276
2277 } /* End of CU recursion block */
2278
2279 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2280 {
2281 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2282 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2283 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2284
2285 do
2286 {
2287 ihevce_update_final_cu_results(
2288 ps_ctxt,
2289 ps_enc_out_ctxt,
2290 ps_cu_prms,
2291 NULL, /* &ps_ctb_col_pu */
2292 NULL, /* &col_pu_map_idx */
2293 &s_cu_update_prms,
2294 ctb_ctr,
2295 vert_ctr);
2296
2297 ps_enc_out_ctxt++;
2298
2299 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2300
2301 } while(ps_enc_out_ctxt->u1_cu_size != 128);
2302 }
2303 #else
2304 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2305 {
2306 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2307 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2308 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2309
2310 do
2311 {
2312 ihevce_update_final_cu_results(
2313 ps_ctxt,
2314 ps_enc_out_ctxt,
2315 ps_cu_prms,
2316 NULL, /* &ps_ctb_col_pu */
2317 NULL, /* &col_pu_map_idx */
2318 &s_cu_update_prms,
2319 ctb_ctr,
2320 vert_ctr);
2321
2322 ps_enc_out_ctxt++;
2323
2324 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2325
2326 } while(ps_enc_out_ctxt->u1_cu_size != 128);
2327 }
2328 #endif
2329
2330 /* --- ctb level copy of data to left buffers--*/
2331 ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2332
2333 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2334 {
2335 /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2336 ihevce_bs_clear_invalid(
2337 &ps_ctxt->s_deblk_bs_prms,
2338 last_ctb_row_flag,
2339 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2340 last_hz_ctb_wd,
2341 last_vt_ctb_ht);
2342
2343 /* -----------------Read boundary strengts for current CTB------------- */
2344
2345 if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2346 {
2347 /*Storing boundary strengths of current CTB*/
2348 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2349 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2350
2351 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2352 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2353 }
2354 //Increment for storing next CTB info
2355 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2356 (ctb_size >> 3); //one vertical edge per 8x8 block
2357 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2358 (ctb_size >> 3); //one horizontal edge per 8x8 block
2359 }
2360
2361 /* -------------- ctb level updates ----------------- */
2362 ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2363
2364 pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2365
2366 /* first ctb offset will be populated by the caller */
2367 if(0 != ctb_ctr)
2368 {
2369 pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2370 }
2371 pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2372 ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2373
2374 ps_ctb_in++;
2375 ps_ctb_out++;
2376 }
2377
2378 /* ---------- Encloop end of row updates ----------------- */
2379
2380 /* at the end of row processing cu pixel counter is set to */
2381 /* (num ctb * ctbzise) + ctb size */
2382 /* this is to set the dependency for right most cu of last */
2383 /* ctb's top right data dependency */
2384 /* this even takes care of entropy dependency for */
2385 /* incomplete ctb as well */
2386 ihevce_dmgr_set_row_row_sync(
2387 pv_dep_mngr_enc_loop_cu_top_right,
2388 (ctb_ctr * ctb_size + ctb_size),
2389 vert_ctr,
2390 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2391
2392 ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2393
2394 /* Restore structure.
2395 Getting the address of stored-BS and Qp-map and other info */
2396 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2397 {
2398 /* Update the pointers to the tile start */
2399 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2400 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
2401 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2402 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
2403 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2404 }
2405
2406 #if PROFILE_ENC_REG_DATA
2407 s_profile.u8_enc_reg_data[vert_ctr] = 0;
2408 #endif
2409
2410 /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2411 if(!ps_ctxt->u1_is_input_data_hbd)
2412 {
2413 WORD32 last_col_pic, last_col_tile;
2414
2415 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2416 {
2417 /* store the ctb level prms in cu prms */
2418 s_cu_prms.i4_ctb_pos = ctb_ctr;
2419 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2420 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2421
2422 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2423 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2424 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2425
2426 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2427
2428 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2429
2430 /* If last ctb in the horizontal row */
2431 if(ctb_ctr == (num_ctbs_horz_pic - 1))
2432 {
2433 last_col_pic = 1;
2434 }
2435 else
2436 {
2437 last_col_pic = 0;
2438 }
2439
2440 /* If last ctb in the tile row */
2441 if(ctb_ctr == (ctb_end - 1))
2442 {
2443 last_col_tile = 1;
2444 }
2445 else
2446 {
2447 last_col_tile = 0;
2448 }
2449
2450 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2451 {
2452 /* for last ctb of a row check top instead of top right */
2453 if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0))
2454 {
2455 dblk_offset = 1;
2456 }
2457 /* Wait till top neighbour CTB has done it's deblocking*/
2458 ihevce_dmgr_chk_row_row_sync(
2459 pv_dep_mngr_enc_loop_dblk,
2460 ctb_ctr,
2461 dblk_offset,
2462 dblk_check_dep_pos,
2463 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2464 ps_ctxt->thrd_id);
2465
2466 if((0 == ps_ctxt->i4_deblock_type))
2467 {
2468 /* Populate Qp-map */
2469 if(ctb_start == ctb_ctr)
2470 {
2471 ihevce_deblk_populate_qp_map(
2472 ps_ctxt,
2473 &s_deblk_ctb_row_params,
2474 ps_ctb_out_dblk,
2475 vert_ctr,
2476 ps_frm_ctb_prms,
2477 ps_tile_params);
2478 }
2479 ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2480
2481 /* recon pointers and stride */
2482 ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2483 ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2484 ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2485 ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2486
2487 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2488 {
2489 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2490 (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2491 }
2492 ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2493 //or according to slice boundary. Support yet to be added !!!!
2494
2495 ihevce_deblk_ctb(
2496 &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2497
2498 //Increment for storing next CTB info
2499 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2500 (ctb_size >> 3); //one vertical edge per 8x8 block
2501 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2502 (ctb_size >> 3); //one horizontal edge per 8x8 block
2503 s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2504 (ctb_size >> 2); //one qp per 4x4 block.
2505 }
2506 } // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2507
2508 /* update the number of ctbs deblocked for this row */
2509 ihevce_dmgr_set_row_row_sync(
2510 pv_dep_mngr_enc_loop_dblk,
2511 (ctb_ctr + 1),
2512 vert_ctr,
2513 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2514
2515 } //end of loop over CTBs in current CTB-row
2516
2517 /* Apply SAO over the previous CTB-row */
2518 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2519 {
2520 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2521 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2522 {
2523 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2524
2525 if(vert_ctr > ps_tile_params->i4_first_ctb_y)
2526 {
2527 /*For last ctb check top dep only*/
2528 if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end))
2529 {
2530 sao_offset = 1;
2531 }
2532
2533 ihevce_dmgr_chk_row_row_sync(
2534 pv_dep_mngr_enc_loop_sao,
2535 ctb_ctr,
2536 sao_offset,
2537 sao_check_dep_pos,
2538 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2539 ps_ctxt->thrd_id);
2540
2541 /* Call the sao function to do sao for the current ctb*/
2542
2543 /* Register the curr ctb's x pos in sao context*/
2544 ps_sao_ctxt->i4_ctb_x = ctb_ctr;
2545
2546 /* Register the curr ctb's y pos in sao context*/
2547 ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2548
2549 ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2550 (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2551 ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2552 ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2553 ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2554
2555 ps_sao_ctxt->i4_is_last_ctb_row = 0;
2556 ps_sao_ctxt->i4_is_last_ctb_col = 0;
2557
2558 if((ctb_ctr + 1) == ctb_end)
2559 {
2560 ps_sao_ctxt->i4_is_last_ctb_col = 1;
2561 ps_sao_ctxt->i4_sao_blk_wd =
2562 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2563 ps_tile_params->i4_curr_tile_width);
2564 }
2565
2566 /* Calculate the recon buf pointer and stride for teh current ctb */
2567 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2568 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2569 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2570 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2571
2572 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2573
2574 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2575 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2576 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2577 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2578 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2579
2580 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2581 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2582
2583 ps_sao_ctxt->pu1_cur_luma_src_buf =
2584 ps_sao_ctxt->pu1_frm_luma_src_buf +
2585 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2586 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2587
2588 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2589
2590 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2591 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2592 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2593 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2594 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2595
2596 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2597
2598 /* Calculate the pointer to buff to store the (x,y)th sao
2599 * for the top merge of (x,y+1)th ctb
2600 */
2601 ps_sao_ctxt->ps_top_ctb_sao =
2602 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2603 [ps_sao_ctxt->i4_ctb_x +
2604 (ps_sao_ctxt->i4_ctb_y) *
2605 ps_frm_ctb_prms->i4_num_ctbs_horz +
2606 (ps_ctxt->i4_bitrate_instance_num *
2607 ps_sao_ctxt->i4_num_ctb_units)];
2608
2609 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2610 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2611 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2612 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2613 ps_sao_ctxt->i4_ctb_x * ctb_size +
2614 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2615 ps_sao_ctxt->i4_top_chroma_buf_size);
2616
2617 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2618 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2619 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2620 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2621 ps_sao_ctxt->i4_ctb_x * ctb_size +
2622 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2623 ps_sao_ctxt->i4_top_chroma_buf_size);
2624
2625 {
2626 UWORD32 u4_ctb_sao_bits;
2627
2628 ihevce_sao_analyse(
2629 &ps_ctxt->s_sao_ctxt_t,
2630 ps_ctb_out_sao,
2631 &u4_ctb_sao_bits,
2632 ps_tile_params);
2633 ps_ctxt
2634 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2635 [ps_ctxt->i4_bitrate_instance_num]
2636 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2637 ps_ctxt
2638 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2639 [ps_ctxt->i4_bitrate_instance_num]
2640 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2641 }
2642 /** Subpel generation not done for non-ref picture **/
2643 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2644 {
2645 /* Recon Padding */
2646 ihevce_recon_padding(
2647 ps_pad_interp_recon,
2648 ctb_ctr,
2649 vert_ctr - 1,
2650 ps_frm_ctb_prms,
2651 ps_ctxt->ps_func_selector);
2652 }
2653 /* update the number of SAO ctbs for this row */
2654 ihevce_dmgr_set_row_row_sync(
2655 pv_dep_mngr_enc_loop_sao,
2656 ctb_ctr + 1,
2657 vert_ctr - 1,
2658 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2659 }
2660 }
2661 else //SAO Disabled
2662 {
2663 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2664 {
2665 /* Recon Padding */
2666 ihevce_recon_padding(
2667 ps_pad_interp_recon,
2668 ctb_ctr,
2669 vert_ctr,
2670 ps_frm_ctb_prms,
2671 ps_ctxt->ps_func_selector);
2672 }
2673 }
2674 } // end of SAO for loop
2675
2676 /* Call the sao function again for the last ctb row of frame */
2677 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2678 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2679 {
2680 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2681
2682 if(vert_ctr ==
2683 (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2684 {
2685 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2686 {
2687 /* Register the curr ctb's x pos in sao context*/
2688 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2689
2690 /* Register the curr ctb's y pos in sao context*/
2691 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2692
2693 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2694 vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2695
2696 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2697
2698 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2699 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2700
2701 if((ctb_ctr + 1) == ctb_end)
2702 {
2703 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2704 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2705 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2706 ps_tile_params->i4_curr_tile_width);
2707 }
2708
2709 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2710 ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2711 ps_tile_params->i4_curr_tile_height);
2712
2713 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2714
2715 /* Calculate the recon buf pointer and stride for teh current ctb */
2716 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2717 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2718 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2719 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2720
2721 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2722
2723 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2724 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2725 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2726 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2727 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2728
2729 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2730 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2731
2732 ps_sao_ctxt->pu1_cur_luma_src_buf =
2733 ps_sao_ctxt->pu1_frm_luma_src_buf +
2734 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2735 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2736
2737 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2738
2739 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2740 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2741 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2742 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2743 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2744
2745 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2746
2747 /* Calculate the pointer to buff to store the (x,y)th sao
2748 * for the top merge of (x,y+1)th ctb
2749 */
2750 ps_sao_ctxt->ps_top_ctb_sao =
2751 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2752 [ps_sao_ctxt->i4_ctb_x +
2753 (ps_sao_ctxt->i4_ctb_y) *
2754 ps_frm_ctb_prms->i4_num_ctbs_horz +
2755 (ps_ctxt->i4_bitrate_instance_num *
2756 ps_sao_ctxt->i4_num_ctb_units)];
2757
2758 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2759 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2760 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2761 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2762 ps_sao_ctxt->i4_ctb_x * ctb_size +
2763 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2764 ps_sao_ctxt->i4_top_chroma_buf_size);
2765
2766 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2767 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2768 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2769 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2770 ps_sao_ctxt->i4_ctb_x * ctb_size +
2771 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2772 ps_sao_ctxt->i4_top_chroma_buf_size);
2773
2774 {
2775 UWORD32 u4_ctb_sao_bits;
2776 ihevce_sao_analyse(
2777 &ps_ctxt->s_sao_ctxt_t,
2778 ps_ctb_out_sao,
2779 &u4_ctb_sao_bits,
2780 ps_tile_params);
2781 ps_ctxt
2782 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2783 [ps_ctxt->i4_bitrate_instance_num]
2784 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2785 ps_ctxt
2786 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2787 [ps_ctxt->i4_bitrate_instance_num]
2788 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2789 }
2790 /** Subpel generation not done for non-ref picture **/
2791 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2792 {
2793 /* Recon Padding */
2794 ihevce_recon_padding(
2795 ps_pad_interp_recon,
2796 ctb_ctr,
2797 vert_ctr,
2798 ps_frm_ctb_prms,
2799 ps_ctxt->ps_func_selector);
2800 }
2801 }
2802 } //end of loop over CTBs in current CTB-row
2803 }
2804
2805 /* Subpel Plane Generation*/
2806 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2807 {
2808 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2809 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2810 {
2811 if(0 != vert_ctr)
2812 {
2813 /** Subpel generation not done for non-ref picture **/
2814 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2815 {
2816 /* Padding and Subpel Plane Generation */
2817 ihevce_pad_interp_recon_ctb(
2818 ps_pad_interp_recon,
2819 ctb_ctr,
2820 vert_ctr - 1,
2821 ps_ctxt->i4_quality_preset,
2822 ps_frm_ctb_prms,
2823 ps_ctxt->ai2_scratch,
2824 ps_ctxt->i4_bitrate_instance_num,
2825 ps_ctxt->ps_func_selector);
2826 }
2827 }
2828 }
2829 else
2830 { // SAO Disabled
2831 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2832 {
2833 /* Padding and Subpel Plane Generation */
2834 ihevce_pad_interp_recon_ctb(
2835 ps_pad_interp_recon,
2836 ctb_ctr,
2837 vert_ctr,
2838 ps_ctxt->i4_quality_preset,
2839 ps_frm_ctb_prms,
2840 ps_ctxt->ai2_scratch,
2841 ps_ctxt->i4_bitrate_instance_num,
2842 ps_ctxt->ps_func_selector);
2843 }
2844 }
2845 }
2846
2847 {
2848 if(!ps_ctxt->i4_bitrate_instance_num)
2849 {
2850 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2851 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2852 {
2853 /* If SAO is on, then signal completion of previous CTB row */
2854 if(0 != vert_ctr)
2855 {
2856 {
2857 WORD32 post_ctb_ctr;
2858
2859 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2860 {
2861 ihevce_dmgr_map_set_sync(
2862 pv_dep_mngr_me_dep_encloop,
2863 post_ctb_ctr,
2864 (vert_ctr - 1),
2865 MAP_CTB_COMPLETE);
2866 }
2867 }
2868 }
2869 }
2870 else
2871 {
2872 {
2873 WORD32 post_ctb_ctr;
2874
2875 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2876 {
2877 ihevce_dmgr_map_set_sync(
2878 pv_dep_mngr_me_dep_encloop,
2879 post_ctb_ctr,
2880 vert_ctr,
2881 MAP_CTB_COMPLETE);
2882 }
2883 }
2884 }
2885 }
2886 }
2887
2888 /*process last ctb row*/
2889 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2890 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2891 {
2892 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2893
2894 if(vert_ctr ==
2895 (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2896 {
2897 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2898 {
2899 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2900 {
2901 /* Padding and Subpel Plane Generation */
2902 ihevce_pad_interp_recon_ctb(
2903 ps_pad_interp_recon,
2904 ctb_ctr,
2905 vert_ctr,
2906 ps_ctxt->i4_quality_preset,
2907 ps_frm_ctb_prms,
2908 ps_ctxt->ai2_scratch,
2909 ps_ctxt->i4_bitrate_instance_num,
2910 ps_ctxt->ps_func_selector);
2911 }
2912 }
2913 }
2914 /* If SAO is on, then signal completion of the last CTB row of frame */
2915 {
2916 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
2917 {
2918 if(!ps_ctxt->i4_bitrate_instance_num)
2919 {
2920 {
2921 WORD32 post_ctb_ctr;
2922
2923 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2924 {
2925 ihevce_dmgr_map_set_sync(
2926 pv_dep_mngr_me_dep_encloop,
2927 post_ctb_ctr,
2928 vert_ctr,
2929 MAP_CTB_COMPLETE);
2930 }
2931 }
2932 }
2933 }
2934 }
2935 }
2936 }
2937
2938 return;
2939 }
2940
2941 /*!
2942 ******************************************************************************
2943 * \if Function name : ihevce_enc_loop_pass \endif
2944 *
2945 * \brief
2946 * Frame level enc_loop pass function
2947 *
2948 * \param[in] pv_ctxt : pointer to enc_loop module
2949 * \param[in] ps_frm_lamda : Frame level Lambda params
2950 * \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
2951 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
2952 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
2953 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
2954 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
2955 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
2956 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
2957 *
2958 * \return
2959 * None
2960 *
2961 * Note : Currently the frame level calcualtions done assumes that
2962 * framewidth of the input /recon are excat multiple of ctbsize
2963 *
2964 * \author
2965 * Ittiam
2966 *
2967 *****************************************************************************
2968 */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)2969 void ihevce_enc_loop_process(
2970 void *pv_ctxt,
2971 ihevce_lap_enc_buf_t *ps_curr_inp,
2972 ctb_analyse_t *ps_ctb_in,
2973 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
2974 recon_pic_buf_t *ps_frm_recon,
2975 cur_ctb_cu_tree_t *ps_cu_tree_out,
2976 ctb_enc_loop_out_t *ps_ctb_out,
2977 cu_enc_loop_out_t *ps_cu_out,
2978 tu_enc_loop_out_t *ps_tu_out,
2979 pu_t *ps_pu_out,
2980 UWORD8 *pu1_frm_ecd_data,
2981 frm_ctb_ctxt_t *ps_frm_ctb_prms,
2982 frm_lambda_ctxt_t *ps_frm_lamda,
2983 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
2984 WORD32 thrd_id,
2985 WORD32 i4_enc_frm_id,
2986 WORD32 i4_pass)
2987 {
2988 WORD32 vert_ctr;
2989 WORD32 tile_col_idx;
2990 iv_enc_yuv_buf_t s_curr_src_bufs;
2991 iv_enc_yuv_buf_t s_curr_recon_bufs;
2992 iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
2993 UWORD32 *pu4_pu_offsets;
2994 WORD32 end_of_frame;
2995 UWORD8 *apu1_y_sub_pel_planes[3];
2996 pad_interp_recon_frm_t s_pad_interp_recon;
2997 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
2998
2999 ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3000
3001 WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3002
3003 /* initialize the closed loop lambda for the current frame */
3004 ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3005 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3006 ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3007 ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3008 ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3009 ps_ctxt->thrd_id = thrd_id;
3010 ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3011
3012 #if DISABLE_SAO_WHEN_NOISY
3013 ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3014 ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3015 #endif
3016
3017 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3018 ps_ctxt->pv_err_func_selector = ps_func_selector;
3019 #endif
3020
3021 /*Bit0 - of this Flag indicates whether current pictute needs to be deblocked,
3022 padded and hpel planes need to be generated.
3023 Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/
3024 ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3025 (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) ||
3026 ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3027 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
3028 << 1);
3029
3030 /* Share all reference pictures with nbr clients. This flag will be used only
3031 in case of dist-enc mode */
3032 ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3033 ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3034
3035 /* Register the frame level ssd lamda for both luma and chroma*/
3036 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3037 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3038
3039 ihevce_populate_cl_cu_lambda_prms(
3040 ps_ctxt,
3041 ps_frm_lamda,
3042 (WORD32)ps_ctxt->i1_slice_type,
3043 ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3044 ENC_LOOP_LAMBDA_TYPE);
3045
3046 ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3047 (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3048 (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3049
3050 end_of_frame = 0;
3051
3052 /* ----------------------------------------------------- */
3053 /* store the stride and dimensions of source and recon */
3054 /* buffer pointers will be over written at every CTB row */
3055 /* ----------------------------------------------------- */
3056 memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3057
3058 memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3059
3060 memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3061
3062 /* get the frame level pu offset pointer*/
3063 pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3064
3065 s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3066
3067 /* ------------ Loop over all the CTB rows --------------- */
3068 while(0 == end_of_frame)
3069 {
3070 UWORD8 *pu1_tmp;
3071 UWORD8 *pu1_row_pu_map;
3072 UWORD8 *pu1_row_ecd_data;
3073 ctb_analyse_t *ps_ctb_row_in;
3074 ctb_enc_loop_out_t *ps_ctb_row_out;
3075 cu_enc_loop_out_t *ps_row_cu;
3076 tu_enc_loop_out_t *ps_row_tu;
3077 pu_t *ps_row_pu;
3078 pu_col_mv_t *ps_row_col_pu;
3079 job_queue_t *ps_job;
3080 UWORD32 *pu4_pu_row_offsets;
3081 UWORD16 *pu2_num_pu_row;
3082
3083 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3084 cur_ctb_cu_tree_t *ps_row_cu_tree;
3085 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3086
3087 /* Get the current row from the job queue */
3088 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3089 ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3090
3091 /* Register the pointer to ctb out of the current frame*/
3092 ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3093
3094 /* If all rows are done, set the end of process flag to 1, */
3095 /* and the current row to -1 */
3096 if(NULL == ps_job)
3097 {
3098 vert_ctr = -1;
3099 tile_col_idx = -1;
3100 end_of_frame = 1;
3101 }
3102 else
3103 {
3104 ihevce_tile_params_t *ps_col_tile_params_temp;
3105 ihevce_tile_params_t *ps_tile_params;
3106 WORD32 i4_tile_id;
3107
3108 ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3109 /* set the output dependency */
3110 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3111
3112 /* Obtain the current row's details from the job */
3113 vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3114 {
3115 /* Obtain the current colum tile index from the job */
3116 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3117
3118 /* The tile parameter for the col. idx. Use only the properties
3119 which is same for all the bottom tiles like width, start_x, etc.
3120 Don't use height, start_y, etc. */
3121 ps_col_tile_params_temp =
3122 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3123
3124 /* Derive actual tile_id based on vert_ctr */
3125 i4_tile_id =
3126 *(ps_frm_ctb_prms->pi4_tile_id_map +
3127 vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3128 ps_col_tile_params_temp->i4_first_ctb_x);
3129 /* Derive pointer to current tile prms */
3130 ps_tile_params =
3131 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3132 }
3133
3134 ps_ctxt->i4_tile_col_idx = tile_col_idx;
3135 /* derive the current ctb row pointers */
3136
3137 /* luma src */
3138 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3139 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3140 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3141 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3142
3143 pu1_tmp +=
3144 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3145 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3146
3147 s_curr_src_bufs.pv_y_buf = pu1_tmp;
3148
3149 if(!ps_ctxt->u1_is_input_data_hbd)
3150 {
3151 /* cb src */
3152 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3153 pu1_tmp +=
3154 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3155 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3156
3157 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3158 }
3159
3160 /* luma recon */
3161 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3162 pu1_tmp +=
3163 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3164
3165 s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3166 s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3167 s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3168 if(!ps_ctxt->u1_is_input_data_hbd)
3169 {
3170 /* cb recon */
3171 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3172 pu1_tmp +=
3173 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3174 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3175
3176 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3177 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3178 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3179
3180 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3181
3182 /* Register the source buffer pointers in sao context*/
3183 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3184 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3185 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3186 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3187 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3188
3189 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3190 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3191
3192 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3193 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3194
3195 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3196 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3197 }
3198
3199 /* Subpel planes hxfy, fxhy, hxhy*/
3200 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3201 pu1_tmp +=
3202 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3203 apu1_y_sub_pel_planes[0] = pu1_tmp;
3204 s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3205
3206 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3207 pu1_tmp +=
3208 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3209 apu1_y_sub_pel_planes[1] = pu1_tmp;
3210 s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3211
3212 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3213 pu1_tmp +=
3214 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3215 apu1_y_sub_pel_planes[2] = pu1_tmp;
3216 s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3217
3218 /* row level coeffs buffer */
3219 pu1_row_ecd_data =
3220 pu1_frm_ecd_data +
3221 (vert_ctr *
3222 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3223 : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3224 MAX_SCAN_COEFFS_BYTES_4x4);
3225
3226 /* Row level CU buffer */
3227 ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3228
3229 /* Row level TU buffer */
3230 ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3231
3232 /* Row level PU buffer */
3233 ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3234
3235 /* Row level colocated PU buffer */
3236 /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3237 ps_row_col_pu =
3238 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3239 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3240 /* Row level col PU map buffer */
3241 /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3242 pu1_row_pu_map =
3243 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3244 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3245 /* row ctb in pointer */
3246 ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3247
3248 /* row ctb out pointer */
3249 ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3250
3251 /* row number of PUs map pointer */
3252 pu2_num_pu_row =
3253 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3254
3255 /* row pu offsets pointer */
3256 pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3257 /* store the first CTB pu offset pointer */
3258 *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3259 /* Initialize ptr to current IPE row */
3260 ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3261
3262 /* Initialize ptr to current row */
3263 ps_row_cu_tree = ps_cu_tree_out +
3264 (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3265
3266 /* Get the EncLoop Top-Right CU Dep Mngr */
3267 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3268 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3269 [i4_bitrate_instance_num];
3270 /* Get the EncLoop Deblock Dep Mngr */
3271 ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3272 ps_master_ctxt
3273 ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3274 /* Get the EncLoop Sao Dep Mngr */
3275 ps_ctxt->pv_dep_mngr_enc_loop_sao =
3276 ps_master_ctxt
3277 ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3278
3279 ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3280
3281 {
3282 /* derive the pointers of top row buffers */
3283 ps_ctxt->pv_top_row_luma =
3284 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3285 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3286 (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3287
3288 ps_ctxt->pv_top_row_chroma =
3289 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3290 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3291 (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3292
3293 /* derive the pointers of bottom row buffers to update current row data */
3294 ps_ctxt->pv_bot_row_luma =
3295 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3296 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3297 (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3298
3299 ps_ctxt->pv_bot_row_chroma =
3300 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3301 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3302 (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3303
3304 /* Register the buffer pointers in sao context*/
3305 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3306 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3307 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3308 ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3309
3310 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3311 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3312 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3313 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3314
3315 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3316
3317 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3318 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3319
3320 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3321 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3322 }
3323
3324 ps_ctxt->ps_top_row_nbr =
3325 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3326 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3327 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3328
3329 ps_ctxt->ps_bot_row_nbr =
3330 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3331 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3332 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3333
3334 if(vert_ctr > 0)
3335 {
3336 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3337 }
3338 else
3339 {
3340 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3341 }
3342
3343 ASSERT(
3344 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3345 .ps_pps->i1_sign_data_hiding_flag ==
3346 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3347 .ps_pps->i1_sign_data_hiding_flag);
3348
3349 /* call the row level processing function */
3350 ihevce_enc_loop_process_row(
3351 ps_ctxt,
3352 &s_curr_src_bufs,
3353 &s_curr_recon_bufs,
3354 &s_curr_recon_bufs_src,
3355 &apu1_y_sub_pel_planes[0],
3356 ps_ctb_row_in,
3357 ps_ctb_row_out,
3358 ps_row_ipe_analyse,
3359 ps_row_cu_tree,
3360 ps_row_cu,
3361 ps_row_tu,
3362 ps_row_pu,
3363 ps_row_col_pu,
3364 pu2_num_pu_row,
3365 pu1_row_pu_map,
3366 pu1_row_ecd_data,
3367 pu4_pu_row_offsets,
3368 ps_frm_ctb_prms,
3369 vert_ctr,
3370 ps_frm_recon,
3371 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3372 &s_pad_interp_recon,
3373 i4_pass,
3374 ps_multi_thrd_ctxt,
3375 ps_tile_params);
3376 }
3377 }
3378 }
3379
3380 /*!
3381 ******************************************************************************
3382 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3383 *
3384 * \brief Returns to the caller key attributes relevant for dependency manager,
3385 * ie, the number of vertical units in l0 layer
3386 *
3387 * \par Description:
3388 *
3389 * \param[in] pai4_ht : ht
3390 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3391 * for deblocking
3392 *
3393 * \return
3394 * None
3395 *
3396 * \author
3397 * Ittiam
3398 *
3399 *****************************************************************************
3400 */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3401 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3402 {
3403 /* Blk ht at a given layer*/
3404 WORD32 unit_ht_c;
3405 WORD32 ctb_size = 64;
3406
3407 /* compute blk ht and unit ht */
3408 unit_ht_c = ctb_size;
3409
3410 /* set the numebr of vertical units */
3411 *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3412 }
3413
3414 /*!
3415 ******************************************************************************
3416 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3417 *
3418 * \brief
3419 * Number of memory records are returned for enc_loop module
3420 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3421 *
3422 * \return
3423 * None
3424 *
3425 * \author
3426 * Ittiam
3427 *
3428 *****************************************************************************
3429 */
3430 WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3431 ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3432 {
3433 WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3434 WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3435 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3436 WORD32 enc_loop_sao_dep_mngr_mem_recs =
3437 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3438 WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3439 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3440 WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3441 i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3442
3443 return (
3444 (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs +
3445 enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3446 }
3447 /*!
3448 ******************************************************************************
3449 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3450 *
3451 * \brief
3452 * Memory requirements are returned for ENC_LOOP.
3453 *
3454 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
3455 * \param[in] ps_init_prms : Create time static parameters
3456 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3457 * \param[in] i4_mem_space : memspace in whihc memory request should be done
3458 *
3459 * \return
3460 * None
3461 *
3462 * \author
3463 * Ittiam
3464 *
3465 *****************************************************************************
3466 */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3467 WORD32 ihevce_enc_loop_get_mem_recs(
3468 iv_mem_rec_t *ps_mem_tab,
3469 ihevce_static_cfg_params_t *ps_init_prms,
3470 WORD32 i4_num_proc_thrds,
3471 WORD32 i4_num_bitrate_inst,
3472 WORD32 i4_num_enc_loop_frm_pllel,
3473 WORD32 i4_mem_space,
3474 WORD32 i4_resolution_id)
3475 {
3476 UWORD32 u4_width, u4_height, n_tabs;
3477 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3478 WORD32 ctr;
3479 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3480
3481 /* derive frame dimensions */
3482 /*width of the input YUV to be encoded */
3483 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3484 /*making the width a multiple of CTB size*/
3485 u4_width += SET_CTB_ALIGN(
3486 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3487
3488 /*height of the input YUV to be encoded */
3489 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3490 /*making the height a multiple of CTB size*/
3491 u4_height += SET_CTB_ALIGN(
3492 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3493 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3494 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3495 /* memories should be requested assuming worst case requirememnts */
3496
3497 /* Module context structure */
3498 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3499
3500 ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3501
3502 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3503
3504 /* Thread context structure */
3505 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3506 i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3507
3508 ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3509
3510 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3511
3512 /* Scale matrices */
3513 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3514
3515 ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3516
3517 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3518
3519 /* Rescale matrices */
3520 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3521
3522 ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3523
3524 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3525
3526 /* top row luma one row of pixel data per CTB row */
3527 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3528 {
3529 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3530 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3531 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3532 }
3533 else
3534 {
3535 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3536 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3537 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3538 }
3539
3540 ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3541
3542 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3543
3544 /* top row chroma */
3545 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3546 {
3547 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3548 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3549 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3550 }
3551 else
3552 {
3553 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3554 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3555 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3556 }
3557
3558 ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3559
3560 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3561
3562 /* top row neighbour 4x4 */
3563 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3564 (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3565 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3566
3567 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3568
3569 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3570
3571 /* memory to dump rate control parameters by each thread for each bit-rate instance */
3572 /* RC params collated by each thread for each bit-rate instance separately */
3573 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3574 i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3575
3576 ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3577
3578 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3579 /* Memory required for deblocking */
3580 {
3581 /* Memory to store Qp of top4x4 blocks for each CTB row.
3582 This memory is allocated at frame level and shared across
3583 all cores. The Qp values are needed to form Qp-map(described
3584 in the ENC_LOOP_DEBLOCKING section below)*/
3585
3586 UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3587 UWORD32 u4_size_top_4x4_qp_memory;
3588
3589 /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3590 /*Space required per CTB*/
3591 u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3592 /*Space required for entire CTB row*/
3593 u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3594 /*Space required for entire frame*/
3595 u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3596 /*Space required for multiple bitrate*/
3597 u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3598 /*Space required for multiple frames in parallel*/
3599 u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3600
3601 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3602 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3603 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3604
3605 /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3606 ## Boundary Strength(Vertical):
3607 BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3608 of the row followed by 8 entries of second CTB and so on.
3609 8 entries: Includes left edge of current CTB and excludes right edge.
3610 ## Boundary Strength(Horizontal):
3611 Same as Vertical.
3612 8 entries: Includes top edge of current CTB and excludes bottom edge.
3613
3614 ## Qp-map storage:
3615 T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3616 00 01 02 03 04 05 ..........to the end of the CTB row
3617 10 11 12 13 14 15 ..........to the end of the CTB row
3618 20 21 22 23 24 25 ..........to the end of the CTB row
3619 30 31 32 33 34 35 ..........to the end of the CTB row
3620 40 41 42 43 44 45 ..........to the end of the CTB row
3621 ............................to the end of the CTB row
3622 upto height_of_CTB..........to the end of the CTB row
3623
3624 Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3625 A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3626 where,
3627 => height_of_CTB = number of 4x4 blocks in a CTB vertically,
3628 => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3629 in order to deblock top edge of current CTB.
3630 => width_of_CTB = number of 4x4 blocks in a CTB horizontally,
3631 */
3632
3633 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3634 /*1 vertical edge per 8 pixel*/
3635 u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3636 /*Vertical edges for entire width of CTB row*/
3637 u4_size_bs_memory *= u4_ctb_in_a_row;
3638 /*Each vertical edge of CTB row is 4 bytes*/
3639 u4_size_bs_memory = u4_size_bs_memory << 2;
3640 /*Adding Memory required for storing horizontal BS by doubling*/
3641 u4_size_bs_memory = u4_size_bs_memory << 1;
3642
3643 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3644 /*Number of 4x4 blocks in the width of a CTB*/
3645 u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3646 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3647 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3648 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3649 /*Storage for entire CTB row*/
3650 u4_size_qp_memory *= u4_ctb_in_a_row;
3651
3652 /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3653 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3654 i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3655
3656 ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3657
3658 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3659 }
3660
3661 /* Memory required to store pred for 422 chroma */
3662 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3663 i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3664 (i4_chroma_format == IV_YUV_422SP_UV) *
3665 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3666
3667 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3668
3669 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3670
3671 /* Memory for inter pred buffers */
3672 {
3673 WORD32 i4_num_bufs_per_thread = 0;
3674
3675 WORD32 i4_buf_size_per_cand =
3676 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3677 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3678 WORD32 i4_quality_preset =
3679 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3680 switch(i4_quality_preset)
3681 {
3682 case IHEVCE_QUALITY_P0:
3683 {
3684 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3685 break;
3686 }
3687 case IHEVCE_QUALITY_P2:
3688 {
3689 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3690 break;
3691 }
3692 case IHEVCE_QUALITY_P3:
3693 {
3694 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3695 break;
3696 }
3697 case IHEVCE_QUALITY_P4:
3698 {
3699 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3700 break;
3701 }
3702 case IHEVCE_QUALITY_P5:
3703 case IHEVCE_QUALITY_P6:
3704 case IHEVCE_QUALITY_P7:
3705 {
3706 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3707 break;
3708 }
3709 default:
3710 {
3711 ASSERT(0);
3712 }
3713 }
3714
3715 i4_num_bufs_per_thread += 4;
3716
3717 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3718 i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3719
3720 ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3721
3722 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3723 }
3724
3725 /* Memory required to store chroma intra pred */
3726 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3727 i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3728 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3729 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3730
3731 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3732
3733 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3734
3735 /* Memory required to store pred for reference substitution output */
3736 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3737 allocate 16 bytes to the left and 7 bytes to the right to facilitate
3738 SIMD access */
3739 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3740 i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3741 + INTRAPRED_SIMD_LEFT_PADDING)*
3742 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3743
3744 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3745
3746 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3747
3748 /* Memory required to store pred for reference filtering output */
3749 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3750 allocate 16 bytes to the left and 7 bytes to the right to facilitate
3751 SIMD access */
3752 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3753 i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3754 + INTRAPRED_SIMD_LEFT_PADDING)*
3755 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3756
3757 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3758
3759 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3760
3761 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3762 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3763 #endif
3764 {
3765 /* Memory assignments for recon storage during CU Recursion */
3766 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3767 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3768 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3769
3770 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3771
3772 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3773
3774 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3775 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3776 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3777 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3778
3779 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3780
3781 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3782 }
3783 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3784 else
3785 {
3786 /* Memory assignments for recon storage during CU Recursion */
3787 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3788
3789 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3790
3791 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3792
3793 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3794
3795 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3796
3797 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3798 }
3799 #endif
3800
3801 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3802 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3803 #endif
3804 {
3805 /* Memory assignments for pred storage during CU Recursion */
3806 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3807 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3808 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3809
3810 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3811
3812 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3813
3814 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3815 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3816 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3817 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3818
3819 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3820
3821 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3822 }
3823 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3824 else
3825 {
3826 /* Memory assignments for pred storage during CU Recursion */
3827 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3828
3829 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3830
3831 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3832
3833 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3834
3835 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3836
3837 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3838 }
3839 #endif
3840
3841 /* Memory assignments for CTB left luma data storage */
3842 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3843 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3844 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3845
3846 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3847
3848 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3849
3850 /* Memory assignments for CTB left chroma data storage */
3851 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3852 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3853 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3854 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3855 ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3856
3857 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3858
3859 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3860
3861 /* Memory required for SAO */
3862 {
3863 WORD32 num_vert_units;
3864 WORD32 num_horz_units;
3865 WORD32 ctb_aligned_ht, ctb_aligned_wd;
3866 WORD32 luma_buf, chroma_buf;
3867
3868 num_vert_units = u4_height / MAX_CTB_SIZE;
3869 num_horz_units = u4_width / MAX_CTB_SIZE;
3870
3871 ctb_aligned_ht = u4_height;
3872 ctb_aligned_wd = u4_width;
3873
3874 /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3875 * and 1 extra location is required for top left buf ptr for row 0
3876 * Also 1 extra byte is required for every row for top left pixel if
3877 * the top left ptr is to be passed to leaf level unconditionally
3878 */
3879 luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3880 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3881 chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3882 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3883
3884 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3885 (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3886
3887 /* Add the memory required to store the sao information of top ctb for top merge
3888 * This is frame level buffer.
3889 */
3890 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
3891 ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
3892 (i4_num_enc_loop_frm_pllel);
3893
3894 ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3895
3896 ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
3897 }
3898
3899 /* Memory for CU level Coeff data buffer */
3900 {
3901 /* 16 additional bytes are required to ensure alignment */
3902 {
3903 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
3904 i4_num_proc_thrds *
3905 (((MAX_LUMA_COEFFS_CTB +
3906 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
3907 16) *
3908 (2) * sizeof(UWORD8));
3909 }
3910
3911 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3912
3913 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
3914
3915 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
3916 i4_num_proc_thrds *
3917 (MAX_LUMA_COEFFS_CTB +
3918 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
3919 sizeof(UWORD8);
3920
3921 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3922
3923 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
3924 }
3925
3926 /* Memory for CU dequant data buffer */
3927 {
3928 /* 16 additional bytes are required to ensure alignment */
3929 {
3930 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
3931 i4_num_proc_thrds *
3932 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
3933 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
3934 8) *
3935 (2) * sizeof(WORD16);
3936 }
3937
3938 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3939
3940 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
3941 }
3942
3943 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
3944 {
3945 WORD32 i4_memSize_perThread;
3946
3947 WORD32 i4_chroma_memSize_perThread = 0;
3948 /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3949 /* used in RDOPT to store cur and best modes' data */
3950 WORD32 i4_luma_memSize_perThread =
3951 4 * MAX_CU_SIZE * MAX_CU_SIZE *
3952 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3953
3954 /* 'Glossary' for comments in the following codeBlock */
3955 /* 1 - 2 Bufs for storing recons of the best modes determined in the */
3956 /* function 'ihevce_intra_chroma_pred_mode_selector' */
3957 /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3958 /* used in RDOPT to store cur and best modes' data */
3959 if(i4_chroma_format == IV_YUV_422SP_UV)
3960 {
3961 WORD32 i4_quality_preset =
3962 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3963 switch(i4_quality_preset)
3964 {
3965 case IHEVCE_QUALITY_P0:
3966 {
3967 /* 1 */
3968 i4_chroma_memSize_perThread +=
3969 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
3970 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3971
3972 /* 2 */
3973 i4_chroma_memSize_perThread +=
3974 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
3975 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3976
3977 break;
3978 }
3979 case IHEVCE_QUALITY_P2:
3980 {
3981 /* 1 */
3982 i4_chroma_memSize_perThread +=
3983 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
3984 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3985
3986 /* 2 */
3987 i4_chroma_memSize_perThread +=
3988 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
3989 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3990
3991 break;
3992 }
3993 case IHEVCE_QUALITY_P3:
3994 {
3995 /* 1 */
3996 i4_chroma_memSize_perThread +=
3997 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
3998 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3999
4000 /* 2 */
4001 i4_chroma_memSize_perThread +=
4002 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4003 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4004
4005 break;
4006 }
4007 case IHEVCE_QUALITY_P4:
4008 {
4009 /* 1 */
4010 i4_chroma_memSize_perThread +=
4011 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4012 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4013
4014 /* 2 */
4015 i4_chroma_memSize_perThread +=
4016 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4017 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4018
4019 break;
4020 }
4021 case IHEVCE_QUALITY_P5:
4022 {
4023 /* 1 */
4024 i4_chroma_memSize_perThread +=
4025 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4026 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4027
4028 /* 2 */
4029 i4_chroma_memSize_perThread +=
4030 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4031 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4032
4033 break;
4034 }
4035 case IHEVCE_QUALITY_P6:
4036 case IHEVCE_QUALITY_P7:
4037 {
4038 /* 1 */
4039 i4_chroma_memSize_perThread +=
4040 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4041 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4042
4043 /* 2 */
4044 i4_chroma_memSize_perThread +=
4045 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4046 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4047
4048 break;
4049 }
4050 }
4051 }
4052 else
4053 {
4054 WORD32 i4_quality_preset =
4055 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4056 switch(i4_quality_preset)
4057 {
4058 case IHEVCE_QUALITY_P0:
4059 {
4060 /* 1 */
4061 i4_chroma_memSize_perThread +=
4062 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4063 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4064
4065 /* 2 */
4066 i4_chroma_memSize_perThread +=
4067 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4068 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4069 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4070
4071 break;
4072 }
4073 case IHEVCE_QUALITY_P2:
4074 {
4075 /* 1 */
4076 i4_chroma_memSize_perThread +=
4077 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4078 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4079
4080 /* 2 */
4081 i4_chroma_memSize_perThread +=
4082 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4083 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4084 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4085
4086 break;
4087 }
4088 case IHEVCE_QUALITY_P3:
4089 {
4090 /* 1 */
4091 i4_chroma_memSize_perThread +=
4092 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4093 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4094
4095 /* 2 */
4096 i4_chroma_memSize_perThread +=
4097 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4098 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4099 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4100
4101 break;
4102 }
4103 case IHEVCE_QUALITY_P4:
4104 {
4105 /* 1 */
4106 i4_chroma_memSize_perThread +=
4107 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4108 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4109
4110 /* 2 */
4111 i4_chroma_memSize_perThread +=
4112 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4113 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4114 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4115
4116 break;
4117 }
4118 case IHEVCE_QUALITY_P5:
4119 {
4120 /* 1 */
4121 i4_chroma_memSize_perThread +=
4122 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4123 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4124
4125 /* 2 */
4126 i4_chroma_memSize_perThread +=
4127 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4128 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4129 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4130
4131 break;
4132 }
4133 case IHEVCE_QUALITY_P6:
4134 case IHEVCE_QUALITY_P7:
4135 {
4136 /* 1 */
4137 i4_chroma_memSize_perThread +=
4138 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4139 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4140
4141 /* 2 */
4142 i4_chroma_memSize_perThread +=
4143 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4144 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4145 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4146
4147 break;
4148 }
4149 }
4150 }
4151
4152 i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4153
4154 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4155 i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4156
4157 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4158
4159 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4160 }
4161
4162 n_tabs = NUM_ENC_LOOP_MEM_RECS;
4163
4164 /*************************************************************************/
4165 /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests -- */
4166 /*************************************************************************/
4167
4168 /* Fill the memtabs for EncLoop Deblock Dep Mngr */
4169 {
4170 WORD32 count;
4171 WORD32 num_vert_units;
4172 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4173
4174 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4175 ASSERT(num_vert_units > 0);
4176 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4177 {
4178 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4179 {
4180 n_tabs += ihevce_dmgr_get_mem_recs(
4181 &ps_mem_tab[n_tabs],
4182 DEP_MNGR_ROW_ROW_SYNC,
4183 num_vert_units,
4184 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4185 i4_num_proc_thrds,
4186 i4_mem_space);
4187 }
4188 }
4189
4190 /* Fill the memtabs for EncLoop SAO Dep Mngr */
4191 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4192 {
4193 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4194 {
4195 n_tabs += ihevce_dmgr_get_mem_recs(
4196 &ps_mem_tab[n_tabs],
4197 DEP_MNGR_ROW_ROW_SYNC,
4198 num_vert_units,
4199 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4200 i4_num_proc_thrds,
4201 i4_mem_space);
4202 }
4203 }
4204 }
4205
4206 /*************************************************************************/
4207 /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests -- */
4208 /*************************************************************************/
4209
4210 /* Fill the memtabs for Top-Right CU sync Dep Mngr */
4211 {
4212 WORD32 count;
4213 WORD32 num_vert_units;
4214 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4215 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4216 ASSERT(num_vert_units > 0);
4217
4218 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4219 {
4220 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4221 {
4222 n_tabs += ihevce_dmgr_get_mem_recs(
4223 &ps_mem_tab[n_tabs],
4224 DEP_MNGR_ROW_ROW_SYNC,
4225 num_vert_units,
4226 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4227 i4_num_proc_thrds,
4228 i4_mem_space);
4229 }
4230 }
4231 }
4232
4233 /*************************************************************************/
4234 /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests -- */
4235 /*************************************************************************/
4236
4237 /* Fill the memtabs for EncLoop Aux. on Ref. bitrate Dep Mngr */
4238 {
4239 WORD32 count;
4240 WORD32 num_vert_units;
4241 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4242
4243 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4244 ASSERT(num_vert_units > 0);
4245
4246 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4247 {
4248 for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4249 {
4250 n_tabs += ihevce_dmgr_get_mem_recs(
4251 &ps_mem_tab[n_tabs],
4252 DEP_MNGR_ROW_ROW_SYNC,
4253 num_vert_units,
4254 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4255 i4_num_proc_thrds,
4256 i4_mem_space);
4257 }
4258 }
4259 }
4260
4261 return (n_tabs);
4262 }
4263
4264 /*!
4265 ******************************************************************************
4266 * \if Function name : ihevce_enc_loop_init \endif
4267 *
4268 * \brief
4269 * Intialization for ENC_LOOP context state structure .
4270 *
4271 * \param[in] ps_mem_tab : pointer to memory descriptors table
4272 * \param[in] ps_init_prms : Create time static parameters
4273 * \param[in] pv_osal_handle : Osal handle
4274 *
4275 * \return
4276 * None
4277 *
4278 * \author
4279 * Ittiam
4280 *
4281 *****************************************************************************
4282 */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4283 void *ihevce_enc_loop_init(
4284 iv_mem_rec_t *ps_mem_tab,
4285 ihevce_static_cfg_params_t *ps_init_prms,
4286 WORD32 i4_num_proc_thrds,
4287 void *pv_osal_handle,
4288 func_selector_t *ps_func_selector,
4289 rc_quant_t *ps_rc_quant_ctxt,
4290 ihevce_tile_params_t *ps_tile_params_base,
4291 WORD32 i4_resolution_id,
4292 WORD32 i4_num_enc_loop_frm_pllel,
4293 UWORD8 u1_is_popcnt_available)
4294 {
4295 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4296 ihevce_enc_loop_ctxt_t *ps_ctxt;
4297 WORD32 ctr, n_tabs;
4298 UWORD32 u4_width, u4_height;
4299 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4300 UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4301 UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4302 WORD32 i;
4303 WORD32 i4_num_bitrate_inst =
4304 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4305 enc_loop_rc_params_t *ps_enc_loop_rc_params;
4306 UWORD8 *pu1_sao_base; /* store the base address of sao*/
4307 UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4308 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4309 WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4310 WORD32 i4_enc_frm_id;
4311 WORD32 num_cu_in_ctb;
4312 WORD32 i4_num_tile_cols = 1; //Default value is 1
4313
4314 /* ENC_LOOP state structure */
4315 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4316
4317 ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4318
4319 ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4320 ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4321 ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4322 /*Calculation of memory sizes for deblocking*/
4323 {
4324 /*width of the input YUV to be encoded. */
4325 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4326 /*making the width a multiple of CTB size*/
4327 u4_width += SET_CTB_ALIGN(
4328 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4329
4330 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4331
4332 /*height of the input YUV to be encoded */
4333 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4334 /*making the height a multiple of CTB size*/
4335 u4_height += SET_CTB_ALIGN(
4336 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4337
4338 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4339
4340 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4341 /*1 vertical edge per 8 pixel*/
4342 u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4343 /*Vertical edges for entire width of CTB row*/
4344 u4_size_bs_memory *= u4_ctb_in_a_row;
4345 /*Each vertical edge of CTB row is 4 bytes*/
4346 u4_size_bs_memory = u4_size_bs_memory << 2;
4347 /*Adding Memory required for storing horizontal BS by doubling*/
4348 u4_size_bs_memory = u4_size_bs_memory << 1;
4349
4350 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4351 /*Number of 4x4 blocks in the width of a CTB*/
4352 u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4353 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4354 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4355 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4356 /*Storage for entire CTB row*/
4357 u4_size_qp_memory *= u4_ctb_in_a_row;
4358
4359 pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4360 }
4361
4362 /*Derive the base pointer of sao*/
4363 pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4364 ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4365 u4_ctb_aligned_wd = u4_width;
4366 u4_ctb_aligned_ht = u4_height;
4367 num_vert_units = (u4_height) / ctb_size;
4368
4369 for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4370 {
4371 ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4372 /* Store Tile params base into EncLoop context */
4373 ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4374 ihevce_cmn_utils_instr_set_router(
4375 &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4376 ihevce_sifter_sad_fxn_assigner(
4377 (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4378 ps_ctxt->i4_max_search_range_horizontal =
4379 ps_init_prms->s_config_prms.i4_max_search_range_horz;
4380 ps_ctxt->i4_max_search_range_vertical =
4381 ps_init_prms->s_config_prms.i4_max_search_range_vert;
4382
4383 ps_ctxt->i4_quality_preset =
4384 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4385
4386 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4387 {
4388 ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4389 }
4390
4391 ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4392
4393 ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4394
4395 ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4396
4397 ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4398
4399 ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4400
4401 ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4402
4403 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4404 {
4405 ps_ctxt->i4_use_ctb_level_lamda = 0;
4406 }
4407 else
4408 {
4409 ps_ctxt->i4_use_ctb_level_lamda = 0;
4410 }
4411
4412 /** Register the function selector pointer*/
4413 ps_ctxt->ps_func_selector = ps_func_selector;
4414
4415 ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4416
4417 /* Initiallization for non-distributed mode */
4418 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4419 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4420 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4421 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4422
4423 ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4424 ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4425
4426 ps_ctxt->i4_frm_top_row_luma_size =
4427 ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4428
4429 ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4430
4431 ps_ctxt->i4_frm_top_row_chroma_size =
4432 ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4433
4434 {
4435 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4436 {
4437 /* +1 is to provision top left pel */
4438 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4439 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4440 (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4441
4442 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4443 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4444 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4445 ps_ctxt->i4_top_row_luma_stride;
4446
4447 /* +2 is to provision top left pel */
4448 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4449 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4450 (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4451
4452 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4453 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4454 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4455 ps_ctxt->i4_top_row_chroma_stride;
4456 }
4457 }
4458
4459 /* +1 is to provision top left nbr */
4460 ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4461 ps_ctxt->i4_frm_top_row_nbr_size =
4462 ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4463 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4464 {
4465 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4466 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4467 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4468 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4469 }
4470
4471 num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4472 num_cu_in_ctb *= num_cu_in_ctb;
4473
4474 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4475
4476 /* Memory for CU level Coeff data buffer */
4477 {
4478 WORD32 i4_16byte_boundary_overshoot;
4479 WORD32 buf_size_per_cu;
4480 WORD32 buf_size_per_thread_wo_alignment_req;
4481 WORD32 buf_size_per_thread;
4482
4483 buf_size_per_cu =
4484 ((MAX_LUMA_COEFFS_CTB +
4485 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4486 16) *
4487 sizeof(UWORD8);
4488 buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4489
4490 {
4491 buf_size_per_thread = buf_size_per_cu * (2);
4492
4493 for(i = 0; i < 2; i++)
4494 {
4495 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4496 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4497 (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4498
4499 i4_16byte_boundary_overshoot =
4500 ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4501
4502 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4503 }
4504 }
4505
4506 ps_ctxt->pu1_cu_recur_coeffs =
4507 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4508 (ctr * buf_size_per_thread_wo_alignment_req);
4509 }
4510
4511 /* Memory for CU dequant data buffer */
4512 {
4513 WORD32 buf_size_per_thread;
4514 WORD32 i4_16byte_boundary_overshoot;
4515
4516 WORD32 buf_size_per_cu =
4517 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4518 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4519 8) *
4520 sizeof(WORD16);
4521
4522 {
4523 buf_size_per_thread = buf_size_per_cu * 2;
4524
4525 for(i = 0; i < 2; i++)
4526 {
4527 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4528 (WORD16
4529 *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4530
4531 i4_16byte_boundary_overshoot =
4532 ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4533
4534 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4535 (WORD16
4536 *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4537 }
4538 }
4539 }
4540
4541 /*------ Deblocking memory's pointers assignements starts ------*/
4542
4543 /*Assign stride = 4x4 blocks in horizontal edge*/
4544 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4545
4546 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4547 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4548
4549 /*Assign frame level memory to store the Qp of
4550 top 4x4 neighbours of each CTB row*/
4551 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4552 {
4553 ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4554 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4555 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4556 i4_enc_frm_id);
4557 }
4558
4559 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4560
4561 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4562 (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4563
4564 ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4565
4566 /*Assign stride = 4x4 blocks in horizontal edge*/
4567 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4568
4569 pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4570
4571 /*------Deblocking memory's pointers assignements ends ------*/
4572
4573 /*------SAO memory's pointer assignment starts------------*/
4574 if(!is_hbd_mode)
4575 {
4576 /* 2 is added to allocate top left pixel */
4577 ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4578 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4579 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4580 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4581 ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4582 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4583
4584 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4585 {
4586 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4587 pu1_sao_base +
4588 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4589 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4590 i4_num_bitrate_inst * i4_enc_frm_id) + // move to the next frame_id
4591 u4_ctb_aligned_wd +
4592 2;
4593
4594 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4595 pu1_sao_base +
4596 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4597 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4598 i4_num_bitrate_inst * i4_enc_frm_id) +
4599 +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4600 u4_ctb_aligned_wd + 4;
4601
4602 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4603 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4604 *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4605 (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4606 }
4607 ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4608 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4609 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4610 }
4611
4612 /*------SAO memory's pointer assignment ends------------*/
4613
4614 /* perform all one time initialisation here */
4615 ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4616
4617 ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4618
4619 ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4620
4621 /* move the pointer to 1,2 location */
4622 ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4623 ps_ctxt->pu1_ctb_nbr_map++;
4624
4625 ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4626
4627 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4628
4629 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4630
4631 CREATE_SUBBLOCK2CSBFID_MAP(
4632 gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4633
4634 CREATE_SUBBLOCK2CSBFID_MAP(
4635 gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4636
4637 /* For both instance initialise the chroma dequant start idx */
4638 ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4639 ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4640
4641 /* initialise all the function pointer tables */
4642 {
4643 ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4644 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4645
4646 ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4647
4648 #if ENABLE_RDO_BASED_TU_RECURSION
4649 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4650 {
4651 ps_ctxt->pv_inter_rdopt_cu_ntu =
4652 (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4653 }
4654 #endif
4655 ps_ctxt->pv_intra_chroma_pred_mode_selector =
4656 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4657 ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4658 ps_ctxt->pv_final_rdopt_mode_prcs =
4659 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4660 ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4661 ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4662 ps_ctxt->pv_enc_loop_ctb_left_copy =
4663 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4664
4665 /* Memory assignments for chroma intra pred buffer */
4666 {
4667 WORD32 pred_buf_size =
4668 MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4669 WORD32 pred_buf_size_per_thread =
4670 NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4671 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4672 (ctr * pred_buf_size_per_thread);
4673
4674 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4675 {
4676 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4677 pu1_base += pred_buf_size;
4678 }
4679 }
4680
4681 /* Memory assignments for reference substitution output */
4682 {
4683 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4684 + INTRAPRED_SIMD_LEFT_PADDING);
4685 WORD32 pred_buf_size_per_thread = pred_buf_size;
4686 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4687 (ctr * pred_buf_size_per_thread);
4688
4689 ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4690 }
4691
4692 /* Memory assignments for reference filtering output */
4693 {
4694 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4695 + INTRAPRED_SIMD_LEFT_PADDING);
4696 WORD32 pred_buf_size_per_thread = pred_buf_size;
4697 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4698 (ctr * pred_buf_size_per_thread);
4699
4700 ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4701 }
4702
4703 /* Memory assignments for recon storage during CU Recursion */
4704 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4705 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4706 #endif
4707 {
4708 {
4709 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4710 WORD32 pred_buf_size_per_thread = pred_buf_size;
4711 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4712 (ctr * pred_buf_size_per_thread);
4713
4714 ps_ctxt->pv_cu_luma_recon = pu1_base;
4715 }
4716
4717 {
4718 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4719 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4720 WORD32 pred_buf_size_per_thread = pred_buf_size;
4721 UWORD8 *pu1_base =
4722 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4723 (ctr * pred_buf_size_per_thread);
4724
4725 ps_ctxt->pv_cu_chrma_recon = pu1_base;
4726 }
4727 }
4728
4729 /* Memory assignments for pred storage during CU Recursion */
4730 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4731 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4732 #endif
4733 {
4734 {
4735 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4736 WORD32 pred_buf_size_per_thread = pred_buf_size;
4737 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4738 (ctr * pred_buf_size_per_thread);
4739
4740 ps_ctxt->pv_CTB_pred_luma = pu1_base;
4741 }
4742
4743 {
4744 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4745 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4746 WORD32 pred_buf_size_per_thread = pred_buf_size;
4747 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4748 (ctr * pred_buf_size_per_thread);
4749
4750 ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4751 }
4752 }
4753
4754 /* Memory assignments for CTB left luma data storage */
4755 {
4756 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4757 WORD32 pred_buf_size_per_thread = pred_buf_size;
4758 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4759 (ctr * pred_buf_size_per_thread);
4760
4761 ps_ctxt->pv_left_luma_data = pu1_base;
4762 }
4763
4764 /* Memory assignments for CTB left chroma data storage */
4765 {
4766 WORD32 pred_buf_size =
4767 (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4768 WORD32 pred_buf_size_per_thread = pred_buf_size;
4769 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4770 (ctr * pred_buf_size_per_thread);
4771
4772 ps_ctxt->pv_left_chrm_data = pu1_base;
4773 }
4774 }
4775
4776 /* Memory for inter pred buffers */
4777 {
4778 WORD32 i4_num_bufs_per_thread;
4779
4780 WORD32 i4_buf_size_per_cand =
4781 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4782 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4783
4784 i4_num_bufs_per_thread =
4785 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4786 i4_buf_size_per_cand;
4787
4788 ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4789
4790 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4791
4792 {
4793 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4794 +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4795
4796 for(i = 0; i < i4_num_bufs_per_thread; i++)
4797 {
4798 ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4799 pu1_base + i * i4_buf_size_per_cand;
4800 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4801 }
4802 }
4803 }
4804
4805 /* Memory required to store pred for 422 chroma */
4806 if(i4_chroma_format == IV_YUV_422SP_UV)
4807 {
4808 WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4809 WORD32 pred_buf_size_per_thread =
4810 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4811 sizeof(UWORD8);
4812 void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4813 (ctr * pred_buf_size_per_thread);
4814
4815 ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4816 }
4817 else
4818 {
4819 ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4820 }
4821
4822 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4823 {
4824 WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4825 WORD32 i4_chromaBufSize =
4826 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4827 WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4828 (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4829 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4830 {
4831 UWORD8 *pu1_mem_base =
4832 (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4833 ctr * i4_memSize_perThread);
4834
4835 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4836 pu1_mem_base + i4_lumaBufSize * 0;
4837 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4838 pu1_mem_base + i4_lumaBufSize * 1;
4839 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4840 pu1_mem_base + i4_lumaBufSize * 2;
4841 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4842 pu1_mem_base + i4_lumaBufSize * 3;
4843
4844 pu1_mem_base += i4_lumaBufSize * 4;
4845
4846 switch(i4_quality_preset)
4847 {
4848 case IHEVCE_QUALITY_P0:
4849 {
4850 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4851 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4852 pu1_mem_base + i4_chromaBufSize * 0;
4853 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4854 pu1_mem_base + i4_chromaBufSize * 1;
4855 #else
4856 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4857 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4858 #endif
4859
4860 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4861 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4862 pu1_mem_base + i4_chromaBufSize * 2;
4863 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4864 pu1_mem_base + i4_chromaBufSize * 3;
4865 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4866 pu1_mem_base + i4_chromaBufSize * 2;
4867 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4868 pu1_mem_base + i4_chromaBufSize * 3;
4869 #else
4870 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4871 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4872 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4873 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4874 #endif
4875
4876 break;
4877 }
4878 case IHEVCE_QUALITY_P2:
4879 {
4880 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4881 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4882 pu1_mem_base + i4_chromaBufSize * 0;
4883 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4884 pu1_mem_base + i4_chromaBufSize * 1;
4885 #else
4886 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4887 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4888 #endif
4889
4890 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4891 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4892 pu1_mem_base + i4_chromaBufSize * 2;
4893 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4894 pu1_mem_base + i4_chromaBufSize * 3;
4895 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4896 pu1_mem_base + i4_chromaBufSize * 2;
4897 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4898 pu1_mem_base + i4_chromaBufSize * 3;
4899 #else
4900 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4901 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4902 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4903 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4904 #endif
4905
4906 break;
4907 }
4908 case IHEVCE_QUALITY_P3:
4909 {
4910 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
4911 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4912 pu1_mem_base + i4_chromaBufSize * 0;
4913 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4914 pu1_mem_base + i4_chromaBufSize * 1;
4915 #else
4916 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4917 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4918 #endif
4919
4920 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
4921 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4922 pu1_mem_base + i4_chromaBufSize * 2;
4923 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4924 pu1_mem_base + i4_chromaBufSize * 3;
4925 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4926 pu1_mem_base + i4_chromaBufSize * 2;
4927 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4928 pu1_mem_base + i4_chromaBufSize * 3;
4929 #else
4930 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4931 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4932 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4933 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4934 #endif
4935
4936 break;
4937 }
4938 case IHEVCE_QUALITY_P4:
4939 {
4940 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
4941 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4942 pu1_mem_base + i4_chromaBufSize * 0;
4943 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4944 pu1_mem_base + i4_chromaBufSize * 1;
4945 #else
4946 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4947 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4948 #endif
4949
4950 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
4951 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4952 pu1_mem_base + i4_chromaBufSize * 2;
4953 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4954 pu1_mem_base + i4_chromaBufSize * 3;
4955 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4956 pu1_mem_base + i4_chromaBufSize * 2;
4957 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4958 pu1_mem_base + i4_chromaBufSize * 3;
4959 #else
4960 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4961 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4962 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4963 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4964 #endif
4965
4966 break;
4967 }
4968 case IHEVCE_QUALITY_P5:
4969 {
4970 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
4971 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4972 pu1_mem_base + i4_chromaBufSize * 0;
4973 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4974 pu1_mem_base + i4_chromaBufSize * 1;
4975 #else
4976 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4977 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4978 #endif
4979
4980 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
4981 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4982 pu1_mem_base + i4_chromaBufSize * 2;
4983 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4984 pu1_mem_base + i4_chromaBufSize * 3;
4985 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4986 pu1_mem_base + i4_chromaBufSize * 2;
4987 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4988 pu1_mem_base + i4_chromaBufSize * 3;
4989 #else
4990 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4991 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4992 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4993 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4994 #endif
4995
4996 break;
4997 }
4998 }
4999 }
5000
5001 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5002 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5003 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5004 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5005
5006 } /* Recon Datastore */
5007
5008 /****************************************************/
5009 /****************************************************/
5010 /* ps_pps->i1_sign_data_hiding_flag == UNHIDDEN */
5011 /* when NO_SBH. else HIDDEN */
5012 /****************************************************/
5013 /****************************************************/
5014 /* Zero cbf tool is enabled by default for all presets */
5015 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5016
5017 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5018 {
5019 ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5020 ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5021 ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5022 ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5023 }
5024 else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5025 {
5026 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5027 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5028 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5029 ps_ctxt->i4_sbh_level = NO_SBH;
5030 }
5031 else
5032 {
5033 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5034 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5035 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5036 ps_ctxt->i4_sbh_level = NO_SBH;
5037 }
5038
5039 #if DISABLE_QUANT_ROUNDING
5040 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5041 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5042 #endif
5043 /*Disabling RDOQ only when spatial modulation is enabled
5044 as RDOQ degrades visual quality*/
5045 if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5046 {
5047 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5048 }
5049
5050 #if DISABLE_RDOQ
5051 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5052 #endif
5053
5054 #if DISABLE_SBH
5055 ps_ctxt->i4_sbh_level = NO_SBH;
5056 #endif
5057
5058 /*Rounding factor calc based on previous cabac states */
5059
5060 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5061 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5062 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5063 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5064
5065 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5066 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5067 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5068 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5069
5070 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5071 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5072 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5073
5074 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5075 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5076 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5077
5078 /****************************************************************************************/
5079 /* Setting the perform rdoq and sbh flags appropriately */
5080 /****************************************************************************************/
5081 {
5082 /******************************************/
5083 /* For best cand rdoq and/or sbh */
5084 /******************************************/
5085 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5086 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5087 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5088 we would have to do RDOQ again.*/
5089 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5090 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5091 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5092 (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5093
5094 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5095 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5096
5097 /* SBH should be performed if
5098 a) i4_sbh_level is BEST_CAND_SBH.
5099 b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5100 if SBH has to be done because for these presets the quant, iquant and scan coeff
5101 data are calculated in this function and not during the RDOPT stage*/
5102
5103 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5104 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5105 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5106 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5107 (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5108
5109 /******************************************/
5110 /* For all cand rdoq and/or sbh */
5111 /******************************************/
5112 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5113 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5114 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5115 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5116 ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5117 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5118 }
5119
5120 if(!is_hbd_mode)
5121 {
5122 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5123 {
5124 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5125 {
5126 ps_ctxt->apf_quant_iquant_ssd[0] =
5127 ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5128 ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5129 }
5130 else
5131 {
5132 ps_ctxt->apf_quant_iquant_ssd[0] =
5133 ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5134 ps_ctxt->apf_quant_iquant_ssd[2] =
5135 ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5136 }
5137
5138 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5139 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5140 {
5141 ps_ctxt->apf_quant_iquant_ssd[1] =
5142 ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5143 ps_ctxt->apf_quant_iquant_ssd[3] =
5144 ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5145 }
5146 else
5147 {
5148 ps_ctxt->apf_quant_iquant_ssd[1] =
5149 ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5150 ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5151 }
5152 }
5153 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5154 {
5155 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5156 {
5157 ps_ctxt->apf_quant_iquant_ssd[0] =
5158 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5159 ps_ctxt->apf_quant_iquant_ssd[2] =
5160 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5161 }
5162 else
5163 {
5164 ps_ctxt->apf_quant_iquant_ssd[0] =
5165 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5166 ps_ctxt->apf_quant_iquant_ssd[2] =
5167 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5168 }
5169
5170 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5171 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5172 {
5173 ps_ctxt->apf_quant_iquant_ssd[1] =
5174 ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5175 ps_ctxt->apf_quant_iquant_ssd[3] =
5176 ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5177 }
5178 else
5179 {
5180 ps_ctxt->apf_quant_iquant_ssd[1] =
5181 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5182 ps_ctxt->apf_quant_iquant_ssd[3] =
5183 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5184 }
5185 }
5186
5187 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5188 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5189 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5190 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5191 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5192 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5193 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5194 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5195
5196 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5197 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5198 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5199 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5200 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5201 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5202 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5203 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5204
5205 ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5206 ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5207 ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5208 ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5209 ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5210
5211 ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5212 ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5213 ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5214
5215 ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5216 ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5217 ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5218 ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5219 ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5220
5221 ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5222 ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5223 ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5224
5225 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5226 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5227 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5228 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5229 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5230 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5231 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5232 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5233 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5234 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5235 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5236 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5237 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5238 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5239 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5240 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5241 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5242 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5243
5244 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5245 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5246 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5247 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5248 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5249 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5250 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5251 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5252 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5253 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5254 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5255 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5256 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5257 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5258 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5259 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5260 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5261 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5262 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5263 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5264
5265 ps_ctxt->apf_chrm_resd_trns_had[0] =
5266 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5267 ps_ctxt->apf_chrm_resd_trns_had[1] =
5268 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5269 ps_ctxt->apf_chrm_resd_trns_had[2] =
5270 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5271 }
5272
5273 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5274 {
5275 /* initialise the scale & rescale matricies */
5276 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5277 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5278 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5279 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5280 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5281 /*init for inter matrix*/
5282 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5283 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5284 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5285 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5286 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5287
5288 /*init for rescale matrix*/
5289 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5290 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5291 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5292 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5293 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5294 /*init for rescale inter matrix*/
5295 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5296 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5297 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5298 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5299 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5300 }
5301 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5302 {
5303 /* initialise the scale & rescale matricies */
5304 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5305 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5306 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5307 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5308 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5309 /*init for inter matrix*/
5310 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5311 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5312 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5313 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5314 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5315
5316 /*init for rescale matrix*/
5317 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5318 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5319 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5320 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5321 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5322 /*init for rescale inter matrix*/
5323 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5324 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5325 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5326 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5327 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5328 }
5329 else
5330 {
5331 ASSERT(0);
5332 }
5333
5334 /* Not recomputing Luma pred-data and header data for any preset now */
5335 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5336 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5337 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5338
5339 switch(ps_ctxt->i4_quality_preset)
5340 {
5341 case IHEVCE_QUALITY_P0:
5342 {
5343 ps_ctxt->i4_max_merge_candidates = 5;
5344 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5345 ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5346 ps_ctxt->u1_use_early_cbf_data = 0;
5347 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5348 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5349 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5350
5351 break;
5352 }
5353 case IHEVCE_QUALITY_P2:
5354 {
5355 ps_ctxt->i4_max_merge_candidates = 5;
5356 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5357 ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5358 ps_ctxt->u1_use_early_cbf_data = 0;
5359
5360 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5361 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5362 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5363
5364 break;
5365 }
5366 case IHEVCE_QUALITY_P3:
5367 {
5368 ps_ctxt->i4_max_merge_candidates = 3;
5369 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5370 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5371
5372 ps_ctxt->u1_use_early_cbf_data = 0;
5373 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5374 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5375 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5376
5377 break;
5378 }
5379 case IHEVCE_QUALITY_P4:
5380 {
5381 ps_ctxt->i4_max_merge_candidates = 2;
5382 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5383 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5384 ps_ctxt->u1_use_early_cbf_data = 0;
5385 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5386 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5387 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5388
5389 break;
5390 }
5391 case IHEVCE_QUALITY_P5:
5392 {
5393 ps_ctxt->i4_max_merge_candidates = 2;
5394 ps_ctxt->i4_use_satd_for_merge_eval = 0;
5395 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5396 ps_ctxt->u1_use_early_cbf_data = 0;
5397 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5398 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5399 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5400
5401 break;
5402 }
5403 case IHEVCE_QUALITY_P6:
5404 {
5405 ps_ctxt->i4_max_merge_candidates = 2;
5406 ps_ctxt->i4_use_satd_for_merge_eval = 0;
5407 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5408 ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5409 break;
5410 }
5411 default:
5412 {
5413 ASSERT(0);
5414 }
5415 }
5416
5417 #if DISABLE_SKIP_AND_MERGE_EVAL
5418 ps_ctxt->i4_max_merge_candidates = 0;
5419 #endif
5420
5421 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5422 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5423
5424 /*initialize memory for RC related parameters required/populated by enc_loop */
5425 /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5426 |-------|-> Thread 0, instance 0
5427 | |
5428 | |
5429 | |
5430 |-------|-> thread 0, instance 1
5431 | |
5432 | |
5433 | |
5434 |-------|-> thread 0, intance 2
5435 | |
5436 | |
5437 | |
5438 |-------|-> thread 1, instance 0
5439 | |
5440 | |
5441 | |
5442 |-------|-> thread 1, instance 1
5443 | |
5444 | |
5445 | |
5446 |-------|-> thread 1, instance 2
5447 ... ...
5448
5449 Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5450 Finally, one thread will become master and collate the data from all the threads */
5451 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5452 {
5453 for(i = 0; i < i4_num_bitrate_inst; i++)
5454 {
5455 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5456 ps_enc_loop_rc_params++;
5457 }
5458 }
5459 /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5460
5461 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5462 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5463 #endif
5464
5465 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5466 MAX_TU_SIZE;
5467 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5468 MAX_TU_SIZE;
5469 /*Multiplying by two to account for interleaving of cb and cr*/
5470 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5471 << 1;
5472 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5473 MAX_TU_SIZE << 1;
5474
5475 /* Memory for a frame level memory to store tile-id */
5476 /* corresponding to each CTB of frame */
5477 ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5478
5479 ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5480 /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5481 /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5482 if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5483 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5484 {
5485 UWORD32 psy_strength;
5486 UWORD32 psy_strength_mask =
5487 224; // only bits 5,6,7 are ones. These three bits represent the psy strength
5488 psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5489 ps_ctxt->u1_enable_psyRDOPT = 1;
5490 ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5491 if(psy_strength == 0)
5492 {
5493 ps_ctxt->u1_enable_psyRDOPT = 0;
5494 ps_ctxt->u4_psy_strength = 0;
5495 }
5496 }
5497
5498 ps_ctxt->u1_is_stasino_enabled =
5499 ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5500 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5501 (ps_init_prms->s_coding_tools_prms.i4_vqet &
5502 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5503
5504 ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5505 ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5506 ps_ctxt++;
5507 }
5508 /* Store Tile params base into EncLoop Master context */
5509 ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5510
5511 if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5512 {
5513 i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5514 }
5515
5516 /* Updating ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5517 /* Loop over all tile-cols in frame */
5518 for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5519 {
5520 WORD32 i4_tile_col_wd_in_ctb_unit =
5521 (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5522 WORD32 offset_x;
5523
5524 if(ctr == (i4_num_tile_cols - 1))
5525 { /* Last tile-row of frame */
5526 WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5527
5528 WORD32 cu_aligned_pic_wd =
5529 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5530 SET_CTB_ALIGN(
5531 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5532 min_cu_size);
5533
5534 WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5535
5536 offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5537 offset_x += last_hz_ctb_wd;
5538 }
5539 else
5540 { /* Not the last tile-row of frame */
5541 offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5542 }
5543
5544 offset_x /= 4;
5545 offset_x -= 1;
5546
5547 ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5548 }
5549
5550 n_tabs = NUM_ENC_LOOP_MEM_RECS;
5551
5552 /*store num bit-rate instances in the master context */
5553 ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5554 ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5555 /*************************************************************************/
5556 /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init -- */
5557 /*************************************************************************/
5558 {
5559 WORD32 count;
5560 WORD32 num_vert_units, num_blks_in_row;
5561 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5562 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5563
5564 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5565 ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5566 ASSERT(num_vert_units > 0);
5567 ASSERT(num_blks_in_row > 0);
5568
5569 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5570 {
5571 for(i = 0; i < i4_num_bitrate_inst; i++)
5572 {
5573 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5574 &ps_mem_tab[n_tabs],
5575 pv_osal_handle,
5576 DEP_MNGR_ROW_ROW_SYNC,
5577 num_vert_units,
5578 num_blks_in_row,
5579 i4_num_tile_cols, /* Number of Col Tiles */
5580 i4_num_proc_thrds,
5581 0 /*Sem Disabled*/
5582 );
5583
5584 n_tabs += ihevce_dmgr_get_num_mem_recs();
5585 }
5586 }
5587
5588 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5589 {
5590 for(i = 0; i < i4_num_bitrate_inst; i++)
5591 {
5592 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init(
5593 &ps_mem_tab[n_tabs],
5594 pv_osal_handle,
5595 DEP_MNGR_ROW_ROW_SYNC,
5596 num_vert_units,
5597 num_blks_in_row,
5598 i4_num_tile_cols, /* Number of Col Tiles */
5599 i4_num_proc_thrds,
5600 0 /*Sem Disabled*/
5601 );
5602
5603 n_tabs += ihevce_dmgr_get_num_mem_recs();
5604 }
5605 }
5606 }
5607 /*************************************************************************/
5608 /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init -- */
5609 /*************************************************************************/
5610 {
5611 WORD32 count;
5612 WORD32 num_vert_units, num_blks_in_row;
5613 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5614 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5615
5616 WORD32 i4_sem = 0;
5617
5618 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5619 IHEVCE_QUALITY_P4)
5620 i4_sem = 0;
5621 else
5622 i4_sem = 1;
5623 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5624 /* For Top-Right CU sync, adding one more CTB since value updation */
5625 /* happens in that way for the last CTB in the row */
5626 num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5627 num_blks_in_row += MAX_CTB_SIZE;
5628
5629 ASSERT(num_vert_units > 0);
5630 ASSERT(num_blks_in_row > 0);
5631
5632 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5633 {
5634 for(i = 0; i < i4_num_bitrate_inst; i++)
5635 {
5636 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5637 {
5638 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5639 ihevce_dmgr_init(
5640 &ps_mem_tab[n_tabs],
5641 pv_osal_handle,
5642 DEP_MNGR_ROW_ROW_SYNC,
5643 num_vert_units,
5644 num_blks_in_row,
5645 i4_num_tile_cols, /* Number of Col Tiles */
5646 i4_num_proc_thrds,
5647 i4_sem /*Sem Disabled*/
5648 );
5649 }
5650 n_tabs += ihevce_dmgr_get_num_mem_recs();
5651 }
5652 }
5653 }
5654
5655 for(i = 1; i < 5; i++)
5656 {
5657 WORD32 i4_log2_trans_size = i + 1;
5658 WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5659
5660 ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5661 }
5662
5663 ga_trans_shift[0] = ga_trans_shift[1];
5664
5665 /* return the handle to caller */
5666 return ((void *)ps_master_ctxt);
5667 }
5668
5669 /*!
5670 ******************************************************************************
5671 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5672 *
5673 * \brief
5674 * Intialization for ENC_LOOP context state structure .
5675 *
5676 * \param[in] ps_mem_tab : pointer to memory descriptors table
5677 * \param[in] ppv_sem_hdls : Array of semaphore handles
5678 * \param[in] i4_num_proc_thrds : Number of processing threads
5679 *
5680 * \return
5681 * None
5682 *
5683 * \author
5684 * Ittiam
5685 *
5686 *****************************************************************************
5687 */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5688 void ihevce_enc_loop_reg_sem_hdls(
5689 void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5690 {
5691 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5692 WORD32 i, enc_frm_id;
5693
5694 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5695
5696 /*************************************************************************/
5697 /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores -- */
5698 /*************************************************************************/
5699 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5700 {
5701 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5702 {
5703 ihevce_dmgr_reg_sem_hdls(
5704 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5705 ppv_sem_hdls,
5706 i4_num_proc_thrds);
5707 }
5708 }
5709
5710 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5711 {
5712 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5713 {
5714 ihevce_dmgr_reg_sem_hdls(
5715 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i],
5716 ppv_sem_hdls,
5717 i4_num_proc_thrds);
5718 }
5719 }
5720
5721 /*************************************************************************/
5722 /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores -- */
5723 /*************************************************************************/
5724 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5725 {
5726 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5727 {
5728 ihevce_dmgr_reg_sem_hdls(
5729 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5730 ppv_sem_hdls,
5731 i4_num_proc_thrds);
5732 }
5733 }
5734
5735 return;
5736 }
5737
5738 /*!
5739 ******************************************************************************
5740 * \if Function name : ihevce_enc_loop_delete \endif
5741 *
5742 * \brief
5743 * Destroy EncLoop module
5744 * Note : Only Destroys the resources allocated in the module like
5745 * semaphore,etc. Memory free is done Separately using memtabs
5746 *
5747 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5748 *
5749 * \return
5750 * None
5751 *
5752 * \author
5753 * Ittiam
5754 *
5755 *****************************************************************************
5756 */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5757 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5758 {
5759 ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5760 WORD32 ctr, enc_frm_id;
5761
5762 ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5763
5764 for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5765 {
5766 for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5767 {
5768 /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5769 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5770 /* --- EncLoop Sao sync Dep Mngr Delete --*/
5771 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]);
5772 /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5773 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5774 }
5775 }
5776 }
5777
5778 /*!
5779 ******************************************************************************
5780 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5781 *
5782 * \brief
5783 * Frame level Reset for the Dependency Mngrs local to EncLoop.,
5784 * ie CU_TopRight and Dblk
5785 *
5786 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
5787 *
5788 * \return
5789 * None
5790 *
5791 * \author
5792 * Ittiam
5793 *
5794 *****************************************************************************
5795 */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5796 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5797 {
5798 WORD32 ctr, frame_id;
5799 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5800
5801 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5802
5803 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5804 {
5805 frame_id = 0;
5806 }
5807 else
5808 {
5809 frame_id = enc_frm_id;
5810 }
5811
5812 for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5813 {
5814 /* Dep. Mngr : Reset the num ctb Deblocked in every row for ENC sync */
5815 ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5816
5817 /* Dep. Mngr : Reset the num SAO ctb in every row for ENC sync */
5818 ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]);
5819
5820 /* Dep. Mngr : Reset the TopRight CU Processed in every row for ENC sync */
5821 ihevce_dmgr_rst_row_row_sync(
5822 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5823 }
5824 }
5825
5826 /*!
5827 ******************************************************************************
5828 * \if Function name : ihevce_enc_loop_frame_init \endif
5829 *
5830 * \brief
5831 * Frame level init of enocde loop function .
5832 *
5833 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
5834 * \param[in] pi4_cu_processed : ptr to cur frame cu process in pix.
5835 * \param[in] aps_ref_list : ref pic list for the current frame
5836 * \param[in] ps_slice_hdr : ptr to current slice header params
5837 * \param[in] ps_pps : ptr to active pps params
5838 * \param[in] ps_sps : ptr to active sps params
5839 * \param[in] ps_vps : ptr to active vps params
5840
5841
5842 * \param[in] i1_weighted_pred_flag : weighted pred enable flag (unidir)
5843 * \param[in] i1_weighted_bipred_flag : weighted pred enable flag (bidir)
5844 * \param[in] log2_luma_wght_denom : down shift factor for weighted pred of luma
5845 * \param[in] log2_chroma_wght_denom : down shift factor for weighted pred of chroma
5846 * \param[in] cur_poc : currennt frame poc
5847 * \param[in] i4_bitrate_instance_num : number indicating the instance of bit-rate for multi-rate encoder
5848 *
5849 * \return
5850 * None
5851 *
5852 * \author
5853 * Ittiam
5854 *
5855 *****************************************************************************
5856 */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5857 void ihevce_enc_loop_frame_init(
5858 void *pv_enc_loop_ctxt,
5859 WORD32 i4_frm_qp,
5860 recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5861 recon_pic_buf_t *ps_frm_recon,
5862 slice_header_t *ps_slice_hdr,
5863 pps_t *ps_pps,
5864 sps_t *ps_sps,
5865 vps_t *ps_vps,
5866 WORD8 i1_weighted_pred_flag,
5867 WORD8 i1_weighted_bipred_flag,
5868 WORD32 log2_luma_wght_denom,
5869 WORD32 log2_chroma_wght_denom,
5870 WORD32 cur_poc,
5871 WORD32 i4_display_num,
5872 enc_ctxt_t *ps_enc_ctxt,
5873 me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5874 WORD32 i4_bitrate_instance_num,
5875 WORD32 i4_thrd_id,
5876 WORD32 i4_enc_frm_id,
5877 WORD32 i4_num_bitrates,
5878 WORD32 i4_quality_preset,
5879 void *pv_dep_mngr_encloop_dep_me)
5880 {
5881 /* local variables */
5882 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5883 ihevce_enc_loop_ctxt_t *ps_ctxt;
5884 WORD32 chroma_qp_offset, i4_div_factor;
5885 WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5886 WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5887
5888 /* ENC_LOOP master state structure */
5889 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5890
5891 /* Nithya: Store the current POC in the slice header */
5892 ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5893
5894 /* Update the POC list of the current frame to the recon buffer */
5895 if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5896 {
5897 int i4_i;
5898 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5899 {
5900 ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5901 }
5902 }
5903 if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5904 {
5905 int i4_i;
5906 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5907 {
5908 ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5909 }
5910 }
5911
5912 /* loop over all the threads */
5913 // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5914 {
5915 /* ENC_LOOP state structure */
5916 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5917
5918 /* SAO ctxt structure initialization*/
5919 ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5920 ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5921 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5922
5923 /*bit-rate instance number for Multi-bitrate (MBR) encode */
5924 ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5925 ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5926 ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5927 ps_ctxt->i4_is_first_query = 1;
5928 ps_ctxt->i4_is_ctb_qp_modified = 0;
5929
5930 /* enc_frm_id for multiframe encode */
5931
5932 if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5933 {
5934 ps_ctxt->i4_enc_frm_id = 0;
5935 i4_enc_frm_id = 0;
5936 }
5937 else
5938 {
5939 ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
5940 }
5941
5942 /*Initialize the sub pic rc buf appropriately */
5943
5944 /*Set the thrd id flag */
5945 ps_enc_ctxt->s_multi_thrd
5946 .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
5947
5948 ps_enc_ctxt->s_multi_thrd
5949 .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5950 ps_enc_ctxt->s_multi_thrd
5951 .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5952
5953 ps_enc_ctxt->s_multi_thrd
5954 .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5955 ps_enc_ctxt->s_multi_thrd
5956 .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5957
5958 ps_enc_ctxt->s_multi_thrd
5959 .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5960 ps_enc_ctxt->s_multi_thrd
5961 .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5962 ps_enc_ctxt->s_multi_thrd
5963 .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5964 ps_enc_ctxt->s_multi_thrd
5965 .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5966 ps_enc_ctxt->s_multi_thrd
5967 .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5968 ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
5969 i4_frm_qp;
5970
5971 /*Frame level data for Sub Pic rc is initalized here */
5972 /*Can be sent once per frame*/
5973 {
5974 WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
5975 ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
5976
5977 /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
5978 ps_ctxt->u4_total_cu_bits = 0;
5979 ps_ctxt->u4_total_cu_hdr_bits = 0;
5980
5981 ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
5982 ps_ctxt->u4_cu_tot_bits = 0;
5983 ps_ctxt->u4_total_cu_bits_mul_qs = 0;
5984 ps_ctxt->i4_display_num = i4_display_num;
5985 ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
5986 /*The Qscale is to be generated every 10th of total frame ctb is completed */
5987 //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
5988 ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
5989
5990 ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
5991 /*Sub Pic RC frame level params */
5992 ps_ctxt->i8_frame_l1_ipe_sad =
5993 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
5994 ps_ctxt->i8_frame_l0_ipe_satd =
5995 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
5996 ps_ctxt->i8_frame_l1_me_sad =
5997 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
5998 ps_ctxt->i8_frame_l1_activity_fact =
5999 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
6000 if(ps_ctxt->i4_sub_pic_level_rc)
6001 {
6002 ASSERT(
6003 ps_curr_inp_prms->ps_curr_inp->s_lap_out
6004 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6005
6006 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6007 [ps_ctxt->i4_bitrate_instance_num] =
6008 ps_curr_inp_prms->ps_curr_inp->s_lap_out
6009 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6010 }
6011 //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6012
6013 ps_ctxt->i4_is_I_scenecut =
6014 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6015 (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6016 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6017
6018 ps_ctxt->i4_is_non_I_scenecut =
6019 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6020 (ps_ctxt->i4_is_I_scenecut == 0));
6021
6022 /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6023 ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6024 ps_ctxt->i4_is_model_valid =
6025 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6026 }
6027 /* cb and cr offsets are assumed to be same */
6028 chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6029
6030 /* assumption of cb = cr qp */
6031 ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6032 ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6033
6034 ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6035
6036 ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6037
6038 ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6039 ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6040
6041 /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6042 ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6043 ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6044 ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6045
6046 ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6047 ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6048 ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6049 ps_ctxt->i4_use_const_lamda_modifier =
6050 ps_ctxt->i4_use_const_lamda_modifier ||
6051 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6052 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6053 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6054 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6055 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6056 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6057 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6058 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6059 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6060 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6061
6062 {
6063 ps_ctxt->f_i_pic_lamda_modifier =
6064 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6065 }
6066
6067 ps_ctxt->i4_frame_qp = i4_frm_qp;
6068 ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6069 ps_ctxt->i4_cu_qp = i4_frm_qp;
6070 ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6071 ps_ctxt->i4_chrm_cu_qp =
6072 (ps_ctxt->u1_chroma_array_type == 2)
6073 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6074 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6075
6076 ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6077 i4_div_factor = (i4_frm_qp + 3) / 6;
6078 i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6079 ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6080
6081 ps_ctxt->i4_chrm_cu_qp_div6 =
6082 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6083 ps_ctxt->i4_chrm_cu_qp_mod6 =
6084 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6085
6086 #define INTER_RND_QP_BY_6
6087 #ifdef INTER_RND_QP_BY_6
6088
6089 { /*1/6 rounding for 8 bit b frames*/
6090 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6091 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6092 }
6093 #else
6094 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6095 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6096 #endif
6097
6098 if(ISLICE == i1_slice_type)
6099 {
6100 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6101 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6102 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6103 }
6104 else
6105 {
6106 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6107 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6108 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6109 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6110 }
6111
6112 ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6113
6114 ps_ctxt->i1_slice_type = i1_slice_type;
6115
6116 /* intialize the inter pred (MC) context at frame level */
6117 ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6118 ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6119 ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6120 ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6121 ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6122
6123 /* intialize the MV pred context at frame level */
6124 ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6125 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6126 ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6127 ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6128 ps_pps->i1_log2_parallel_merge_level - 2;
6129
6130 #if ADAPT_COLOCATED_FROM_L0_FLAG
6131 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6132 {
6133 if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6134 (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6135 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6136 {
6137 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6138 }
6139 }
6140 #endif
6141 /* Initialization of deblocking params */
6142 ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6143 ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6144
6145 ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6146
6147 ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6148 /*init frame level stat accumualtion parameters */
6149 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6150 ->u4_frame_sad_acc = 0;
6151 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6152 ->u4_frame_intra_sad_acc = 0;
6153 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6154 ->u4_frame_open_loop_intra_sad = 0;
6155 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6156 ->i8_frame_open_loop_ssd = 0;
6157 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6158 ->u4_frame_inter_sad_acc = 0;
6159
6160 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6161 ->i8_frame_cost_acc = 0;
6162 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6163 ->i8_frame_intra_cost_acc = 0;
6164 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6165 ->i8_frame_inter_cost_acc = 0;
6166
6167 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6168 ->u4_frame_intra_sad = 0;
6169 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6170 ->u4_frame_rdopt_bits = 0;
6171 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6172 ->u4_frame_rdopt_header_bits = 0;
6173 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6174 ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6175 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6176 ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6177 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6178 ->i4_8x8_cu_sum[0] = 0;
6179 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6180 ->i4_8x8_cu_sum[1] = 0;
6181 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6182 ->i8_sad_by_qscale[0] = 0;
6183 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6184 ->i8_sad_by_qscale[1] = 0;
6185 /* Compute the frame_qstep */
6186 GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6187
6188 ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6189
6190 ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6191 /* intialize the cabac rdopt context at frame level */
6192 ihevce_entropy_rdo_frame_init(
6193 &ps_ctxt->s_rdopt_entropy_ctxt,
6194 ps_slice_hdr,
6195 ps_pps,
6196 ps_sps,
6197 ps_vps,
6198 ps_master_ctxt->au1_cu_skip_top_row,
6199 &ps_enc_ctxt->s_rc_quant);
6200
6201 /* register the dep mngr instance for forward ME sync */
6202 ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6203 }
6204 }
6205 /*
6206 ******************************************************************************
6207 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6208 *
6209 * \brief
6210 * returns Nil
6211 *
6212 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6213 * \param[out]ps_rc_prms : ptr to frame level info structure
6214 *
6215 * \return
6216 * None
6217 *
6218 * \author
6219 * Ittiam
6220 *
6221 *****************************************************************************
6222 */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6223 void ihevce_enc_loop_get_frame_rc_prms(
6224 void *pv_enc_loop_ctxt,
6225 rc_bits_sad_t *ps_rc_prms,
6226 WORD32 i4_br_id, //bitrate instance id
6227 WORD32 i4_enc_frm_id) // frame id
6228 {
6229 /*Get the master thread pointer*/
6230 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6231 ihevce_enc_loop_ctxt_t *ps_ctxt;
6232 UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6233 LWORD64 i8_total_ssd_frame = 0;
6234 UWORD32 total_frame_sad = 0;
6235 UWORD32 total_frame_rdopt_bits = 0;
6236 UWORD32 total_frame_rdopt_header_bits = 0;
6237 WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6238 WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6239 LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6240 WORD32 i4_curr_qp_acc = 0;
6241 WORD32 i;
6242
6243 /* ENC_LOOP master state structure */
6244 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6245
6246 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6247 {
6248 i4_enc_frm_id = 0;
6249 }
6250 /*loop through all threads and accumulate intra sad across all threads*/
6251 for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6252 {
6253 /* ENC_LOOP state structure */
6254 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6255 total_frame_open_loop_intra_sad +=
6256 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6257 i8_total_ssd_frame +=
6258 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6259 total_frame_intra_sad +=
6260 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6261 total_frame_sad +=
6262 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6263 total_frame_rdopt_bits +=
6264 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6265 total_frame_rdopt_header_bits +=
6266 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6267 i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6268 ->i4_qp_normalized_8x8_cu_sum[0];
6269 i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6270 ->i4_qp_normalized_8x8_cu_sum[1];
6271 i4_8x8_cu_sum[0] +=
6272 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6273 i4_8x8_cu_sum[1] +=
6274 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6275 i8_sad_by_qscale[0] +=
6276 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6277 i8_sad_by_qscale[1] +=
6278 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6279 }
6280
6281 ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6282 ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6283 ps_rc_prms->u4_total_sad = total_frame_sad;
6284 ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6285 ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6286 /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6287 ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6288 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6289 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6290 ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6291 ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6292 ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6293 ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6294 }
6295