• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_pass.c
24 *
25 * \brief
26 *    This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 *    18/09/2012
30 *
31 * \author
32 *    Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40 
41 /*****************************************************************************/
42 /* File Includes                                                             */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52 
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57 
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61 
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81 #include "ihevc_quant_tables.h"
82 
83 #include "ihevce_defs.h"
84 #include "ihevce_hle_interface.h"
85 #include "ihevce_lap_enc_structs.h"
86 #include "ihevce_multi_thrd_structs.h"
87 #include "ihevce_multi_thrd_funcs.h"
88 #include "ihevce_me_common_defs.h"
89 #include "ihevce_had_satd.h"
90 #include "ihevce_error_codes.h"
91 #include "ihevce_bitstream.h"
92 #include "ihevce_cabac.h"
93 #include "ihevce_rdoq_macros.h"
94 #include "ihevce_function_selector.h"
95 #include "ihevce_enc_structs.h"
96 #include "ihevce_entropy_structs.h"
97 #include "ihevce_cmn_utils_instr_set_router.h"
98 #include "ihevce_ipe_instr_set_router.h"
99 #include "ihevce_decomp_pre_intra_structs.h"
100 #include "ihevce_decomp_pre_intra_pass.h"
101 #include "ihevce_enc_loop_structs.h"
102 #include "ihevce_nbr_avail.h"
103 #include "ihevce_enc_loop_utils.h"
104 #include "ihevce_sub_pic_rc.h"
105 #include "ihevce_global_tables.h"
106 #include "ihevce_bs_compute_ctb.h"
107 #include "ihevce_cabac_rdo.h"
108 #include "ihevce_deblk.h"
109 #include "ihevce_frame_process.h"
110 #include "ihevce_rc_enc_structs.h"
111 #include "hme_datatype.h"
112 #include "hme_interface.h"
113 #include "hme_common_defs.h"
114 #include "hme_defs.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128 
129 #include "cast_types.h"
130 #include "osal.h"
131 #include "osal_defaults.h"
132 
133 /*****************************************************************************/
134 /* Globals                                                                   */
135 /*****************************************************************************/
136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137 
138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139 
140 /*****************************************************************************/
141 /* Constant Macros                                                           */
142 /*****************************************************************************/
143 #define UPDATE_QP_AT_CTB 6
144 
145 /*****************************************************************************/
146 /* Function Definitions                                                      */
147 /*****************************************************************************/
148 
149 /*!
150 ******************************************************************************
151 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
152 *
153 * \brief
154 *    This function copy the right data of CTB to context buffers
155 *
156 * \date
157 *    18/09/2012
158 *
159 * \author
160 *    Ittiam
161 *
162 * \return
163 *
164 * List of Functions
165 *
166 *
167 ******************************************************************************
168 */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)169 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
170 {
171     /* ------------------------------------------------------------------ */
172     /* copy the right coloum data to the context buffers                  */
173     /* ------------------------------------------------------------------ */
174 
175     nbr_4x4_t *ps_left_nbr;
176     nbr_4x4_t *ps_nbr;
177     UWORD8 *pu1_buff;
178     WORD32 num_pels;
179     UWORD8 *pu1_luma_left, *pu1_chrm_left;
180 
181     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
182 
183     pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
184     pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
185     ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
186 
187     /* copy right luma data */
188     pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
189 
190     for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
191     {
192         WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
193 
194         pu1_luma_left[num_pels] = pu1_buff[i4_indx];
195     }
196 
197     /* copy right chroma data */
198     pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
199 
200     for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
201     {
202         WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
203 
204         *pu1_chrm_left++ = pu1_buff[i4_indx];
205         *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
206     }
207 
208     /* store the nbr 4x4 data at ctb level */
209     {
210         WORD32 ctr;
211         WORD32 nbr_strd;
212 
213         nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
214 
215         /* copy right nbr data */
216         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
217         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
218 
219         for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
220         {
221             WORD32 i4_indx = nbr_strd * ctr;
222 
223             ps_left_nbr[ctr] = ps_nbr[i4_indx];
224         }
225     }
226     return;
227 }
228 
229 /*!
230 ******************************************************************************
231 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
232 *
233 * \brief
234 *   Mark all modes for inter/intra for evaluation. This function will be
235 *   called by ref instance
236 *
237 * \param[in] pv_ctxt : pointer to enc_loop module
238 * \param[in] ps_cu_analyse : pointer to cu analyse
239 *
240 * \return
241 *    None
242 *
243 * \author
244 *  Ittiam
245 *
246 *****************************************************************************
247 */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)248 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
249 {
250     UWORD8 ctr;
251     WORD32 i4_part;
252 
253     (void)pv_ctxt;
254     /* run a loop over all Inter cands */
255     for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
256     {
257         ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
258     }
259 
260     /* run a loop over all intra candidates */
261     if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
262     {
263         for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
264         {
265             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
266             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
267 
268             for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
269             {
270                 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
271             }
272         }
273     }
274 }
275 
276 /*!
277 ******************************************************************************
278 * \if Function name : ihevce_cu_mode_decide \endif
279 *
280 * \brief
281 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
282 *
283 * \param[in] ps_ctxt : pointer to enc_loop module
284 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
285 * \param[in] ps_cu_analyse : pointer to cu analyse
286 * \param[out] ps_cu_final : pointer to cu final
287 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
288 * \param[out]ps_row_col_pu; colocated pu buffer pointer
289 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
290 * \param[in]col_start_pu_idx : pu index start value
291 *
292 * \return
293 *    None
294 *
295 *
296 * \author
297 *  Ittiam
298 *
299 *****************************************************************************
300 */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)301 LWORD64 ihevce_cu_mode_decide(
302     ihevce_enc_loop_ctxt_t *ps_ctxt,
303     enc_loop_cu_prms_t *ps_cu_prms,
304     cu_analyse_t *ps_cu_analyse,
305     final_mode_state_t *ps_final_mode_state,
306     UWORD8 *pu1_ecd_data,
307     pu_col_mv_t *ps_col_pu,
308     UWORD8 *pu1_col_pu_map,
309     WORD32 col_start_pu_idx)
310 {
311     enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
312     cu_nbr_prms_t s_cu_nbr_prms;
313     inter_cu_mode_info_t s_inter_cu_mode_info;
314     cu_inter_cand_t *ps_best_inter_cand = NULL;
315     UWORD8 *pu1_cu_top;
316     UWORD8 *pu1_cu_top_left;
317     UWORD8 *pu1_cu_left;
318     UWORD8 *pu1_final_recon = NULL;
319     UWORD8 *pu1_curr_src = NULL;
320     void *pv_curr_src = NULL;
321     void *pv_cu_left = NULL;
322     void *pv_cu_top = NULL;
323     void *pv_cu_top_left = NULL;
324 
325     WORD32 cu_left_stride = 0;
326     WORD32 ctr;
327     WORD32 rd_opt_best_idx;
328     LWORD64 rd_opt_least_cost;
329     WORD32 rd_opt_curr_idx;
330     WORD32 num_4x4_in_ctb;
331     WORD32 nbr_4x4_left_strd = 0;
332 
333     nbr_4x4_t *ps_topleft_nbr_4x4;
334     nbr_4x4_t *ps_left_nbr_4x4 = NULL;
335     nbr_4x4_t *ps_top_nbr_4x4 = NULL;
336     nbr_4x4_t *ps_curr_nbr_4x4;
337     WORD32 enable_intra_eval_flag;
338     WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
339     WORD32 curr_cu_pos_in_row;
340     WORD32 cu_top_right_offset;
341     WORD32 cu_top_right_dep_pos;
342     WORD32 i4_ctb_x_off, i4_ctb_y_off;
343 
344     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
345     (void)ps_final_mode_state;
346     /* default init */
347     rd_opt_least_cost = MAX_COST_64;
348     ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
349     ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
350 
351     /* Zero cbf tool is enabled by default for all presets */
352     ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
353 
354     rd_opt_best_idx = 1;
355     rd_opt_curr_idx = 0;
356     enable_intra_eval_flag = 1;
357 
358     /* CU params in enc ctxt*/
359     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
360     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
361     ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
362 
363     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
364     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
365     ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
366     ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
367 
368     /* CB and Cr are pixel interleaved */
369     s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
370 
371     s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
372 
373     if(!ps_ctxt->u1_is_input_data_hbd)
374     {
375         /* --------------------------------------- */
376         /* ----- Luma Pointers Derivation -------- */
377         /* --------------------------------------- */
378 
379         /* based on CU position derive the pointers */
380         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
381 
382         pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
383 
384         pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
385 
386         pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
387 
388         pv_curr_src = pu1_curr_src;
389 
390         /* CU left */
391         if(0 == ps_cu_analyse->b3_cu_pos_x)
392         {
393             /* CTB boundary */
394             pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
395             pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
396             cu_left_stride = 1;
397 
398             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
399             ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
400             nbr_4x4_left_strd = 1;
401         }
402         else
403         {
404             /* inside CTB */
405             pu1_cu_left = pu1_final_recon - 1;
406             cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
407 
408             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
409             nbr_4x4_left_strd = num_4x4_in_ctb;
410         }
411 
412         pv_cu_left = pu1_cu_left;
413 
414         /* CU top */
415         if(0 == ps_cu_analyse->b3_cu_pos_y)
416         {
417             /* CTB boundary */
418             pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
419             pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
420             pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
421 
422             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
423             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
424             ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
425         }
426         else
427         {
428             /* inside CTB */
429             pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
430 
431             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
432         }
433 
434         pv_cu_top = pu1_cu_top;
435 
436         /* CU top left */
437         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
438         {
439             /* left ctb boundary but not first row */
440             pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
441             ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
442         }
443         else
444         {
445             /* rest all cases topleft is top -1 */
446             pu1_cu_top_left = pu1_cu_top - 1;
447             ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
448         }
449 
450         pv_cu_top_left = pu1_cu_top_left;
451 
452         /* Store the CU nbr information in the ctxt for final reconstruction fun. */
453         s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
454         s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
455         s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
456         s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
457         s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
458         s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
459         s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
460         s_cu_nbr_prms.cu_left_stride = cu_left_stride;
461 
462         /* ------------------------------------------------------------ */
463         /* -- Initialize the number of neigbour skip cu count for rdo --*/
464         /* ------------------------------------------------------------ */
465         {
466             nbr_avail_flags_t s_nbr;
467             WORD32 i4_num_nbr_skip_cus = 0;
468 
469             /* get the neighbour availability flags for current cu  */
470             ihevce_get_nbr_intra(
471                 &s_nbr,
472                 ps_ctxt->pu1_ctb_nbr_map,
473                 ps_ctxt->i4_nbr_map_strd,
474                 (ps_cu_analyse->b3_cu_pos_x << 1),
475                 (ps_cu_analyse->b3_cu_pos_y << 1),
476                 (ps_cu_analyse->u1_cu_size >> 2));
477             if(s_nbr.u1_top_avail)
478             {
479                 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
480             }
481 
482             if(s_nbr.u1_left_avail)
483             {
484                 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
485             }
486             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
487                 i4_num_nbr_skip_cus;
488             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
489                 i4_num_nbr_skip_cus;
490         }
491 
492         /* --------------------------------------- */
493         /* --- Chroma Pointers Derivation -------- */
494         /* --------------------------------------- */
495 
496         /* based on CU position derive the pointers */
497         s_chrm_cu_buf_prms.pu1_final_recon =
498             ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
499 
500         s_chrm_cu_buf_prms.pu1_curr_src =
501             ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
502 
503         s_chrm_cu_buf_prms.pu1_final_recon +=
504             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
505 
506         s_chrm_cu_buf_prms.pu1_curr_src +=
507             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
508 
509         /* CU left */
510         if(0 == ps_cu_analyse->b3_cu_pos_x)
511         {
512             /* CTB boundary */
513             s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
514             s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
515             s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
516         }
517         else
518         {
519             /* inside CTB */
520             s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
521             s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
522         }
523 
524         /* CU top */
525         if(0 == ps_cu_analyse->b3_cu_pos_y)
526         {
527             /* CTB boundary */
528             s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
529             s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
530             s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
531         }
532         else
533         {
534             /* inside CTB */
535             s_chrm_cu_buf_prms.pu1_cu_top =
536                 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
537         }
538 
539         /* CU top left */
540         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
541         {
542             /* left ctb boundary but not first row */
543             s_chrm_cu_buf_prms.pu1_cu_top_left =
544                 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
545         }
546         else
547         {
548             /* rest all cases topleft is top -2 */
549             s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
550         }
551     }
552 
553     /* Set Variables for Dep. Checking and Setting */
554     i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
555 
556     i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
557     ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
558 
559     /* Set the pred pointer count for ME/intra to 0 to start */
560     ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
561 
562     ASSERT(
563         (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
564 
565     ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
566     s_inter_cu_mode_info.u1_num_inter_cands = 0;
567     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
568     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
569 
570     ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
571     ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
572     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
573     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
574     ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
575     if(0 != ps_cu_analyse->u1_num_inter_cands)
576     {
577         ihevce_inter_cand_sifter_prms_t s_prms;
578 
579         UWORD8 u1_enable_top_row_sync;
580 
581         if(ps_ctxt->u1_disable_intra_eval)
582         {
583             u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
584         }
585         else
586         {
587             u1_enable_top_row_sync = 1;
588         }
589 
590         if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
591         {
592             /* Wait till top data is ready          */
593             /* Currently checking till top right CU */
594             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
595 
596             if(i4_ctb_y_off == 0)
597             {
598                 /* No wait for 1st row */
599                 cu_top_right_offset = -(MAX_CTB_SIZE);
600                 {
601                     ihevce_tile_params_t *ps_col_tile_params =
602                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
603                          ps_ctxt->i4_tile_col_idx);
604                     /* No wait for 1st row */
605                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
606                 }
607                 cu_top_right_dep_pos = 0;
608             }
609             else
610             {
611                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
612                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
613             }
614 
615             if(0 == ps_cu_analyse->b3_cu_pos_y)
616             {
617                 ihevce_dmgr_chk_row_row_sync(
618                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
619                     curr_cu_pos_in_row,
620                     cu_top_right_offset,
621                     cu_top_right_dep_pos,
622                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
623                     ps_ctxt->thrd_id);
624             }
625         }
626 
627         s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
628         s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
629         s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
630         s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
631         s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
632         s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
633         s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
634         s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
635         s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
636         s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
637         s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
638         s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
639         s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
640         s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
641         s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
642         s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
643         s_prms.pv_src = pv_curr_src;
644         s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
645         s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
646         s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
647         s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
648         s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
649         s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
650         s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
651         s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
652         s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
653         s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
654         s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
655         s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
656         s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
657         s_prms.u1_use_merge_cand_from_top_row =
658             (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
659         s_prms.u1_merge_idx_cabac_model =
660             ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
661 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
662         s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
663         s_prms.u1_reuse_me_sad = 1;
664 #else
665         s_prms.u1_reuse_me_sad = 0;
666 #endif
667 
668         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
669         {
670             if(ps_ctxt->i4_temporal_layer == 1)
671             {
672                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
673             }
674             else
675             {
676                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
677             }
678         }
679         else
680         {
681             s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
682         }
683         s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
684 
685         if(s_prms.u1_is_cu_noisy)
686         {
687             s_prms.i4_lambda_qf =
688                 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
689         }
690         s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
691 
692         s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
693 
694         s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
695         ihevce_inter_cand_sifter(&s_prms);
696     }
697     if(u1_is_422)
698     {
699         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
700         UWORD8 u1_num_bufs_allocated;
701 
702         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
703             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
704 
705         ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
706 
707         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
708             ctr++)
709         {
710             {
711                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
712                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
713             }
714 
715             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
716 
717             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
718         }
719 
720         {
721             ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
722                 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
723         }
724 
725         ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
726 
727         ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
728     }
729     else
730     {
731         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
732         UWORD8 u1_num_bufs_allocated;
733 
734         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
735             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
736 
737         ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
738 
739         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
740             ctr++)
741         {
742             {
743                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
744                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
745             }
746 
747             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
748 
749             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
750         }
751     }
752 
753     ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
754 
755     ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
756     ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
757     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
758     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
759     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
760     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
761     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
762     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
763     /* --------------------------------------- */
764     /* ------ Inter RD OPT stage ------------- */
765     /* --------------------------------------- */
766     if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
767     {
768         UWORD8 u1_ssd_bit_info_ctr = 0;
769 
770         /* -- run a loop over all Inter rd opt cands ------ */
771         for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
772         {
773             cu_inter_cand_t *ps_inter_cand;
774 
775             LWORD64 rd_opt_cost = 0;
776 
777             ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
778 
779             if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
780                (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
781             {
782                 ps_inter_cand->b1_eval_mark = 1;
783             }
784 
785             /****************************************************************/
786             /* This check is only valid for derived instances.              */
787             /* check if this mode needs to be evaluated or not.             */
788             /* if it is a skip candidate, go ahead and evaluate it even if  */
789             /* it has not been marked while sorting.                        */
790             /****************************************************************/
791             if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
792             {
793                 continue;
794             }
795 
796             /* RDOPT related copies and settings */
797             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
798 
799             /* RDOPT copy States : Prev Cu best to current init */
800             COPY_CABAC_STATES(
801                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
802                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
803                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
804             /* MVP ,MVD calc and Motion compensation */
805             rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
806                 ps_ctxt,
807                 ps_inter_cand,
808                 ps_cu_analyse->u1_cu_size,
809                 ps_cu_analyse->b3_cu_pos_x,
810                 ps_cu_analyse->b3_cu_pos_y,
811                 ps_left_nbr_4x4,
812                 ps_top_nbr_4x4,
813                 ps_topleft_nbr_4x4,
814                 nbr_4x4_left_strd,
815                 rd_opt_curr_idx);
816 
817 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
818             if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
819             {
820                 ihevce_determine_tu_tree_distribution(
821                     ps_inter_cand,
822                     (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
823                     ps_ctxt->ai2_scratch,
824                     (UWORD8 *)pv_curr_src,
825                     ps_cu_prms->i4_luma_src_stride,
826                     ps_ctxt->i4_satd_lamda,
827                     LAMBDA_Q_SHIFT,
828                     ps_cu_analyse->u1_cu_size,
829                     ps_ctxt->u1_max_tr_depth);
830             }
831 #endif
832 #if DISABLE_ZERO_ZBF_IN_INTER
833             ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
834 #else
835             ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
836 #endif
837             /* Recon loop with different TUs based on partition type*/
838             rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
839                 ps_ctxt,
840                 ps_cu_prms,
841                 pv_curr_src,
842                 ps_cu_analyse->u1_cu_size,
843                 ps_cu_analyse->b3_cu_pos_x,
844                 ps_cu_analyse->b3_cu_pos_y,
845                 rd_opt_curr_idx,
846                 &s_chrm_cu_buf_prms,
847                 ps_inter_cand,
848                 ps_cu_analyse,
849                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
850                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
851                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
852                                              100.0);
853 
854 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
855             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
856             {
857                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
858                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
859                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
860             }
861 #endif
862 
863             /* based on the rd opt cost choose the best and current index */
864             if(rd_opt_cost < rd_opt_least_cost)
865             {
866                 /* swap the best and current indx */
867                 rd_opt_best_idx = !rd_opt_best_idx;
868                 rd_opt_curr_idx = !rd_opt_curr_idx;
869 
870                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
871                 rd_opt_least_cost = rd_opt_cost;
872                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
873 
874                 /* Store the best Inter cand. for final_recon function */
875                 ps_best_inter_cand = ps_inter_cand;
876             }
877 
878             /* set the neighbour map to 0 */
879             ihevce_set_nbr_map(
880                 ps_ctxt->pu1_ctb_nbr_map,
881                 ps_ctxt->i4_nbr_map_strd,
882                 (ps_cu_analyse->b3_cu_pos_x << 1),
883                 (ps_cu_analyse->b3_cu_pos_y << 1),
884                 (ps_cu_analyse->u1_cu_size >> 2),
885                 0);
886 
887         } /* end of loop for all the Inter RD OPT cand */
888     }
889     /* --------------------------------------- */
890     /* ---- Conditional Eval of Intra -------- */
891     /* --------------------------------------- */
892     {
893         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
894         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
895 
896         /* check if inter candidates are valid */
897         if(0 != ps_cu_analyse->u1_num_inter_cands)
898         {
899             /* if skip or no residual inter candidates has won then */
900             /* evaluation of intra candidates is disabled           */
901             if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
902                (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
903             {
904                 enable_intra_eval_flag = 0;
905             }
906         }
907         /* Disable Intra Gating for HIGH QUALITY PRESET */
908 #if !ENABLE_INTRA_GATING_FOR_HQ
909         if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
910         {
911             enable_intra_eval_flag = 1;
912 
913 #if DISABLE_LARGE_INTRA_PQ
914             if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
915                (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
916             {
917                 if(ps_cu_analyse->u1_cu_size > 16)
918                 {
919                     /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
920                     enable_intra_eval_flag = 0;
921                 }
922                 else if(ps_cu_analyse->u1_cu_size == 16)
923                 {
924                     /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
925                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
926                 }
927             }
928 #endif
929         }
930 #endif
931     }
932 
933     /* --------------------------------------- */
934     /* ------ Intra RD OPT stage ------------- */
935     /* --------------------------------------- */
936 
937     /* -- run a loop over all Intra rd opt cands ------ */
938     if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
939     {
940         LWORD64 rd_opt_cost;
941         WORD32 end_flag = 0;
942         WORD32 cu_eval_done = 0;
943         WORD32 subcu_eval_done = 0;
944         WORD32 subpu_eval_done = 0;
945         WORD32 max_trans_size;
946         WORD32 sync_wait_stride;
947         max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
948         sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
949 
950         if(!ps_ctxt->u1_use_top_at_ctb_boundary)
951         {
952             /* Wait till top data is ready          */
953             /* Currently checking till top right CU */
954             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
955 
956             if(i4_ctb_y_off == 0)
957             {
958                 /* No wait for 1st row */
959                 cu_top_right_offset = -(MAX_CTB_SIZE);
960                 {
961                     ihevce_tile_params_t *ps_col_tile_params =
962                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
963                          ps_ctxt->i4_tile_col_idx);
964                     /* No wait for 1st row */
965                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
966                 }
967                 cu_top_right_dep_pos = 0;
968             }
969             else
970             {
971                 cu_top_right_offset = sync_wait_stride;
972                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
973             }
974 
975             if(0 == ps_cu_analyse->b3_cu_pos_y)
976             {
977                 ihevce_dmgr_chk_row_row_sync(
978                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
979                     curr_cu_pos_in_row,
980                     cu_top_right_offset,
981                     cu_top_right_dep_pos,
982                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
983                     ps_ctxt->thrd_id);
984             }
985         }
986         ctr = 0;
987 
988         /* Zero cbf tool is disabled for intra CUs */
989 #if ENABLE_ZERO_CBF_IN_INTRA
990         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
991 #else
992         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
993 #endif
994 
995         /* Intra Mode gating based on MPM cand list and encoder quality preset */
996         if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
997         {
998             ihevce_mpm_idx_based_filter_RDOPT_cand(
999                 ps_ctxt,
1000                 ps_cu_analyse,
1001                 ps_left_nbr_4x4,
1002                 ps_top_nbr_4x4,
1003                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1004                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1005 
1006             ihevce_mpm_idx_based_filter_RDOPT_cand(
1007                 ps_ctxt,
1008                 ps_cu_analyse,
1009                 ps_left_nbr_4x4,
1010                 ps_top_nbr_4x4,
1011                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1012                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1013         }
1014 
1015         /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1016         if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1017         {
1018             /* For cu_size = 64, there won't be any TU_EQ_CU case */
1019             if(64 != ps_cu_analyse->u1_cu_size)
1020             {
1021                 /* RDOPT copy States : Prev Cu best to current init */
1022                 COPY_CABAC_STATES(
1023                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1024                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1025                     IHEVC_CAB_CTXT_END);
1026 
1027                 /* RDOPT related copies and settings */
1028                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1029 
1030                 /* Calc. best SATD mode for TU_EQ_CU case */
1031                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1032                     ps_ctxt,
1033                     &s_chrm_cu_buf_prms,
1034                     ps_cu_analyse,
1035                     rd_opt_curr_idx,
1036                     TU_EQ_CU,
1037                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1038                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1039                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1040                                                  100.0,
1041                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1042 
1043 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1044                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1045                 {
1046                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1047                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1048                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1049                 }
1050 #endif
1051             }
1052 
1053             /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1054             TU_EQ_CU_DIV2 case */
1055 
1056             if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1057                 255) &&
1058                (8 != ps_cu_analyse->u1_cu_size))
1059             {
1060                 /* RDOPT copy States : Prev Cu best to current init */
1061                 COPY_CABAC_STATES(
1062                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1063                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1064                     IHEVC_CAB_CTXT_END);
1065 
1066                 /* RDOPT related copies and settings */
1067                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1068 
1069                 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1070                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1071                     ps_ctxt,
1072                     &s_chrm_cu_buf_prms,
1073                     ps_cu_analyse,
1074                     rd_opt_curr_idx,
1075                     TU_EQ_CU_DIV2,
1076                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1077                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1078                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1079                                                  100.0,
1080                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1081 
1082 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1083                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1084                 {
1085                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1086                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1087                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1088                 }
1089 #endif
1090             }
1091         }
1092 
1093         while(0 == end_flag)
1094         {
1095             UWORD8 *pu1_mode = NULL;
1096             WORD32 curr_func_mode = 0;
1097             void *pv_pred;
1098 
1099             ASSERT(ctr < 36);
1100 
1101             /* TU equal to CU size evaluation of different modes */
1102             if(0 == cu_eval_done)
1103             {
1104                 /* check if the all the modes have been evaluated */
1105                 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1106                 {
1107                     cu_eval_done = 1;
1108                     ctr = 0;
1109                 }
1110                 else if(
1111                     (1 == ctr) &&
1112                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1113                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1114                     (ps_ctxt->i1_slice_type != ISLICE))
1115                 {
1116                     ctr = 0;
1117                     cu_eval_done = 1;
1118                     subcu_eval_done = 1;
1119                     subpu_eval_done = 1;
1120                 }
1121                 else
1122                 {
1123                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1124                     {
1125                         ctr++;
1126                         continue;
1127                     }
1128 
1129                     pu1_mode =
1130                         &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1131                     ctr++;
1132                     curr_func_mode = TU_EQ_CU;
1133                 }
1134             }
1135             /* Sub CU (NXN) mode evaluation of different pred modes */
1136             if((0 == subpu_eval_done) && (1 == cu_eval_done))
1137             {
1138                 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1139                 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1140                 {
1141                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1142 
1143                     curr_func_mode = TU_EQ_SUBCU;
1144                     /* check if the any modes have to be evaluated */
1145                     if(255 == *pu1_mode)
1146                     {
1147                         subpu_eval_done = 1;
1148                         ctr = 0;
1149                     }
1150                     else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1151                     {
1152                         subpu_eval_done = 1;
1153                         ctr = 0;
1154                     }
1155                     else
1156                     {
1157                         ctr++;
1158                     }
1159                 }
1160             }
1161 
1162             /* TU size equal to CU div2 mode evaluation of different pred modes */
1163             if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1164             {
1165                 /* check if the all the modes have been evaluated */
1166                 if(255 ==
1167                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1168                 {
1169                     subcu_eval_done = 1;
1170                 }
1171                 else if(
1172                     (1 == ctr) &&
1173                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1174                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1175                     (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1176                 {
1177                     subcu_eval_done = 1;
1178                 }
1179                 else
1180                 {
1181                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1182                     {
1183                         ctr++;
1184                         continue;
1185                     }
1186 
1187                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1188                                     .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1189 
1190                     ctr++;
1191                     curr_func_mode = TU_EQ_CU_DIV2;
1192                 }
1193             }
1194 
1195             /* check if all CU option have been evalueted */
1196             if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1197             {
1198                 break;
1199             }
1200 
1201             /* RDOPT related copies and settings */
1202             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1203 
1204             /* Assign ME/Intra pred buf. to the current intra cand. since we
1205             are storing pred data for final_reon function */
1206             {
1207                 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1208             }
1209 
1210             /* RDOPT copy States : Prev Cu best to current init */
1211             COPY_CABAC_STATES(
1212                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1213                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1214                 IHEVC_CAB_CTXT_END);
1215 
1216             /* call the function which performs the normative Intra encode */
1217             rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1218                 ps_ctxt,
1219                 ps_cu_prms,
1220                 pv_pred,
1221                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1222                 &s_chrm_cu_buf_prms,
1223                 pu1_mode,
1224                 ps_cu_analyse,
1225                 pv_curr_src,
1226                 pv_cu_left,
1227                 pv_cu_top,
1228                 pv_cu_top_left,
1229                 ps_left_nbr_4x4,
1230                 ps_top_nbr_4x4,
1231                 nbr_4x4_left_strd,
1232                 cu_left_stride,
1233                 rd_opt_curr_idx,
1234                 curr_func_mode,
1235                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1236                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1237                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1238                                              100.0);
1239 
1240 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1241             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1242             {
1243                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1244                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1245                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1246             }
1247 #endif
1248 
1249             /* based on the rd opt cost choose the best and current index */
1250             if(rd_opt_cost < rd_opt_least_cost)
1251             {
1252                 /* swap the best and current indx */
1253                 rd_opt_best_idx = !rd_opt_best_idx;
1254                 rd_opt_curr_idx = !rd_opt_curr_idx;
1255                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1256 
1257                 rd_opt_least_cost = rd_opt_cost;
1258                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1259             }
1260 
1261             if((TU_EQ_SUBCU == curr_func_mode) &&
1262                (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1263                (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1264             {
1265                 UWORD8 au1_tu_eq_cu_div2_modes[4];
1266                 UWORD8 au1_freq_of_mode[4];
1267 
1268                 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1269                 {
1270                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1271                         255;  //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1272                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1273                         255;
1274                 }
1275                 else
1276                 {
1277                     WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1278                         ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1279                         au1_tu_eq_cu_div2_modes,
1280                         au1_freq_of_mode,
1281                         4);
1282 
1283                     if(2 == i4_num_clusters)
1284                     {
1285                         if(au1_freq_of_mode[0] == 3)
1286                         {
1287                             ps_cu_analyse->s_cu_intra_cand
1288                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1289                                 au1_tu_eq_cu_div2_modes[0];
1290                             ps_cu_analyse->s_cu_intra_cand
1291                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1292                         }
1293                         else if(au1_freq_of_mode[1] == 3)
1294                         {
1295                             ps_cu_analyse->s_cu_intra_cand
1296                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1297                                 au1_tu_eq_cu_div2_modes[1];
1298                             ps_cu_analyse->s_cu_intra_cand
1299                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1300                         }
1301                         else
1302                         {
1303                             ps_cu_analyse->s_cu_intra_cand
1304                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1305                                 au1_tu_eq_cu_div2_modes[0];
1306                             ps_cu_analyse->s_cu_intra_cand
1307                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1308                                 au1_tu_eq_cu_div2_modes[1];
1309                             ps_cu_analyse->s_cu_intra_cand
1310                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1311                         }
1312                     }
1313                 }
1314             }
1315 
1316             /* set the neighbour map to 0 */
1317             ihevce_set_nbr_map(
1318                 ps_ctxt->pu1_ctb_nbr_map,
1319                 ps_ctxt->i4_nbr_map_strd,
1320                 (ps_cu_analyse->b3_cu_pos_x << 1),
1321                 (ps_cu_analyse->b3_cu_pos_y << 1),
1322                 (ps_cu_analyse->u1_cu_size >> 2),
1323                 0);
1324         }
1325 
1326     } /* end of Intra RD OPT cand evaluation */
1327 
1328     ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1329     ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1330     ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1331 
1332     /* --------------------------------------- */
1333     /* --------Final mode Recon ---------- */
1334     /* --------------------------------------- */
1335     {
1336         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1337         void *pv_final_pred = NULL;
1338         WORD32 final_pred_strd = 0;
1339         void *pv_final_pred_chrm = NULL;
1340         WORD32 final_pred_strd_chrm = 0;
1341         WORD32 packed_pred_mode;
1342 
1343 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1344         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1345         {
1346             pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1347         }
1348 #else
1349         pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1350 #endif
1351 
1352         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1353         packed_pred_mode =
1354             ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1355 
1356         if(!ps_ctxt->u1_is_input_data_hbd)
1357         {
1358             if(ps_enc_loop_bestprms->u1_intra_flag)
1359             {
1360                 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1361                 final_pred_strd =
1362                     ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1363             }
1364             else
1365             {
1366                 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1367                 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1368             }
1369 
1370             pv_final_pred_chrm =
1371                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1372                 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1373                                    (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1374             final_pred_strd_chrm =
1375                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1376         }
1377 
1378         ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1379 
1380         {
1381             final_mode_process_prms_t s_prms;
1382 
1383             void *pv_cu_luma_recon;
1384             void *pv_cu_chroma_recon;
1385             WORD32 luma_stride, chroma_stride;
1386 
1387             if(!ps_ctxt->u1_is_input_data_hbd)
1388             {
1389 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1390                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1391                 {
1392                     pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1393                     pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1394                     luma_stride = ps_cu_analyse->u1_cu_size;
1395                     chroma_stride = ps_cu_analyse->u1_cu_size;
1396                 }
1397                 else
1398                 {
1399                     /* based on CU position derive the luma pointers */
1400                     pv_cu_luma_recon = pu1_final_recon;
1401 
1402                     /* based on CU position derive the chroma pointers */
1403                     pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1404 
1405                     luma_stride = ps_cu_prms->i4_luma_recon_stride;
1406 
1407                     chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1408                 }
1409 #else
1410                 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1411                 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1412                 luma_stride = ps_cu_analyse->u1_cu_size;
1413                 chroma_stride = ps_cu_analyse->u1_cu_size;
1414 #endif
1415 
1416                 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1417                 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1418                 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1419                 s_prms.packed_pred_mode = packed_pred_mode;
1420                 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1421                 s_prms.pv_src = pu1_curr_src;
1422                 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1423                 s_prms.pv_pred = pv_final_pred;
1424                 s_prms.pred_strd = final_pred_strd;
1425                 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1426                 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1427                 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1428                 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1429                 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1430                 s_prms.pv_luma_recon = pv_cu_luma_recon;
1431                 s_prms.recon_luma_strd = luma_stride;
1432                 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1433                 s_prms.recon_chrma_strd = chroma_stride;
1434                 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1435                 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1436                 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1437                 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1438                 s_prms.u1_will_cabac_state_change = 1;
1439                 s_prms.u1_recompute_sbh_and_rdoq = 0;
1440                 s_prms.u1_is_first_pass = 1;
1441             }
1442 
1443 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1444             s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1445                                         ? ps_cu_prms->u1_is_cu_noisy
1446                                         : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1447 #endif
1448 
1449             ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1450 
1451 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1452             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1453             {
1454                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1455                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1456                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1457             }
1458 #endif
1459         }
1460     }
1461 
1462     /* --------------------------------------- */
1463     /* --------Populate CU out prms ---------- */
1464     /* --------------------------------------- */
1465     {
1466         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1467         UWORD8 *pu1_pu_map;
1468         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1469 
1470         /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1471         /* then it has to be coded as skip CU */
1472         if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1473            (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1474            (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1475         {
1476             ps_enc_loop_bestprms->u1_skip_flag = 1;
1477         }
1478 
1479         /* update number PUs in CU */
1480         ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1481 
1482         /* ---- populate the colocated pu map index --- */
1483         for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1484         {
1485             WORD32 i;
1486             WORD32 vert_ht;
1487             WORD32 horz_wd;
1488 
1489             if(ps_enc_loop_bestprms->u1_intra_flag)
1490             {
1491                 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1492                 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1493                 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1494             }
1495             else
1496             {
1497                 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1498                 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1499             }
1500 
1501             pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1502             pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1503 
1504             for(i = 0; i < vert_ht; i++)
1505             {
1506                 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1507                 pu1_pu_map += num_4x4_in_ctb;
1508             }
1509             /* increment the index */
1510             col_start_pu_idx++;
1511         }
1512         /* ---- copy the colocated PUs to frm pu ----- */
1513         memcpy(
1514             ps_col_pu,
1515             &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1516             ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1517 
1518         /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1519         {
1520             entropy_context_t *ps_entropy_ctxt;
1521 
1522             WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1523 
1524             WORD32 log2_min_cu_qp_delta_size;
1525             UWORD32 block_addr_align;
1526             ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1527 
1528             log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1529             diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1530 
1531             log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1532             block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1533 
1534             ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1535             ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1536             /*Update the Qp value used. It will not have a valid value iff
1537             current CU is (skipped/no_cbf). In that case the Qp needed for
1538             deblocking is calculated from top/left/previous coded CU*/
1539 
1540             ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1541 
1542             if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1543                ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1544             {
1545                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1546             }
1547             else
1548             {
1549                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1550             }
1551         }
1552 
1553         /* -- at the end of CU set the neighbour map to 1 -- */
1554         ihevce_set_nbr_map(
1555             ps_ctxt->pu1_ctb_nbr_map,
1556             ps_ctxt->i4_nbr_map_strd,
1557             (ps_cu_analyse->b3_cu_pos_x << 1),
1558             (ps_cu_analyse->b3_cu_pos_y << 1),
1559             (ps_cu_analyse->u1_cu_size >> 2),
1560             1);
1561 
1562         /* -- at the end of CU update best cabac rdopt states -- */
1563         /* -- and also set the top row skip flags  ------------- */
1564         ihevce_entropy_update_best_cu_states(
1565             &ps_ctxt->s_rdopt_entropy_ctxt,
1566             ps_cu_analyse->b3_cu_pos_x,
1567             ps_cu_analyse->b3_cu_pos_y,
1568             ps_cu_analyse->u1_cu_size,
1569             0,
1570             rd_opt_best_idx);
1571     }
1572 
1573     /* Store Output struct */
1574 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1575     {
1576         {
1577             memcpy(
1578                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1579                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1580                 sizeof(enc_loop_cu_final_prms_t));
1581         }
1582 
1583         memcpy(
1584             &ps_ctxt->as_cu_recur_nbr[0],
1585             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1586             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1587                 (ps_cu_analyse->u1_cu_size >> 2));
1588 
1589         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1590 
1591         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1592     }
1593 #else
1594     if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1595     {
1596         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1597 
1598         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1599 
1600         if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1601         {
1602             /* Wait till top data is ready          */
1603             /* Currently checking till top right CU */
1604             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1605 
1606             if(i4_ctb_y_off == 0)
1607             {
1608                 /* No wait for 1st row */
1609                 cu_top_right_offset = -(MAX_CTB_SIZE);
1610                 {
1611                     ihevce_tile_params_t *ps_col_tile_params =
1612                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1613                          ps_ctxt->i4_tile_col_idx);
1614 
1615                     /* No wait for 1st row */
1616                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1617                 }
1618                 cu_top_right_dep_pos = 0;
1619             }
1620             else
1621             {
1622                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1623                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1624             }
1625 
1626             if(0 == ps_cu_analyse->b3_cu_pos_y)
1627             {
1628                 ihevce_dmgr_chk_row_row_sync(
1629                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1630                     curr_cu_pos_in_row,
1631                     cu_top_right_offset,
1632                     cu_top_right_dep_pos,
1633                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1634                     ps_ctxt->thrd_id);
1635             }
1636         }
1637     }
1638     else
1639     {
1640         {
1641             memcpy(
1642                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1643                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1644                 sizeof(enc_loop_cu_final_prms_t));
1645         }
1646 
1647         memcpy(
1648             &ps_ctxt->as_cu_recur_nbr[0],
1649             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1650             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1651                 (ps_cu_analyse->u1_cu_size >> 2));
1652 
1653         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1654 
1655         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1656     }
1657 #endif
1658 
1659     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1660         ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1661 
1662     return rd_opt_least_cost;
1663 }
1664 
1665 /*!
1666 ******************************************************************************
1667 * \if Function name : ihevce_enc_loop_process_row \endif
1668 *
1669 * \brief
1670 *    Row level enc_loop pass function
1671 *
1672 * \param[in] pv_ctxt : pointer to enc_loop module
1673 * \param[in] ps_curr_src_bufs  : pointer to input yuv buffer (row buffer)
1674 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1675 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1676 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1677 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1678 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1679 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1680 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1681 *
1682 * \return
1683 *    None
1684 *
1685 * Note : Currently the frame level calcualtions done assumes that
1686 *        framewidth of the input /recon are excat multiple of ctbsize
1687 *
1688 * \author
1689 *  Ittiam
1690 *
1691 *****************************************************************************
1692 */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1693 void ihevce_enc_loop_process_row(
1694     ihevce_enc_loop_ctxt_t *ps_ctxt,
1695     iv_enc_yuv_buf_t *ps_curr_src_bufs,
1696     iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1697     iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1698     UWORD8 **ppu1_y_subpel_planes,
1699     ctb_analyse_t *ps_ctb_in,
1700     ctb_enc_loop_out_t *ps_ctb_out,
1701     ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1702     cur_ctb_cu_tree_t *ps_row_cu_tree,
1703     cu_enc_loop_out_t *ps_row_cu,
1704     tu_enc_loop_out_t *ps_row_tu,
1705     pu_t *ps_row_pu,
1706     pu_col_mv_t *ps_row_col_pu,
1707     UWORD16 *pu2_num_pu_map,
1708     UWORD8 *pu1_row_pu_map,
1709     UWORD8 *pu1_row_ecd_data,
1710     UWORD32 *pu4_pu_offsets,
1711     frm_ctb_ctxt_t *ps_frm_ctb_prms,
1712     WORD32 vert_ctr,
1713     recon_pic_buf_t *ps_frm_recon,
1714     void *pv_dep_mngr_encloop_dep_me,
1715     pad_interp_recon_frm_t *ps_pad_interp_recon,
1716     WORD32 i4_pass,
1717     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1718     ihevce_tile_params_t *ps_tile_params)
1719 {
1720     enc_loop_cu_prms_t s_cu_prms;
1721     ctb_enc_loop_out_t *ps_ctb_out_dblk;
1722 
1723     WORD32 ctb_ctr, ctb_start, ctb_end;
1724     WORD32 col_pu_map_idx;
1725     WORD32 num_ctbs_horz_pic;
1726     WORD32 ctb_size;
1727     WORD32 last_ctb_row_flag;
1728     WORD32 last_ctb_col_flag;
1729     WORD32 last_hz_ctb_wd;
1730     WORD32 last_vt_ctb_ht;
1731     void *pv_dep_mngr_enc_loop_dblk;
1732     void *pv_dep_mngr_enc_loop_cu_top_right;
1733     WORD32 dblk_offset, dblk_check_dep_pos;
1734     WORD32 aux_offset, aux_check_dep_pos;
1735     void *pv_dep_mngr_me_dep_encloop;
1736     ctb_enc_loop_out_t *ps_ctb_out_sao;
1737     /*Structure to store deblocking parameters at CTB-row level*/
1738     deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1739     UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1740 
1741     pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1742     num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1743     ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1744 
1745     /* Store the num_ctb_horz in sao context*/
1746     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1747     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1748 
1749     /* Get the EncLoop Deblock Dep Mngr */
1750     pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1751     /* Get the EncLoop Top-Right CU Dep Mngr */
1752     pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1753     /* Set Variables for Dep. Checking and Setting */
1754     aux_check_dep_pos = vert_ctr;
1755     aux_offset = 2; /* Should be there for 0th row also */
1756     if(vert_ctr > 0)
1757     {
1758         dblk_check_dep_pos = vert_ctr - 1;
1759         dblk_offset = 2;
1760     }
1761     else
1762     {
1763         /* First row should run without waiting */
1764         dblk_check_dep_pos = 0;
1765         dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1766     }
1767 
1768     /* check if the current row processed in last CTb row */
1769     last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1770 
1771     /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1772     last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1773 
1774     /* Valid Height (pixels) in the last CTB row (padding cases) */
1775     last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1776                      ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1777     /* reset the states copied flag */
1778     ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1779     ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1780 
1781     /* populate the cu prms which are common for entire ctb row */
1782     s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1783     s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1784     s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1785     s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1786     s_cu_prms.i4_ctb_size = ctb_size;
1787 
1788     ps_ctxt->i4_is_first_cu_qg_coded = 0;
1789 
1790     /* Initialize the number of PUs for the first CTB to 0 */
1791     *pu2_num_pu_map = 0;
1792 
1793     /*Getting the address of BS and Qp arrays and other info*/
1794     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1795     {
1796         WORD32 num_ctbs_horz_tile;
1797         /* Update the pointers which are accessed not by using ctb_ctr
1798         to the tile start here! */
1799         ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1800         ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1801 
1802         ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1803         ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1804         ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1805         pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1806         pu1_row_ecd_data +=
1807             (ps_tile_params->i4_first_ctb_x *
1808              ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1809                                 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1810              MAX_SCAN_COEFFS_BYTES_4x4);
1811 
1812         /* Update the pointers to the tile start */
1813         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1814             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
1815         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1816             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
1817         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1818 
1819         num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1820 
1821         ctb_start = ps_tile_params->i4_first_ctb_x;
1822         ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1823     }
1824     ps_ctb_out_dblk = ps_ctb_out;
1825 
1826     ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1827 
1828     /* --------- Loop over all the CTBs in a row --------------- */
1829     for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1830     {
1831         cu_final_update_prms s_cu_update_prms;
1832 
1833         cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1834         me_ctb_data_t *ps_cu_me_data;
1835         ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1836         cu_enc_loop_out_t *ps_cu_final;
1837         pu_col_mv_t *ps_ctb_col_pu;
1838 
1839         WORD32 cur_ctb_ht, cur_ctb_wd;
1840         WORD32 last_cu_pos_in_ctb;
1841         WORD32 last_cu_size;
1842         WORD32 num_pus_in_ctb;
1843         UWORD8 u1_is_ctb_noisy;
1844         ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1845 
1846         if(ctb_ctr)
1847         {
1848             ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1849         }
1850         /*If Sup pic rc is enabled*/
1851         if(ps_ctxt->i4_sub_pic_level_rc)
1852         {
1853             ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1854         }
1855         /* check if the current row processed in last CTb row */
1856         last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1857         if(1 == last_ctb_col_flag)
1858         {
1859             cur_ctb_wd = last_hz_ctb_wd;
1860         }
1861         else
1862         {
1863             cur_ctb_wd = ctb_size;
1864         }
1865 
1866         /* If it's the last CTB, get the actual ht of CTB */
1867         if(1 == last_ctb_row_flag)
1868         {
1869             cur_ctb_ht = last_vt_ctb_ht;
1870         }
1871         else
1872         {
1873             cur_ctb_ht = ctb_size;
1874         }
1875 
1876         ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1877         ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1878 
1879         /* Wait till reference frame recon is available */
1880 
1881         /* ------------ Wait till current data is ready from ME -------------- */
1882 
1883         /*only for ref instance and Non I pics */
1884         if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1885            ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1886         {
1887             if(ctb_ctr < (num_ctbs_horz_pic))
1888             {
1889                 ihevce_dmgr_chk_row_row_sync(
1890                     pv_dep_mngr_encloop_dep_me,
1891                     ctb_ctr,
1892                     1,
1893                     vert_ctr,
1894                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1895                     ps_ctxt->thrd_id);
1896             }
1897         }
1898 
1899         /* store the cu pointer for current ctb out */
1900         ps_ctb_out->ps_enc_cu = ps_row_cu;
1901         ps_cu_final = ps_row_cu;
1902 
1903         /* Get the base point of CU recursion tree */
1904         if(ISLICE != ps_ctxt->i1_slice_type)
1905         {
1906             ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1907             ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1908         }
1909         else
1910         {
1911             /* Initialize ptr to current CTB */
1912             ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1913         }
1914 
1915         /* Get the ME data pointer for 16x16 block data in ctb */
1916         ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1917         u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1918         s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1919         s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1920 
1921         /* store the ctb level prms in cu prms */
1922         s_cu_prms.i4_ctb_pos = ctb_ctr;
1923 
1924         s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1925         s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1926 
1927         {
1928             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1929             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1930         }
1931 
1932         s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1933 
1934         s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1935 
1936         s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1937 
1938         /* Initialize ptr to current CTB */
1939         ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr;  // * ctb_size;
1940 
1941         /* reset the map idx for current ctb */
1942         col_pu_map_idx = 0;
1943         num_pus_in_ctb = 0;
1944 
1945         /* reset the map buffer to 0*/
1946 
1947         memset(
1948             &ps_ctxt->au1_nbr_ctb_map[0][0],
1949             0,
1950             (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1951 
1952         /* set the CTB neighbour availability flags */
1953         ihevce_set_ctb_nbr(
1954             &ps_ctb_out->s_ctb_nbr_avail_flags,
1955             ps_ctxt->pu1_ctb_nbr_map,
1956             ps_ctxt->i4_nbr_map_strd,
1957             ctb_ctr,
1958             vert_ctr,
1959             ps_frm_ctb_prms);
1960 
1961         /* -------- update the cur CTB offsets for inter prediction-------- */
1962         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1963         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1964 
1965         /* -------- update the cur CTB offsets for MV prediction-------- */
1966         ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1967         ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1968 
1969         /* -------------- Boundary Strength Initialization ----------- */
1970         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1971         {
1972             ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1973         }
1974 
1975         /* -------- update cur CTB offsets for entropy rdopt context------- */
1976         ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1977 
1978         /* --------- CU Recursion --------------- */
1979 
1980         {
1981 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1982             WORD32 i4_max_tree_depth = 4;
1983 #endif
1984             WORD32 i4_tree_depth = 0;
1985             /* Init no. of CU in CTB to 0*/
1986             ps_ctb_out->u1_num_cus_in_ctb = 0;
1987 
1988 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1989             if(ps_ctxt->i4_bitrate_instance_num == 0)
1990             {
1991                 WORD32 i4_max_tree_depth = 4;
1992                 WORD32 i;
1993                 for(i = 0; i < i4_max_tree_depth; i++)
1994                 {
1995                     COPY_CABAC_STATES(
1996                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
1997                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1998                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
1999                 }
2000             }
2001 #else
2002             if(ps_ctxt->i4_bitrate_instance_num == 0)
2003             {
2004                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2005                 {
2006                     WORD32 i4_max_tree_depth = 4;
2007                     WORD32 i;
2008                     for(i = 0; i < i4_max_tree_depth; i++)
2009                     {
2010                         COPY_CABAC_STATES(
2011                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2012                             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2013                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2014                     }
2015                 }
2016             }
2017 
2018 #endif
2019             if(ps_ctxt->i4_bitrate_instance_num == 0)
2020             {
2021                 /* FOR I- PIC populate the curr_ctb accordingly */
2022                 if(ISLICE == ps_ctxt->i1_slice_type)
2023                 {
2024                     ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2025                     ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2026 
2027                     ihevce_populate_cu_tree(
2028                         ps_ctb_ipe_analyse,
2029                         ps_cu_tree_analyse,
2030                         0,
2031                         (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2032                         POS_NA,
2033                         POS_NA,
2034                         POS_NA);
2035                 }
2036             }
2037             ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2038             ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2039             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2040             if(ps_ctxt->i4_use_ctb_level_lamda)
2041             {
2042                 ihevce_compute_cu_level_QP(
2043                     ps_ctxt, -1, ps_ctb_ipe_analyse->i4_64x64_act_factor[3][1], 0);
2044             }
2045 
2046             s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2047             s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2048             s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2049             s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2050             s_cu_update_prms.pps_cu_final = &ps_cu_final;
2051             s_cu_update_prms.pps_row_pu = &ps_row_pu;
2052             s_cu_update_prms.pps_row_tu = &ps_row_tu;
2053             s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2054 
2055             // source satd computation
2056             /* compute the source 8x8 SATD for the current CTB */
2057             /* populate  pui4_source_satd in some structure and pass it inside */
2058             if(ps_ctxt->u1_enable_psyRDOPT)
2059             {
2060                 /* declare local variables */
2061                 WORD32 i;
2062                 WORD32 ctb_size;
2063                 WORD32 num_comp_had_blocks;
2064                 UWORD8 *pu1_l0_block;
2065                 WORD32 block_ht;
2066                 WORD32 block_wd;
2067                 WORD32 ht_offset;
2068                 WORD32 wd_offset;
2069 
2070                 WORD32 num_horz_blocks;
2071                 WORD32 had_block_size;
2072                 WORD32 total_had_block_size;
2073                 WORD16 pi2_residue_had_zscan[64];
2074                 UWORD8 ai1_zeros_buffer[64];
2075 
2076                 WORD32 index_satd;
2077                 WORD32 is_hbd;
2078                 /* initialize the variables */
2079                 block_ht = cur_ctb_ht;
2080                 block_wd = cur_ctb_wd;
2081 
2082                 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2083 
2084                 had_block_size = 8;
2085                 total_had_block_size = had_block_size * had_block_size;
2086 
2087                 for(i = 0; i < total_had_block_size; i++)
2088                 {
2089                     ai1_zeros_buffer[i] = 0;
2090                 }
2091 
2092                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2093                 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2094 
2095                 num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
2096                 ht_offset = -had_block_size;
2097                 wd_offset = -had_block_size;
2098 
2099                 index_satd = 0;
2100                 /*Loop over all 8x8 blocsk in the CTB*/
2101                 for(i = 0; i < num_comp_had_blocks; i++)
2102                 {
2103                     if(i % num_horz_blocks == 0)
2104                     {
2105                         wd_offset = -had_block_size;
2106                         ht_offset += had_block_size;
2107                     }
2108                     wd_offset += had_block_size;
2109 
2110                     if(!is_hbd)
2111                     {
2112                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2113                         pu1_l0_block = s_cu_prms.pu1_luma_src +
2114                                        ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2115 
2116                         ps_ctxt->ai4_source_satd_8x8[index_satd] =
2117 
2118                             ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2119                                 pu1_l0_block,
2120                                 ps_curr_src_bufs->i4_y_strd,
2121                                 ai1_zeros_buffer,
2122                                 had_block_size,
2123                                 pi2_residue_had_zscan,
2124                                 had_block_size);
2125                     }
2126                     index_satd++;
2127                 }
2128             }
2129 
2130             if(ps_ctxt->u1_enable_psyRDOPT)
2131             {
2132                 /* declare local variables */
2133                 WORD32 i;
2134                 WORD32 ctb_size;
2135                 WORD32 num_comp_had_blocks;
2136                 UWORD8 *pu1_l0_block;
2137                 UWORD8 *pu1_l0_block_prev = NULL;
2138                 WORD32 block_ht;
2139                 WORD32 block_wd;
2140                 WORD32 ht_offset;
2141                 WORD32 wd_offset;
2142 
2143                 WORD32 num_horz_blocks;
2144                 WORD32 had_block_size;
2145                 WORD16 pi2_residue_had[64];
2146                 UWORD8 ai1_zeros_buffer[64];
2147                 WORD32 index_satd = 0;
2148 
2149                 WORD32 is_hbd;
2150                 is_hbd = ps_ctxt->u1_is_input_data_hbd;  // 8 bit
2151 
2152                 /* initialize the variables */
2153                 /* change this based ont he bit depth */
2154                 // ps_ctxt->u1_chroma_array_type
2155                 if(ps_ctxt->u1_chroma_array_type == 1)
2156                 {
2157                     block_ht = cur_ctb_ht / 2;
2158                     block_wd = cur_ctb_wd / 2;
2159                 }
2160                 else
2161                 {
2162                     block_ht = cur_ctb_ht;
2163                     block_wd = cur_ctb_wd / 2;
2164                 }
2165 
2166                 had_block_size = 4;
2167                 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2168 
2169                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2170                 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2171 
2172                 num_horz_blocks = 2 * block_wd / had_block_size;  //ctb_width / had_block_size;
2173                 ht_offset = -had_block_size;
2174                 wd_offset = -had_block_size;
2175 
2176                 if(!is_hbd)
2177                 {
2178                     /* loop over for every 4x4 blocks in the CU for Cb */
2179                     for(i = 0; i < num_comp_had_blocks; i++)
2180                     {
2181                         if(i % num_horz_blocks == 0)
2182                         {
2183                             wd_offset = -had_block_size;
2184                             ht_offset += had_block_size;
2185                         }
2186                         wd_offset += had_block_size;
2187 
2188                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2189                         if(i % 2 != 0)
2190                         {
2191                             if(!is_hbd)
2192                             {
2193                                 pu1_l0_block = pu1_l0_block_prev + 1;
2194                             }
2195                         }
2196                         else
2197                         {
2198                             if(!is_hbd)
2199                             {
2200                                 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2201                                                s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2202                                 pu1_l0_block_prev = pu1_l0_block;
2203                             }
2204                         }
2205 
2206                         if(had_block_size == 4)
2207                         {
2208                             if(!is_hbd)
2209                             {
2210                                 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2211                                     ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2212                                         pu1_l0_block,
2213                                         s_cu_prms.i4_chrm_src_stride,
2214                                         ai1_zeros_buffer,
2215                                         had_block_size,
2216                                         pi2_residue_had,
2217                                         had_block_size);
2218                             }
2219 
2220                             index_satd++;
2221 
2222                         }  // block size of 4x4
2223 
2224                     }  // for all blocks
2225 
2226                 }  // is hbd check
2227             }
2228 
2229             ihevce_cu_recurse_decide(
2230                 ps_ctxt,
2231                 &s_cu_prms,
2232                 ps_cu_tree_analyse,
2233                 ps_cu_tree_analyse,
2234                 ps_ctb_ipe_analyse,
2235                 ps_cu_me_data,
2236                 &ps_ctb_col_pu,
2237                 &s_cu_update_prms,
2238                 pu1_row_pu_map,
2239                 &col_pu_map_idx,
2240                 i4_tree_depth,
2241                 ctb_ctr << 6,
2242                 vert_ctr << 6,
2243                 cur_ctb_ht);
2244 
2245             if(ps_ctxt->i1_slice_type != ISLICE)
2246             {
2247                 ASSERT(
2248                     (cur_ctb_wd * cur_ctb_ht) <=
2249                     ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2250             }
2251             /*If Sup pic rc is enabled*/
2252             if(1 == ps_ctxt->i4_sub_pic_level_rc)
2253             {
2254                 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2255                 ihevce_sub_pic_rc_in_data(
2256                     (void *)ps_multi_thrd_ctxt,
2257                     (void *)ps_ctxt,
2258                     (void *)ps_ctb_ipe_analyse,
2259                     (void *)ps_frm_ctb_prms);
2260             }
2261 
2262             ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2263 
2264         } /* End of CU recursion block */
2265 
2266 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2267         {
2268             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2269             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2270             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2271 
2272             do
2273             {
2274                 ihevce_update_final_cu_results(
2275                     ps_ctxt,
2276                     ps_enc_out_ctxt,
2277                     ps_cu_prms,
2278                     NULL, /* &ps_ctb_col_pu */
2279                     NULL, /* &col_pu_map_idx */
2280                     &s_cu_update_prms,
2281                     ctb_ctr,
2282                     vert_ctr);
2283 
2284                 ps_enc_out_ctxt++;
2285 
2286                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2287 
2288             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2289         }
2290 #else
2291         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2292         {
2293             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2294             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2295             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2296 
2297             do
2298             {
2299                 ihevce_update_final_cu_results(
2300                     ps_ctxt,
2301                     ps_enc_out_ctxt,
2302                     ps_cu_prms,
2303                     NULL, /* &ps_ctb_col_pu */
2304                     NULL, /* &col_pu_map_idx */
2305                     &s_cu_update_prms,
2306                     ctb_ctr,
2307                     vert_ctr);
2308 
2309                 ps_enc_out_ctxt++;
2310 
2311                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2312 
2313             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2314         }
2315 #endif
2316 
2317         /* --- ctb level copy of data to left buffers--*/
2318         ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2319 
2320         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2321         {
2322             /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2323             ihevce_bs_clear_invalid(
2324                 &ps_ctxt->s_deblk_bs_prms,
2325                 last_ctb_row_flag,
2326                 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2327                 last_hz_ctb_wd,
2328                 last_vt_ctb_ht);
2329 
2330             /* -----------------Read boundary strengts for current CTB------------- */
2331 
2332             if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2333             {
2334                 /*Storing boundary strengths of current CTB*/
2335                 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2336                 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2337 
2338                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2339                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2340             }
2341             //Increment for storing next CTB info
2342             s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2343                 (ctb_size >> 3);  //one vertical edge per 8x8 block
2344             s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2345                 (ctb_size >> 3);  //one horizontal edge per 8x8 block
2346         }
2347 
2348         /* -------------- ctb level updates ----------------- */
2349         ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2350 
2351         pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2352 
2353         /* first ctb offset will be populated by the caller */
2354         if(0 != ctb_ctr)
2355         {
2356             pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2357         }
2358         pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2359         ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2360 
2361         ps_ctb_in++;
2362         ps_ctb_out++;
2363     }
2364 
2365     /* ---------- Encloop end of row updates ----------------- */
2366 
2367     /* at the end of row processing cu pixel counter is set to */
2368     /* (num ctb * ctbzise) + ctb size                          */
2369     /* this is to set the dependency for right most cu of last */
2370     /* ctb's top right data dependency                         */
2371     /* this even takes care of entropy dependency for          */
2372     /* incomplete ctb as well                                  */
2373     ihevce_dmgr_set_row_row_sync(
2374         pv_dep_mngr_enc_loop_cu_top_right,
2375         (ctb_ctr * ctb_size + ctb_size),
2376         vert_ctr,
2377         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2378 
2379     ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2380 
2381     /* Restore structure.
2382     Getting the address of stored-BS and Qp-map and other info */
2383     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2384     {
2385         /* Update the pointers to the tile start */
2386         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2387             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
2388         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2389             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
2390         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2391     }
2392 
2393 #if PROFILE_ENC_REG_DATA
2394     s_profile.u8_enc_reg_data[vert_ctr] = 0;
2395 #endif
2396 
2397     /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2398     if(!ps_ctxt->u1_is_input_data_hbd)
2399     {
2400         WORD32 last_col_pic, last_col_tile;
2401 
2402         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2403         {
2404             /* store the ctb level prms in cu prms */
2405             s_cu_prms.i4_ctb_pos = ctb_ctr;
2406             s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2407             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2408 
2409             s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2410             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2411             s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2412 
2413             s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2414 
2415             s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2416 
2417             /* If last ctb in the horizontal row */
2418             if(ctb_ctr == (num_ctbs_horz_pic - 1))
2419             {
2420                 last_col_pic = 1;
2421             }
2422             else
2423             {
2424                 last_col_pic = 0;
2425             }
2426 
2427             /* If last ctb in the tile row */
2428             if(ctb_ctr == (ctb_end - 1))
2429             {
2430                 last_col_tile = 1;
2431             }
2432             else
2433             {
2434                 last_col_tile = 0;
2435             }
2436 
2437             if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2438             {
2439                 /* Wait till top neighbour CTB has done it's deblocking*/
2440                 if(ctb_ctr < (ctb_end)-1)
2441                 {
2442                     ihevce_dmgr_chk_row_row_sync(
2443                         pv_dep_mngr_enc_loop_dblk,
2444                         ctb_ctr,
2445                         dblk_offset,
2446                         dblk_check_dep_pos,
2447                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2448                         ps_ctxt->thrd_id);
2449                 }
2450 
2451                 if((0 == ps_ctxt->i4_deblock_type))
2452                 {
2453                     /* Populate Qp-map */
2454                     if(ctb_start == ctb_ctr)
2455                     {
2456                         ihevce_deblk_populate_qp_map(
2457                             ps_ctxt,
2458                             &s_deblk_ctb_row_params,
2459                             ps_ctb_out_dblk,
2460                             vert_ctr,
2461                             ps_frm_ctb_prms,
2462                             ps_tile_params);
2463                     }
2464                     ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2465 
2466                     /* recon pointers and stride */
2467                     ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2468                     ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2469                     ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2470                     ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2471 
2472                     ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2473                     {
2474                         ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2475                             (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2476                     }
2477                     ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2478                     //or according to slice boundary. Support yet to be added !!!!
2479 
2480                     ihevce_deblk_ctb(
2481                         &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2482 
2483                     //Increment for storing next CTB info
2484                     s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2485                         (ctb_size >> 3);  //one vertical edge per 8x8 block
2486                     s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2487                         (ctb_size >> 3);  //one horizontal edge per 8x8 block
2488                     s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2489                         (ctb_size >> 2);  //one qp per 4x4 block.
2490 
2491                 }  //end of if((0 == ps_ctxt->i4_deblock_type)
2492             }  // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2493 
2494             /* Apply SAO over the previous CTB-row */
2495             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2496                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2497             {
2498                 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2499 
2500                 if((vert_ctr > ps_tile_params->i4_first_ctb_y) &&
2501                    (ctb_ctr > ctb_start))  //if((vert_ctr > 0) && (ctb_ctr > 0))
2502                 {
2503                     /* Call the sao function to do sao for the current ctb*/
2504 
2505                     /* Register the curr ctb's x pos in sao context*/
2506                     ps_sao_ctxt->i4_ctb_x = ctb_ctr - 1;
2507 
2508                     /* Register the curr ctb's y pos in sao context*/
2509                     ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2510 
2511                     ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2512                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz +
2513                                      (ctb_ctr - 1);
2514                     ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2515                     ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2516                     ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2517 
2518                     ps_sao_ctxt->i4_is_last_ctb_row = 0;
2519                     ps_sao_ctxt->i4_is_last_ctb_col = 0;
2520 
2521                     /* Calculate the recon buf pointer and stride for teh current ctb */
2522                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2523                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2524                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2525                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2526 
2527                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2528 
2529                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2530                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2531                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2532                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2533                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2534 
2535                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2536                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2537 
2538                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2539                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2540                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2541                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2542 
2543                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2544 
2545                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2546                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2547                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2548                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2549                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2550 
2551                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2552 
2553                     /* Calculate the pointer to buff to store the (x,y)th sao
2554                     * for the top merge of (x,y+1)th ctb
2555                     */
2556                     ps_sao_ctxt->ps_top_ctb_sao =
2557                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2558                                                          [ps_sao_ctxt->i4_ctb_x +
2559                                                           (ps_sao_ctxt->i4_ctb_y) *
2560                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2561                                                           (ps_ctxt->i4_bitrate_instance_num *
2562                                                            ps_sao_ctxt->i4_num_ctb_units)];
2563 
2564                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2565                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2566                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2567                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2568                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2569                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2570                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2571 
2572                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2573                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2574                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2575                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2576                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2577                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2578                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2579 
2580                     {
2581                         UWORD32 u4_ctb_sao_bits;
2582 
2583                         ihevce_sao_analyse(
2584                             &ps_ctxt->s_sao_ctxt_t,
2585                             ps_ctb_out_sao,
2586                             &u4_ctb_sao_bits,
2587                             ps_tile_params);
2588                         ps_ctxt
2589                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2590                                                      [ps_ctxt->i4_bitrate_instance_num]
2591                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2592                         ps_ctxt
2593                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2594                                                      [ps_ctxt->i4_bitrate_instance_num]
2595                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2596                     }
2597                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
2598                        0x1) /** Subpel generation not done for non-ref picture **/
2599                     {
2600                         /* Padding and Subpel Plane Generation */
2601                         ihevce_pad_interp_recon_ctb(
2602                             ps_pad_interp_recon,
2603                             ctb_ctr - 1,
2604                             vert_ctr - 1,
2605                             ps_ctxt->i4_quality_preset,
2606                             ps_frm_ctb_prms,
2607                             ps_ctxt->ai2_scratch,
2608                             ps_ctxt->i4_bitrate_instance_num,
2609                             ps_ctxt->ps_func_selector);
2610                     }
2611                 }
2612 
2613                 /* Call the sao function again for the last ctb of the previous row*/
2614                 if(((ctb_ctr + 1) == (ctb_end)) &&
2615                    (vert_ctr >
2616                     ps_tile_params
2617                         ->i4_first_ctb_y))  //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz) && (vert_ctr > 0) )
2618                 {
2619                     /* Register the curr ctb's x pos in sao context*/
2620                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2621 
2622                     /* Register the curr ctb's y pos in sao context*/
2623                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr - 1;
2624 
2625                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2626                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
2627 
2628                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2629 
2630                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2631                         ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2632                                     ps_tile_params->i4_curr_tile_width);
2633 
2634                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2635 
2636                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 0;
2637                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2638 
2639                     /* Calculate the recon buf pointer and stride for teh current ctb */
2640                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2641                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2642                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2643                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2644 
2645                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2646 
2647                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2648                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2649                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2650                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2651                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2652 
2653                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2654                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2655 
2656                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2657                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2658                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2659                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2660 
2661                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2662 
2663                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2664                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2665                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2666                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2667                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2668 
2669                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2670 
2671                     /* Calculate the pointer to buff to store the (x,y)th sao
2672                     * for the top merge of (x,y+1)th ctb
2673                     */
2674                     ps_sao_ctxt->ps_top_ctb_sao =
2675                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2676                                                          [ps_sao_ctxt->i4_ctb_x +
2677                                                           (ps_sao_ctxt->i4_ctb_y) *
2678                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2679                                                           (ps_ctxt->i4_bitrate_instance_num *
2680                                                            ps_sao_ctxt->i4_num_ctb_units)];
2681 
2682                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2683                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2684                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2685                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2686                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2687                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2688                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2689 
2690                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2691                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2692                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2693                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2694                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2695                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2696                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2697 
2698                     {
2699                         UWORD32 u4_ctb_sao_bits;
2700 
2701                         ihevce_sao_analyse(
2702                             &ps_ctxt->s_sao_ctxt_t,
2703                             ps_ctb_out_sao,
2704                             &u4_ctb_sao_bits,
2705                             ps_tile_params);
2706                         ps_ctxt
2707                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2708                                                      [ps_ctxt->i4_bitrate_instance_num]
2709                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2710                         ps_ctxt
2711                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2712                                                      [ps_ctxt->i4_bitrate_instance_num]
2713                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2714                     }
2715                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
2716                        0x1) /** Subpel generation not done for non-ref picture **/
2717                     {
2718                         /* Padding and Subpel Plane Generation */
2719                         ihevce_pad_interp_recon_ctb(
2720                             ps_pad_interp_recon,
2721                             ctb_ctr,
2722                             vert_ctr - 1,
2723                             ps_ctxt->i4_quality_preset,
2724                             ps_frm_ctb_prms,
2725                             ps_ctxt->ai2_scratch,
2726                             ps_ctxt->i4_bitrate_instance_num,
2727                             ps_ctxt->ps_func_selector);
2728                     }
2729                 }
2730             }
2731             else  //SAO Disabled
2732             {
2733                 if(1 == ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2734                 {
2735                     /* Padding and Subpel Plane Generation */
2736                     ihevce_pad_interp_recon_ctb(
2737                         ps_pad_interp_recon,
2738                         ctb_ctr,
2739                         vert_ctr,
2740                         ps_ctxt->i4_quality_preset,
2741                         ps_frm_ctb_prms,
2742                         ps_ctxt->ai2_scratch,
2743                         ps_ctxt->i4_bitrate_instance_num,
2744                         ps_ctxt->ps_func_selector);
2745                 }
2746             }
2747 
2748             /* update the number of ctbs deblocked for this row */
2749             ihevce_dmgr_set_row_row_sync(
2750                 pv_dep_mngr_enc_loop_dblk,
2751                 (ctb_ctr + 1),
2752                 vert_ctr,
2753                 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2754         }  //end of loop over CTBs in current CTB-row
2755         {
2756             if(!ps_ctxt->i4_bitrate_instance_num)
2757             {
2758                 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2759                    ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2760                 {
2761                     /* If SAO is on, then signal completion of previous CTB row */
2762                     if(0 != vert_ctr)
2763                     {
2764                         {
2765                             WORD32 post_ctb_ctr;
2766 
2767                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2768                             {
2769                                 ihevce_dmgr_map_set_sync(
2770                                     pv_dep_mngr_me_dep_encloop,
2771                                     post_ctb_ctr,
2772                                     (vert_ctr - 1),
2773                                     MAP_CTB_COMPLETE);
2774                             }
2775                         }
2776                     }
2777                 }
2778                 else
2779                 {
2780                     {
2781                         WORD32 post_ctb_ctr;
2782 
2783                         for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2784                         {
2785                             ihevce_dmgr_map_set_sync(
2786                                 pv_dep_mngr_me_dep_encloop,
2787                                 post_ctb_ctr,
2788                                 vert_ctr,
2789                                 MAP_CTB_COMPLETE);
2790                         }
2791                     }
2792                 }
2793             }
2794         }
2795 
2796         /* Call the sao function again for the last ctb row of frame */
2797         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2798            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2799         {
2800             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2801 
2802             for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2803             {
2804                 if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
2805                                  ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
2806                    (ctb_ctr >
2807                     ctb_start))  //((vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) && (ctb_ctr > 0))
2808                 {
2809                     /* Register the curr ctb's x pos in sao context*/
2810                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr - 1;
2811 
2812                     /* Register the curr ctb's y pos in sao context*/
2813                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2814 
2815                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2816                                      (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr - 1);
2817 
2818                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2819 
2820                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2821                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2822 
2823                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2824                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2825                                     ps_tile_params->i4_curr_tile_height);
2826 
2827                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2828 
2829                     /* Calculate the recon buf pointer and stride for teh current ctb */
2830                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2831                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2832                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2833                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2834 
2835                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2836 
2837                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2838                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2839                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2840                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2841                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2842 
2843                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2844                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2845 
2846                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2847                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2848                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2849                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2850 
2851                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2852 
2853                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2854                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2855                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2856                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2857                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2858 
2859                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2860 
2861                     /* Calculate the pointer to buff to store the (x,y)th sao
2862                     * for the top merge of (x,y+1)th ctb
2863                     */
2864                     ps_sao_ctxt->ps_top_ctb_sao =
2865                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2866                                                          [ps_sao_ctxt->i4_ctb_x +
2867                                                           (ps_sao_ctxt->i4_ctb_y) *
2868                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2869                                                           (ps_ctxt->i4_bitrate_instance_num *
2870                                                            ps_sao_ctxt->i4_num_ctb_units)];
2871 
2872                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2873                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2874                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2875                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2876                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2877                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2878                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2879 
2880                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2881                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2882                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2883                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2884                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2885                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2886                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2887 
2888                     {
2889                         UWORD32 u4_ctb_sao_bits;
2890                         ihevce_sao_analyse(
2891                             &ps_ctxt->s_sao_ctxt_t,
2892                             ps_ctb_out_sao,
2893                             &u4_ctb_sao_bits,
2894                             ps_tile_params);
2895                         ps_ctxt
2896                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2897                                                      [ps_ctxt->i4_bitrate_instance_num]
2898                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2899                         ps_ctxt
2900                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2901                                                      [ps_ctxt->i4_bitrate_instance_num]
2902                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2903                     }
2904                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
2905                        0x1) /** Subpel generation not done for non-ref picture **/
2906                     {
2907                         /* Padding and Subpel Plane Generation */
2908                         ihevce_pad_interp_recon_ctb(
2909                             ps_pad_interp_recon,
2910                             ctb_ctr - 1,
2911                             vert_ctr,
2912                             ps_ctxt->i4_quality_preset,
2913                             ps_frm_ctb_prms,
2914                             ps_ctxt->ai2_scratch,
2915                             ps_ctxt->i4_bitrate_instance_num,
2916                             ps_ctxt->ps_func_selector);
2917                     }
2918                 }
2919                 /* Call the sao function again for the last ctb of the last ctb row of frame */
2920                 if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
2921                                  ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
2922                    ((ctb_ctr + 1) ==
2923                     (ctb_end)))  //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz))
2924                 {
2925                     /* Register the curr ctb's x pos in sao context*/
2926                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2927 
2928                     /* Register the curr ctb's y pos in sao context*/
2929                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2930 
2931                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2932                                      (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
2933 
2934                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2935 
2936                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2937                         ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2938                                     ps_tile_params->i4_curr_tile_width);
2939 
2940                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2941                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2942                                     ps_tile_params->i4_curr_tile_height);
2943 
2944                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2945                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2946 
2947                     /* Calculate the recon buf pointer and stride for teh current ctb */
2948                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2949                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2950                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2951                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2952 
2953                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2954 
2955                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2956                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2957                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2958                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2959                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2960 
2961                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2962                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2963 
2964                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2965                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2966                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2967                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2968 
2969                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2970 
2971                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2972                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2973                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2974                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2975                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2976 
2977                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2978 
2979                     /* Calculate the pointer to buff to store the (x,y)th sao
2980                     * for the top merge of (x,y+1)th ctb
2981                     */
2982                     ps_sao_ctxt->ps_top_ctb_sao =
2983                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2984                                                          [ps_sao_ctxt->i4_ctb_x +
2985                                                           ps_sao_ctxt->i4_ctb_y *
2986                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2987                                                           (ps_ctxt->i4_bitrate_instance_num *
2988                                                            ps_sao_ctxt->i4_num_ctb_units)];
2989 
2990                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2991                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2992                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2993                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2994                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2995                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2996                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2997 
2998                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2999                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
3000                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
3001                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
3002                         ps_sao_ctxt->i4_ctb_x * ctb_size +
3003                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
3004                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
3005 
3006                     {
3007                         UWORD32 u4_ctb_sao_bits;
3008 
3009                         ihevce_sao_analyse(
3010                             &ps_ctxt->s_sao_ctxt_t,
3011                             ps_ctb_out_sao,
3012                             &u4_ctb_sao_bits,
3013                             ps_tile_params);
3014                         ps_ctxt
3015                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
3016                                                      [ps_ctxt->i4_bitrate_instance_num]
3017                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
3018                         ps_ctxt
3019                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
3020                                                      [ps_ctxt->i4_bitrate_instance_num]
3021                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
3022                     }
3023                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
3024                        0x1) /** Subpel generation not done for non-ref picture **/
3025                     {
3026                         /* Padding and Subpel Plane Generation */
3027                         ihevce_pad_interp_recon_ctb(
3028                             ps_pad_interp_recon,
3029                             ctb_ctr,
3030                             vert_ctr,
3031                             ps_ctxt->i4_quality_preset,
3032                             ps_frm_ctb_prms,
3033                             ps_ctxt->ai2_scratch,
3034                             ps_ctxt->i4_bitrate_instance_num,
3035                             ps_ctxt->ps_func_selector);
3036                     }
3037                 }
3038             }  //end of loop over CTBs in current CTB-row
3039 
3040             /* If SAO is on, then signal completion of the last CTB row of frame */
3041             {
3042                 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
3043                 {
3044                     if(!ps_ctxt->i4_bitrate_instance_num)
3045                     {
3046                         {
3047                             WORD32 post_ctb_ctr;
3048 
3049                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
3050                             {
3051                                 ihevce_dmgr_map_set_sync(
3052                                     pv_dep_mngr_me_dep_encloop,
3053                                     post_ctb_ctr,
3054                                     vert_ctr,
3055                                     MAP_CTB_COMPLETE);
3056                             }
3057                         }
3058                     }
3059                 }
3060             }
3061         }
3062     }
3063 
3064     return;
3065 }
3066 
3067 /*!
3068 ******************************************************************************
3069 * \if Function name : ihevce_enc_loop_pass \endif
3070 *
3071 * \brief
3072 *    Frame level enc_loop pass function
3073 *
3074 * \param[in] pv_ctxt : pointer to enc_loop module
3075 * \param[in] ps_frm_lamda : Frame level Lambda params
3076 * \param[in] ps_inp  : pointer to input yuv buffer (frame buffer)
3077 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
3078 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
3079 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
3080 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
3081 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
3082 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
3083 *
3084 * \return
3085 *    None
3086 *
3087 * Note : Currently the frame level calcualtions done assumes that
3088 *        framewidth of the input /recon are excat multiple of ctbsize
3089 *
3090 * \author
3091 *  Ittiam
3092 *
3093 *****************************************************************************
3094 */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)3095 void ihevce_enc_loop_process(
3096     void *pv_ctxt,
3097     ihevce_lap_enc_buf_t *ps_curr_inp,
3098     ctb_analyse_t *ps_ctb_in,
3099     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
3100     recon_pic_buf_t *ps_frm_recon,
3101     cur_ctb_cu_tree_t *ps_cu_tree_out,
3102     ctb_enc_loop_out_t *ps_ctb_out,
3103     cu_enc_loop_out_t *ps_cu_out,
3104     tu_enc_loop_out_t *ps_tu_out,
3105     pu_t *ps_pu_out,
3106     UWORD8 *pu1_frm_ecd_data,
3107     frm_ctb_ctxt_t *ps_frm_ctb_prms,
3108     frm_lambda_ctxt_t *ps_frm_lamda,
3109     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
3110     WORD32 thrd_id,
3111     WORD32 i4_enc_frm_id,
3112     WORD32 i4_pass)
3113 {
3114     WORD32 vert_ctr;
3115     WORD32 tile_col_idx;
3116     iv_enc_yuv_buf_t s_curr_src_bufs;
3117     iv_enc_yuv_buf_t s_curr_recon_bufs;
3118     iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
3119     UWORD32 *pu4_pu_offsets;
3120     WORD32 end_of_frame;
3121     UWORD8 *apu1_y_sub_pel_planes[3];
3122     pad_interp_recon_frm_t s_pad_interp_recon;
3123     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
3124 
3125     ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3126 
3127     WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3128 
3129     /* initialize the closed loop lambda for the current frame */
3130     ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3131     ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3132     ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3133     ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3134     ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3135     ps_ctxt->thrd_id = thrd_id;
3136     ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3137 
3138 #if DISABLE_SAO_WHEN_NOISY
3139     ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3140     ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3141 #endif
3142 
3143 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3144     ps_ctxt->pv_err_func_selector = ps_func_selector;
3145 #endif
3146 
3147     /*Bit0 -  of this Flag indicates whether current pictute needs to be deblocked,
3148     padded and hpel planes need to be generated.
3149     Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/
3150     ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3151         (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) ||
3152         ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3153           ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
3154          << 1);
3155 
3156     /* Share all reference pictures with nbr clients. This flag will be used only
3157     in case of dist-enc mode */
3158     ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3159     ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3160 
3161     /* Register the frame level ssd lamda for both luma and chroma*/
3162     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3163     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3164 
3165     ihevce_populate_cl_cu_lambda_prms(
3166         ps_ctxt,
3167         ps_frm_lamda,
3168         (WORD32)ps_ctxt->i1_slice_type,
3169         ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3170         ENC_LOOP_LAMBDA_TYPE);
3171 
3172     ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3173                                      (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3174                                      (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3175 
3176     end_of_frame = 0;
3177 
3178     /* ----------------------------------------------------- */
3179     /* store the stride and dimensions of source and recon   */
3180     /* buffer pointers will be over written at every CTB row */
3181     /* ----------------------------------------------------- */
3182     memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3183 
3184     memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3185 
3186     memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3187 
3188     /* get the frame level pu offset pointer*/
3189     pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3190 
3191     s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3192 
3193     /* ------------ Loop over all the CTB rows --------------- */
3194     while(0 == end_of_frame)
3195     {
3196         UWORD8 *pu1_tmp;
3197         UWORD8 *pu1_row_pu_map;
3198         UWORD8 *pu1_row_ecd_data;
3199         ctb_analyse_t *ps_ctb_row_in;
3200         ctb_enc_loop_out_t *ps_ctb_row_out;
3201         cu_enc_loop_out_t *ps_row_cu;
3202         tu_enc_loop_out_t *ps_row_tu;
3203         pu_t *ps_row_pu;
3204         pu_col_mv_t *ps_row_col_pu;
3205         job_queue_t *ps_job;
3206         UWORD32 *pu4_pu_row_offsets;
3207         UWORD16 *pu2_num_pu_row;
3208 
3209         ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3210         cur_ctb_cu_tree_t *ps_row_cu_tree;
3211         UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3212 
3213         /* Get the current row from the job queue */
3214         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3215             ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3216 
3217         /* Register the pointer to ctb out of the current frame*/
3218         ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3219 
3220         /* If all rows are done, set the end of process flag to 1, */
3221         /* and the current row to -1 */
3222         if(NULL == ps_job)
3223         {
3224             vert_ctr = -1;
3225             tile_col_idx = -1;
3226             end_of_frame = 1;
3227         }
3228         else
3229         {
3230             ihevce_tile_params_t *ps_col_tile_params_temp;
3231             ihevce_tile_params_t *ps_tile_params;
3232             WORD32 i4_tile_id;
3233 
3234             ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3235             /* set the output dependency */
3236             ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3237 
3238             /* Obtain the current row's details from the job */
3239             vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3240             {
3241                 /* Obtain the current colum tile index from the job */
3242                 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3243 
3244                 /* The tile parameter for the col. idx. Use only the properties
3245                 which is same for all the bottom tiles like width, start_x, etc.
3246                 Don't use height, start_y, etc.                                  */
3247                 ps_col_tile_params_temp =
3248                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3249 
3250                 /* Derive actual tile_id based on vert_ctr */
3251                 i4_tile_id =
3252                     *(ps_frm_ctb_prms->pi4_tile_id_map +
3253                       vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3254                       ps_col_tile_params_temp->i4_first_ctb_x);
3255                 /* Derive pointer to current tile prms */
3256                 ps_tile_params =
3257                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3258             }
3259 
3260             ps_ctxt->i4_tile_col_idx = tile_col_idx;
3261             /* derive the current ctb row pointers */
3262 
3263             /* luma src */
3264             pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3265                       (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3266                        ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3267                       ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3268 
3269             pu1_tmp +=
3270                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3271                  ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3272 
3273             s_curr_src_bufs.pv_y_buf = pu1_tmp;
3274 
3275             if(!ps_ctxt->u1_is_input_data_hbd)
3276             {
3277                 /* cb src */
3278                 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3279                 pu1_tmp +=
3280                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3281                      ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3282 
3283                 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3284             }
3285 
3286             /* luma recon */
3287             pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3288             pu1_tmp +=
3289                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3290 
3291             s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3292             s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3293             s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3294             if(!ps_ctxt->u1_is_input_data_hbd)
3295             {
3296                 /* cb recon */
3297                 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3298                 pu1_tmp +=
3299                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3300                      ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3301 
3302                 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3303                 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3304                 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3305 
3306                 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3307 
3308                 /* Register the source buffer pointers in sao context*/
3309                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3310                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3311                     (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3312                      ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3313                     ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3314 
3315                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3316                     ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3317 
3318                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3319                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3320 
3321                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3322                     ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3323             }
3324 
3325             /* Subpel planes hxfy, fxhy, hxhy*/
3326             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3327             pu1_tmp +=
3328                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3329             apu1_y_sub_pel_planes[0] = pu1_tmp;
3330             s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3331 
3332             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3333             pu1_tmp +=
3334                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3335             apu1_y_sub_pel_planes[1] = pu1_tmp;
3336             s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3337 
3338             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3339             pu1_tmp +=
3340                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3341             apu1_y_sub_pel_planes[2] = pu1_tmp;
3342             s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3343 
3344             /* row level coeffs buffer */
3345             pu1_row_ecd_data =
3346                 pu1_frm_ecd_data +
3347                 (vert_ctr *
3348                  ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3349                                     : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3350                  MAX_SCAN_COEFFS_BYTES_4x4);
3351 
3352             /* Row level CU buffer */
3353             ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3354 
3355             /* Row level TU buffer */
3356             ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3357 
3358             /* Row level PU buffer */
3359             ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3360 
3361             /* Row level colocated PU buffer */
3362             /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3363             ps_row_col_pu =
3364                 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3365                                                ps_frm_ctb_prms->i4_num_pus_in_ctb);
3366             /* Row level col PU map buffer */
3367             /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3368             pu1_row_pu_map =
3369                 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3370                                                 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3371             /* row ctb in pointer  */
3372             ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3373 
3374             /* row ctb out pointer  */
3375             ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3376 
3377             /* row number of PUs map pointer */
3378             pu2_num_pu_row =
3379                 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3380 
3381             /* row pu offsets pointer  */
3382             pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3383             /* store the first CTB pu offset pointer */
3384             *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3385             /* Initialize ptr to current IPE row */
3386             ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3387 
3388             /* Initialize ptr to current row */
3389             ps_row_cu_tree = ps_cu_tree_out +
3390                              (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3391 
3392             /* Get the EncLoop Top-Right CU Dep Mngr */
3393             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3394                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3395                                                                    [i4_bitrate_instance_num];
3396             /* Get the EncLoop Deblock Dep Mngr */
3397             ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3398                 ps_master_ctxt
3399                     ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3400 
3401             ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3402 
3403             {
3404                 /* derive the pointers of top row buffers */
3405                 ps_ctxt->pv_top_row_luma =
3406                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3407                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3408                     (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3409 
3410                 ps_ctxt->pv_top_row_chroma =
3411                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3412                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3413                     (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3414 
3415                 /* derive the pointers of bottom row buffers to update current row data */
3416                 ps_ctxt->pv_bot_row_luma =
3417                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3418                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3419                     (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3420 
3421                 ps_ctxt->pv_bot_row_chroma =
3422                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3423                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3424                     (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3425 
3426                 /* Register the buffer pointers in sao context*/
3427                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3428                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3429                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3430                     ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3431 
3432                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3433                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3434                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3435                     ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3436 
3437                 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3438 
3439                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3440                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3441 
3442                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3443                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3444             }
3445 
3446             ps_ctxt->ps_top_row_nbr =
3447                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3448                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3449                 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3450 
3451             ps_ctxt->ps_bot_row_nbr =
3452                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3453                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3454                 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3455 
3456             if(vert_ctr > 0)
3457             {
3458                 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3459             }
3460             else
3461             {
3462                 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3463             }
3464 
3465             ASSERT(
3466                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3467                     .ps_pps->i1_sign_data_hiding_flag ==
3468                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3469                     .ps_pps->i1_sign_data_hiding_flag);
3470 
3471             /* call the row level processing function */
3472             ihevce_enc_loop_process_row(
3473                 ps_ctxt,
3474                 &s_curr_src_bufs,
3475                 &s_curr_recon_bufs,
3476                 &s_curr_recon_bufs_src,
3477                 &apu1_y_sub_pel_planes[0],
3478                 ps_ctb_row_in,
3479                 ps_ctb_row_out,
3480                 ps_row_ipe_analyse,
3481                 ps_row_cu_tree,
3482                 ps_row_cu,
3483                 ps_row_tu,
3484                 ps_row_pu,
3485                 ps_row_col_pu,
3486                 pu2_num_pu_row,
3487                 pu1_row_pu_map,
3488                 pu1_row_ecd_data,
3489                 pu4_pu_row_offsets,
3490                 ps_frm_ctb_prms,
3491                 vert_ctr,
3492                 ps_frm_recon,
3493                 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3494                 &s_pad_interp_recon,
3495                 i4_pass,
3496                 ps_multi_thrd_ctxt,
3497                 ps_tile_params);
3498         }
3499     }
3500 }
3501 
3502 /*!
3503 ******************************************************************************
3504 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3505 *
3506 * \brief Returns to the caller key attributes relevant for dependency manager,
3507 *        ie, the number of vertical units in l0 layer
3508 *
3509 * \par Description:
3510 *
3511 * \param[in] pai4_ht    : ht
3512 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3513 *                                         for deblocking
3514 *
3515 * \return
3516 *    None
3517 *
3518 * \author
3519 *  Ittiam
3520 *
3521 *****************************************************************************
3522 */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3523 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3524 {
3525     /* Blk ht at a given layer*/
3526     WORD32 unit_ht_c;
3527     WORD32 ctb_size = 64;
3528 
3529     /* compute blk ht and unit ht */
3530     unit_ht_c = ctb_size;
3531 
3532     /* set the numebr of vertical units */
3533     *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3534 }
3535 
3536 /*!
3537 ******************************************************************************
3538 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3539 *
3540 * \brief
3541 *    Number of memory records are returned for enc_loop module
3542 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3543 *
3544 * \return
3545 *    None
3546 *
3547 * \author
3548 *  Ittiam
3549 *
3550 *****************************************************************************
3551 */
3552 WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3553     ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3554 {
3555     WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3556     WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3557         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3558     WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3559         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3560     WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3561         i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3562 
3563     return (
3564         (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs +
3565          enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3566 }
3567 /*!
3568 ******************************************************************************
3569 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3570 *
3571 * \brief
3572 *    Memory requirements are returned for ENC_LOOP.
3573 *
3574 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
3575 * \param[in] ps_init_prms : Create time static parameters
3576 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3577 * \param[in] i4_mem_space : memspace in whihc memory request should be done
3578 *
3579 * \return
3580 *    None
3581 *
3582 * \author
3583 *  Ittiam
3584 *
3585 *****************************************************************************
3586 */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3587 WORD32 ihevce_enc_loop_get_mem_recs(
3588     iv_mem_rec_t *ps_mem_tab,
3589     ihevce_static_cfg_params_t *ps_init_prms,
3590     WORD32 i4_num_proc_thrds,
3591     WORD32 i4_num_bitrate_inst,
3592     WORD32 i4_num_enc_loop_frm_pllel,
3593     WORD32 i4_mem_space,
3594     WORD32 i4_resolution_id)
3595 {
3596     UWORD32 u4_width, u4_height, n_tabs;
3597     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3598     WORD32 ctr;
3599     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3600 
3601     /* derive frame dimensions */
3602     /*width of the input YUV to be encoded */
3603     u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3604     /*making the width a multiple of CTB size*/
3605     u4_width += SET_CTB_ALIGN(
3606         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3607 
3608     /*height of the input YUV to be encoded */
3609     u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3610     /*making the height a multiple of CTB size*/
3611     u4_height += SET_CTB_ALIGN(
3612         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3613     u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3614     u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3615     /* memories should be requested assuming worst case requirememnts */
3616 
3617     /* Module context structure */
3618     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3619 
3620     ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3621 
3622     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3623 
3624     /* Thread context structure */
3625     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3626         i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3627 
3628     ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3629 
3630     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3631 
3632     /* Scale matrices */
3633     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3634 
3635     ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3636 
3637     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3638 
3639     /* Rescale matrices */
3640     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3641 
3642     ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3643 
3644     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3645 
3646     /* top row luma one row of pixel data per CTB row */
3647     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3648     {
3649         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3650                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3651                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3652     }
3653     else
3654     {
3655         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3656                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3657                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3658     }
3659 
3660     ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3661 
3662     ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3663 
3664     /* top row chroma */
3665     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3666     {
3667         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3668             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3669             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3670     }
3671     else
3672     {
3673         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3674             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3675             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3676     }
3677 
3678     ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3679 
3680     ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3681 
3682     /* top row neighbour 4x4 */
3683     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3684         (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3685         i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3686 
3687     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3688 
3689     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3690 
3691     /* memory to dump rate control parameters by each thread for each bit-rate instance */
3692     /* RC params collated by each thread for each bit-rate instance separately */
3693     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3694                                                  i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3695 
3696     ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3697 
3698     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3699     /* Memory required for deblocking */
3700     {
3701         /* Memory to store Qp of top4x4 blocks for each CTB row.
3702         This memory is allocated at frame level and shared across
3703         all cores. The Qp values are needed to form Qp-map(described
3704         in the ENC_LOOP_DEBLOCKING section below)*/
3705 
3706         UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3707         UWORD32 u4_size_top_4x4_qp_memory;
3708 
3709         /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3710         /*Space required per CTB*/
3711         u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3712         /*Space required for entire CTB row*/
3713         u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3714         /*Space required for entire frame*/
3715         u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3716         /*Space required for multiple bitrate*/
3717         u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3718         /*Space required for multiple frames in parallel*/
3719         u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3720 
3721         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3722         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3723         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3724 
3725         /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3726         ## Boundary Strength(Vertical):
3727         BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3728         of the row followed by 8 entries of second CTB and so on.
3729         8 entries: Includes left edge of current CTB and excludes right edge.
3730         ## Boundary Strength(Horizontal):
3731         Same as Vertical.
3732         8 entries:  Includes top edge of current CTB and excludes bottom edge.
3733 
3734         ## Qp-map storage:
3735         T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3736         00 01 02 03 04 05 ..........to the end of the CTB row
3737         10 11 12 13 14 15 ..........to the end of the CTB row
3738         20 21 22 23 24 25 ..........to the end of the CTB row
3739         30 31 32 33 34 35 ..........to the end of the CTB row
3740         40 41 42 43 44 45 ..........to the end of the CTB row
3741         ............................to the end of the CTB row
3742         upto height_of_CTB..........to the end of the CTB row
3743 
3744         Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3745         A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3746         where,
3747         => height_of_CTB = number of 4x4 blocks in a CTB  vertically,
3748         => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3749         in order to deblock top edge of current CTB.
3750         => width_of_CTB  = number of 4x4 blocks in a CTB  horizontally,
3751         */
3752 
3753         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3754         /*1 vertical edge per 8 pixel*/
3755         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3756         /*Vertical edges for entire width of CTB row*/
3757         u4_size_bs_memory *= u4_ctb_in_a_row;
3758         /*Each vertical edge of CTB row is 4 bytes*/
3759         u4_size_bs_memory = u4_size_bs_memory << 2;
3760         /*Adding Memory required for storing horizontal BS by doubling*/
3761         u4_size_bs_memory = u4_size_bs_memory << 1;
3762 
3763         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3764         /*Number of 4x4 blocks in the width of a CTB*/
3765         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3766         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3767         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3768         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3769         /*Storage for entire CTB row*/
3770         u4_size_qp_memory *= u4_ctb_in_a_row;
3771 
3772         /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3773         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3774             i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3775 
3776         ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3777 
3778         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3779     }
3780 
3781     /* Memory required to store pred for 422 chroma */
3782     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3783         i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3784         (i4_chroma_format == IV_YUV_422SP_UV) *
3785         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3786 
3787     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3788 
3789     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3790 
3791     /* Memory for inter pred buffers */
3792     {
3793         WORD32 i4_num_bufs_per_thread = 0;
3794 
3795         WORD32 i4_buf_size_per_cand =
3796             (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3797             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3798         WORD32 i4_quality_preset =
3799             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3800         switch(i4_quality_preset)
3801         {
3802         case IHEVCE_QUALITY_P0:
3803         {
3804             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3805             break;
3806         }
3807         case IHEVCE_QUALITY_P2:
3808         {
3809             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3810             break;
3811         }
3812         case IHEVCE_QUALITY_P3:
3813         {
3814             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3815             break;
3816         }
3817         case IHEVCE_QUALITY_P4:
3818         {
3819             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3820             break;
3821         }
3822         case IHEVCE_QUALITY_P5:
3823         case IHEVCE_QUALITY_P6:
3824         case IHEVCE_QUALITY_P7:
3825         {
3826             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3827             break;
3828         }
3829         default:
3830         {
3831             ASSERT(0);
3832         }
3833         }
3834 
3835         i4_num_bufs_per_thread += 4;
3836 
3837         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3838             i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3839 
3840         ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3841 
3842         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3843     }
3844 
3845     /* Memory required to store chroma intra pred */
3846     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3847         i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3848         ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3849         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3850 
3851     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3852 
3853     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3854 
3855     /* Memory required to store pred for reference substitution output */
3856     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3857         i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
3858         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3859 
3860     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3861 
3862     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3863 
3864     /* Memory required to store pred for reference filtering output */
3865     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3866         i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
3867         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3868 
3869     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3870 
3871     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3872 
3873 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3874     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3875 #endif
3876     {
3877         /* Memory assignments for recon storage during CU Recursion */
3878         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3879             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3880             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3881 
3882         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3883 
3884         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3885 
3886         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3887             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3888             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3889             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3890 
3891         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3892 
3893         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3894     }
3895 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3896     else
3897     {
3898         /* Memory assignments for recon storage during CU Recursion */
3899         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3900 
3901         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3902 
3903         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3904 
3905         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3906 
3907         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3908 
3909         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3910     }
3911 #endif
3912 
3913 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3914     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3915 #endif
3916     {
3917         /* Memory assignments for pred storage during CU Recursion */
3918         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3919             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3920             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3921 
3922         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3923 
3924         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3925 
3926         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3927             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3928             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3929             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3930 
3931         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3932 
3933         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3934     }
3935 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3936     else
3937     {
3938         /* Memory assignments for pred storage during CU Recursion */
3939         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3940 
3941         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3942 
3943         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3944 
3945         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3946 
3947         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3948 
3949         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3950     }
3951 #endif
3952 
3953     /* Memory assignments for CTB left luma data storage */
3954     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3955         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3956         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3957 
3958     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3959 
3960     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3961 
3962     /* Memory assignments for CTB left chroma data storage */
3963     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3964         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3965         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3966     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3967         ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3968 
3969     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3970 
3971     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3972 
3973     /* Memory required for SAO */
3974     {
3975         WORD32 num_vert_units;
3976         WORD32 num_horz_units;
3977         WORD32 ctb_aligned_ht, ctb_aligned_wd;
3978         WORD32 luma_buf, chroma_buf;
3979 
3980         num_vert_units = u4_height / MAX_CTB_SIZE;
3981         num_horz_units = u4_width / MAX_CTB_SIZE;
3982 
3983         ctb_aligned_ht = u4_height;
3984         ctb_aligned_wd = u4_width;
3985 
3986         /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3987         * and 1 extra location is required for top left buf ptr for row 0
3988         * Also 1 extra byte is required for every row for top left pixel if
3989         * the top left ptr is to be passed to leaf level unconditionally
3990         */
3991         luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3992                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3993         chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3994                      ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3995 
3996         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3997             (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3998 
3999         /* Add the memory required to store the sao information of top ctb for top merge
4000         * This is frame level buffer.
4001         */
4002         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
4003             ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
4004             (i4_num_enc_loop_frm_pllel);
4005 
4006         ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4007 
4008         ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
4009     }
4010 
4011     /* Memory for CU level Coeff data buffer */
4012     {
4013         /* 16 additional bytes are required to ensure alignment */
4014         {
4015             ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
4016                 i4_num_proc_thrds *
4017                 (((MAX_LUMA_COEFFS_CTB +
4018                    (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4019                   16) *
4020                  (2) * sizeof(UWORD8));
4021         }
4022 
4023         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4024 
4025         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
4026 
4027         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
4028             i4_num_proc_thrds *
4029             (MAX_LUMA_COEFFS_CTB +
4030              (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
4031             sizeof(UWORD8);
4032 
4033         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4034 
4035         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
4036     }
4037 
4038     /* Memory for CU dequant data buffer */
4039     {
4040         /* 16 additional bytes are required to ensure alignment */
4041         {
4042             ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
4043                 i4_num_proc_thrds *
4044                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4045                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4046                  8) *
4047                 (2) * sizeof(WORD16);
4048         }
4049 
4050         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4051 
4052         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
4053     }
4054 
4055     /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4056     {
4057         WORD32 i4_memSize_perThread;
4058 
4059         WORD32 i4_chroma_memSize_perThread = 0;
4060         /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
4061         /* used in RDOPT to store cur and best modes' data */
4062         WORD32 i4_luma_memSize_perThread =
4063             4 * MAX_CU_SIZE * MAX_CU_SIZE *
4064             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4065 
4066         /* 'Glossary' for comments in the following codeBlock */
4067         /* 1 - 2 Bufs for storing recons of the best modes determined in the */
4068         /* function 'ihevce_intra_chroma_pred_mode_selector' */
4069         /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
4070         /* used in RDOPT to store cur and best modes' data */
4071         if(i4_chroma_format == IV_YUV_422SP_UV)
4072         {
4073             WORD32 i4_quality_preset =
4074                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4075             switch(i4_quality_preset)
4076             {
4077             case IHEVCE_QUALITY_P0:
4078             {
4079                 /* 1 */
4080                 i4_chroma_memSize_perThread +=
4081                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4082                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4083 
4084                 /* 2 */
4085                 i4_chroma_memSize_perThread +=
4086                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4087                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4088 
4089                 break;
4090             }
4091             case IHEVCE_QUALITY_P2:
4092             {
4093                 /* 1 */
4094                 i4_chroma_memSize_perThread +=
4095                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4096                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4097 
4098                 /* 2 */
4099                 i4_chroma_memSize_perThread +=
4100                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4101                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4102 
4103                 break;
4104             }
4105             case IHEVCE_QUALITY_P3:
4106             {
4107                 /* 1 */
4108                 i4_chroma_memSize_perThread +=
4109                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4110                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4111 
4112                 /* 2 */
4113                 i4_chroma_memSize_perThread +=
4114                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4115                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4116 
4117                 break;
4118             }
4119             case IHEVCE_QUALITY_P4:
4120             {
4121                 /* 1 */
4122                 i4_chroma_memSize_perThread +=
4123                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4124                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4125 
4126                 /* 2 */
4127                 i4_chroma_memSize_perThread +=
4128                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4129                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4130 
4131                 break;
4132             }
4133             case IHEVCE_QUALITY_P5:
4134             {
4135                 /* 1 */
4136                 i4_chroma_memSize_perThread +=
4137                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4138                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4139 
4140                 /* 2 */
4141                 i4_chroma_memSize_perThread +=
4142                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4143                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4144 
4145                 break;
4146             }
4147             case IHEVCE_QUALITY_P6:
4148             case IHEVCE_QUALITY_P7:
4149             {
4150                 /* 1 */
4151                 i4_chroma_memSize_perThread +=
4152                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4153                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4154 
4155                 /* 2 */
4156                 i4_chroma_memSize_perThread +=
4157                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4158                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4159 
4160                 break;
4161             }
4162             }
4163         }
4164         else
4165         {
4166             WORD32 i4_quality_preset =
4167                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4168             switch(i4_quality_preset)
4169             {
4170             case IHEVCE_QUALITY_P0:
4171             {
4172                 /* 1 */
4173                 i4_chroma_memSize_perThread +=
4174                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4175                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4176 
4177                 /* 2 */
4178                 i4_chroma_memSize_perThread +=
4179                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4180                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4181                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4182 
4183                 break;
4184             }
4185             case IHEVCE_QUALITY_P2:
4186             {
4187                 /* 1 */
4188                 i4_chroma_memSize_perThread +=
4189                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4190                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4191 
4192                 /* 2 */
4193                 i4_chroma_memSize_perThread +=
4194                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4195                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4196                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4197 
4198                 break;
4199             }
4200             case IHEVCE_QUALITY_P3:
4201             {
4202                 /* 1 */
4203                 i4_chroma_memSize_perThread +=
4204                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4205                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4206 
4207                 /* 2 */
4208                 i4_chroma_memSize_perThread +=
4209                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4210                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4211                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4212 
4213                 break;
4214             }
4215             case IHEVCE_QUALITY_P4:
4216             {
4217                 /* 1 */
4218                 i4_chroma_memSize_perThread +=
4219                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4220                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4221 
4222                 /* 2 */
4223                 i4_chroma_memSize_perThread +=
4224                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4225                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4226                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4227 
4228                 break;
4229             }
4230             case IHEVCE_QUALITY_P5:
4231             {
4232                 /* 1 */
4233                 i4_chroma_memSize_perThread +=
4234                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4235                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4236 
4237                 /* 2 */
4238                 i4_chroma_memSize_perThread +=
4239                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4240                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4241                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4242 
4243                 break;
4244             }
4245             case IHEVCE_QUALITY_P6:
4246             case IHEVCE_QUALITY_P7:
4247             {
4248                 /* 1 */
4249                 i4_chroma_memSize_perThread +=
4250                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4251                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4252 
4253                 /* 2 */
4254                 i4_chroma_memSize_perThread +=
4255                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4256                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4257                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4258 
4259                 break;
4260             }
4261             }
4262         }
4263 
4264         i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4265 
4266         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4267             i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4268 
4269         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4270 
4271         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4272     }
4273 
4274     n_tabs = NUM_ENC_LOOP_MEM_RECS;
4275 
4276     /*************************************************************************/
4277     /* --- EncLoop Deblock sync Dep Mngr Mem requests --                     */
4278     /*************************************************************************/
4279 
4280     /* Fill the memtabs for  EncLoop Deblock Dep Mngr */
4281     {
4282         WORD32 count;
4283         WORD32 num_vert_units;
4284         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4285 
4286         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4287         ASSERT(num_vert_units > 0);
4288         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4289         {
4290             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4291             {
4292                 n_tabs += ihevce_dmgr_get_mem_recs(
4293                     &ps_mem_tab[n_tabs],
4294                     DEP_MNGR_ROW_ROW_SYNC,
4295                     num_vert_units,
4296                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4297                     i4_num_proc_thrds,
4298                     i4_mem_space);
4299             }
4300         }
4301     }
4302 
4303     /*************************************************************************/
4304     /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests --                */
4305     /*************************************************************************/
4306 
4307     /* Fill the memtabs for  Top-Right CU sync Dep Mngr */
4308     {
4309         WORD32 count;
4310         WORD32 num_vert_units;
4311         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4312         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4313         ASSERT(num_vert_units > 0);
4314 
4315         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4316         {
4317             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4318             {
4319                 n_tabs += ihevce_dmgr_get_mem_recs(
4320                     &ps_mem_tab[n_tabs],
4321                     DEP_MNGR_ROW_ROW_SYNC,
4322                     num_vert_units,
4323                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4324                     i4_num_proc_thrds,
4325                     i4_mem_space);
4326             }
4327         }
4328     }
4329 
4330     /*************************************************************************/
4331     /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests --        */
4332     /*************************************************************************/
4333 
4334     /* Fill the memtabs for  EncLoop Aux. on Ref. bitrate Dep Mngr */
4335     {
4336         WORD32 count;
4337         WORD32 num_vert_units;
4338         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4339 
4340         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4341         ASSERT(num_vert_units > 0);
4342 
4343         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4344         {
4345             for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4346             {
4347                 n_tabs += ihevce_dmgr_get_mem_recs(
4348                     &ps_mem_tab[n_tabs],
4349                     DEP_MNGR_ROW_ROW_SYNC,
4350                     num_vert_units,
4351                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4352                     i4_num_proc_thrds,
4353                     i4_mem_space);
4354             }
4355         }
4356     }
4357 
4358     return (n_tabs);
4359 }
4360 
4361 /*!
4362 ******************************************************************************
4363 * \if Function name : ihevce_enc_loop_init \endif
4364 *
4365 * \brief
4366 *    Intialization for ENC_LOOP context state structure .
4367 *
4368 * \param[in] ps_mem_tab : pointer to memory descriptors table
4369 * \param[in] ps_init_prms : Create time static parameters
4370 * \param[in] pv_osal_handle : Osal handle
4371 *
4372 * \return
4373 *    None
4374 *
4375 * \author
4376 *  Ittiam
4377 *
4378 *****************************************************************************
4379 */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4380 void *ihevce_enc_loop_init(
4381     iv_mem_rec_t *ps_mem_tab,
4382     ihevce_static_cfg_params_t *ps_init_prms,
4383     WORD32 i4_num_proc_thrds,
4384     void *pv_osal_handle,
4385     func_selector_t *ps_func_selector,
4386     rc_quant_t *ps_rc_quant_ctxt,
4387     ihevce_tile_params_t *ps_tile_params_base,
4388     WORD32 i4_resolution_id,
4389     WORD32 i4_num_enc_loop_frm_pllel,
4390     UWORD8 u1_is_popcnt_available)
4391 {
4392     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4393     ihevce_enc_loop_ctxt_t *ps_ctxt;
4394     WORD32 ctr, n_tabs;
4395     UWORD32 u4_width, u4_height;
4396     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4397     UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4398     UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4399     WORD32 i;
4400     WORD32 i4_num_bitrate_inst =
4401         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4402     enc_loop_rc_params_t *ps_enc_loop_rc_params;
4403     UWORD8 *pu1_sao_base; /* store the base address of sao*/
4404     UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4405     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4406     WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4407     WORD32 i4_enc_frm_id;
4408     WORD32 num_cu_in_ctb;
4409     WORD32 i4_num_tile_cols = 1;  //Default value is 1
4410 
4411     /* ENC_LOOP state structure */
4412     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4413 
4414     ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4415 
4416     ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4417     ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4418     ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4419     /*Calculation of memory sizes for deblocking*/
4420     {
4421         /*width of the input YUV to be encoded. */
4422         u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4423         /*making the width a multiple of CTB size*/
4424         u4_width += SET_CTB_ALIGN(
4425             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4426 
4427         u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4428 
4429         /*height of the input YUV to be encoded */
4430         u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4431         /*making the height a multiple of CTB size*/
4432         u4_height += SET_CTB_ALIGN(
4433             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4434 
4435         u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4436 
4437         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4438         /*1 vertical edge per 8 pixel*/
4439         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4440         /*Vertical edges for entire width of CTB row*/
4441         u4_size_bs_memory *= u4_ctb_in_a_row;
4442         /*Each vertical edge of CTB row is 4 bytes*/
4443         u4_size_bs_memory = u4_size_bs_memory << 2;
4444         /*Adding Memory required for storing horizontal BS by doubling*/
4445         u4_size_bs_memory = u4_size_bs_memory << 1;
4446 
4447         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4448         /*Number of 4x4 blocks in the width of a CTB*/
4449         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4450         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4451         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4452         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4453         /*Storage for entire CTB row*/
4454         u4_size_qp_memory *= u4_ctb_in_a_row;
4455 
4456         pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4457     }
4458 
4459     /*Derive the base pointer of sao*/
4460     pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4461     ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4462     u4_ctb_aligned_wd = u4_width;
4463     u4_ctb_aligned_ht = u4_height;
4464     num_vert_units = (u4_height) / ctb_size;
4465 
4466     for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4467     {
4468         ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4469         /* Store Tile params base into EncLoop context */
4470         ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4471         ihevce_cmn_utils_instr_set_router(
4472             &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4473         ihevce_sifter_sad_fxn_assigner(
4474             (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4475         ps_ctxt->i4_max_search_range_horizontal =
4476             ps_init_prms->s_config_prms.i4_max_search_range_horz;
4477         ps_ctxt->i4_max_search_range_vertical =
4478             ps_init_prms->s_config_prms.i4_max_search_range_vert;
4479 
4480         ps_ctxt->i4_quality_preset =
4481             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4482 
4483         if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4484         {
4485             ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4486         }
4487 
4488         ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4489 
4490         ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4491 
4492         ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4493 
4494         ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4495 
4496         ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4497 
4498         ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4499 
4500         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4501         {
4502             ps_ctxt->i4_use_ctb_level_lamda = 0;
4503         }
4504         else
4505         {
4506             ps_ctxt->i4_use_ctb_level_lamda = 0;
4507         }
4508 
4509         /** Register the function selector pointer*/
4510         ps_ctxt->ps_func_selector = ps_func_selector;
4511 
4512         ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4513 
4514         /* Initiallization for non-distributed mode */
4515         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4516         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4517         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4518         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4519 
4520         ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4521         ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4522 
4523         ps_ctxt->i4_frm_top_row_luma_size =
4524             ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4525 
4526         ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4527 
4528         ps_ctxt->i4_frm_top_row_chroma_size =
4529             ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4530 
4531         {
4532             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4533             {
4534                 /* +1 is to provision top left pel */
4535                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4536                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4537                     (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4538 
4539                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4540                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4541                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4542                     ps_ctxt->i4_top_row_luma_stride;
4543 
4544                 /* +2 is to provision top left pel */
4545                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4546                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4547                     (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4548 
4549                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4550                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4551                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4552                     ps_ctxt->i4_top_row_chroma_stride;
4553             }
4554         }
4555 
4556         /* +1 is to provision top left nbr */
4557         ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4558         ps_ctxt->i4_frm_top_row_nbr_size =
4559             ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4560         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4561         {
4562             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4563                 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4564                 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4565             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4566         }
4567 
4568         num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4569         num_cu_in_ctb *= num_cu_in_ctb;
4570 
4571         /* pointer incremented by 1 row to avoid OOB access in 0th row */
4572 
4573         /* Memory for CU level Coeff data buffer */
4574         {
4575             WORD32 i4_16byte_boundary_overshoot;
4576             WORD32 buf_size_per_cu;
4577             WORD32 buf_size_per_thread_wo_alignment_req;
4578             WORD32 buf_size_per_thread;
4579 
4580             buf_size_per_cu =
4581                 ((MAX_LUMA_COEFFS_CTB +
4582                   (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4583                  16) *
4584                 sizeof(UWORD8);
4585             buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4586 
4587             {
4588                 buf_size_per_thread = buf_size_per_cu * (2);
4589 
4590                 for(i = 0; i < 2; i++)
4591                 {
4592                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4593                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4594                         (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4595 
4596                     i4_16byte_boundary_overshoot =
4597                         ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4598 
4599                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4600                 }
4601             }
4602 
4603             ps_ctxt->pu1_cu_recur_coeffs =
4604                 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4605                 (ctr * buf_size_per_thread_wo_alignment_req);
4606         }
4607 
4608         /* Memory for CU dequant data buffer */
4609         {
4610             WORD32 buf_size_per_thread;
4611             WORD32 i4_16byte_boundary_overshoot;
4612 
4613             WORD32 buf_size_per_cu =
4614                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4615                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4616                  8) *
4617                 sizeof(WORD16);
4618 
4619             {
4620                 buf_size_per_thread = buf_size_per_cu * 2;
4621 
4622                 for(i = 0; i < 2; i++)
4623                 {
4624                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4625                         (WORD16
4626                              *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4627 
4628                     i4_16byte_boundary_overshoot =
4629                         ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4630 
4631                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4632                         (WORD16
4633                              *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4634                 }
4635             }
4636         }
4637 
4638         /*------ Deblocking memory's pointers assignements starts ------*/
4639 
4640         /*Assign stride = 4x4 blocks in horizontal edge*/
4641         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4642 
4643         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4644             ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4645 
4646         /*Assign frame level memory to store the Qp of
4647         top 4x4 neighbours of each CTB row*/
4648         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4649         {
4650             ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4651                 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4652                 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4653                  i4_enc_frm_id);
4654         }
4655 
4656         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4657 
4658         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4659             (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4660 
4661         ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4662 
4663         /*Assign stride = 4x4 blocks in horizontal edge*/
4664         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4665 
4666         pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4667 
4668         /*------Deblocking memory's pointers assignements ends ------*/
4669 
4670         /*------SAO memory's pointer assignment starts------------*/
4671         if(!is_hbd_mode)
4672         {
4673             /* 2 is added to allocate top left pixel */
4674             ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4675                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4676             ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4677                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4678             ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4679                 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4680 
4681             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4682             {
4683                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4684                     pu1_sao_base +
4685                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4686                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4687                      i4_num_bitrate_inst * i4_enc_frm_id) +  // move to the next frame_id
4688                     u4_ctb_aligned_wd +
4689                     2;
4690 
4691                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4692                     pu1_sao_base +
4693                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4694                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4695                      i4_num_bitrate_inst * i4_enc_frm_id) +
4696                     +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4697                     u4_ctb_aligned_wd + 4;
4698 
4699                 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4700                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4701                     *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4702                     (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4703             }
4704             ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4705                 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4706             ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4707         }
4708 
4709         /*------SAO memory's pointer assignment ends------------*/
4710 
4711         /* perform all one time initialisation here */
4712         ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4713 
4714         ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4715 
4716         ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4717 
4718         /* move the pointer to 1,2 location */
4719         ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4720         ps_ctxt->pu1_ctb_nbr_map++;
4721 
4722         ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4723 
4724         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4725 
4726         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4727 
4728         CREATE_SUBBLOCK2CSBFID_MAP(
4729             gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4730 
4731         CREATE_SUBBLOCK2CSBFID_MAP(
4732             gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4733 
4734         /* For both instance initialise the chroma dequant start idx */
4735         ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4736         ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4737 
4738         /* initialise all the function pointer tables */
4739         {
4740             ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4741                 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4742 
4743             ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4744 
4745 #if ENABLE_RDO_BASED_TU_RECURSION
4746             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4747             {
4748                 ps_ctxt->pv_inter_rdopt_cu_ntu =
4749                     (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4750             }
4751 #endif
4752             ps_ctxt->pv_intra_chroma_pred_mode_selector =
4753                 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4754             ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4755             ps_ctxt->pv_final_rdopt_mode_prcs =
4756                 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4757             ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4758             ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4759             ps_ctxt->pv_enc_loop_ctb_left_copy =
4760                 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4761 
4762             /* Memory assignments for chroma intra pred buffer */
4763             {
4764                 WORD32 pred_buf_size =
4765                     MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4766                 WORD32 pred_buf_size_per_thread =
4767                     NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4768                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4769                                    (ctr * pred_buf_size_per_thread);
4770 
4771                 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4772                 {
4773                     ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4774                     pu1_base += pred_buf_size;
4775                 }
4776             }
4777 
4778             /* Memory assignments for reference substitution output */
4779             {
4780                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
4781                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4782                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4783                                    (ctr * pred_buf_size_per_thread);
4784 
4785                 ps_ctxt->pv_ref_sub_out = pu1_base;
4786             }
4787 
4788             /* Memory assignments for reference filtering output */
4789             {
4790                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
4791                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4792                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4793                                    (ctr * pred_buf_size_per_thread);
4794 
4795                 ps_ctxt->pv_ref_filt_out = pu1_base;
4796             }
4797 
4798             /* Memory assignments for recon storage during CU Recursion */
4799 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4800             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4801 #endif
4802             {
4803                 {
4804                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4805                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4806                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4807                                        (ctr * pred_buf_size_per_thread);
4808 
4809                     ps_ctxt->pv_cu_luma_recon = pu1_base;
4810                 }
4811 
4812                 {
4813                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4814                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4815                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4816                     UWORD8 *pu1_base =
4817                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4818                         (ctr * pred_buf_size_per_thread);
4819 
4820                     ps_ctxt->pv_cu_chrma_recon = pu1_base;
4821                 }
4822             }
4823 
4824             /* Memory assignments for pred storage during CU Recursion */
4825 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4826             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4827 #endif
4828             {
4829                 {
4830                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4831                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4832                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4833                                        (ctr * pred_buf_size_per_thread);
4834 
4835                     ps_ctxt->pv_CTB_pred_luma = pu1_base;
4836                 }
4837 
4838                 {
4839                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4840                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4841                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4842                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4843                                        (ctr * pred_buf_size_per_thread);
4844 
4845                     ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4846                 }
4847             }
4848 
4849             /* Memory assignments for CTB left luma data storage */
4850             {
4851                 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4852                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4853                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4854                                    (ctr * pred_buf_size_per_thread);
4855 
4856                 ps_ctxt->pv_left_luma_data = pu1_base;
4857             }
4858 
4859             /* Memory assignments for CTB left chroma data storage */
4860             {
4861                 WORD32 pred_buf_size =
4862                     (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4863                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4864                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4865                                    (ctr * pred_buf_size_per_thread);
4866 
4867                 ps_ctxt->pv_left_chrm_data = pu1_base;
4868             }
4869         }
4870 
4871         /* Memory for inter pred buffers */
4872         {
4873             WORD32 i4_num_bufs_per_thread;
4874 
4875             WORD32 i4_buf_size_per_cand =
4876                 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4877                 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4878 
4879             i4_num_bufs_per_thread =
4880                 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4881                 i4_buf_size_per_cand;
4882 
4883             ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4884 
4885             ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4886 
4887             {
4888                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4889                                    +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4890 
4891                 for(i = 0; i < i4_num_bufs_per_thread; i++)
4892                 {
4893                     ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4894                         pu1_base + i * i4_buf_size_per_cand;
4895                     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4896                 }
4897             }
4898         }
4899 
4900         /* Memory required to store pred for 422 chroma */
4901         if(i4_chroma_format == IV_YUV_422SP_UV)
4902         {
4903             WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4904             WORD32 pred_buf_size_per_thread =
4905                 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4906                 sizeof(UWORD8);
4907             void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4908                             (ctr * pred_buf_size_per_thread);
4909 
4910             ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4911         }
4912         else
4913         {
4914             ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4915         }
4916 
4917         /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4918         {
4919             WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4920             WORD32 i4_chromaBufSize =
4921                 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4922             WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4923                                           (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4924             WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4925             {
4926                 UWORD8 *pu1_mem_base =
4927                     (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4928                      ctr * i4_memSize_perThread);
4929 
4930                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4931                     pu1_mem_base + i4_lumaBufSize * 0;
4932                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4933                     pu1_mem_base + i4_lumaBufSize * 1;
4934                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4935                     pu1_mem_base + i4_lumaBufSize * 2;
4936                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4937                     pu1_mem_base + i4_lumaBufSize * 3;
4938 
4939                 pu1_mem_base += i4_lumaBufSize * 4;
4940 
4941                 switch(i4_quality_preset)
4942                 {
4943                 case IHEVCE_QUALITY_P0:
4944                 {
4945 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4946                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4947                         pu1_mem_base + i4_chromaBufSize * 0;
4948                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4949                         pu1_mem_base + i4_chromaBufSize * 1;
4950 #else
4951                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4952                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4953 #endif
4954 
4955 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4956                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4957                         pu1_mem_base + i4_chromaBufSize * 2;
4958                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4959                         pu1_mem_base + i4_chromaBufSize * 3;
4960                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4961                         pu1_mem_base + i4_chromaBufSize * 2;
4962                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4963                         pu1_mem_base + i4_chromaBufSize * 3;
4964 #else
4965                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4966                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4967                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4968                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4969 #endif
4970 
4971                     break;
4972                 }
4973                 case IHEVCE_QUALITY_P2:
4974                 {
4975 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4976                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4977                         pu1_mem_base + i4_chromaBufSize * 0;
4978                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4979                         pu1_mem_base + i4_chromaBufSize * 1;
4980 #else
4981                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4982                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4983 #endif
4984 
4985 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4986                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4987                         pu1_mem_base + i4_chromaBufSize * 2;
4988                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4989                         pu1_mem_base + i4_chromaBufSize * 3;
4990                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4991                         pu1_mem_base + i4_chromaBufSize * 2;
4992                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4993                         pu1_mem_base + i4_chromaBufSize * 3;
4994 #else
4995                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4996                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4997                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4998                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4999 #endif
5000 
5001                     break;
5002                 }
5003                 case IHEVCE_QUALITY_P3:
5004                 {
5005 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
5006                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
5007                         pu1_mem_base + i4_chromaBufSize * 0;
5008                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
5009                         pu1_mem_base + i4_chromaBufSize * 1;
5010 #else
5011                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5012                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5013 #endif
5014 
5015 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
5016                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
5017                         pu1_mem_base + i4_chromaBufSize * 2;
5018                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
5019                         pu1_mem_base + i4_chromaBufSize * 3;
5020                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
5021                         pu1_mem_base + i4_chromaBufSize * 2;
5022                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
5023                         pu1_mem_base + i4_chromaBufSize * 3;
5024 #else
5025                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5026                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5027                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5028                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5029 #endif
5030 
5031                     break;
5032                 }
5033                 case IHEVCE_QUALITY_P4:
5034                 {
5035 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
5036                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
5037                         pu1_mem_base + i4_chromaBufSize * 0;
5038                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
5039                         pu1_mem_base + i4_chromaBufSize * 1;
5040 #else
5041                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5042                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5043 #endif
5044 
5045 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
5046                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
5047                         pu1_mem_base + i4_chromaBufSize * 2;
5048                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
5049                         pu1_mem_base + i4_chromaBufSize * 3;
5050                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
5051                         pu1_mem_base + i4_chromaBufSize * 2;
5052                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
5053                         pu1_mem_base + i4_chromaBufSize * 3;
5054 #else
5055                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5056                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5057                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5058                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5059 #endif
5060 
5061                     break;
5062                 }
5063                 case IHEVCE_QUALITY_P5:
5064                 {
5065 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
5066                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
5067                         pu1_mem_base + i4_chromaBufSize * 0;
5068                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
5069                         pu1_mem_base + i4_chromaBufSize * 1;
5070 #else
5071                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5072                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5073 #endif
5074 
5075 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
5076                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
5077                         pu1_mem_base + i4_chromaBufSize * 2;
5078                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
5079                         pu1_mem_base + i4_chromaBufSize * 3;
5080                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
5081                         pu1_mem_base + i4_chromaBufSize * 2;
5082                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
5083                         pu1_mem_base + i4_chromaBufSize * 3;
5084 #else
5085                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5086                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5087                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5088                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5089 #endif
5090 
5091                     break;
5092                 }
5093                 }
5094             }
5095 
5096             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5097             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5098             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5099             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5100 
5101         } /* Recon Datastore */
5102 
5103         /****************************************************/
5104         /****************************************************/
5105         /* ps_pps->i1_sign_data_hiding_flag  == UNHIDDEN    */
5106         /* when NO_SBH. else HIDDEN                         */
5107         /****************************************************/
5108         /****************************************************/
5109         /* Zero cbf tool is enabled by default for all presets */
5110         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5111 
5112         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5113         {
5114             ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5115             ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5116             ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5117             ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5118         }
5119         else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5120         {
5121             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5122             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5123             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5124             ps_ctxt->i4_sbh_level = NO_SBH;
5125         }
5126         else
5127         {
5128             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5129             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5130             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5131             ps_ctxt->i4_sbh_level = NO_SBH;
5132         }
5133 
5134 #if DISABLE_QUANT_ROUNDING
5135         ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5136         ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5137 #endif
5138         /*Disabling RDOQ only when spatial modulation is enabled
5139                 as RDOQ degrades visual quality*/
5140         if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5141         {
5142             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5143         }
5144 
5145 #if DISABLE_RDOQ
5146         ps_ctxt->i4_rdoq_level = NO_RDOQ;
5147 #endif
5148 
5149 #if DISABLE_SBH
5150         ps_ctxt->i4_sbh_level = NO_SBH;
5151 #endif
5152 
5153         /*Rounding factor calc based on previous cabac states */
5154 
5155         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5156         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5157         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5158         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5159 
5160         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5161         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5162         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5163         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5164 
5165         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5166         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5167         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5168 
5169         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5170         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5171         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5172 
5173         /****************************************************************************************/
5174         /* Setting the perform rdoq and sbh flags appropriately                                 */
5175         /****************************************************************************************/
5176         {
5177             /******************************************/
5178             /* For best cand rdoq and/or sbh          */
5179             /******************************************/
5180             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5181                 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5182             /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5183             we would have to do RDOQ again.*/
5184             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5185                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5186                 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5187                  (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5188 
5189             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5190                 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5191 
5192             /* SBH should be performed if
5193             a) i4_sbh_level is BEST_CAND_SBH.
5194             b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5195             if SBH has to be done because for these presets the quant, iquant and scan coeff
5196             data are calculated in this function and not during the RDOPT stage*/
5197 
5198             /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5199             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5200                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5201                 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5202                  (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5203 
5204             /******************************************/
5205             /* For all cand rdoq and/or sbh          */
5206             /******************************************/
5207             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5208                 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5209             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5210                 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5211             ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5212                 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5213         }
5214 
5215         if(!is_hbd_mode)
5216         {
5217             if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5218             {
5219                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5220                 {
5221                     ps_ctxt->apf_quant_iquant_ssd[0] =
5222                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5223                     ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5224                 }
5225                 else
5226                 {
5227                     ps_ctxt->apf_quant_iquant_ssd[0] =
5228                         ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5229                     ps_ctxt->apf_quant_iquant_ssd[2] =
5230                         ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5231                 }
5232 
5233                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5234                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5235                 {
5236                     ps_ctxt->apf_quant_iquant_ssd[1] =
5237                         ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5238                     ps_ctxt->apf_quant_iquant_ssd[3] =
5239                         ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5240                 }
5241                 else
5242                 {
5243                     ps_ctxt->apf_quant_iquant_ssd[1] =
5244                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5245                     ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5246                 }
5247             }
5248             else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5249             {
5250                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5251                 {
5252                     ps_ctxt->apf_quant_iquant_ssd[0] =
5253                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5254                     ps_ctxt->apf_quant_iquant_ssd[2] =
5255                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5256                 }
5257                 else
5258                 {
5259                     ps_ctxt->apf_quant_iquant_ssd[0] =
5260                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5261                     ps_ctxt->apf_quant_iquant_ssd[2] =
5262                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5263                 }
5264 
5265                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5266                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5267                 {
5268                     ps_ctxt->apf_quant_iquant_ssd[1] =
5269                         ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5270                     ps_ctxt->apf_quant_iquant_ssd[3] =
5271                         ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5272                 }
5273                 else
5274                 {
5275                     ps_ctxt->apf_quant_iquant_ssd[1] =
5276                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5277                     ps_ctxt->apf_quant_iquant_ssd[3] =
5278                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5279                 }
5280             }
5281 
5282             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5283                 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5284             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5285                 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5286             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5287                 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5288             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5289                 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5290 
5291             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5292                 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5293             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5294                 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5295             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5296                 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5297             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5298                 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5299 
5300             ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5301             ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5302             ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5303             ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5304             ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5305 
5306             ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5307             ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5308             ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5309 
5310             ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5311             ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5312             ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5313             ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5314             ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5315 
5316             ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5317             ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5318             ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5319 
5320             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5321                 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5322             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5323             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5324                 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5325             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5326                 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5327             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5328                 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5329             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5330                 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5331             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5332                 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5333             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5334                 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5335             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5336             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5337                 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5338 
5339             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5340                 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5341             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5342                 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5343             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5344                 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5345             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5346                 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5347             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5348                 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5349             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5350                 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5351             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5352                 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5353             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5354                 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5355             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5356                 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5357             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5358                 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5359 
5360             ps_ctxt->apf_chrm_resd_trns_had[0] =
5361                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5362             ps_ctxt->apf_chrm_resd_trns_had[1] =
5363                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5364             ps_ctxt->apf_chrm_resd_trns_had[2] =
5365                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5366         }
5367 
5368         if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5369         {
5370             /* initialise the scale & rescale matricies */
5371             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5372             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5373             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5374             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5375             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5376             /*init for inter matrix*/
5377             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5378             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5379             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5380             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5381             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5382 
5383             /*init for rescale matrix*/
5384             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5385             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5386             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5387             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5388             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5389             /*init for rescale inter matrix*/
5390             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5391             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5392             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5393             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5394             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5395         }
5396         else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5397         {
5398             /* initialise the scale & rescale matricies */
5399             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5400             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5401             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5402             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5403             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5404             /*init for inter matrix*/
5405             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5406             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5407             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5408             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5409             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5410 
5411             /*init for rescale matrix*/
5412             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5413             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5414             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5415             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5416             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5417             /*init for rescale inter matrix*/
5418             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5419             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5420             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5421             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5422             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5423         }
5424         else
5425         {
5426             ASSERT(0);
5427         }
5428 
5429         /* Not recomputing Luma pred-data and header data for any preset now */
5430         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5431         ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5432         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5433 
5434         switch(ps_ctxt->i4_quality_preset)
5435         {
5436         case IHEVCE_QUALITY_P0:
5437         {
5438             ps_ctxt->i4_max_merge_candidates = 5;
5439             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5440             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5441             ps_ctxt->u1_use_early_cbf_data = 0;
5442             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5443             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5444                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5445 
5446             break;
5447         }
5448         case IHEVCE_QUALITY_P2:
5449         {
5450             ps_ctxt->i4_max_merge_candidates = 5;
5451             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5452             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5453             ps_ctxt->u1_use_early_cbf_data = 0;
5454 
5455             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5456             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5457                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5458 
5459             break;
5460         }
5461         case IHEVCE_QUALITY_P3:
5462         {
5463             ps_ctxt->i4_max_merge_candidates = 3;
5464             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5465             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5466 
5467             ps_ctxt->u1_use_early_cbf_data = 0;
5468             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5469             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5470                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5471 
5472             break;
5473         }
5474         case IHEVCE_QUALITY_P4:
5475         {
5476             ps_ctxt->i4_max_merge_candidates = 2;
5477             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5478             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5479             ps_ctxt->u1_use_early_cbf_data = 0;
5480             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5481             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5482                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5483 
5484             break;
5485         }
5486         case IHEVCE_QUALITY_P5:
5487         {
5488             ps_ctxt->i4_max_merge_candidates = 2;
5489             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5490             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5491             ps_ctxt->u1_use_early_cbf_data = 0;
5492             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5493             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5494                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5495 
5496             break;
5497         }
5498         case IHEVCE_QUALITY_P6:
5499         {
5500             ps_ctxt->i4_max_merge_candidates = 2;
5501             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5502             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5503             ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5504             break;
5505         }
5506         default:
5507         {
5508             ASSERT(0);
5509         }
5510         }
5511 
5512 #if DISABLE_SKIP_AND_MERGE_EVAL
5513         ps_ctxt->i4_max_merge_candidates = 0;
5514 #endif
5515 
5516         ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5517             !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5518 
5519         /*initialize memory for RC related parameters required/populated by enc_loop */
5520         /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5521         |-------|-> Thread 0, instance 0
5522         |       |
5523         |       |
5524         |       |
5525         |-------|-> thread 0, instance 1
5526         |       |
5527         |       |
5528         |       |
5529         |-------|-> thread 0, intance 2
5530         |       |
5531         |       |
5532         |       |
5533         |-------|-> thread 1, instance 0
5534         |       |
5535         |       |
5536         |       |
5537         |-------|-> thread 1, instance 1
5538         |       |
5539         |       |
5540         |       |
5541         |-------|-> thread 1, instance 2
5542         ...         ...
5543 
5544         Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5545         Finally, one thread will become master and collate the data from all the threads */
5546         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5547         {
5548             for(i = 0; i < i4_num_bitrate_inst; i++)
5549             {
5550                 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5551                 ps_enc_loop_rc_params++;
5552             }
5553         }
5554         /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5555 
5556 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5557         ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5558 #endif
5559 
5560         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5561             MAX_TU_SIZE;
5562         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5563             MAX_TU_SIZE;
5564         /*Multiplying by two to account for interleaving of cb and cr*/
5565         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5566                                                                                        << 1;
5567         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5568             MAX_TU_SIZE << 1;
5569 
5570         /*     Memory for a frame level memory to store tile-id                  */
5571         /*              corresponding to each CTB of frame                       */
5572         ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5573 
5574         ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5575         /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5576         /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5577         if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5578            (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5579         {
5580             UWORD32 psy_strength;
5581             UWORD32 psy_strength_mask =
5582                 224;  // only bits 5,6,7 are ones. These three bits represent the psy strength
5583             psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5584             ps_ctxt->u1_enable_psyRDOPT = 1;
5585             ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5586             if(psy_strength == 0)
5587             {
5588                 ps_ctxt->u1_enable_psyRDOPT = 0;
5589                 ps_ctxt->u4_psy_strength = 0;
5590             }
5591         }
5592 
5593         ps_ctxt->u1_is_stasino_enabled =
5594             ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5595               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5596              (ps_init_prms->s_coding_tools_prms.i4_vqet &
5597               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5598 
5599         ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5600         ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5601         ps_ctxt++;
5602     }
5603     /* Store Tile params base into EncLoop Master context */
5604     ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5605 
5606     if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5607     {
5608         i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5609     }
5610 
5611     /* Updating  ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5612     /* Loop over all tile-cols in frame */
5613     for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5614     {
5615         WORD32 i4_tile_col_wd_in_ctb_unit =
5616             (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5617         WORD32 offset_x;
5618 
5619         if(ctr == (i4_num_tile_cols - 1))
5620         { /* Last tile-row of frame */
5621             WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5622 
5623             WORD32 cu_aligned_pic_wd =
5624                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5625                 SET_CTB_ALIGN(
5626                     ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5627                     min_cu_size);
5628 
5629             WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5630 
5631             offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5632             offset_x += last_hz_ctb_wd;
5633         }
5634         else
5635         { /* Not the last tile-row of frame */
5636             offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5637         }
5638 
5639         offset_x /= 4;
5640         offset_x -= 1;
5641 
5642         ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5643     }
5644 
5645     n_tabs = NUM_ENC_LOOP_MEM_RECS;
5646 
5647     /*store num bit-rate instances in the master context */
5648     ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5649     ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5650     /*************************************************************************/
5651     /* --- EncLoop Deblock sync Dep Mngr Mem init --                         */
5652     /*************************************************************************/
5653     {
5654         WORD32 count;
5655         WORD32 num_vert_units, num_blks_in_row;
5656         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5657         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5658 
5659         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5660         ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5661         ASSERT(num_vert_units > 0);
5662         ASSERT(num_blks_in_row > 0);
5663 
5664         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5665         {
5666             for(i = 0; i < i4_num_bitrate_inst; i++)
5667             {
5668                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5669                     &ps_mem_tab[n_tabs],
5670                     pv_osal_handle,
5671                     DEP_MNGR_ROW_ROW_SYNC,
5672                     num_vert_units,
5673                     num_blks_in_row,
5674                     i4_num_tile_cols, /* Number of Col Tiles */
5675                     i4_num_proc_thrds,
5676                     0 /*Sem Disabled*/
5677                 );
5678 
5679                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5680             }
5681         }
5682     }
5683     /*************************************************************************/
5684     /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init --                   */
5685     /*************************************************************************/
5686     {
5687         WORD32 count;
5688         WORD32 num_vert_units, num_blks_in_row;
5689         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5690         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5691 
5692         WORD32 i4_sem = 0;
5693 
5694         if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5695            IHEVCE_QUALITY_P4)
5696             i4_sem = 0;
5697         else
5698             i4_sem = 1;
5699         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5700         /* For Top-Right CU sync, adding one more CTB since value updation */
5701         /* happens in that way for the last CTB in the row                 */
5702         num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5703         num_blks_in_row += MAX_CTB_SIZE;
5704 
5705         ASSERT(num_vert_units > 0);
5706         ASSERT(num_blks_in_row > 0);
5707 
5708         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5709         {
5710             for(i = 0; i < i4_num_bitrate_inst; i++)
5711             {
5712                 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5713                 {
5714                     ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5715                         ihevce_dmgr_init(
5716                             &ps_mem_tab[n_tabs],
5717                             pv_osal_handle,
5718                             DEP_MNGR_ROW_ROW_SYNC,
5719                             num_vert_units,
5720                             num_blks_in_row,
5721                             i4_num_tile_cols, /* Number of Col Tiles */
5722                             i4_num_proc_thrds,
5723                             i4_sem /*Sem Disabled*/
5724                         );
5725                 }
5726                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5727             }
5728         }
5729     }
5730 
5731     for(i = 1; i < 5; i++)
5732     {
5733         WORD32 i4_log2_trans_size = i + 1;
5734         WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5735 
5736         ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5737     }
5738 
5739     ga_trans_shift[0] = ga_trans_shift[1];
5740 
5741     /* return the handle to caller */
5742     return ((void *)ps_master_ctxt);
5743 }
5744 
5745 /*!
5746 ******************************************************************************
5747 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5748 *
5749 * \brief
5750 *    Intialization for ENC_LOOP context state structure .
5751 *
5752 * \param[in] ps_mem_tab : pointer to memory descriptors table
5753 * \param[in] ppv_sem_hdls : Array of semaphore handles
5754 * \param[in] i4_num_proc_thrds : Number of processing threads
5755 *
5756 * \return
5757 *    None
5758 *
5759 * \author
5760 *  Ittiam
5761 *
5762 *****************************************************************************
5763 */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5764 void ihevce_enc_loop_reg_sem_hdls(
5765     void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5766 {
5767     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5768     WORD32 i, enc_frm_id;
5769 
5770     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5771 
5772     /*************************************************************************/
5773     /* --- EncLoop Deblock sync Dep Mngr reg Semaphores --                   */
5774     /*************************************************************************/
5775     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5776     {
5777         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5778         {
5779             ihevce_dmgr_reg_sem_hdls(
5780                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5781                 ppv_sem_hdls,
5782                 i4_num_proc_thrds);
5783         }
5784     }
5785 
5786     /*************************************************************************/
5787     /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores --             */
5788     /*************************************************************************/
5789     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5790     {
5791         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5792         {
5793             ihevce_dmgr_reg_sem_hdls(
5794                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5795                 ppv_sem_hdls,
5796                 i4_num_proc_thrds);
5797         }
5798     }
5799 
5800     return;
5801 }
5802 
5803 /*!
5804 ******************************************************************************
5805 * \if Function name : ihevce_enc_loop_delete \endif
5806 *
5807 * \brief
5808 *    Destroy EncLoop module
5809 * Note : Only Destroys the resources allocated in the module like
5810 *   semaphore,etc. Memory free is done Separately using memtabs
5811 *
5812 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5813 *
5814 * \return
5815 *    None
5816 *
5817 * \author
5818 *  Ittiam
5819 *
5820 *****************************************************************************
5821 */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5822 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5823 {
5824     ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5825     WORD32 ctr, enc_frm_id;
5826 
5827     ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5828 
5829     for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5830     {
5831         for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5832         {
5833             /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5834             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5835             /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5836             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5837         }
5838     }
5839 }
5840 
5841 /*!
5842 ******************************************************************************
5843 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5844 *
5845 * \brief
5846 *    Frame level Reset for the Dependency Mngrs local to EncLoop.,
5847 *    ie CU_TopRight and Dblk
5848 *
5849 * \param[in] pv_enc_loop_ctxt       : Enc_loop context pointer
5850 *
5851 * \return
5852 *    None
5853 *
5854 * \author
5855 *  Ittiam
5856 *
5857 *****************************************************************************
5858 */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5859 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5860 {
5861     WORD32 ctr, frame_id;
5862     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5863 
5864     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5865 
5866     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5867     {
5868         frame_id = 0;
5869     }
5870     else
5871     {
5872         frame_id = enc_frm_id;
5873     }
5874 
5875     for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5876     {
5877         /* Dep. Mngr : Reset the num ctb Deblocked in every row  for ENC sync */
5878         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5879 
5880         /* Dep. Mngr : Reset the TopRight CU Processed in every row  for ENC sync */
5881         ihevce_dmgr_rst_row_row_sync(
5882             ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5883     }
5884 }
5885 
5886 /*!
5887 ******************************************************************************
5888 * \if Function name : ihevce_enc_loop_frame_init \endif
5889 *
5890 * \brief
5891 *    Frame level init of enocde loop function .
5892 *
5893 * \param[in] pv_enc_loop_ctxt           : Enc_loop context pointer
5894 * \param[in] pi4_cu_processed           : ptr to cur frame cu process in pix.
5895 * \param[in] aps_ref_list               : ref pic list for the current frame
5896 * \param[in] ps_slice_hdr               : ptr to current slice header params
5897 * \param[in] ps_pps                     : ptr to active pps params
5898 * \param[in] ps_sps                     : ptr to active sps params
5899 * \param[in] ps_vps                     : ptr to active vps params
5900 
5901 
5902 * \param[in] i1_weighted_pred_flag      : weighted pred enable flag (unidir)
5903 * \param[in] i1_weighted_bipred_flag    : weighted pred enable flag (bidir)
5904 * \param[in] log2_luma_wght_denom       : down shift factor for weighted pred of luma
5905 * \param[in] log2_chroma_wght_denom       : down shift factor for weighted pred of chroma
5906 * \param[in] cur_poc                    : currennt frame poc
5907 * \param[in] i4_bitrate_instance_num    : number indicating the instance of bit-rate for multi-rate encoder
5908 *
5909 * \return
5910 *    None
5911 *
5912 * \author
5913 *  Ittiam
5914 *
5915 *****************************************************************************
5916 */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5917 void ihevce_enc_loop_frame_init(
5918     void *pv_enc_loop_ctxt,
5919     WORD32 i4_frm_qp,
5920     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5921     recon_pic_buf_t *ps_frm_recon,
5922     slice_header_t *ps_slice_hdr,
5923     pps_t *ps_pps,
5924     sps_t *ps_sps,
5925     vps_t *ps_vps,
5926     WORD8 i1_weighted_pred_flag,
5927     WORD8 i1_weighted_bipred_flag,
5928     WORD32 log2_luma_wght_denom,
5929     WORD32 log2_chroma_wght_denom,
5930     WORD32 cur_poc,
5931     WORD32 i4_display_num,
5932     enc_ctxt_t *ps_enc_ctxt,
5933     me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5934     WORD32 i4_bitrate_instance_num,
5935     WORD32 i4_thrd_id,
5936     WORD32 i4_enc_frm_id,
5937     WORD32 i4_num_bitrates,
5938     WORD32 i4_quality_preset,
5939     void *pv_dep_mngr_encloop_dep_me)
5940 {
5941     /* local variables */
5942     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5943     ihevce_enc_loop_ctxt_t *ps_ctxt;
5944     WORD32 chroma_qp_offset, i4_div_factor;
5945     WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5946     WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5947 
5948     /* ENC_LOOP master state structure */
5949     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5950 
5951     /* Nithya: Store the current POC in the slice header */
5952     ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5953 
5954     /* Update the POC list of the current frame to the recon buffer */
5955     if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5956     {
5957         int i4_i;
5958         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5959         {
5960             ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5961         }
5962     }
5963     if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5964     {
5965         int i4_i;
5966         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5967         {
5968             ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5969         }
5970     }
5971 
5972     /* loop over all the threads */
5973     // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5974     {
5975         /* ENC_LOOP state structure */
5976         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5977 
5978         /* SAO ctxt structure initialization*/
5979         ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5980         ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5981         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5982 
5983         /*bit-rate instance number for Multi-bitrate (MBR) encode */
5984         ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5985         ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5986         ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5987         ps_ctxt->i4_is_first_query = 1;
5988         ps_ctxt->i4_is_ctb_qp_modified = 0;
5989 
5990         /* enc_frm_id for multiframe encode */
5991 
5992         if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5993         {
5994             ps_ctxt->i4_enc_frm_id = 0;
5995             i4_enc_frm_id = 0;
5996         }
5997         else
5998         {
5999             ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
6000         }
6001 
6002         /*Initialize the sub pic rc buf appropriately */
6003 
6004         /*Set the thrd id flag */
6005         ps_enc_ctxt->s_multi_thrd
6006             .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
6007 
6008         ps_enc_ctxt->s_multi_thrd
6009             .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6010         ps_enc_ctxt->s_multi_thrd
6011             .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6012 
6013         ps_enc_ctxt->s_multi_thrd
6014             .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6015         ps_enc_ctxt->s_multi_thrd
6016             .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6017 
6018         ps_enc_ctxt->s_multi_thrd
6019             .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6020         ps_enc_ctxt->s_multi_thrd
6021             .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6022         ps_enc_ctxt->s_multi_thrd
6023             .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6024         ps_enc_ctxt->s_multi_thrd
6025             .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6026         ps_enc_ctxt->s_multi_thrd
6027             .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6028         ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
6029             i4_frm_qp;
6030 
6031         /*Frame level data for Sub Pic rc is initalized here */
6032         /*Can be sent once per frame*/
6033         {
6034             WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
6035                                       ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
6036 
6037             /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
6038             ps_ctxt->u4_total_cu_bits = 0;
6039             ps_ctxt->u4_total_cu_hdr_bits = 0;
6040 
6041             ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
6042             ps_ctxt->u4_cu_tot_bits = 0;
6043             ps_ctxt->u4_total_cu_bits_mul_qs = 0;
6044             ps_ctxt->i4_display_num = i4_display_num;
6045             ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
6046             /*The Qscale is to be generated every 10th of total frame ctb is completed */
6047             //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
6048             ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
6049 
6050             ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
6051             /*Sub Pic RC frame level params */
6052             ps_ctxt->i8_frame_l1_ipe_sad =
6053                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
6054             ps_ctxt->i8_frame_l0_ipe_satd =
6055                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
6056             ps_ctxt->i8_frame_l1_me_sad =
6057                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
6058             ps_ctxt->i8_frame_l1_activity_fact =
6059                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
6060             if(ps_ctxt->i4_sub_pic_level_rc)
6061             {
6062                 ASSERT(
6063                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6064                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6065 
6066                 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6067                                                  [ps_ctxt->i4_bitrate_instance_num] =
6068                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6069                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6070             }
6071             //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6072 
6073             ps_ctxt->i4_is_I_scenecut =
6074                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6075                  (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6076                   ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6077 
6078             ps_ctxt->i4_is_non_I_scenecut =
6079                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6080                  (ps_ctxt->i4_is_I_scenecut == 0));
6081 
6082             /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6083             ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6084             ps_ctxt->i4_is_model_valid =
6085                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6086         }
6087         /* cb and cr offsets are assumed to be same */
6088         chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6089 
6090         /* assumption of cb = cr qp */
6091         ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6092         ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6093 
6094         ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6095 
6096         ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6097 
6098         ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6099         ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6100 
6101         /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6102         ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6103         ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6104         ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6105 
6106         ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6107         ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6108         ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6109         ps_ctxt->i4_use_const_lamda_modifier =
6110             ps_ctxt->i4_use_const_lamda_modifier ||
6111             ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6112               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6113              ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6114                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6115               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6116                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6117               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6118                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6119               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6120                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6121 
6122         {
6123             ps_ctxt->f_i_pic_lamda_modifier =
6124                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6125         }
6126 
6127         ps_ctxt->i4_frame_qp = i4_frm_qp;
6128         ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6129         ps_ctxt->i4_cu_qp = i4_frm_qp;
6130         ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6131         ps_ctxt->i4_chrm_cu_qp =
6132             (ps_ctxt->u1_chroma_array_type == 2)
6133                 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6134                 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6135 
6136         ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6137         i4_div_factor = (i4_frm_qp + 3) / 6;
6138         i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6139         ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6140 
6141         ps_ctxt->i4_chrm_cu_qp_div6 =
6142             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6143         ps_ctxt->i4_chrm_cu_qp_mod6 =
6144             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6145 
6146 #define INTER_RND_QP_BY_6
6147 #ifdef INTER_RND_QP_BY_6
6148 
6149         { /*1/6 rounding for 8 bit b frames*/
6150             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6151                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6152         }
6153 #else
6154         /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6155         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6156 #endif
6157 
6158         if(ISLICE == i1_slice_type)
6159         {
6160             /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6161             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6162                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6163         }
6164         else
6165         {
6166             /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6167             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6168                 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6169             /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6170         }
6171 
6172         ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6173 
6174         ps_ctxt->i1_slice_type = i1_slice_type;
6175 
6176         /* intialize the inter pred (MC) context at frame level */
6177         ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6178         ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6179         ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6180         ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6181         ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6182 
6183         /* intialize the MV pred context at frame level */
6184         ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6185         ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6186         ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6187         ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6188             ps_pps->i1_log2_parallel_merge_level - 2;
6189 
6190 #if ADAPT_COLOCATED_FROM_L0_FLAG
6191         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6192         {
6193             if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6194                (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6195                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6196             {
6197                 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6198             }
6199         }
6200 #endif
6201         /* Initialization of deblocking params */
6202         ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6203         ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6204 
6205         ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6206 
6207         ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6208         /*init frame level stat accumualtion parameters */
6209         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6210             ->u4_frame_sad_acc = 0;
6211         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6212             ->u4_frame_intra_sad_acc = 0;
6213         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6214             ->u4_frame_open_loop_intra_sad = 0;
6215         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6216             ->i8_frame_open_loop_ssd = 0;
6217         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6218             ->u4_frame_inter_sad_acc = 0;
6219 
6220         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6221             ->i8_frame_cost_acc = 0;
6222         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6223             ->i8_frame_intra_cost_acc = 0;
6224         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6225             ->i8_frame_inter_cost_acc = 0;
6226 
6227         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6228             ->u4_frame_intra_sad = 0;
6229         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6230             ->u4_frame_rdopt_bits = 0;
6231         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6232             ->u4_frame_rdopt_header_bits = 0;
6233         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6234             ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6235         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6236             ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6237         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6238             ->i4_8x8_cu_sum[0] = 0;
6239         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6240             ->i4_8x8_cu_sum[1] = 0;
6241         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6242             ->i8_sad_by_qscale[0] = 0;
6243         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6244             ->i8_sad_by_qscale[1] = 0;
6245         /* Compute the frame_qstep */
6246         GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6247 
6248         ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6249 
6250         ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6251         /* intialize the cabac rdopt context at frame level */
6252         ihevce_entropy_rdo_frame_init(
6253             &ps_ctxt->s_rdopt_entropy_ctxt,
6254             ps_slice_hdr,
6255             ps_pps,
6256             ps_sps,
6257             ps_vps,
6258             ps_master_ctxt->au1_cu_skip_top_row,
6259             &ps_enc_ctxt->s_rc_quant);
6260 
6261         /* register the dep mngr instance for forward ME sync */
6262         ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6263     }
6264 }
6265 /*
6266 ******************************************************************************
6267 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6268 *
6269 * \brief
6270 *    returns Nil
6271 *
6272 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6273 * \param[out]ps_rc_prms       : ptr to frame level info structure
6274 *
6275 * \return
6276 *    None
6277 *
6278 * \author
6279 *  Ittiam
6280 *
6281 *****************************************************************************
6282 */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6283 void ihevce_enc_loop_get_frame_rc_prms(
6284     void *pv_enc_loop_ctxt,
6285     rc_bits_sad_t *ps_rc_prms,
6286     WORD32 i4_br_id,  //bitrate instance id
6287     WORD32 i4_enc_frm_id)  // frame id
6288 {
6289     /*Get the master thread pointer*/
6290     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6291     ihevce_enc_loop_ctxt_t *ps_ctxt;
6292     UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6293     LWORD64 i8_total_ssd_frame = 0;
6294     UWORD32 total_frame_sad = 0;
6295     UWORD32 total_frame_rdopt_bits = 0;
6296     UWORD32 total_frame_rdopt_header_bits = 0;
6297     WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6298     WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6299     LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6300     WORD32 i4_curr_qp_acc = 0;
6301     WORD32 i;
6302 
6303     /* ENC_LOOP master state structure */
6304     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6305 
6306     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6307     {
6308         i4_enc_frm_id = 0;
6309     }
6310     /*loop through all threads and accumulate intra sad across all threads*/
6311     for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6312     {
6313         /* ENC_LOOP state structure */
6314         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6315         total_frame_open_loop_intra_sad +=
6316             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6317         i8_total_ssd_frame +=
6318             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6319         total_frame_intra_sad +=
6320             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6321         total_frame_sad +=
6322             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6323         total_frame_rdopt_bits +=
6324             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6325         total_frame_rdopt_header_bits +=
6326             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6327         i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6328                                               ->i4_qp_normalized_8x8_cu_sum[0];
6329         i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6330                                               ->i4_qp_normalized_8x8_cu_sum[1];
6331         i4_8x8_cu_sum[0] +=
6332             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6333         i4_8x8_cu_sum[1] +=
6334             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6335         i8_sad_by_qscale[0] +=
6336             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6337         i8_sad_by_qscale[1] +=
6338             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6339     }
6340 
6341     ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6342     ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6343     ps_rc_prms->u4_total_sad = total_frame_sad;
6344     ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6345     ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6346     /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6347     ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6348     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6349     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6350     ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6351     ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6352     ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6353     ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6354 }
6355