1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_pass.c
24 *
25 * \brief
26 * This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 * 18/09/2012
30 *
31 * \author
32 * Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40
41 /*****************************************************************************/
42 /* File Includes */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81 #include "ihevc_quant_tables.h"
82
83 #include "ihevce_defs.h"
84 #include "ihevce_hle_interface.h"
85 #include "ihevce_lap_enc_structs.h"
86 #include "ihevce_multi_thrd_structs.h"
87 #include "ihevce_multi_thrd_funcs.h"
88 #include "ihevce_me_common_defs.h"
89 #include "ihevce_had_satd.h"
90 #include "ihevce_error_codes.h"
91 #include "ihevce_bitstream.h"
92 #include "ihevce_cabac.h"
93 #include "ihevce_rdoq_macros.h"
94 #include "ihevce_function_selector.h"
95 #include "ihevce_enc_structs.h"
96 #include "ihevce_entropy_structs.h"
97 #include "ihevce_cmn_utils_instr_set_router.h"
98 #include "ihevce_ipe_instr_set_router.h"
99 #include "ihevce_decomp_pre_intra_structs.h"
100 #include "ihevce_decomp_pre_intra_pass.h"
101 #include "ihevce_enc_loop_structs.h"
102 #include "ihevce_nbr_avail.h"
103 #include "ihevce_enc_loop_utils.h"
104 #include "ihevce_sub_pic_rc.h"
105 #include "ihevce_global_tables.h"
106 #include "ihevce_bs_compute_ctb.h"
107 #include "ihevce_cabac_rdo.h"
108 #include "ihevce_deblk.h"
109 #include "ihevce_frame_process.h"
110 #include "ihevce_rc_enc_structs.h"
111 #include "hme_datatype.h"
112 #include "hme_interface.h"
113 #include "hme_common_defs.h"
114 #include "hme_defs.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128
129 #include "cast_types.h"
130 #include "osal.h"
131 #include "osal_defaults.h"
132
133 /*****************************************************************************/
134 /* Globals */
135 /*****************************************************************************/
136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137
138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139
140 /*****************************************************************************/
141 /* Constant Macros */
142 /*****************************************************************************/
143 #define UPDATE_QP_AT_CTB 6
144
145 /*****************************************************************************/
146 /* Function Definitions */
147 /*****************************************************************************/
148
149 /*!
150 ******************************************************************************
151 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
152 *
153 * \brief
154 * This function copy the right data of CTB to context buffers
155 *
156 * \date
157 * 18/09/2012
158 *
159 * \author
160 * Ittiam
161 *
162 * \return
163 *
164 * List of Functions
165 *
166 *
167 ******************************************************************************
168 */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)169 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
170 {
171 /* ------------------------------------------------------------------ */
172 /* copy the right coloum data to the context buffers */
173 /* ------------------------------------------------------------------ */
174
175 nbr_4x4_t *ps_left_nbr;
176 nbr_4x4_t *ps_nbr;
177 UWORD8 *pu1_buff;
178 WORD32 num_pels;
179 UWORD8 *pu1_luma_left, *pu1_chrm_left;
180
181 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
182
183 pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
184 pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
185 ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
186
187 /* copy right luma data */
188 pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
189
190 for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
191 {
192 WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
193
194 pu1_luma_left[num_pels] = pu1_buff[i4_indx];
195 }
196
197 /* copy right chroma data */
198 pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
199
200 for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
201 {
202 WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
203
204 *pu1_chrm_left++ = pu1_buff[i4_indx];
205 *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
206 }
207
208 /* store the nbr 4x4 data at ctb level */
209 {
210 WORD32 ctr;
211 WORD32 nbr_strd;
212
213 nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
214
215 /* copy right nbr data */
216 ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
217 ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
218
219 for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
220 {
221 WORD32 i4_indx = nbr_strd * ctr;
222
223 ps_left_nbr[ctr] = ps_nbr[i4_indx];
224 }
225 }
226 return;
227 }
228
229 /*!
230 ******************************************************************************
231 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
232 *
233 * \brief
234 * Mark all modes for inter/intra for evaluation. This function will be
235 * called by ref instance
236 *
237 * \param[in] pv_ctxt : pointer to enc_loop module
238 * \param[in] ps_cu_analyse : pointer to cu analyse
239 *
240 * \return
241 * None
242 *
243 * \author
244 * Ittiam
245 *
246 *****************************************************************************
247 */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)248 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
249 {
250 UWORD8 ctr;
251 WORD32 i4_part;
252
253 (void)pv_ctxt;
254 /* run a loop over all Inter cands */
255 for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
256 {
257 ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
258 }
259
260 /* run a loop over all intra candidates */
261 if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
262 {
263 for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
264 {
265 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
266 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
267
268 for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
269 {
270 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
271 }
272 }
273 }
274 }
275
276 /*!
277 ******************************************************************************
278 * \if Function name : ihevce_cu_mode_decide \endif
279 *
280 * \brief
281 * Coding Unit mode decide function. Performs RD opt and decides the best mode
282 *
283 * \param[in] ps_ctxt : pointer to enc_loop module
284 * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
285 * \param[in] ps_cu_analyse : pointer to cu analyse
286 * \param[out] ps_cu_final : pointer to cu final
287 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
288 * \param[out]ps_row_col_pu; colocated pu buffer pointer
289 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
290 * \param[in]col_start_pu_idx : pu index start value
291 *
292 * \return
293 * None
294 *
295 *
296 * \author
297 * Ittiam
298 *
299 *****************************************************************************
300 */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)301 LWORD64 ihevce_cu_mode_decide(
302 ihevce_enc_loop_ctxt_t *ps_ctxt,
303 enc_loop_cu_prms_t *ps_cu_prms,
304 cu_analyse_t *ps_cu_analyse,
305 final_mode_state_t *ps_final_mode_state,
306 UWORD8 *pu1_ecd_data,
307 pu_col_mv_t *ps_col_pu,
308 UWORD8 *pu1_col_pu_map,
309 WORD32 col_start_pu_idx)
310 {
311 enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
312 cu_nbr_prms_t s_cu_nbr_prms;
313 inter_cu_mode_info_t s_inter_cu_mode_info;
314 cu_inter_cand_t *ps_best_inter_cand = NULL;
315 UWORD8 *pu1_cu_top;
316 UWORD8 *pu1_cu_top_left;
317 UWORD8 *pu1_cu_left;
318 UWORD8 *pu1_final_recon = NULL;
319 UWORD8 *pu1_curr_src = NULL;
320 void *pv_curr_src = NULL;
321 void *pv_cu_left = NULL;
322 void *pv_cu_top = NULL;
323 void *pv_cu_top_left = NULL;
324
325 WORD32 cu_left_stride = 0;
326 WORD32 ctr;
327 WORD32 rd_opt_best_idx;
328 LWORD64 rd_opt_least_cost;
329 WORD32 rd_opt_curr_idx;
330 WORD32 num_4x4_in_ctb;
331 WORD32 nbr_4x4_left_strd = 0;
332
333 nbr_4x4_t *ps_topleft_nbr_4x4;
334 nbr_4x4_t *ps_left_nbr_4x4 = NULL;
335 nbr_4x4_t *ps_top_nbr_4x4 = NULL;
336 nbr_4x4_t *ps_curr_nbr_4x4;
337 WORD32 enable_intra_eval_flag;
338 WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
339 WORD32 curr_cu_pos_in_row;
340 WORD32 cu_top_right_offset;
341 WORD32 cu_top_right_dep_pos;
342 WORD32 i4_ctb_x_off, i4_ctb_y_off;
343
344 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
345 (void)ps_final_mode_state;
346 /* default init */
347 rd_opt_least_cost = MAX_COST_64;
348 ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
349 ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
350
351 /* Zero cbf tool is enabled by default for all presets */
352 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
353
354 rd_opt_best_idx = 1;
355 rd_opt_curr_idx = 0;
356 enable_intra_eval_flag = 1;
357
358 /* CU params in enc ctxt*/
359 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
360 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
361 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
362
363 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
364 ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
365 ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
366 ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
367
368 /* CB and Cr are pixel interleaved */
369 s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
370
371 s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
372
373 if(!ps_ctxt->u1_is_input_data_hbd)
374 {
375 /* --------------------------------------- */
376 /* ----- Luma Pointers Derivation -------- */
377 /* --------------------------------------- */
378
379 /* based on CU position derive the pointers */
380 pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
381
382 pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
383
384 pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
385
386 pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
387
388 pv_curr_src = pu1_curr_src;
389
390 /* CU left */
391 if(0 == ps_cu_analyse->b3_cu_pos_x)
392 {
393 /* CTB boundary */
394 pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
395 pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
396 cu_left_stride = 1;
397
398 ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
399 ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
400 nbr_4x4_left_strd = 1;
401 }
402 else
403 {
404 /* inside CTB */
405 pu1_cu_left = pu1_final_recon - 1;
406 cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
407
408 ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
409 nbr_4x4_left_strd = num_4x4_in_ctb;
410 }
411
412 pv_cu_left = pu1_cu_left;
413
414 /* CU top */
415 if(0 == ps_cu_analyse->b3_cu_pos_y)
416 {
417 /* CTB boundary */
418 pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
419 pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
420 pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
421
422 ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
423 ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
424 ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
425 }
426 else
427 {
428 /* inside CTB */
429 pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
430
431 ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
432 }
433
434 pv_cu_top = pu1_cu_top;
435
436 /* CU top left */
437 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
438 {
439 /* left ctb boundary but not first row */
440 pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
441 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
442 }
443 else
444 {
445 /* rest all cases topleft is top -1 */
446 pu1_cu_top_left = pu1_cu_top - 1;
447 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
448 }
449
450 pv_cu_top_left = pu1_cu_top_left;
451
452 /* Store the CU nbr information in the ctxt for final reconstruction fun. */
453 s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
454 s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
455 s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
456 s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
457 s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
458 s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
459 s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
460 s_cu_nbr_prms.cu_left_stride = cu_left_stride;
461
462 /* ------------------------------------------------------------ */
463 /* -- Initialize the number of neigbour skip cu count for rdo --*/
464 /* ------------------------------------------------------------ */
465 {
466 nbr_avail_flags_t s_nbr;
467 WORD32 i4_num_nbr_skip_cus = 0;
468
469 /* get the neighbour availability flags for current cu */
470 ihevce_get_nbr_intra(
471 &s_nbr,
472 ps_ctxt->pu1_ctb_nbr_map,
473 ps_ctxt->i4_nbr_map_strd,
474 (ps_cu_analyse->b3_cu_pos_x << 1),
475 (ps_cu_analyse->b3_cu_pos_y << 1),
476 (ps_cu_analyse->u1_cu_size >> 2));
477 if(s_nbr.u1_top_avail)
478 {
479 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
480 }
481
482 if(s_nbr.u1_left_avail)
483 {
484 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
485 }
486 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
487 i4_num_nbr_skip_cus;
488 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
489 i4_num_nbr_skip_cus;
490 }
491
492 /* --------------------------------------- */
493 /* --- Chroma Pointers Derivation -------- */
494 /* --------------------------------------- */
495
496 /* based on CU position derive the pointers */
497 s_chrm_cu_buf_prms.pu1_final_recon =
498 ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
499
500 s_chrm_cu_buf_prms.pu1_curr_src =
501 ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
502
503 s_chrm_cu_buf_prms.pu1_final_recon +=
504 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
505
506 s_chrm_cu_buf_prms.pu1_curr_src +=
507 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
508
509 /* CU left */
510 if(0 == ps_cu_analyse->b3_cu_pos_x)
511 {
512 /* CTB boundary */
513 s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
514 s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
515 s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
516 }
517 else
518 {
519 /* inside CTB */
520 s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
521 s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
522 }
523
524 /* CU top */
525 if(0 == ps_cu_analyse->b3_cu_pos_y)
526 {
527 /* CTB boundary */
528 s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
529 s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
530 s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
531 }
532 else
533 {
534 /* inside CTB */
535 s_chrm_cu_buf_prms.pu1_cu_top =
536 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
537 }
538
539 /* CU top left */
540 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
541 {
542 /* left ctb boundary but not first row */
543 s_chrm_cu_buf_prms.pu1_cu_top_left =
544 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
545 }
546 else
547 {
548 /* rest all cases topleft is top -2 */
549 s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
550 }
551 }
552
553 /* Set Variables for Dep. Checking and Setting */
554 i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
555
556 i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
557 ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
558
559 /* Set the pred pointer count for ME/intra to 0 to start */
560 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
561
562 ASSERT(
563 (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
564
565 ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
566 s_inter_cu_mode_info.u1_num_inter_cands = 0;
567 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
568 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
569
570 ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
571 ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
572 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
573 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
574 ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
575 if(0 != ps_cu_analyse->u1_num_inter_cands)
576 {
577 ihevce_inter_cand_sifter_prms_t s_prms;
578
579 UWORD8 u1_enable_top_row_sync;
580
581 if(ps_ctxt->u1_disable_intra_eval)
582 {
583 u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
584 }
585 else
586 {
587 u1_enable_top_row_sync = 1;
588 }
589
590 if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
591 {
592 /* Wait till top data is ready */
593 /* Currently checking till top right CU */
594 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
595
596 if(i4_ctb_y_off == 0)
597 {
598 /* No wait for 1st row */
599 cu_top_right_offset = -(MAX_CTB_SIZE);
600 {
601 ihevce_tile_params_t *ps_col_tile_params =
602 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
603 ps_ctxt->i4_tile_col_idx);
604 /* No wait for 1st row */
605 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
606 }
607 cu_top_right_dep_pos = 0;
608 }
609 else
610 {
611 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
612 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
613 }
614
615 if(0 == ps_cu_analyse->b3_cu_pos_y)
616 {
617 ihevce_dmgr_chk_row_row_sync(
618 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
619 curr_cu_pos_in_row,
620 cu_top_right_offset,
621 cu_top_right_dep_pos,
622 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
623 ps_ctxt->thrd_id);
624 }
625 }
626
627 s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
628 s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
629 s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
630 s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
631 s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
632 s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
633 s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
634 s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
635 s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
636 s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
637 s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
638 s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
639 s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
640 s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
641 s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
642 s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
643 s_prms.pv_src = pv_curr_src;
644 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
645 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
646 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
647 s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
648 s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
649 s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
650 s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
651 s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
652 s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
653 s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
654 s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
655 s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
656 s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
657 s_prms.u1_use_merge_cand_from_top_row =
658 (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
659 s_prms.u1_merge_idx_cabac_model =
660 ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
661 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
662 s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
663 s_prms.u1_reuse_me_sad = 1;
664 #else
665 s_prms.u1_reuse_me_sad = 0;
666 #endif
667
668 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
669 {
670 if(ps_ctxt->i4_temporal_layer == 1)
671 {
672 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
673 }
674 else
675 {
676 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
677 }
678 }
679 else
680 {
681 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
682 }
683 s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
684
685 if(s_prms.u1_is_cu_noisy)
686 {
687 s_prms.i4_lambda_qf =
688 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
689 }
690 s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
691
692 s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
693
694 s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
695 ihevce_inter_cand_sifter(&s_prms);
696 }
697 if(u1_is_422)
698 {
699 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
700 UWORD8 u1_num_bufs_allocated;
701
702 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
703 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
704
705 ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
706
707 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
708 ctr++)
709 {
710 {
711 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
712 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
713 }
714
715 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
716
717 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
718 }
719
720 {
721 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
722 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
723 }
724
725 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
726
727 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
728 }
729 else
730 {
731 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
732 UWORD8 u1_num_bufs_allocated;
733
734 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
735 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
736
737 ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
738
739 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
740 ctr++)
741 {
742 {
743 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
744 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
745 }
746
747 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
748
749 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
750 }
751 }
752
753 ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
754
755 ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
756 ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
757 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
758 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
759 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
760 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
761 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
762 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
763 /* --------------------------------------- */
764 /* ------ Inter RD OPT stage ------------- */
765 /* --------------------------------------- */
766 if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
767 {
768 UWORD8 u1_ssd_bit_info_ctr = 0;
769
770 /* -- run a loop over all Inter rd opt cands ------ */
771 for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
772 {
773 cu_inter_cand_t *ps_inter_cand;
774
775 LWORD64 rd_opt_cost = 0;
776
777 ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
778
779 if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
780 (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
781 {
782 ps_inter_cand->b1_eval_mark = 1;
783 }
784
785 /****************************************************************/
786 /* This check is only valid for derived instances. */
787 /* check if this mode needs to be evaluated or not. */
788 /* if it is a skip candidate, go ahead and evaluate it even if */
789 /* it has not been marked while sorting. */
790 /****************************************************************/
791 if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
792 {
793 continue;
794 }
795
796 /* RDOPT related copies and settings */
797 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
798
799 /* RDOPT copy States : Prev Cu best to current init */
800 COPY_CABAC_STATES(
801 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
802 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
803 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
804 /* MVP ,MVD calc and Motion compensation */
805 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
806 ps_ctxt,
807 ps_inter_cand,
808 ps_cu_analyse->u1_cu_size,
809 ps_cu_analyse->b3_cu_pos_x,
810 ps_cu_analyse->b3_cu_pos_y,
811 ps_left_nbr_4x4,
812 ps_top_nbr_4x4,
813 ps_topleft_nbr_4x4,
814 nbr_4x4_left_strd,
815 rd_opt_curr_idx);
816
817 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
818 if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
819 {
820 ihevce_determine_tu_tree_distribution(
821 ps_inter_cand,
822 (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
823 ps_ctxt->ai2_scratch,
824 (UWORD8 *)pv_curr_src,
825 ps_cu_prms->i4_luma_src_stride,
826 ps_ctxt->i4_satd_lamda,
827 LAMBDA_Q_SHIFT,
828 ps_cu_analyse->u1_cu_size,
829 ps_ctxt->u1_max_tr_depth);
830 }
831 #endif
832 #if DISABLE_ZERO_ZBF_IN_INTER
833 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
834 #else
835 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
836 #endif
837 /* Recon loop with different TUs based on partition type*/
838 rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
839 ps_ctxt,
840 ps_cu_prms,
841 pv_curr_src,
842 ps_cu_analyse->u1_cu_size,
843 ps_cu_analyse->b3_cu_pos_x,
844 ps_cu_analyse->b3_cu_pos_y,
845 rd_opt_curr_idx,
846 &s_chrm_cu_buf_prms,
847 ps_inter_cand,
848 ps_cu_analyse,
849 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
850 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
851 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
852 100.0);
853
854 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
855 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
856 {
857 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
858 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
859 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
860 }
861 #endif
862
863 /* based on the rd opt cost choose the best and current index */
864 if(rd_opt_cost < rd_opt_least_cost)
865 {
866 /* swap the best and current indx */
867 rd_opt_best_idx = !rd_opt_best_idx;
868 rd_opt_curr_idx = !rd_opt_curr_idx;
869
870 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
871 rd_opt_least_cost = rd_opt_cost;
872 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
873
874 /* Store the best Inter cand. for final_recon function */
875 ps_best_inter_cand = ps_inter_cand;
876 }
877
878 /* set the neighbour map to 0 */
879 ihevce_set_nbr_map(
880 ps_ctxt->pu1_ctb_nbr_map,
881 ps_ctxt->i4_nbr_map_strd,
882 (ps_cu_analyse->b3_cu_pos_x << 1),
883 (ps_cu_analyse->b3_cu_pos_y << 1),
884 (ps_cu_analyse->u1_cu_size >> 2),
885 0);
886
887 } /* end of loop for all the Inter RD OPT cand */
888 }
889 /* --------------------------------------- */
890 /* ---- Conditional Eval of Intra -------- */
891 /* --------------------------------------- */
892 {
893 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
894 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
895
896 /* check if inter candidates are valid */
897 if(0 != ps_cu_analyse->u1_num_inter_cands)
898 {
899 /* if skip or no residual inter candidates has won then */
900 /* evaluation of intra candidates is disabled */
901 if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
902 (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
903 {
904 enable_intra_eval_flag = 0;
905 }
906 }
907 /* Disable Intra Gating for HIGH QUALITY PRESET */
908 #if !ENABLE_INTRA_GATING_FOR_HQ
909 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
910 {
911 enable_intra_eval_flag = 1;
912
913 #if DISABLE_LARGE_INTRA_PQ
914 if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
915 (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
916 {
917 if(ps_cu_analyse->u1_cu_size > 16)
918 {
919 /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
920 enable_intra_eval_flag = 0;
921 }
922 else if(ps_cu_analyse->u1_cu_size == 16)
923 {
924 /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
925 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
926 }
927 }
928 #endif
929 }
930 #endif
931 }
932
933 /* --------------------------------------- */
934 /* ------ Intra RD OPT stage ------------- */
935 /* --------------------------------------- */
936
937 /* -- run a loop over all Intra rd opt cands ------ */
938 if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
939 {
940 LWORD64 rd_opt_cost;
941 WORD32 end_flag = 0;
942 WORD32 cu_eval_done = 0;
943 WORD32 subcu_eval_done = 0;
944 WORD32 subpu_eval_done = 0;
945 WORD32 max_trans_size;
946 WORD32 sync_wait_stride;
947 max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
948 sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
949
950 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
951 {
952 /* Wait till top data is ready */
953 /* Currently checking till top right CU */
954 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
955
956 if(i4_ctb_y_off == 0)
957 {
958 /* No wait for 1st row */
959 cu_top_right_offset = -(MAX_CTB_SIZE);
960 {
961 ihevce_tile_params_t *ps_col_tile_params =
962 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
963 ps_ctxt->i4_tile_col_idx);
964 /* No wait for 1st row */
965 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
966 }
967 cu_top_right_dep_pos = 0;
968 }
969 else
970 {
971 cu_top_right_offset = sync_wait_stride;
972 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
973 }
974
975 if(0 == ps_cu_analyse->b3_cu_pos_y)
976 {
977 ihevce_dmgr_chk_row_row_sync(
978 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
979 curr_cu_pos_in_row,
980 cu_top_right_offset,
981 cu_top_right_dep_pos,
982 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
983 ps_ctxt->thrd_id);
984 }
985 }
986 ctr = 0;
987
988 /* Zero cbf tool is disabled for intra CUs */
989 #if ENABLE_ZERO_CBF_IN_INTRA
990 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
991 #else
992 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
993 #endif
994
995 /* Intra Mode gating based on MPM cand list and encoder quality preset */
996 if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
997 {
998 ihevce_mpm_idx_based_filter_RDOPT_cand(
999 ps_ctxt,
1000 ps_cu_analyse,
1001 ps_left_nbr_4x4,
1002 ps_top_nbr_4x4,
1003 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1004 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1005
1006 ihevce_mpm_idx_based_filter_RDOPT_cand(
1007 ps_ctxt,
1008 ps_cu_analyse,
1009 ps_left_nbr_4x4,
1010 ps_top_nbr_4x4,
1011 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1012 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1013 }
1014
1015 /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1016 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1017 {
1018 /* For cu_size = 64, there won't be any TU_EQ_CU case */
1019 if(64 != ps_cu_analyse->u1_cu_size)
1020 {
1021 /* RDOPT copy States : Prev Cu best to current init */
1022 COPY_CABAC_STATES(
1023 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1024 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1025 IHEVC_CAB_CTXT_END);
1026
1027 /* RDOPT related copies and settings */
1028 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1029
1030 /* Calc. best SATD mode for TU_EQ_CU case */
1031 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1032 ps_ctxt,
1033 &s_chrm_cu_buf_prms,
1034 ps_cu_analyse,
1035 rd_opt_curr_idx,
1036 TU_EQ_CU,
1037 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1038 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1039 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1040 100.0,
1041 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1042
1043 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1044 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1045 {
1046 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1047 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1048 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1049 }
1050 #endif
1051 }
1052
1053 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1054 TU_EQ_CU_DIV2 case */
1055
1056 if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1057 255) &&
1058 (8 != ps_cu_analyse->u1_cu_size))
1059 {
1060 /* RDOPT copy States : Prev Cu best to current init */
1061 COPY_CABAC_STATES(
1062 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1063 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1064 IHEVC_CAB_CTXT_END);
1065
1066 /* RDOPT related copies and settings */
1067 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1068
1069 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1070 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1071 ps_ctxt,
1072 &s_chrm_cu_buf_prms,
1073 ps_cu_analyse,
1074 rd_opt_curr_idx,
1075 TU_EQ_CU_DIV2,
1076 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1077 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1078 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1079 100.0,
1080 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1081
1082 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1083 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1084 {
1085 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1086 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1087 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1088 }
1089 #endif
1090 }
1091 }
1092
1093 while(0 == end_flag)
1094 {
1095 UWORD8 *pu1_mode = NULL;
1096 WORD32 curr_func_mode = 0;
1097 void *pv_pred;
1098
1099 ASSERT(ctr < 36);
1100
1101 /* TU equal to CU size evaluation of different modes */
1102 if(0 == cu_eval_done)
1103 {
1104 /* check if the all the modes have been evaluated */
1105 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1106 {
1107 cu_eval_done = 1;
1108 ctr = 0;
1109 }
1110 else if(
1111 (1 == ctr) &&
1112 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1113 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1114 (ps_ctxt->i1_slice_type != ISLICE))
1115 {
1116 ctr = 0;
1117 cu_eval_done = 1;
1118 subcu_eval_done = 1;
1119 subpu_eval_done = 1;
1120 }
1121 else
1122 {
1123 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1124 {
1125 ctr++;
1126 continue;
1127 }
1128
1129 pu1_mode =
1130 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1131 ctr++;
1132 curr_func_mode = TU_EQ_CU;
1133 }
1134 }
1135 /* Sub CU (NXN) mode evaluation of different pred modes */
1136 if((0 == subpu_eval_done) && (1 == cu_eval_done))
1137 {
1138 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1139 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1140 {
1141 pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1142
1143 curr_func_mode = TU_EQ_SUBCU;
1144 /* check if the any modes have to be evaluated */
1145 if(255 == *pu1_mode)
1146 {
1147 subpu_eval_done = 1;
1148 ctr = 0;
1149 }
1150 else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1151 {
1152 subpu_eval_done = 1;
1153 ctr = 0;
1154 }
1155 else
1156 {
1157 ctr++;
1158 }
1159 }
1160 }
1161
1162 /* TU size equal to CU div2 mode evaluation of different pred modes */
1163 if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1164 {
1165 /* check if the all the modes have been evaluated */
1166 if(255 ==
1167 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1168 {
1169 subcu_eval_done = 1;
1170 }
1171 else if(
1172 (1 == ctr) &&
1173 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1174 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1175 (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1176 {
1177 subcu_eval_done = 1;
1178 }
1179 else
1180 {
1181 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1182 {
1183 ctr++;
1184 continue;
1185 }
1186
1187 pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1188 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1189
1190 ctr++;
1191 curr_func_mode = TU_EQ_CU_DIV2;
1192 }
1193 }
1194
1195 /* check if all CU option have been evalueted */
1196 if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1197 {
1198 break;
1199 }
1200
1201 /* RDOPT related copies and settings */
1202 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1203
1204 /* Assign ME/Intra pred buf. to the current intra cand. since we
1205 are storing pred data for final_reon function */
1206 {
1207 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1208 }
1209
1210 /* RDOPT copy States : Prev Cu best to current init */
1211 COPY_CABAC_STATES(
1212 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1213 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1214 IHEVC_CAB_CTXT_END);
1215
1216 /* call the function which performs the normative Intra encode */
1217 rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1218 ps_ctxt,
1219 ps_cu_prms,
1220 pv_pred,
1221 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1222 &s_chrm_cu_buf_prms,
1223 pu1_mode,
1224 ps_cu_analyse,
1225 pv_curr_src,
1226 pv_cu_left,
1227 pv_cu_top,
1228 pv_cu_top_left,
1229 ps_left_nbr_4x4,
1230 ps_top_nbr_4x4,
1231 nbr_4x4_left_strd,
1232 cu_left_stride,
1233 rd_opt_curr_idx,
1234 curr_func_mode,
1235 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1236 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1237 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1238 100.0);
1239
1240 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1241 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1242 {
1243 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1244 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1245 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1246 }
1247 #endif
1248
1249 /* based on the rd opt cost choose the best and current index */
1250 if(rd_opt_cost < rd_opt_least_cost)
1251 {
1252 /* swap the best and current indx */
1253 rd_opt_best_idx = !rd_opt_best_idx;
1254 rd_opt_curr_idx = !rd_opt_curr_idx;
1255 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1256
1257 rd_opt_least_cost = rd_opt_cost;
1258 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1259 }
1260
1261 if((TU_EQ_SUBCU == curr_func_mode) &&
1262 (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1263 (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1264 {
1265 UWORD8 au1_tu_eq_cu_div2_modes[4];
1266 UWORD8 au1_freq_of_mode[4];
1267
1268 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1269 {
1270 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1271 255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1272 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1273 255;
1274 }
1275 else
1276 {
1277 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1278 ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1279 au1_tu_eq_cu_div2_modes,
1280 au1_freq_of_mode,
1281 4);
1282
1283 if(2 == i4_num_clusters)
1284 {
1285 if(au1_freq_of_mode[0] == 3)
1286 {
1287 ps_cu_analyse->s_cu_intra_cand
1288 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1289 au1_tu_eq_cu_div2_modes[0];
1290 ps_cu_analyse->s_cu_intra_cand
1291 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1292 }
1293 else if(au1_freq_of_mode[1] == 3)
1294 {
1295 ps_cu_analyse->s_cu_intra_cand
1296 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1297 au1_tu_eq_cu_div2_modes[1];
1298 ps_cu_analyse->s_cu_intra_cand
1299 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1300 }
1301 else
1302 {
1303 ps_cu_analyse->s_cu_intra_cand
1304 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1305 au1_tu_eq_cu_div2_modes[0];
1306 ps_cu_analyse->s_cu_intra_cand
1307 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1308 au1_tu_eq_cu_div2_modes[1];
1309 ps_cu_analyse->s_cu_intra_cand
1310 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1311 }
1312 }
1313 }
1314 }
1315
1316 /* set the neighbour map to 0 */
1317 ihevce_set_nbr_map(
1318 ps_ctxt->pu1_ctb_nbr_map,
1319 ps_ctxt->i4_nbr_map_strd,
1320 (ps_cu_analyse->b3_cu_pos_x << 1),
1321 (ps_cu_analyse->b3_cu_pos_y << 1),
1322 (ps_cu_analyse->u1_cu_size >> 2),
1323 0);
1324 }
1325
1326 } /* end of Intra RD OPT cand evaluation */
1327
1328 ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1329 ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1330 ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1331
1332 /* --------------------------------------- */
1333 /* --------Final mode Recon ---------- */
1334 /* --------------------------------------- */
1335 {
1336 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1337 void *pv_final_pred = NULL;
1338 WORD32 final_pred_strd = 0;
1339 void *pv_final_pred_chrm = NULL;
1340 WORD32 final_pred_strd_chrm = 0;
1341 WORD32 packed_pred_mode;
1342
1343 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1344 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1345 {
1346 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1347 }
1348 #else
1349 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1350 #endif
1351
1352 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1353 packed_pred_mode =
1354 ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1355
1356 if(!ps_ctxt->u1_is_input_data_hbd)
1357 {
1358 if(ps_enc_loop_bestprms->u1_intra_flag)
1359 {
1360 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1361 final_pred_strd =
1362 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1363 }
1364 else
1365 {
1366 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1367 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1368 }
1369
1370 pv_final_pred_chrm =
1371 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1372 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1373 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1374 final_pred_strd_chrm =
1375 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1376 }
1377
1378 ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1379
1380 {
1381 final_mode_process_prms_t s_prms;
1382
1383 void *pv_cu_luma_recon;
1384 void *pv_cu_chroma_recon;
1385 WORD32 luma_stride, chroma_stride;
1386
1387 if(!ps_ctxt->u1_is_input_data_hbd)
1388 {
1389 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1390 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1391 {
1392 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1393 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1394 luma_stride = ps_cu_analyse->u1_cu_size;
1395 chroma_stride = ps_cu_analyse->u1_cu_size;
1396 }
1397 else
1398 {
1399 /* based on CU position derive the luma pointers */
1400 pv_cu_luma_recon = pu1_final_recon;
1401
1402 /* based on CU position derive the chroma pointers */
1403 pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1404
1405 luma_stride = ps_cu_prms->i4_luma_recon_stride;
1406
1407 chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1408 }
1409 #else
1410 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1411 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1412 luma_stride = ps_cu_analyse->u1_cu_size;
1413 chroma_stride = ps_cu_analyse->u1_cu_size;
1414 #endif
1415
1416 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1417 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1418 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1419 s_prms.packed_pred_mode = packed_pred_mode;
1420 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1421 s_prms.pv_src = pu1_curr_src;
1422 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1423 s_prms.pv_pred = pv_final_pred;
1424 s_prms.pred_strd = final_pred_strd;
1425 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1426 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1427 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1428 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1429 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1430 s_prms.pv_luma_recon = pv_cu_luma_recon;
1431 s_prms.recon_luma_strd = luma_stride;
1432 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1433 s_prms.recon_chrma_strd = chroma_stride;
1434 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1435 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1436 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1437 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1438 s_prms.u1_will_cabac_state_change = 1;
1439 s_prms.u1_recompute_sbh_and_rdoq = 0;
1440 s_prms.u1_is_first_pass = 1;
1441 }
1442
1443 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1444 s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1445 ? ps_cu_prms->u1_is_cu_noisy
1446 : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1447 #endif
1448
1449 ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1450
1451 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1452 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1453 {
1454 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1455 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1456 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1457 }
1458 #endif
1459 }
1460 }
1461
1462 /* --------------------------------------- */
1463 /* --------Populate CU out prms ---------- */
1464 /* --------------------------------------- */
1465 {
1466 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1467 UWORD8 *pu1_pu_map;
1468 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1469
1470 /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1471 /* then it has to be coded as skip CU */
1472 if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1473 (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1474 (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1475 {
1476 ps_enc_loop_bestprms->u1_skip_flag = 1;
1477 }
1478
1479 /* update number PUs in CU */
1480 ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1481
1482 /* ---- populate the colocated pu map index --- */
1483 for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1484 {
1485 WORD32 i;
1486 WORD32 vert_ht;
1487 WORD32 horz_wd;
1488
1489 if(ps_enc_loop_bestprms->u1_intra_flag)
1490 {
1491 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1492 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1493 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1494 }
1495 else
1496 {
1497 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1498 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1499 }
1500
1501 pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1502 pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1503
1504 for(i = 0; i < vert_ht; i++)
1505 {
1506 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1507 pu1_pu_map += num_4x4_in_ctb;
1508 }
1509 /* increment the index */
1510 col_start_pu_idx++;
1511 }
1512 /* ---- copy the colocated PUs to frm pu ----- */
1513 memcpy(
1514 ps_col_pu,
1515 &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1516 ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1517
1518 /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1519 {
1520 entropy_context_t *ps_entropy_ctxt;
1521
1522 WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1523
1524 WORD32 log2_min_cu_qp_delta_size;
1525 UWORD32 block_addr_align;
1526 ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1527
1528 log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1529 diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1530
1531 log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1532 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1533
1534 ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1535 ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1536 /*Update the Qp value used. It will not have a valid value iff
1537 current CU is (skipped/no_cbf). In that case the Qp needed for
1538 deblocking is calculated from top/left/previous coded CU*/
1539
1540 ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1541
1542 if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1543 ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1544 {
1545 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1546 }
1547 else
1548 {
1549 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1550 }
1551 }
1552
1553 /* -- at the end of CU set the neighbour map to 1 -- */
1554 ihevce_set_nbr_map(
1555 ps_ctxt->pu1_ctb_nbr_map,
1556 ps_ctxt->i4_nbr_map_strd,
1557 (ps_cu_analyse->b3_cu_pos_x << 1),
1558 (ps_cu_analyse->b3_cu_pos_y << 1),
1559 (ps_cu_analyse->u1_cu_size >> 2),
1560 1);
1561
1562 /* -- at the end of CU update best cabac rdopt states -- */
1563 /* -- and also set the top row skip flags ------------- */
1564 ihevce_entropy_update_best_cu_states(
1565 &ps_ctxt->s_rdopt_entropy_ctxt,
1566 ps_cu_analyse->b3_cu_pos_x,
1567 ps_cu_analyse->b3_cu_pos_y,
1568 ps_cu_analyse->u1_cu_size,
1569 0,
1570 rd_opt_best_idx);
1571 }
1572
1573 /* Store Output struct */
1574 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1575 {
1576 {
1577 memcpy(
1578 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1579 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1580 sizeof(enc_loop_cu_final_prms_t));
1581 }
1582
1583 memcpy(
1584 &ps_ctxt->as_cu_recur_nbr[0],
1585 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1586 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1587 (ps_cu_analyse->u1_cu_size >> 2));
1588
1589 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1590
1591 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1592 }
1593 #else
1594 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1595 {
1596 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1597
1598 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1599
1600 if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1601 {
1602 /* Wait till top data is ready */
1603 /* Currently checking till top right CU */
1604 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1605
1606 if(i4_ctb_y_off == 0)
1607 {
1608 /* No wait for 1st row */
1609 cu_top_right_offset = -(MAX_CTB_SIZE);
1610 {
1611 ihevce_tile_params_t *ps_col_tile_params =
1612 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1613 ps_ctxt->i4_tile_col_idx);
1614
1615 /* No wait for 1st row */
1616 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1617 }
1618 cu_top_right_dep_pos = 0;
1619 }
1620 else
1621 {
1622 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1623 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1624 }
1625
1626 if(0 == ps_cu_analyse->b3_cu_pos_y)
1627 {
1628 ihevce_dmgr_chk_row_row_sync(
1629 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1630 curr_cu_pos_in_row,
1631 cu_top_right_offset,
1632 cu_top_right_dep_pos,
1633 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1634 ps_ctxt->thrd_id);
1635 }
1636 }
1637 }
1638 else
1639 {
1640 {
1641 memcpy(
1642 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1643 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1644 sizeof(enc_loop_cu_final_prms_t));
1645 }
1646
1647 memcpy(
1648 &ps_ctxt->as_cu_recur_nbr[0],
1649 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1650 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1651 (ps_cu_analyse->u1_cu_size >> 2));
1652
1653 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1654
1655 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1656 }
1657 #endif
1658
1659 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1660 ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1661
1662 return rd_opt_least_cost;
1663 }
1664
1665 /*!
1666 ******************************************************************************
1667 * \if Function name : ihevce_enc_loop_process_row \endif
1668 *
1669 * \brief
1670 * Row level enc_loop pass function
1671 *
1672 * \param[in] pv_ctxt : pointer to enc_loop module
1673 * \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer)
1674 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1675 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1676 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1677 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1678 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1679 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1680 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1681 *
1682 * \return
1683 * None
1684 *
1685 * Note : Currently the frame level calcualtions done assumes that
1686 * framewidth of the input /recon are excat multiple of ctbsize
1687 *
1688 * \author
1689 * Ittiam
1690 *
1691 *****************************************************************************
1692 */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1693 void ihevce_enc_loop_process_row(
1694 ihevce_enc_loop_ctxt_t *ps_ctxt,
1695 iv_enc_yuv_buf_t *ps_curr_src_bufs,
1696 iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1697 iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1698 UWORD8 **ppu1_y_subpel_planes,
1699 ctb_analyse_t *ps_ctb_in,
1700 ctb_enc_loop_out_t *ps_ctb_out,
1701 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1702 cur_ctb_cu_tree_t *ps_row_cu_tree,
1703 cu_enc_loop_out_t *ps_row_cu,
1704 tu_enc_loop_out_t *ps_row_tu,
1705 pu_t *ps_row_pu,
1706 pu_col_mv_t *ps_row_col_pu,
1707 UWORD16 *pu2_num_pu_map,
1708 UWORD8 *pu1_row_pu_map,
1709 UWORD8 *pu1_row_ecd_data,
1710 UWORD32 *pu4_pu_offsets,
1711 frm_ctb_ctxt_t *ps_frm_ctb_prms,
1712 WORD32 vert_ctr,
1713 recon_pic_buf_t *ps_frm_recon,
1714 void *pv_dep_mngr_encloop_dep_me,
1715 pad_interp_recon_frm_t *ps_pad_interp_recon,
1716 WORD32 i4_pass,
1717 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1718 ihevce_tile_params_t *ps_tile_params)
1719 {
1720 enc_loop_cu_prms_t s_cu_prms;
1721 ctb_enc_loop_out_t *ps_ctb_out_dblk;
1722
1723 WORD32 ctb_ctr, ctb_start, ctb_end;
1724 WORD32 col_pu_map_idx;
1725 WORD32 num_ctbs_horz_pic;
1726 WORD32 ctb_size;
1727 WORD32 last_ctb_row_flag;
1728 WORD32 last_ctb_col_flag;
1729 WORD32 last_hz_ctb_wd;
1730 WORD32 last_vt_ctb_ht;
1731 void *pv_dep_mngr_enc_loop_dblk;
1732 void *pv_dep_mngr_enc_loop_cu_top_right;
1733 WORD32 dblk_offset, dblk_check_dep_pos;
1734 WORD32 aux_offset, aux_check_dep_pos;
1735 void *pv_dep_mngr_me_dep_encloop;
1736 ctb_enc_loop_out_t *ps_ctb_out_sao;
1737 /*Structure to store deblocking parameters at CTB-row level*/
1738 deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1739 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1740
1741 pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1742 num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1743 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1744
1745 /* Store the num_ctb_horz in sao context*/
1746 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1747 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1748
1749 /* Get the EncLoop Deblock Dep Mngr */
1750 pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1751 /* Get the EncLoop Top-Right CU Dep Mngr */
1752 pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1753 /* Set Variables for Dep. Checking and Setting */
1754 aux_check_dep_pos = vert_ctr;
1755 aux_offset = 2; /* Should be there for 0th row also */
1756 if(vert_ctr > 0)
1757 {
1758 dblk_check_dep_pos = vert_ctr - 1;
1759 dblk_offset = 2;
1760 }
1761 else
1762 {
1763 /* First row should run without waiting */
1764 dblk_check_dep_pos = 0;
1765 dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1766 }
1767
1768 /* check if the current row processed in last CTb row */
1769 last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1770
1771 /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1772 last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1773
1774 /* Valid Height (pixels) in the last CTB row (padding cases) */
1775 last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1776 ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1777 /* reset the states copied flag */
1778 ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1779 ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1780
1781 /* populate the cu prms which are common for entire ctb row */
1782 s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1783 s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1784 s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1785 s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1786 s_cu_prms.i4_ctb_size = ctb_size;
1787
1788 ps_ctxt->i4_is_first_cu_qg_coded = 0;
1789
1790 /* Initialize the number of PUs for the first CTB to 0 */
1791 *pu2_num_pu_map = 0;
1792
1793 /*Getting the address of BS and Qp arrays and other info*/
1794 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1795 {
1796 WORD32 num_ctbs_horz_tile;
1797 /* Update the pointers which are accessed not by using ctb_ctr
1798 to the tile start here! */
1799 ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1800 ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1801
1802 ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1803 ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1804 ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1805 pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1806 pu1_row_ecd_data +=
1807 (ps_tile_params->i4_first_ctb_x *
1808 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1809 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1810 MAX_SCAN_COEFFS_BYTES_4x4);
1811
1812 /* Update the pointers to the tile start */
1813 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1814 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
1815 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1816 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
1817 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1818
1819 num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1820
1821 ctb_start = ps_tile_params->i4_first_ctb_x;
1822 ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1823 }
1824 ps_ctb_out_dblk = ps_ctb_out;
1825
1826 ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1827
1828 /* --------- Loop over all the CTBs in a row --------------- */
1829 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1830 {
1831 cu_final_update_prms s_cu_update_prms;
1832
1833 cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1834 me_ctb_data_t *ps_cu_me_data;
1835 ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1836 cu_enc_loop_out_t *ps_cu_final;
1837 pu_col_mv_t *ps_ctb_col_pu;
1838
1839 WORD32 cur_ctb_ht, cur_ctb_wd;
1840 WORD32 last_cu_pos_in_ctb;
1841 WORD32 last_cu_size;
1842 WORD32 num_pus_in_ctb;
1843 UWORD8 u1_is_ctb_noisy;
1844 ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1845
1846 if(ctb_ctr)
1847 {
1848 ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1849 }
1850 /*If Sup pic rc is enabled*/
1851 if(ps_ctxt->i4_sub_pic_level_rc)
1852 {
1853 ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1854 }
1855 /* check if the current row processed in last CTb row */
1856 last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1857 if(1 == last_ctb_col_flag)
1858 {
1859 cur_ctb_wd = last_hz_ctb_wd;
1860 }
1861 else
1862 {
1863 cur_ctb_wd = ctb_size;
1864 }
1865
1866 /* If it's the last CTB, get the actual ht of CTB */
1867 if(1 == last_ctb_row_flag)
1868 {
1869 cur_ctb_ht = last_vt_ctb_ht;
1870 }
1871 else
1872 {
1873 cur_ctb_ht = ctb_size;
1874 }
1875
1876 ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1877 ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1878
1879 /* Wait till reference frame recon is available */
1880
1881 /* ------------ Wait till current data is ready from ME -------------- */
1882
1883 /*only for ref instance and Non I pics */
1884 if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1885 ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1886 {
1887 if(ctb_ctr < (num_ctbs_horz_pic))
1888 {
1889 ihevce_dmgr_chk_row_row_sync(
1890 pv_dep_mngr_encloop_dep_me,
1891 ctb_ctr,
1892 1,
1893 vert_ctr,
1894 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1895 ps_ctxt->thrd_id);
1896 }
1897 }
1898
1899 /* store the cu pointer for current ctb out */
1900 ps_ctb_out->ps_enc_cu = ps_row_cu;
1901 ps_cu_final = ps_row_cu;
1902
1903 /* Get the base point of CU recursion tree */
1904 if(ISLICE != ps_ctxt->i1_slice_type)
1905 {
1906 ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1907 ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1908 }
1909 else
1910 {
1911 /* Initialize ptr to current CTB */
1912 ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1913 }
1914
1915 /* Get the ME data pointer for 16x16 block data in ctb */
1916 ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1917 u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1918 s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1919 s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1920
1921 /* store the ctb level prms in cu prms */
1922 s_cu_prms.i4_ctb_pos = ctb_ctr;
1923
1924 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1925 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1926
1927 {
1928 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1929 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1930 }
1931
1932 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1933
1934 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1935
1936 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1937
1938 /* Initialize ptr to current CTB */
1939 ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size;
1940
1941 /* reset the map idx for current ctb */
1942 col_pu_map_idx = 0;
1943 num_pus_in_ctb = 0;
1944
1945 /* reset the map buffer to 0*/
1946
1947 memset(
1948 &ps_ctxt->au1_nbr_ctb_map[0][0],
1949 0,
1950 (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1951
1952 /* set the CTB neighbour availability flags */
1953 ihevce_set_ctb_nbr(
1954 &ps_ctb_out->s_ctb_nbr_avail_flags,
1955 ps_ctxt->pu1_ctb_nbr_map,
1956 ps_ctxt->i4_nbr_map_strd,
1957 ctb_ctr,
1958 vert_ctr,
1959 ps_frm_ctb_prms);
1960
1961 /* -------- update the cur CTB offsets for inter prediction-------- */
1962 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1963 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1964
1965 /* -------- update the cur CTB offsets for MV prediction-------- */
1966 ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1967 ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1968
1969 /* -------------- Boundary Strength Initialization ----------- */
1970 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1971 {
1972 ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1973 }
1974
1975 /* -------- update cur CTB offsets for entropy rdopt context------- */
1976 ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1977
1978 /* --------- CU Recursion --------------- */
1979
1980 {
1981 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1982 WORD32 i4_max_tree_depth = 4;
1983 #endif
1984 WORD32 i4_tree_depth = 0;
1985 /* Init no. of CU in CTB to 0*/
1986 ps_ctb_out->u1_num_cus_in_ctb = 0;
1987
1988 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1989 if(ps_ctxt->i4_bitrate_instance_num == 0)
1990 {
1991 WORD32 i4_max_tree_depth = 4;
1992 WORD32 i;
1993 for(i = 0; i < i4_max_tree_depth; i++)
1994 {
1995 COPY_CABAC_STATES(
1996 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
1997 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1998 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
1999 }
2000 }
2001 #else
2002 if(ps_ctxt->i4_bitrate_instance_num == 0)
2003 {
2004 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2005 {
2006 WORD32 i4_max_tree_depth = 4;
2007 WORD32 i;
2008 for(i = 0; i < i4_max_tree_depth; i++)
2009 {
2010 COPY_CABAC_STATES(
2011 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2012 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2013 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2014 }
2015 }
2016 }
2017
2018 #endif
2019 if(ps_ctxt->i4_bitrate_instance_num == 0)
2020 {
2021 /* FOR I- PIC populate the curr_ctb accordingly */
2022 if(ISLICE == ps_ctxt->i1_slice_type)
2023 {
2024 ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2025 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2026
2027 ihevce_populate_cu_tree(
2028 ps_ctb_ipe_analyse,
2029 ps_cu_tree_analyse,
2030 0,
2031 (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2032 POS_NA,
2033 POS_NA,
2034 POS_NA);
2035 }
2036 }
2037 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2038 ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2039 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2040 if(ps_ctxt->i4_use_ctb_level_lamda)
2041 {
2042 ihevce_compute_cu_level_QP(
2043 ps_ctxt, -1, ps_ctb_ipe_analyse->i4_64x64_act_factor[3][1], 0);
2044 }
2045
2046 s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2047 s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2048 s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2049 s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2050 s_cu_update_prms.pps_cu_final = &ps_cu_final;
2051 s_cu_update_prms.pps_row_pu = &ps_row_pu;
2052 s_cu_update_prms.pps_row_tu = &ps_row_tu;
2053 s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2054
2055 // source satd computation
2056 /* compute the source 8x8 SATD for the current CTB */
2057 /* populate pui4_source_satd in some structure and pass it inside */
2058 if(ps_ctxt->u1_enable_psyRDOPT)
2059 {
2060 /* declare local variables */
2061 WORD32 i;
2062 WORD32 ctb_size;
2063 WORD32 num_comp_had_blocks;
2064 UWORD8 *pu1_l0_block;
2065 WORD32 block_ht;
2066 WORD32 block_wd;
2067 WORD32 ht_offset;
2068 WORD32 wd_offset;
2069
2070 WORD32 num_horz_blocks;
2071 WORD32 had_block_size;
2072 WORD32 total_had_block_size;
2073 WORD16 pi2_residue_had_zscan[64];
2074 UWORD8 ai1_zeros_buffer[64];
2075
2076 WORD32 index_satd;
2077 WORD32 is_hbd;
2078 /* initialize the variables */
2079 block_ht = cur_ctb_ht;
2080 block_wd = cur_ctb_wd;
2081
2082 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2083
2084 had_block_size = 8;
2085 total_had_block_size = had_block_size * had_block_size;
2086
2087 for(i = 0; i < total_had_block_size; i++)
2088 {
2089 ai1_zeros_buffer[i] = 0;
2090 }
2091
2092 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
2093 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2094
2095 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
2096 ht_offset = -had_block_size;
2097 wd_offset = -had_block_size;
2098
2099 index_satd = 0;
2100 /*Loop over all 8x8 blocsk in the CTB*/
2101 for(i = 0; i < num_comp_had_blocks; i++)
2102 {
2103 if(i % num_horz_blocks == 0)
2104 {
2105 wd_offset = -had_block_size;
2106 ht_offset += had_block_size;
2107 }
2108 wd_offset += had_block_size;
2109
2110 if(!is_hbd)
2111 {
2112 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2113 pu1_l0_block = s_cu_prms.pu1_luma_src +
2114 ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2115
2116 ps_ctxt->ai4_source_satd_8x8[index_satd] =
2117
2118 ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2119 pu1_l0_block,
2120 ps_curr_src_bufs->i4_y_strd,
2121 ai1_zeros_buffer,
2122 had_block_size,
2123 pi2_residue_had_zscan,
2124 had_block_size);
2125 }
2126 index_satd++;
2127 }
2128 }
2129
2130 if(ps_ctxt->u1_enable_psyRDOPT)
2131 {
2132 /* declare local variables */
2133 WORD32 i;
2134 WORD32 ctb_size;
2135 WORD32 num_comp_had_blocks;
2136 UWORD8 *pu1_l0_block;
2137 UWORD8 *pu1_l0_block_prev = NULL;
2138 WORD32 block_ht;
2139 WORD32 block_wd;
2140 WORD32 ht_offset;
2141 WORD32 wd_offset;
2142
2143 WORD32 num_horz_blocks;
2144 WORD32 had_block_size;
2145 WORD16 pi2_residue_had[64];
2146 UWORD8 ai1_zeros_buffer[64];
2147 WORD32 index_satd = 0;
2148
2149 WORD32 is_hbd;
2150 is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit
2151
2152 /* initialize the variables */
2153 /* change this based ont he bit depth */
2154 // ps_ctxt->u1_chroma_array_type
2155 if(ps_ctxt->u1_chroma_array_type == 1)
2156 {
2157 block_ht = cur_ctb_ht / 2;
2158 block_wd = cur_ctb_wd / 2;
2159 }
2160 else
2161 {
2162 block_ht = cur_ctb_ht;
2163 block_wd = cur_ctb_wd / 2;
2164 }
2165
2166 had_block_size = 4;
2167 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2168
2169 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
2170 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2171
2172 num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size;
2173 ht_offset = -had_block_size;
2174 wd_offset = -had_block_size;
2175
2176 if(!is_hbd)
2177 {
2178 /* loop over for every 4x4 blocks in the CU for Cb */
2179 for(i = 0; i < num_comp_had_blocks; i++)
2180 {
2181 if(i % num_horz_blocks == 0)
2182 {
2183 wd_offset = -had_block_size;
2184 ht_offset += had_block_size;
2185 }
2186 wd_offset += had_block_size;
2187
2188 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2189 if(i % 2 != 0)
2190 {
2191 if(!is_hbd)
2192 {
2193 pu1_l0_block = pu1_l0_block_prev + 1;
2194 }
2195 }
2196 else
2197 {
2198 if(!is_hbd)
2199 {
2200 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2201 s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2202 pu1_l0_block_prev = pu1_l0_block;
2203 }
2204 }
2205
2206 if(had_block_size == 4)
2207 {
2208 if(!is_hbd)
2209 {
2210 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2211 ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2212 pu1_l0_block,
2213 s_cu_prms.i4_chrm_src_stride,
2214 ai1_zeros_buffer,
2215 had_block_size,
2216 pi2_residue_had,
2217 had_block_size);
2218 }
2219
2220 index_satd++;
2221
2222 } // block size of 4x4
2223
2224 } // for all blocks
2225
2226 } // is hbd check
2227 }
2228
2229 ihevce_cu_recurse_decide(
2230 ps_ctxt,
2231 &s_cu_prms,
2232 ps_cu_tree_analyse,
2233 ps_cu_tree_analyse,
2234 ps_ctb_ipe_analyse,
2235 ps_cu_me_data,
2236 &ps_ctb_col_pu,
2237 &s_cu_update_prms,
2238 pu1_row_pu_map,
2239 &col_pu_map_idx,
2240 i4_tree_depth,
2241 ctb_ctr << 6,
2242 vert_ctr << 6,
2243 cur_ctb_ht);
2244
2245 if(ps_ctxt->i1_slice_type != ISLICE)
2246 {
2247 ASSERT(
2248 (cur_ctb_wd * cur_ctb_ht) <=
2249 ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2250 }
2251 /*If Sup pic rc is enabled*/
2252 if(1 == ps_ctxt->i4_sub_pic_level_rc)
2253 {
2254 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2255 ihevce_sub_pic_rc_in_data(
2256 (void *)ps_multi_thrd_ctxt,
2257 (void *)ps_ctxt,
2258 (void *)ps_ctb_ipe_analyse,
2259 (void *)ps_frm_ctb_prms);
2260 }
2261
2262 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2263
2264 } /* End of CU recursion block */
2265
2266 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2267 {
2268 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2269 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2270 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2271
2272 do
2273 {
2274 ihevce_update_final_cu_results(
2275 ps_ctxt,
2276 ps_enc_out_ctxt,
2277 ps_cu_prms,
2278 NULL, /* &ps_ctb_col_pu */
2279 NULL, /* &col_pu_map_idx */
2280 &s_cu_update_prms,
2281 ctb_ctr,
2282 vert_ctr);
2283
2284 ps_enc_out_ctxt++;
2285
2286 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2287
2288 } while(ps_enc_out_ctxt->u1_cu_size != 128);
2289 }
2290 #else
2291 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2292 {
2293 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2294 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2295 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2296
2297 do
2298 {
2299 ihevce_update_final_cu_results(
2300 ps_ctxt,
2301 ps_enc_out_ctxt,
2302 ps_cu_prms,
2303 NULL, /* &ps_ctb_col_pu */
2304 NULL, /* &col_pu_map_idx */
2305 &s_cu_update_prms,
2306 ctb_ctr,
2307 vert_ctr);
2308
2309 ps_enc_out_ctxt++;
2310
2311 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2312
2313 } while(ps_enc_out_ctxt->u1_cu_size != 128);
2314 }
2315 #endif
2316
2317 /* --- ctb level copy of data to left buffers--*/
2318 ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2319
2320 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2321 {
2322 /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2323 ihevce_bs_clear_invalid(
2324 &ps_ctxt->s_deblk_bs_prms,
2325 last_ctb_row_flag,
2326 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2327 last_hz_ctb_wd,
2328 last_vt_ctb_ht);
2329
2330 /* -----------------Read boundary strengts for current CTB------------- */
2331
2332 if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2333 {
2334 /*Storing boundary strengths of current CTB*/
2335 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2336 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2337
2338 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2339 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2340 }
2341 //Increment for storing next CTB info
2342 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2343 (ctb_size >> 3); //one vertical edge per 8x8 block
2344 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2345 (ctb_size >> 3); //one horizontal edge per 8x8 block
2346 }
2347
2348 /* -------------- ctb level updates ----------------- */
2349 ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2350
2351 pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2352
2353 /* first ctb offset will be populated by the caller */
2354 if(0 != ctb_ctr)
2355 {
2356 pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2357 }
2358 pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2359 ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2360
2361 ps_ctb_in++;
2362 ps_ctb_out++;
2363 }
2364
2365 /* ---------- Encloop end of row updates ----------------- */
2366
2367 /* at the end of row processing cu pixel counter is set to */
2368 /* (num ctb * ctbzise) + ctb size */
2369 /* this is to set the dependency for right most cu of last */
2370 /* ctb's top right data dependency */
2371 /* this even takes care of entropy dependency for */
2372 /* incomplete ctb as well */
2373 ihevce_dmgr_set_row_row_sync(
2374 pv_dep_mngr_enc_loop_cu_top_right,
2375 (ctb_ctr * ctb_size + ctb_size),
2376 vert_ctr,
2377 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2378
2379 ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2380
2381 /* Restore structure.
2382 Getting the address of stored-BS and Qp-map and other info */
2383 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2384 {
2385 /* Update the pointers to the tile start */
2386 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2387 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
2388 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2389 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
2390 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2391 }
2392
2393 #if PROFILE_ENC_REG_DATA
2394 s_profile.u8_enc_reg_data[vert_ctr] = 0;
2395 #endif
2396
2397 /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2398 if(!ps_ctxt->u1_is_input_data_hbd)
2399 {
2400 WORD32 last_col_pic, last_col_tile;
2401
2402 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2403 {
2404 /* store the ctb level prms in cu prms */
2405 s_cu_prms.i4_ctb_pos = ctb_ctr;
2406 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2407 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2408
2409 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2410 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2411 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2412
2413 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2414
2415 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2416
2417 /* If last ctb in the horizontal row */
2418 if(ctb_ctr == (num_ctbs_horz_pic - 1))
2419 {
2420 last_col_pic = 1;
2421 }
2422 else
2423 {
2424 last_col_pic = 0;
2425 }
2426
2427 /* If last ctb in the tile row */
2428 if(ctb_ctr == (ctb_end - 1))
2429 {
2430 last_col_tile = 1;
2431 }
2432 else
2433 {
2434 last_col_tile = 0;
2435 }
2436
2437 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2438 {
2439 /* Wait till top neighbour CTB has done it's deblocking*/
2440 if(ctb_ctr < (ctb_end)-1)
2441 {
2442 ihevce_dmgr_chk_row_row_sync(
2443 pv_dep_mngr_enc_loop_dblk,
2444 ctb_ctr,
2445 dblk_offset,
2446 dblk_check_dep_pos,
2447 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2448 ps_ctxt->thrd_id);
2449 }
2450
2451 if((0 == ps_ctxt->i4_deblock_type))
2452 {
2453 /* Populate Qp-map */
2454 if(ctb_start == ctb_ctr)
2455 {
2456 ihevce_deblk_populate_qp_map(
2457 ps_ctxt,
2458 &s_deblk_ctb_row_params,
2459 ps_ctb_out_dblk,
2460 vert_ctr,
2461 ps_frm_ctb_prms,
2462 ps_tile_params);
2463 }
2464 ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2465
2466 /* recon pointers and stride */
2467 ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2468 ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2469 ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2470 ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2471
2472 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2473 {
2474 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2475 (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2476 }
2477 ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2478 //or according to slice boundary. Support yet to be added !!!!
2479
2480 ihevce_deblk_ctb(
2481 &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2482
2483 //Increment for storing next CTB info
2484 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2485 (ctb_size >> 3); //one vertical edge per 8x8 block
2486 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2487 (ctb_size >> 3); //one horizontal edge per 8x8 block
2488 s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2489 (ctb_size >> 2); //one qp per 4x4 block.
2490
2491 } //end of if((0 == ps_ctxt->i4_deblock_type)
2492 } // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2493
2494 /* Apply SAO over the previous CTB-row */
2495 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2496 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2497 {
2498 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2499
2500 if((vert_ctr > ps_tile_params->i4_first_ctb_y) &&
2501 (ctb_ctr > ctb_start)) //if((vert_ctr > 0) && (ctb_ctr > 0))
2502 {
2503 /* Call the sao function to do sao for the current ctb*/
2504
2505 /* Register the curr ctb's x pos in sao context*/
2506 ps_sao_ctxt->i4_ctb_x = ctb_ctr - 1;
2507
2508 /* Register the curr ctb's y pos in sao context*/
2509 ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2510
2511 ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2512 (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz +
2513 (ctb_ctr - 1);
2514 ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2515 ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2516 ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2517
2518 ps_sao_ctxt->i4_is_last_ctb_row = 0;
2519 ps_sao_ctxt->i4_is_last_ctb_col = 0;
2520
2521 /* Calculate the recon buf pointer and stride for teh current ctb */
2522 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2523 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2524 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2525 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2526
2527 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2528
2529 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2530 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2531 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2532 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2533 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2534
2535 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2536 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2537
2538 ps_sao_ctxt->pu1_cur_luma_src_buf =
2539 ps_sao_ctxt->pu1_frm_luma_src_buf +
2540 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2541 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2542
2543 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2544
2545 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2546 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2547 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2548 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2549 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2550
2551 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2552
2553 /* Calculate the pointer to buff to store the (x,y)th sao
2554 * for the top merge of (x,y+1)th ctb
2555 */
2556 ps_sao_ctxt->ps_top_ctb_sao =
2557 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2558 [ps_sao_ctxt->i4_ctb_x +
2559 (ps_sao_ctxt->i4_ctb_y) *
2560 ps_frm_ctb_prms->i4_num_ctbs_horz +
2561 (ps_ctxt->i4_bitrate_instance_num *
2562 ps_sao_ctxt->i4_num_ctb_units)];
2563
2564 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2565 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2566 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2567 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2568 ps_sao_ctxt->i4_ctb_x * ctb_size +
2569 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2570 ps_sao_ctxt->i4_top_chroma_buf_size);
2571
2572 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2573 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2574 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2575 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2576 ps_sao_ctxt->i4_ctb_x * ctb_size +
2577 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2578 ps_sao_ctxt->i4_top_chroma_buf_size);
2579
2580 {
2581 UWORD32 u4_ctb_sao_bits;
2582
2583 ihevce_sao_analyse(
2584 &ps_ctxt->s_sao_ctxt_t,
2585 ps_ctb_out_sao,
2586 &u4_ctb_sao_bits,
2587 ps_tile_params);
2588 ps_ctxt
2589 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2590 [ps_ctxt->i4_bitrate_instance_num]
2591 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2592 ps_ctxt
2593 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2594 [ps_ctxt->i4_bitrate_instance_num]
2595 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2596 }
2597 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
2598 0x1) /** Subpel generation not done for non-ref picture **/
2599 {
2600 /* Padding and Subpel Plane Generation */
2601 ihevce_pad_interp_recon_ctb(
2602 ps_pad_interp_recon,
2603 ctb_ctr - 1,
2604 vert_ctr - 1,
2605 ps_ctxt->i4_quality_preset,
2606 ps_frm_ctb_prms,
2607 ps_ctxt->ai2_scratch,
2608 ps_ctxt->i4_bitrate_instance_num,
2609 ps_ctxt->ps_func_selector);
2610 }
2611 }
2612
2613 /* Call the sao function again for the last ctb of the previous row*/
2614 if(((ctb_ctr + 1) == (ctb_end)) &&
2615 (vert_ctr >
2616 ps_tile_params
2617 ->i4_first_ctb_y)) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz) && (vert_ctr > 0) )
2618 {
2619 /* Register the curr ctb's x pos in sao context*/
2620 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2621
2622 /* Register the curr ctb's y pos in sao context*/
2623 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr - 1;
2624
2625 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2626 (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
2627
2628 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2629
2630 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2631 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2632 ps_tile_params->i4_curr_tile_width);
2633
2634 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2635
2636 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 0;
2637 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2638
2639 /* Calculate the recon buf pointer and stride for teh current ctb */
2640 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2641 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2642 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2643 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2644
2645 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2646
2647 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2648 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2649 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2650 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2651 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2652
2653 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2654 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2655
2656 ps_sao_ctxt->pu1_cur_luma_src_buf =
2657 ps_sao_ctxt->pu1_frm_luma_src_buf +
2658 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2659 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2660
2661 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2662
2663 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2664 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2665 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2666 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2667 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2668
2669 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2670
2671 /* Calculate the pointer to buff to store the (x,y)th sao
2672 * for the top merge of (x,y+1)th ctb
2673 */
2674 ps_sao_ctxt->ps_top_ctb_sao =
2675 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2676 [ps_sao_ctxt->i4_ctb_x +
2677 (ps_sao_ctxt->i4_ctb_y) *
2678 ps_frm_ctb_prms->i4_num_ctbs_horz +
2679 (ps_ctxt->i4_bitrate_instance_num *
2680 ps_sao_ctxt->i4_num_ctb_units)];
2681
2682 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2683 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2684 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2685 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2686 ps_sao_ctxt->i4_ctb_x * ctb_size +
2687 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2688 ps_sao_ctxt->i4_top_chroma_buf_size);
2689
2690 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2691 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2692 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2693 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2694 ps_sao_ctxt->i4_ctb_x * ctb_size +
2695 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2696 ps_sao_ctxt->i4_top_chroma_buf_size);
2697
2698 {
2699 UWORD32 u4_ctb_sao_bits;
2700
2701 ihevce_sao_analyse(
2702 &ps_ctxt->s_sao_ctxt_t,
2703 ps_ctb_out_sao,
2704 &u4_ctb_sao_bits,
2705 ps_tile_params);
2706 ps_ctxt
2707 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2708 [ps_ctxt->i4_bitrate_instance_num]
2709 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2710 ps_ctxt
2711 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2712 [ps_ctxt->i4_bitrate_instance_num]
2713 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2714 }
2715 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
2716 0x1) /** Subpel generation not done for non-ref picture **/
2717 {
2718 /* Padding and Subpel Plane Generation */
2719 ihevce_pad_interp_recon_ctb(
2720 ps_pad_interp_recon,
2721 ctb_ctr,
2722 vert_ctr - 1,
2723 ps_ctxt->i4_quality_preset,
2724 ps_frm_ctb_prms,
2725 ps_ctxt->ai2_scratch,
2726 ps_ctxt->i4_bitrate_instance_num,
2727 ps_ctxt->ps_func_selector);
2728 }
2729 }
2730 }
2731 else //SAO Disabled
2732 {
2733 if(1 == ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2734 {
2735 /* Padding and Subpel Plane Generation */
2736 ihevce_pad_interp_recon_ctb(
2737 ps_pad_interp_recon,
2738 ctb_ctr,
2739 vert_ctr,
2740 ps_ctxt->i4_quality_preset,
2741 ps_frm_ctb_prms,
2742 ps_ctxt->ai2_scratch,
2743 ps_ctxt->i4_bitrate_instance_num,
2744 ps_ctxt->ps_func_selector);
2745 }
2746 }
2747
2748 /* update the number of ctbs deblocked for this row */
2749 ihevce_dmgr_set_row_row_sync(
2750 pv_dep_mngr_enc_loop_dblk,
2751 (ctb_ctr + 1),
2752 vert_ctr,
2753 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2754 } //end of loop over CTBs in current CTB-row
2755 {
2756 if(!ps_ctxt->i4_bitrate_instance_num)
2757 {
2758 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2759 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2760 {
2761 /* If SAO is on, then signal completion of previous CTB row */
2762 if(0 != vert_ctr)
2763 {
2764 {
2765 WORD32 post_ctb_ctr;
2766
2767 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2768 {
2769 ihevce_dmgr_map_set_sync(
2770 pv_dep_mngr_me_dep_encloop,
2771 post_ctb_ctr,
2772 (vert_ctr - 1),
2773 MAP_CTB_COMPLETE);
2774 }
2775 }
2776 }
2777 }
2778 else
2779 {
2780 {
2781 WORD32 post_ctb_ctr;
2782
2783 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2784 {
2785 ihevce_dmgr_map_set_sync(
2786 pv_dep_mngr_me_dep_encloop,
2787 post_ctb_ctr,
2788 vert_ctr,
2789 MAP_CTB_COMPLETE);
2790 }
2791 }
2792 }
2793 }
2794 }
2795
2796 /* Call the sao function again for the last ctb row of frame */
2797 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2798 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2799 {
2800 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2801
2802 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2803 {
2804 if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
2805 ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
2806 (ctb_ctr >
2807 ctb_start)) //((vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) && (ctb_ctr > 0))
2808 {
2809 /* Register the curr ctb's x pos in sao context*/
2810 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr - 1;
2811
2812 /* Register the curr ctb's y pos in sao context*/
2813 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2814
2815 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2816 (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr - 1);
2817
2818 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2819
2820 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2821 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2822
2823 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2824 ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2825 ps_tile_params->i4_curr_tile_height);
2826
2827 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2828
2829 /* Calculate the recon buf pointer and stride for teh current ctb */
2830 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2831 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2832 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2833 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2834
2835 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2836
2837 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2838 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2839 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2840 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2841 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2842
2843 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2844 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2845
2846 ps_sao_ctxt->pu1_cur_luma_src_buf =
2847 ps_sao_ctxt->pu1_frm_luma_src_buf +
2848 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2849 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2850
2851 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2852
2853 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2854 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2855 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2856 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2857 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2858
2859 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2860
2861 /* Calculate the pointer to buff to store the (x,y)th sao
2862 * for the top merge of (x,y+1)th ctb
2863 */
2864 ps_sao_ctxt->ps_top_ctb_sao =
2865 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2866 [ps_sao_ctxt->i4_ctb_x +
2867 (ps_sao_ctxt->i4_ctb_y) *
2868 ps_frm_ctb_prms->i4_num_ctbs_horz +
2869 (ps_ctxt->i4_bitrate_instance_num *
2870 ps_sao_ctxt->i4_num_ctb_units)];
2871
2872 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2873 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2874 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2875 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2876 ps_sao_ctxt->i4_ctb_x * ctb_size +
2877 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2878 ps_sao_ctxt->i4_top_chroma_buf_size);
2879
2880 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2881 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2882 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2883 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2884 ps_sao_ctxt->i4_ctb_x * ctb_size +
2885 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2886 ps_sao_ctxt->i4_top_chroma_buf_size);
2887
2888 {
2889 UWORD32 u4_ctb_sao_bits;
2890 ihevce_sao_analyse(
2891 &ps_ctxt->s_sao_ctxt_t,
2892 ps_ctb_out_sao,
2893 &u4_ctb_sao_bits,
2894 ps_tile_params);
2895 ps_ctxt
2896 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2897 [ps_ctxt->i4_bitrate_instance_num]
2898 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2899 ps_ctxt
2900 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2901 [ps_ctxt->i4_bitrate_instance_num]
2902 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2903 }
2904 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
2905 0x1) /** Subpel generation not done for non-ref picture **/
2906 {
2907 /* Padding and Subpel Plane Generation */
2908 ihevce_pad_interp_recon_ctb(
2909 ps_pad_interp_recon,
2910 ctb_ctr - 1,
2911 vert_ctr,
2912 ps_ctxt->i4_quality_preset,
2913 ps_frm_ctb_prms,
2914 ps_ctxt->ai2_scratch,
2915 ps_ctxt->i4_bitrate_instance_num,
2916 ps_ctxt->ps_func_selector);
2917 }
2918 }
2919 /* Call the sao function again for the last ctb of the last ctb row of frame */
2920 if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
2921 ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
2922 ((ctb_ctr + 1) ==
2923 (ctb_end))) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz))
2924 {
2925 /* Register the curr ctb's x pos in sao context*/
2926 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2927
2928 /* Register the curr ctb's y pos in sao context*/
2929 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2930
2931 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2932 (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
2933
2934 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2935
2936 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2937 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2938 ps_tile_params->i4_curr_tile_width);
2939
2940 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2941 ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2942 ps_tile_params->i4_curr_tile_height);
2943
2944 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2945 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2946
2947 /* Calculate the recon buf pointer and stride for teh current ctb */
2948 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2949 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2950 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2951 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2952
2953 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2954
2955 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2956 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2957 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2958 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2959 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2960
2961 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2962 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2963
2964 ps_sao_ctxt->pu1_cur_luma_src_buf =
2965 ps_sao_ctxt->pu1_frm_luma_src_buf +
2966 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2967 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2968
2969 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2970
2971 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2972 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2973 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2974 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2975 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2976
2977 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2978
2979 /* Calculate the pointer to buff to store the (x,y)th sao
2980 * for the top merge of (x,y+1)th ctb
2981 */
2982 ps_sao_ctxt->ps_top_ctb_sao =
2983 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2984 [ps_sao_ctxt->i4_ctb_x +
2985 ps_sao_ctxt->i4_ctb_y *
2986 ps_frm_ctb_prms->i4_num_ctbs_horz +
2987 (ps_ctxt->i4_bitrate_instance_num *
2988 ps_sao_ctxt->i4_num_ctb_units)];
2989
2990 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2991 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2992 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2993 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2994 ps_sao_ctxt->i4_ctb_x * ctb_size +
2995 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2996 ps_sao_ctxt->i4_top_chroma_buf_size);
2997
2998 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2999 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
3000 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
3001 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
3002 ps_sao_ctxt->i4_ctb_x * ctb_size +
3003 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
3004 ps_sao_ctxt->i4_top_chroma_buf_size);
3005
3006 {
3007 UWORD32 u4_ctb_sao_bits;
3008
3009 ihevce_sao_analyse(
3010 &ps_ctxt->s_sao_ctxt_t,
3011 ps_ctb_out_sao,
3012 &u4_ctb_sao_bits,
3013 ps_tile_params);
3014 ps_ctxt
3015 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
3016 [ps_ctxt->i4_bitrate_instance_num]
3017 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
3018 ps_ctxt
3019 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
3020 [ps_ctxt->i4_bitrate_instance_num]
3021 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
3022 }
3023 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
3024 0x1) /** Subpel generation not done for non-ref picture **/
3025 {
3026 /* Padding and Subpel Plane Generation */
3027 ihevce_pad_interp_recon_ctb(
3028 ps_pad_interp_recon,
3029 ctb_ctr,
3030 vert_ctr,
3031 ps_ctxt->i4_quality_preset,
3032 ps_frm_ctb_prms,
3033 ps_ctxt->ai2_scratch,
3034 ps_ctxt->i4_bitrate_instance_num,
3035 ps_ctxt->ps_func_selector);
3036 }
3037 }
3038 } //end of loop over CTBs in current CTB-row
3039
3040 /* If SAO is on, then signal completion of the last CTB row of frame */
3041 {
3042 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
3043 {
3044 if(!ps_ctxt->i4_bitrate_instance_num)
3045 {
3046 {
3047 WORD32 post_ctb_ctr;
3048
3049 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
3050 {
3051 ihevce_dmgr_map_set_sync(
3052 pv_dep_mngr_me_dep_encloop,
3053 post_ctb_ctr,
3054 vert_ctr,
3055 MAP_CTB_COMPLETE);
3056 }
3057 }
3058 }
3059 }
3060 }
3061 }
3062 }
3063
3064 return;
3065 }
3066
3067 /*!
3068 ******************************************************************************
3069 * \if Function name : ihevce_enc_loop_pass \endif
3070 *
3071 * \brief
3072 * Frame level enc_loop pass function
3073 *
3074 * \param[in] pv_ctxt : pointer to enc_loop module
3075 * \param[in] ps_frm_lamda : Frame level Lambda params
3076 * \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
3077 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
3078 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
3079 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
3080 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
3081 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
3082 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
3083 *
3084 * \return
3085 * None
3086 *
3087 * Note : Currently the frame level calcualtions done assumes that
3088 * framewidth of the input /recon are excat multiple of ctbsize
3089 *
3090 * \author
3091 * Ittiam
3092 *
3093 *****************************************************************************
3094 */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)3095 void ihevce_enc_loop_process(
3096 void *pv_ctxt,
3097 ihevce_lap_enc_buf_t *ps_curr_inp,
3098 ctb_analyse_t *ps_ctb_in,
3099 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
3100 recon_pic_buf_t *ps_frm_recon,
3101 cur_ctb_cu_tree_t *ps_cu_tree_out,
3102 ctb_enc_loop_out_t *ps_ctb_out,
3103 cu_enc_loop_out_t *ps_cu_out,
3104 tu_enc_loop_out_t *ps_tu_out,
3105 pu_t *ps_pu_out,
3106 UWORD8 *pu1_frm_ecd_data,
3107 frm_ctb_ctxt_t *ps_frm_ctb_prms,
3108 frm_lambda_ctxt_t *ps_frm_lamda,
3109 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
3110 WORD32 thrd_id,
3111 WORD32 i4_enc_frm_id,
3112 WORD32 i4_pass)
3113 {
3114 WORD32 vert_ctr;
3115 WORD32 tile_col_idx;
3116 iv_enc_yuv_buf_t s_curr_src_bufs;
3117 iv_enc_yuv_buf_t s_curr_recon_bufs;
3118 iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
3119 UWORD32 *pu4_pu_offsets;
3120 WORD32 end_of_frame;
3121 UWORD8 *apu1_y_sub_pel_planes[3];
3122 pad_interp_recon_frm_t s_pad_interp_recon;
3123 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
3124
3125 ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3126
3127 WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3128
3129 /* initialize the closed loop lambda for the current frame */
3130 ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3131 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3132 ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3133 ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3134 ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3135 ps_ctxt->thrd_id = thrd_id;
3136 ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3137
3138 #if DISABLE_SAO_WHEN_NOISY
3139 ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3140 ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3141 #endif
3142
3143 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3144 ps_ctxt->pv_err_func_selector = ps_func_selector;
3145 #endif
3146
3147 /*Bit0 - of this Flag indicates whether current pictute needs to be deblocked,
3148 padded and hpel planes need to be generated.
3149 Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/
3150 ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3151 (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) ||
3152 ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3153 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
3154 << 1);
3155
3156 /* Share all reference pictures with nbr clients. This flag will be used only
3157 in case of dist-enc mode */
3158 ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3159 ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3160
3161 /* Register the frame level ssd lamda for both luma and chroma*/
3162 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3163 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3164
3165 ihevce_populate_cl_cu_lambda_prms(
3166 ps_ctxt,
3167 ps_frm_lamda,
3168 (WORD32)ps_ctxt->i1_slice_type,
3169 ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3170 ENC_LOOP_LAMBDA_TYPE);
3171
3172 ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3173 (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3174 (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3175
3176 end_of_frame = 0;
3177
3178 /* ----------------------------------------------------- */
3179 /* store the stride and dimensions of source and recon */
3180 /* buffer pointers will be over written at every CTB row */
3181 /* ----------------------------------------------------- */
3182 memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3183
3184 memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3185
3186 memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3187
3188 /* get the frame level pu offset pointer*/
3189 pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3190
3191 s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3192
3193 /* ------------ Loop over all the CTB rows --------------- */
3194 while(0 == end_of_frame)
3195 {
3196 UWORD8 *pu1_tmp;
3197 UWORD8 *pu1_row_pu_map;
3198 UWORD8 *pu1_row_ecd_data;
3199 ctb_analyse_t *ps_ctb_row_in;
3200 ctb_enc_loop_out_t *ps_ctb_row_out;
3201 cu_enc_loop_out_t *ps_row_cu;
3202 tu_enc_loop_out_t *ps_row_tu;
3203 pu_t *ps_row_pu;
3204 pu_col_mv_t *ps_row_col_pu;
3205 job_queue_t *ps_job;
3206 UWORD32 *pu4_pu_row_offsets;
3207 UWORD16 *pu2_num_pu_row;
3208
3209 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3210 cur_ctb_cu_tree_t *ps_row_cu_tree;
3211 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3212
3213 /* Get the current row from the job queue */
3214 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3215 ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3216
3217 /* Register the pointer to ctb out of the current frame*/
3218 ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3219
3220 /* If all rows are done, set the end of process flag to 1, */
3221 /* and the current row to -1 */
3222 if(NULL == ps_job)
3223 {
3224 vert_ctr = -1;
3225 tile_col_idx = -1;
3226 end_of_frame = 1;
3227 }
3228 else
3229 {
3230 ihevce_tile_params_t *ps_col_tile_params_temp;
3231 ihevce_tile_params_t *ps_tile_params;
3232 WORD32 i4_tile_id;
3233
3234 ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3235 /* set the output dependency */
3236 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3237
3238 /* Obtain the current row's details from the job */
3239 vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3240 {
3241 /* Obtain the current colum tile index from the job */
3242 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3243
3244 /* The tile parameter for the col. idx. Use only the properties
3245 which is same for all the bottom tiles like width, start_x, etc.
3246 Don't use height, start_y, etc. */
3247 ps_col_tile_params_temp =
3248 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3249
3250 /* Derive actual tile_id based on vert_ctr */
3251 i4_tile_id =
3252 *(ps_frm_ctb_prms->pi4_tile_id_map +
3253 vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3254 ps_col_tile_params_temp->i4_first_ctb_x);
3255 /* Derive pointer to current tile prms */
3256 ps_tile_params =
3257 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3258 }
3259
3260 ps_ctxt->i4_tile_col_idx = tile_col_idx;
3261 /* derive the current ctb row pointers */
3262
3263 /* luma src */
3264 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3265 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3266 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3267 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3268
3269 pu1_tmp +=
3270 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3271 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3272
3273 s_curr_src_bufs.pv_y_buf = pu1_tmp;
3274
3275 if(!ps_ctxt->u1_is_input_data_hbd)
3276 {
3277 /* cb src */
3278 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3279 pu1_tmp +=
3280 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3281 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3282
3283 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3284 }
3285
3286 /* luma recon */
3287 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3288 pu1_tmp +=
3289 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3290
3291 s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3292 s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3293 s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3294 if(!ps_ctxt->u1_is_input_data_hbd)
3295 {
3296 /* cb recon */
3297 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3298 pu1_tmp +=
3299 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3300 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3301
3302 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3303 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3304 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3305
3306 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3307
3308 /* Register the source buffer pointers in sao context*/
3309 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3310 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3311 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3312 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3313 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3314
3315 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3316 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3317
3318 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3319 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3320
3321 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3322 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3323 }
3324
3325 /* Subpel planes hxfy, fxhy, hxhy*/
3326 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3327 pu1_tmp +=
3328 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3329 apu1_y_sub_pel_planes[0] = pu1_tmp;
3330 s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3331
3332 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3333 pu1_tmp +=
3334 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3335 apu1_y_sub_pel_planes[1] = pu1_tmp;
3336 s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3337
3338 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3339 pu1_tmp +=
3340 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3341 apu1_y_sub_pel_planes[2] = pu1_tmp;
3342 s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3343
3344 /* row level coeffs buffer */
3345 pu1_row_ecd_data =
3346 pu1_frm_ecd_data +
3347 (vert_ctr *
3348 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3349 : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3350 MAX_SCAN_COEFFS_BYTES_4x4);
3351
3352 /* Row level CU buffer */
3353 ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3354
3355 /* Row level TU buffer */
3356 ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3357
3358 /* Row level PU buffer */
3359 ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3360
3361 /* Row level colocated PU buffer */
3362 /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3363 ps_row_col_pu =
3364 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3365 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3366 /* Row level col PU map buffer */
3367 /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3368 pu1_row_pu_map =
3369 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3370 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3371 /* row ctb in pointer */
3372 ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3373
3374 /* row ctb out pointer */
3375 ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3376
3377 /* row number of PUs map pointer */
3378 pu2_num_pu_row =
3379 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3380
3381 /* row pu offsets pointer */
3382 pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3383 /* store the first CTB pu offset pointer */
3384 *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3385 /* Initialize ptr to current IPE row */
3386 ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3387
3388 /* Initialize ptr to current row */
3389 ps_row_cu_tree = ps_cu_tree_out +
3390 (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3391
3392 /* Get the EncLoop Top-Right CU Dep Mngr */
3393 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3394 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3395 [i4_bitrate_instance_num];
3396 /* Get the EncLoop Deblock Dep Mngr */
3397 ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3398 ps_master_ctxt
3399 ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3400
3401 ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3402
3403 {
3404 /* derive the pointers of top row buffers */
3405 ps_ctxt->pv_top_row_luma =
3406 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3407 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3408 (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3409
3410 ps_ctxt->pv_top_row_chroma =
3411 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3412 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3413 (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3414
3415 /* derive the pointers of bottom row buffers to update current row data */
3416 ps_ctxt->pv_bot_row_luma =
3417 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3418 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3419 (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3420
3421 ps_ctxt->pv_bot_row_chroma =
3422 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3423 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3424 (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3425
3426 /* Register the buffer pointers in sao context*/
3427 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3428 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3429 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3430 ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3431
3432 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3433 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3434 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3435 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3436
3437 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3438
3439 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3440 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3441
3442 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3443 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3444 }
3445
3446 ps_ctxt->ps_top_row_nbr =
3447 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3448 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3449 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3450
3451 ps_ctxt->ps_bot_row_nbr =
3452 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3453 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3454 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3455
3456 if(vert_ctr > 0)
3457 {
3458 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3459 }
3460 else
3461 {
3462 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3463 }
3464
3465 ASSERT(
3466 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3467 .ps_pps->i1_sign_data_hiding_flag ==
3468 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3469 .ps_pps->i1_sign_data_hiding_flag);
3470
3471 /* call the row level processing function */
3472 ihevce_enc_loop_process_row(
3473 ps_ctxt,
3474 &s_curr_src_bufs,
3475 &s_curr_recon_bufs,
3476 &s_curr_recon_bufs_src,
3477 &apu1_y_sub_pel_planes[0],
3478 ps_ctb_row_in,
3479 ps_ctb_row_out,
3480 ps_row_ipe_analyse,
3481 ps_row_cu_tree,
3482 ps_row_cu,
3483 ps_row_tu,
3484 ps_row_pu,
3485 ps_row_col_pu,
3486 pu2_num_pu_row,
3487 pu1_row_pu_map,
3488 pu1_row_ecd_data,
3489 pu4_pu_row_offsets,
3490 ps_frm_ctb_prms,
3491 vert_ctr,
3492 ps_frm_recon,
3493 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3494 &s_pad_interp_recon,
3495 i4_pass,
3496 ps_multi_thrd_ctxt,
3497 ps_tile_params);
3498 }
3499 }
3500 }
3501
3502 /*!
3503 ******************************************************************************
3504 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3505 *
3506 * \brief Returns to the caller key attributes relevant for dependency manager,
3507 * ie, the number of vertical units in l0 layer
3508 *
3509 * \par Description:
3510 *
3511 * \param[in] pai4_ht : ht
3512 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3513 * for deblocking
3514 *
3515 * \return
3516 * None
3517 *
3518 * \author
3519 * Ittiam
3520 *
3521 *****************************************************************************
3522 */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3523 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3524 {
3525 /* Blk ht at a given layer*/
3526 WORD32 unit_ht_c;
3527 WORD32 ctb_size = 64;
3528
3529 /* compute blk ht and unit ht */
3530 unit_ht_c = ctb_size;
3531
3532 /* set the numebr of vertical units */
3533 *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3534 }
3535
3536 /*!
3537 ******************************************************************************
3538 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3539 *
3540 * \brief
3541 * Number of memory records are returned for enc_loop module
3542 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3543 *
3544 * \return
3545 * None
3546 *
3547 * \author
3548 * Ittiam
3549 *
3550 *****************************************************************************
3551 */
3552 WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3553 ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3554 {
3555 WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3556 WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3557 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3558 WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3559 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3560 WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3561 i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3562
3563 return (
3564 (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs +
3565 enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3566 }
3567 /*!
3568 ******************************************************************************
3569 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3570 *
3571 * \brief
3572 * Memory requirements are returned for ENC_LOOP.
3573 *
3574 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
3575 * \param[in] ps_init_prms : Create time static parameters
3576 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3577 * \param[in] i4_mem_space : memspace in whihc memory request should be done
3578 *
3579 * \return
3580 * None
3581 *
3582 * \author
3583 * Ittiam
3584 *
3585 *****************************************************************************
3586 */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3587 WORD32 ihevce_enc_loop_get_mem_recs(
3588 iv_mem_rec_t *ps_mem_tab,
3589 ihevce_static_cfg_params_t *ps_init_prms,
3590 WORD32 i4_num_proc_thrds,
3591 WORD32 i4_num_bitrate_inst,
3592 WORD32 i4_num_enc_loop_frm_pllel,
3593 WORD32 i4_mem_space,
3594 WORD32 i4_resolution_id)
3595 {
3596 UWORD32 u4_width, u4_height, n_tabs;
3597 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3598 WORD32 ctr;
3599 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3600
3601 /* derive frame dimensions */
3602 /*width of the input YUV to be encoded */
3603 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3604 /*making the width a multiple of CTB size*/
3605 u4_width += SET_CTB_ALIGN(
3606 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3607
3608 /*height of the input YUV to be encoded */
3609 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3610 /*making the height a multiple of CTB size*/
3611 u4_height += SET_CTB_ALIGN(
3612 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3613 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3614 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3615 /* memories should be requested assuming worst case requirememnts */
3616
3617 /* Module context structure */
3618 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3619
3620 ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3621
3622 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3623
3624 /* Thread context structure */
3625 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3626 i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3627
3628 ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3629
3630 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3631
3632 /* Scale matrices */
3633 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3634
3635 ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3636
3637 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3638
3639 /* Rescale matrices */
3640 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3641
3642 ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3643
3644 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3645
3646 /* top row luma one row of pixel data per CTB row */
3647 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3648 {
3649 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3650 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3651 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3652 }
3653 else
3654 {
3655 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3656 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3657 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3658 }
3659
3660 ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3661
3662 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3663
3664 /* top row chroma */
3665 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3666 {
3667 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3668 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3669 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3670 }
3671 else
3672 {
3673 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3674 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3675 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3676 }
3677
3678 ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3679
3680 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3681
3682 /* top row neighbour 4x4 */
3683 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3684 (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3685 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3686
3687 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3688
3689 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3690
3691 /* memory to dump rate control parameters by each thread for each bit-rate instance */
3692 /* RC params collated by each thread for each bit-rate instance separately */
3693 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3694 i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3695
3696 ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3697
3698 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3699 /* Memory required for deblocking */
3700 {
3701 /* Memory to store Qp of top4x4 blocks for each CTB row.
3702 This memory is allocated at frame level and shared across
3703 all cores. The Qp values are needed to form Qp-map(described
3704 in the ENC_LOOP_DEBLOCKING section below)*/
3705
3706 UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3707 UWORD32 u4_size_top_4x4_qp_memory;
3708
3709 /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3710 /*Space required per CTB*/
3711 u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3712 /*Space required for entire CTB row*/
3713 u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3714 /*Space required for entire frame*/
3715 u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3716 /*Space required for multiple bitrate*/
3717 u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3718 /*Space required for multiple frames in parallel*/
3719 u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3720
3721 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3722 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3723 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3724
3725 /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3726 ## Boundary Strength(Vertical):
3727 BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3728 of the row followed by 8 entries of second CTB and so on.
3729 8 entries: Includes left edge of current CTB and excludes right edge.
3730 ## Boundary Strength(Horizontal):
3731 Same as Vertical.
3732 8 entries: Includes top edge of current CTB and excludes bottom edge.
3733
3734 ## Qp-map storage:
3735 T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3736 00 01 02 03 04 05 ..........to the end of the CTB row
3737 10 11 12 13 14 15 ..........to the end of the CTB row
3738 20 21 22 23 24 25 ..........to the end of the CTB row
3739 30 31 32 33 34 35 ..........to the end of the CTB row
3740 40 41 42 43 44 45 ..........to the end of the CTB row
3741 ............................to the end of the CTB row
3742 upto height_of_CTB..........to the end of the CTB row
3743
3744 Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3745 A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3746 where,
3747 => height_of_CTB = number of 4x4 blocks in a CTB vertically,
3748 => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3749 in order to deblock top edge of current CTB.
3750 => width_of_CTB = number of 4x4 blocks in a CTB horizontally,
3751 */
3752
3753 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3754 /*1 vertical edge per 8 pixel*/
3755 u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3756 /*Vertical edges for entire width of CTB row*/
3757 u4_size_bs_memory *= u4_ctb_in_a_row;
3758 /*Each vertical edge of CTB row is 4 bytes*/
3759 u4_size_bs_memory = u4_size_bs_memory << 2;
3760 /*Adding Memory required for storing horizontal BS by doubling*/
3761 u4_size_bs_memory = u4_size_bs_memory << 1;
3762
3763 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3764 /*Number of 4x4 blocks in the width of a CTB*/
3765 u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3766 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3767 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3768 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3769 /*Storage for entire CTB row*/
3770 u4_size_qp_memory *= u4_ctb_in_a_row;
3771
3772 /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3773 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3774 i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3775
3776 ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3777
3778 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3779 }
3780
3781 /* Memory required to store pred for 422 chroma */
3782 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3783 i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3784 (i4_chroma_format == IV_YUV_422SP_UV) *
3785 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3786
3787 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3788
3789 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3790
3791 /* Memory for inter pred buffers */
3792 {
3793 WORD32 i4_num_bufs_per_thread = 0;
3794
3795 WORD32 i4_buf_size_per_cand =
3796 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3797 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3798 WORD32 i4_quality_preset =
3799 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3800 switch(i4_quality_preset)
3801 {
3802 case IHEVCE_QUALITY_P0:
3803 {
3804 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3805 break;
3806 }
3807 case IHEVCE_QUALITY_P2:
3808 {
3809 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3810 break;
3811 }
3812 case IHEVCE_QUALITY_P3:
3813 {
3814 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3815 break;
3816 }
3817 case IHEVCE_QUALITY_P4:
3818 {
3819 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3820 break;
3821 }
3822 case IHEVCE_QUALITY_P5:
3823 case IHEVCE_QUALITY_P6:
3824 case IHEVCE_QUALITY_P7:
3825 {
3826 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3827 break;
3828 }
3829 default:
3830 {
3831 ASSERT(0);
3832 }
3833 }
3834
3835 i4_num_bufs_per_thread += 4;
3836
3837 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3838 i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3839
3840 ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3841
3842 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3843 }
3844
3845 /* Memory required to store chroma intra pred */
3846 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3847 i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3848 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3849 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3850
3851 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3852
3853 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3854
3855 /* Memory required to store pred for reference substitution output */
3856 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3857 i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
3858 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3859
3860 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3861
3862 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3863
3864 /* Memory required to store pred for reference filtering output */
3865 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3866 i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
3867 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3868
3869 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3870
3871 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3872
3873 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3874 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3875 #endif
3876 {
3877 /* Memory assignments for recon storage during CU Recursion */
3878 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3879 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3880 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3881
3882 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3883
3884 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3885
3886 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3887 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3888 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3889 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3890
3891 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3892
3893 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3894 }
3895 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3896 else
3897 {
3898 /* Memory assignments for recon storage during CU Recursion */
3899 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3900
3901 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3902
3903 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3904
3905 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3906
3907 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3908
3909 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3910 }
3911 #endif
3912
3913 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3914 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3915 #endif
3916 {
3917 /* Memory assignments for pred storage during CU Recursion */
3918 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3919 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3920 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3921
3922 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3923
3924 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3925
3926 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3927 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3928 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3929 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3930
3931 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3932
3933 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3934 }
3935 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3936 else
3937 {
3938 /* Memory assignments for pred storage during CU Recursion */
3939 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3940
3941 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3942
3943 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3944
3945 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3946
3947 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3948
3949 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3950 }
3951 #endif
3952
3953 /* Memory assignments for CTB left luma data storage */
3954 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3955 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3956 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3957
3958 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3959
3960 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3961
3962 /* Memory assignments for CTB left chroma data storage */
3963 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3964 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3965 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3966 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3967 ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3968
3969 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3970
3971 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3972
3973 /* Memory required for SAO */
3974 {
3975 WORD32 num_vert_units;
3976 WORD32 num_horz_units;
3977 WORD32 ctb_aligned_ht, ctb_aligned_wd;
3978 WORD32 luma_buf, chroma_buf;
3979
3980 num_vert_units = u4_height / MAX_CTB_SIZE;
3981 num_horz_units = u4_width / MAX_CTB_SIZE;
3982
3983 ctb_aligned_ht = u4_height;
3984 ctb_aligned_wd = u4_width;
3985
3986 /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3987 * and 1 extra location is required for top left buf ptr for row 0
3988 * Also 1 extra byte is required for every row for top left pixel if
3989 * the top left ptr is to be passed to leaf level unconditionally
3990 */
3991 luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3992 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3993 chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3994 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3995
3996 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3997 (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3998
3999 /* Add the memory required to store the sao information of top ctb for top merge
4000 * This is frame level buffer.
4001 */
4002 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
4003 ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
4004 (i4_num_enc_loop_frm_pllel);
4005
4006 ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4007
4008 ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
4009 }
4010
4011 /* Memory for CU level Coeff data buffer */
4012 {
4013 /* 16 additional bytes are required to ensure alignment */
4014 {
4015 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
4016 i4_num_proc_thrds *
4017 (((MAX_LUMA_COEFFS_CTB +
4018 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4019 16) *
4020 (2) * sizeof(UWORD8));
4021 }
4022
4023 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4024
4025 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
4026
4027 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
4028 i4_num_proc_thrds *
4029 (MAX_LUMA_COEFFS_CTB +
4030 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
4031 sizeof(UWORD8);
4032
4033 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4034
4035 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
4036 }
4037
4038 /* Memory for CU dequant data buffer */
4039 {
4040 /* 16 additional bytes are required to ensure alignment */
4041 {
4042 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
4043 i4_num_proc_thrds *
4044 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4045 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4046 8) *
4047 (2) * sizeof(WORD16);
4048 }
4049
4050 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4051
4052 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
4053 }
4054
4055 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4056 {
4057 WORD32 i4_memSize_perThread;
4058
4059 WORD32 i4_chroma_memSize_perThread = 0;
4060 /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
4061 /* used in RDOPT to store cur and best modes' data */
4062 WORD32 i4_luma_memSize_perThread =
4063 4 * MAX_CU_SIZE * MAX_CU_SIZE *
4064 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4065
4066 /* 'Glossary' for comments in the following codeBlock */
4067 /* 1 - 2 Bufs for storing recons of the best modes determined in the */
4068 /* function 'ihevce_intra_chroma_pred_mode_selector' */
4069 /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
4070 /* used in RDOPT to store cur and best modes' data */
4071 if(i4_chroma_format == IV_YUV_422SP_UV)
4072 {
4073 WORD32 i4_quality_preset =
4074 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4075 switch(i4_quality_preset)
4076 {
4077 case IHEVCE_QUALITY_P0:
4078 {
4079 /* 1 */
4080 i4_chroma_memSize_perThread +=
4081 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4082 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4083
4084 /* 2 */
4085 i4_chroma_memSize_perThread +=
4086 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4087 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4088
4089 break;
4090 }
4091 case IHEVCE_QUALITY_P2:
4092 {
4093 /* 1 */
4094 i4_chroma_memSize_perThread +=
4095 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4096 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4097
4098 /* 2 */
4099 i4_chroma_memSize_perThread +=
4100 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4101 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4102
4103 break;
4104 }
4105 case IHEVCE_QUALITY_P3:
4106 {
4107 /* 1 */
4108 i4_chroma_memSize_perThread +=
4109 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4110 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4111
4112 /* 2 */
4113 i4_chroma_memSize_perThread +=
4114 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4115 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4116
4117 break;
4118 }
4119 case IHEVCE_QUALITY_P4:
4120 {
4121 /* 1 */
4122 i4_chroma_memSize_perThread +=
4123 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4124 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4125
4126 /* 2 */
4127 i4_chroma_memSize_perThread +=
4128 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4129 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4130
4131 break;
4132 }
4133 case IHEVCE_QUALITY_P5:
4134 {
4135 /* 1 */
4136 i4_chroma_memSize_perThread +=
4137 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4138 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4139
4140 /* 2 */
4141 i4_chroma_memSize_perThread +=
4142 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4143 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4144
4145 break;
4146 }
4147 case IHEVCE_QUALITY_P6:
4148 case IHEVCE_QUALITY_P7:
4149 {
4150 /* 1 */
4151 i4_chroma_memSize_perThread +=
4152 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4153 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4154
4155 /* 2 */
4156 i4_chroma_memSize_perThread +=
4157 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4158 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4159
4160 break;
4161 }
4162 }
4163 }
4164 else
4165 {
4166 WORD32 i4_quality_preset =
4167 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4168 switch(i4_quality_preset)
4169 {
4170 case IHEVCE_QUALITY_P0:
4171 {
4172 /* 1 */
4173 i4_chroma_memSize_perThread +=
4174 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4175 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4176
4177 /* 2 */
4178 i4_chroma_memSize_perThread +=
4179 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4180 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4181 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4182
4183 break;
4184 }
4185 case IHEVCE_QUALITY_P2:
4186 {
4187 /* 1 */
4188 i4_chroma_memSize_perThread +=
4189 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4190 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4191
4192 /* 2 */
4193 i4_chroma_memSize_perThread +=
4194 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4195 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4196 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4197
4198 break;
4199 }
4200 case IHEVCE_QUALITY_P3:
4201 {
4202 /* 1 */
4203 i4_chroma_memSize_perThread +=
4204 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4205 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4206
4207 /* 2 */
4208 i4_chroma_memSize_perThread +=
4209 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4210 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4211 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4212
4213 break;
4214 }
4215 case IHEVCE_QUALITY_P4:
4216 {
4217 /* 1 */
4218 i4_chroma_memSize_perThread +=
4219 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4220 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4221
4222 /* 2 */
4223 i4_chroma_memSize_perThread +=
4224 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4225 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4226 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4227
4228 break;
4229 }
4230 case IHEVCE_QUALITY_P5:
4231 {
4232 /* 1 */
4233 i4_chroma_memSize_perThread +=
4234 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4235 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4236
4237 /* 2 */
4238 i4_chroma_memSize_perThread +=
4239 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4240 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4241 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4242
4243 break;
4244 }
4245 case IHEVCE_QUALITY_P6:
4246 case IHEVCE_QUALITY_P7:
4247 {
4248 /* 1 */
4249 i4_chroma_memSize_perThread +=
4250 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4251 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4252
4253 /* 2 */
4254 i4_chroma_memSize_perThread +=
4255 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4256 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4257 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4258
4259 break;
4260 }
4261 }
4262 }
4263
4264 i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4265
4266 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4267 i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4268
4269 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4270
4271 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4272 }
4273
4274 n_tabs = NUM_ENC_LOOP_MEM_RECS;
4275
4276 /*************************************************************************/
4277 /* --- EncLoop Deblock sync Dep Mngr Mem requests -- */
4278 /*************************************************************************/
4279
4280 /* Fill the memtabs for EncLoop Deblock Dep Mngr */
4281 {
4282 WORD32 count;
4283 WORD32 num_vert_units;
4284 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4285
4286 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4287 ASSERT(num_vert_units > 0);
4288 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4289 {
4290 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4291 {
4292 n_tabs += ihevce_dmgr_get_mem_recs(
4293 &ps_mem_tab[n_tabs],
4294 DEP_MNGR_ROW_ROW_SYNC,
4295 num_vert_units,
4296 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4297 i4_num_proc_thrds,
4298 i4_mem_space);
4299 }
4300 }
4301 }
4302
4303 /*************************************************************************/
4304 /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests -- */
4305 /*************************************************************************/
4306
4307 /* Fill the memtabs for Top-Right CU sync Dep Mngr */
4308 {
4309 WORD32 count;
4310 WORD32 num_vert_units;
4311 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4312 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4313 ASSERT(num_vert_units > 0);
4314
4315 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4316 {
4317 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4318 {
4319 n_tabs += ihevce_dmgr_get_mem_recs(
4320 &ps_mem_tab[n_tabs],
4321 DEP_MNGR_ROW_ROW_SYNC,
4322 num_vert_units,
4323 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4324 i4_num_proc_thrds,
4325 i4_mem_space);
4326 }
4327 }
4328 }
4329
4330 /*************************************************************************/
4331 /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests -- */
4332 /*************************************************************************/
4333
4334 /* Fill the memtabs for EncLoop Aux. on Ref. bitrate Dep Mngr */
4335 {
4336 WORD32 count;
4337 WORD32 num_vert_units;
4338 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4339
4340 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4341 ASSERT(num_vert_units > 0);
4342
4343 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4344 {
4345 for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4346 {
4347 n_tabs += ihevce_dmgr_get_mem_recs(
4348 &ps_mem_tab[n_tabs],
4349 DEP_MNGR_ROW_ROW_SYNC,
4350 num_vert_units,
4351 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4352 i4_num_proc_thrds,
4353 i4_mem_space);
4354 }
4355 }
4356 }
4357
4358 return (n_tabs);
4359 }
4360
4361 /*!
4362 ******************************************************************************
4363 * \if Function name : ihevce_enc_loop_init \endif
4364 *
4365 * \brief
4366 * Intialization for ENC_LOOP context state structure .
4367 *
4368 * \param[in] ps_mem_tab : pointer to memory descriptors table
4369 * \param[in] ps_init_prms : Create time static parameters
4370 * \param[in] pv_osal_handle : Osal handle
4371 *
4372 * \return
4373 * None
4374 *
4375 * \author
4376 * Ittiam
4377 *
4378 *****************************************************************************
4379 */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4380 void *ihevce_enc_loop_init(
4381 iv_mem_rec_t *ps_mem_tab,
4382 ihevce_static_cfg_params_t *ps_init_prms,
4383 WORD32 i4_num_proc_thrds,
4384 void *pv_osal_handle,
4385 func_selector_t *ps_func_selector,
4386 rc_quant_t *ps_rc_quant_ctxt,
4387 ihevce_tile_params_t *ps_tile_params_base,
4388 WORD32 i4_resolution_id,
4389 WORD32 i4_num_enc_loop_frm_pllel,
4390 UWORD8 u1_is_popcnt_available)
4391 {
4392 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4393 ihevce_enc_loop_ctxt_t *ps_ctxt;
4394 WORD32 ctr, n_tabs;
4395 UWORD32 u4_width, u4_height;
4396 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4397 UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4398 UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4399 WORD32 i;
4400 WORD32 i4_num_bitrate_inst =
4401 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4402 enc_loop_rc_params_t *ps_enc_loop_rc_params;
4403 UWORD8 *pu1_sao_base; /* store the base address of sao*/
4404 UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4405 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4406 WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4407 WORD32 i4_enc_frm_id;
4408 WORD32 num_cu_in_ctb;
4409 WORD32 i4_num_tile_cols = 1; //Default value is 1
4410
4411 /* ENC_LOOP state structure */
4412 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4413
4414 ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4415
4416 ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4417 ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4418 ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4419 /*Calculation of memory sizes for deblocking*/
4420 {
4421 /*width of the input YUV to be encoded. */
4422 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4423 /*making the width a multiple of CTB size*/
4424 u4_width += SET_CTB_ALIGN(
4425 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4426
4427 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4428
4429 /*height of the input YUV to be encoded */
4430 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4431 /*making the height a multiple of CTB size*/
4432 u4_height += SET_CTB_ALIGN(
4433 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4434
4435 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4436
4437 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4438 /*1 vertical edge per 8 pixel*/
4439 u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4440 /*Vertical edges for entire width of CTB row*/
4441 u4_size_bs_memory *= u4_ctb_in_a_row;
4442 /*Each vertical edge of CTB row is 4 bytes*/
4443 u4_size_bs_memory = u4_size_bs_memory << 2;
4444 /*Adding Memory required for storing horizontal BS by doubling*/
4445 u4_size_bs_memory = u4_size_bs_memory << 1;
4446
4447 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4448 /*Number of 4x4 blocks in the width of a CTB*/
4449 u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4450 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4451 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4452 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4453 /*Storage for entire CTB row*/
4454 u4_size_qp_memory *= u4_ctb_in_a_row;
4455
4456 pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4457 }
4458
4459 /*Derive the base pointer of sao*/
4460 pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4461 ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4462 u4_ctb_aligned_wd = u4_width;
4463 u4_ctb_aligned_ht = u4_height;
4464 num_vert_units = (u4_height) / ctb_size;
4465
4466 for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4467 {
4468 ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4469 /* Store Tile params base into EncLoop context */
4470 ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4471 ihevce_cmn_utils_instr_set_router(
4472 &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4473 ihevce_sifter_sad_fxn_assigner(
4474 (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4475 ps_ctxt->i4_max_search_range_horizontal =
4476 ps_init_prms->s_config_prms.i4_max_search_range_horz;
4477 ps_ctxt->i4_max_search_range_vertical =
4478 ps_init_prms->s_config_prms.i4_max_search_range_vert;
4479
4480 ps_ctxt->i4_quality_preset =
4481 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4482
4483 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4484 {
4485 ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4486 }
4487
4488 ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4489
4490 ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4491
4492 ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4493
4494 ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4495
4496 ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4497
4498 ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4499
4500 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4501 {
4502 ps_ctxt->i4_use_ctb_level_lamda = 0;
4503 }
4504 else
4505 {
4506 ps_ctxt->i4_use_ctb_level_lamda = 0;
4507 }
4508
4509 /** Register the function selector pointer*/
4510 ps_ctxt->ps_func_selector = ps_func_selector;
4511
4512 ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4513
4514 /* Initiallization for non-distributed mode */
4515 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4516 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4517 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4518 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4519
4520 ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4521 ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4522
4523 ps_ctxt->i4_frm_top_row_luma_size =
4524 ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4525
4526 ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4527
4528 ps_ctxt->i4_frm_top_row_chroma_size =
4529 ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4530
4531 {
4532 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4533 {
4534 /* +1 is to provision top left pel */
4535 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4536 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4537 (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4538
4539 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4540 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4541 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4542 ps_ctxt->i4_top_row_luma_stride;
4543
4544 /* +2 is to provision top left pel */
4545 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4546 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4547 (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4548
4549 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4550 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4551 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4552 ps_ctxt->i4_top_row_chroma_stride;
4553 }
4554 }
4555
4556 /* +1 is to provision top left nbr */
4557 ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4558 ps_ctxt->i4_frm_top_row_nbr_size =
4559 ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4560 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4561 {
4562 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4563 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4564 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4565 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4566 }
4567
4568 num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4569 num_cu_in_ctb *= num_cu_in_ctb;
4570
4571 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4572
4573 /* Memory for CU level Coeff data buffer */
4574 {
4575 WORD32 i4_16byte_boundary_overshoot;
4576 WORD32 buf_size_per_cu;
4577 WORD32 buf_size_per_thread_wo_alignment_req;
4578 WORD32 buf_size_per_thread;
4579
4580 buf_size_per_cu =
4581 ((MAX_LUMA_COEFFS_CTB +
4582 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4583 16) *
4584 sizeof(UWORD8);
4585 buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4586
4587 {
4588 buf_size_per_thread = buf_size_per_cu * (2);
4589
4590 for(i = 0; i < 2; i++)
4591 {
4592 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4593 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4594 (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4595
4596 i4_16byte_boundary_overshoot =
4597 ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4598
4599 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4600 }
4601 }
4602
4603 ps_ctxt->pu1_cu_recur_coeffs =
4604 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4605 (ctr * buf_size_per_thread_wo_alignment_req);
4606 }
4607
4608 /* Memory for CU dequant data buffer */
4609 {
4610 WORD32 buf_size_per_thread;
4611 WORD32 i4_16byte_boundary_overshoot;
4612
4613 WORD32 buf_size_per_cu =
4614 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4615 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4616 8) *
4617 sizeof(WORD16);
4618
4619 {
4620 buf_size_per_thread = buf_size_per_cu * 2;
4621
4622 for(i = 0; i < 2; i++)
4623 {
4624 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4625 (WORD16
4626 *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4627
4628 i4_16byte_boundary_overshoot =
4629 ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4630
4631 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4632 (WORD16
4633 *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4634 }
4635 }
4636 }
4637
4638 /*------ Deblocking memory's pointers assignements starts ------*/
4639
4640 /*Assign stride = 4x4 blocks in horizontal edge*/
4641 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4642
4643 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4644 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4645
4646 /*Assign frame level memory to store the Qp of
4647 top 4x4 neighbours of each CTB row*/
4648 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4649 {
4650 ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4651 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4652 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4653 i4_enc_frm_id);
4654 }
4655
4656 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4657
4658 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4659 (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4660
4661 ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4662
4663 /*Assign stride = 4x4 blocks in horizontal edge*/
4664 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4665
4666 pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4667
4668 /*------Deblocking memory's pointers assignements ends ------*/
4669
4670 /*------SAO memory's pointer assignment starts------------*/
4671 if(!is_hbd_mode)
4672 {
4673 /* 2 is added to allocate top left pixel */
4674 ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4675 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4676 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4677 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4678 ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4679 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4680
4681 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4682 {
4683 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4684 pu1_sao_base +
4685 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4686 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4687 i4_num_bitrate_inst * i4_enc_frm_id) + // move to the next frame_id
4688 u4_ctb_aligned_wd +
4689 2;
4690
4691 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4692 pu1_sao_base +
4693 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4694 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4695 i4_num_bitrate_inst * i4_enc_frm_id) +
4696 +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4697 u4_ctb_aligned_wd + 4;
4698
4699 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4700 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4701 *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4702 (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4703 }
4704 ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4705 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4706 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4707 }
4708
4709 /*------SAO memory's pointer assignment ends------------*/
4710
4711 /* perform all one time initialisation here */
4712 ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4713
4714 ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4715
4716 ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4717
4718 /* move the pointer to 1,2 location */
4719 ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4720 ps_ctxt->pu1_ctb_nbr_map++;
4721
4722 ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4723
4724 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4725
4726 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4727
4728 CREATE_SUBBLOCK2CSBFID_MAP(
4729 gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4730
4731 CREATE_SUBBLOCK2CSBFID_MAP(
4732 gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4733
4734 /* For both instance initialise the chroma dequant start idx */
4735 ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4736 ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4737
4738 /* initialise all the function pointer tables */
4739 {
4740 ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4741 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4742
4743 ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4744
4745 #if ENABLE_RDO_BASED_TU_RECURSION
4746 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4747 {
4748 ps_ctxt->pv_inter_rdopt_cu_ntu =
4749 (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4750 }
4751 #endif
4752 ps_ctxt->pv_intra_chroma_pred_mode_selector =
4753 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4754 ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4755 ps_ctxt->pv_final_rdopt_mode_prcs =
4756 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4757 ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4758 ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4759 ps_ctxt->pv_enc_loop_ctb_left_copy =
4760 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4761
4762 /* Memory assignments for chroma intra pred buffer */
4763 {
4764 WORD32 pred_buf_size =
4765 MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4766 WORD32 pred_buf_size_per_thread =
4767 NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4768 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4769 (ctr * pred_buf_size_per_thread);
4770
4771 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4772 {
4773 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4774 pu1_base += pred_buf_size;
4775 }
4776 }
4777
4778 /* Memory assignments for reference substitution output */
4779 {
4780 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
4781 WORD32 pred_buf_size_per_thread = pred_buf_size;
4782 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4783 (ctr * pred_buf_size_per_thread);
4784
4785 ps_ctxt->pv_ref_sub_out = pu1_base;
4786 }
4787
4788 /* Memory assignments for reference filtering output */
4789 {
4790 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
4791 WORD32 pred_buf_size_per_thread = pred_buf_size;
4792 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4793 (ctr * pred_buf_size_per_thread);
4794
4795 ps_ctxt->pv_ref_filt_out = pu1_base;
4796 }
4797
4798 /* Memory assignments for recon storage during CU Recursion */
4799 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4800 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4801 #endif
4802 {
4803 {
4804 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4805 WORD32 pred_buf_size_per_thread = pred_buf_size;
4806 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4807 (ctr * pred_buf_size_per_thread);
4808
4809 ps_ctxt->pv_cu_luma_recon = pu1_base;
4810 }
4811
4812 {
4813 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4814 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4815 WORD32 pred_buf_size_per_thread = pred_buf_size;
4816 UWORD8 *pu1_base =
4817 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4818 (ctr * pred_buf_size_per_thread);
4819
4820 ps_ctxt->pv_cu_chrma_recon = pu1_base;
4821 }
4822 }
4823
4824 /* Memory assignments for pred storage during CU Recursion */
4825 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4826 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4827 #endif
4828 {
4829 {
4830 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4831 WORD32 pred_buf_size_per_thread = pred_buf_size;
4832 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4833 (ctr * pred_buf_size_per_thread);
4834
4835 ps_ctxt->pv_CTB_pred_luma = pu1_base;
4836 }
4837
4838 {
4839 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4840 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4841 WORD32 pred_buf_size_per_thread = pred_buf_size;
4842 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4843 (ctr * pred_buf_size_per_thread);
4844
4845 ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4846 }
4847 }
4848
4849 /* Memory assignments for CTB left luma data storage */
4850 {
4851 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4852 WORD32 pred_buf_size_per_thread = pred_buf_size;
4853 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4854 (ctr * pred_buf_size_per_thread);
4855
4856 ps_ctxt->pv_left_luma_data = pu1_base;
4857 }
4858
4859 /* Memory assignments for CTB left chroma data storage */
4860 {
4861 WORD32 pred_buf_size =
4862 (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4863 WORD32 pred_buf_size_per_thread = pred_buf_size;
4864 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4865 (ctr * pred_buf_size_per_thread);
4866
4867 ps_ctxt->pv_left_chrm_data = pu1_base;
4868 }
4869 }
4870
4871 /* Memory for inter pred buffers */
4872 {
4873 WORD32 i4_num_bufs_per_thread;
4874
4875 WORD32 i4_buf_size_per_cand =
4876 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4877 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4878
4879 i4_num_bufs_per_thread =
4880 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4881 i4_buf_size_per_cand;
4882
4883 ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4884
4885 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4886
4887 {
4888 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4889 +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4890
4891 for(i = 0; i < i4_num_bufs_per_thread; i++)
4892 {
4893 ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4894 pu1_base + i * i4_buf_size_per_cand;
4895 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4896 }
4897 }
4898 }
4899
4900 /* Memory required to store pred for 422 chroma */
4901 if(i4_chroma_format == IV_YUV_422SP_UV)
4902 {
4903 WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4904 WORD32 pred_buf_size_per_thread =
4905 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4906 sizeof(UWORD8);
4907 void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4908 (ctr * pred_buf_size_per_thread);
4909
4910 ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4911 }
4912 else
4913 {
4914 ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4915 }
4916
4917 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4918 {
4919 WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4920 WORD32 i4_chromaBufSize =
4921 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4922 WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4923 (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4924 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4925 {
4926 UWORD8 *pu1_mem_base =
4927 (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4928 ctr * i4_memSize_perThread);
4929
4930 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4931 pu1_mem_base + i4_lumaBufSize * 0;
4932 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4933 pu1_mem_base + i4_lumaBufSize * 1;
4934 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4935 pu1_mem_base + i4_lumaBufSize * 2;
4936 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4937 pu1_mem_base + i4_lumaBufSize * 3;
4938
4939 pu1_mem_base += i4_lumaBufSize * 4;
4940
4941 switch(i4_quality_preset)
4942 {
4943 case IHEVCE_QUALITY_P0:
4944 {
4945 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4946 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4947 pu1_mem_base + i4_chromaBufSize * 0;
4948 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4949 pu1_mem_base + i4_chromaBufSize * 1;
4950 #else
4951 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4952 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4953 #endif
4954
4955 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4956 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4957 pu1_mem_base + i4_chromaBufSize * 2;
4958 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4959 pu1_mem_base + i4_chromaBufSize * 3;
4960 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4961 pu1_mem_base + i4_chromaBufSize * 2;
4962 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4963 pu1_mem_base + i4_chromaBufSize * 3;
4964 #else
4965 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4966 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4967 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4968 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4969 #endif
4970
4971 break;
4972 }
4973 case IHEVCE_QUALITY_P2:
4974 {
4975 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4976 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4977 pu1_mem_base + i4_chromaBufSize * 0;
4978 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4979 pu1_mem_base + i4_chromaBufSize * 1;
4980 #else
4981 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4982 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4983 #endif
4984
4985 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4986 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4987 pu1_mem_base + i4_chromaBufSize * 2;
4988 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4989 pu1_mem_base + i4_chromaBufSize * 3;
4990 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4991 pu1_mem_base + i4_chromaBufSize * 2;
4992 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4993 pu1_mem_base + i4_chromaBufSize * 3;
4994 #else
4995 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4996 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4997 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4998 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4999 #endif
5000
5001 break;
5002 }
5003 case IHEVCE_QUALITY_P3:
5004 {
5005 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
5006 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
5007 pu1_mem_base + i4_chromaBufSize * 0;
5008 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
5009 pu1_mem_base + i4_chromaBufSize * 1;
5010 #else
5011 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5012 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5013 #endif
5014
5015 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
5016 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
5017 pu1_mem_base + i4_chromaBufSize * 2;
5018 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
5019 pu1_mem_base + i4_chromaBufSize * 3;
5020 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
5021 pu1_mem_base + i4_chromaBufSize * 2;
5022 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
5023 pu1_mem_base + i4_chromaBufSize * 3;
5024 #else
5025 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5026 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5027 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5028 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5029 #endif
5030
5031 break;
5032 }
5033 case IHEVCE_QUALITY_P4:
5034 {
5035 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
5036 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
5037 pu1_mem_base + i4_chromaBufSize * 0;
5038 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
5039 pu1_mem_base + i4_chromaBufSize * 1;
5040 #else
5041 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5042 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5043 #endif
5044
5045 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
5046 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
5047 pu1_mem_base + i4_chromaBufSize * 2;
5048 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
5049 pu1_mem_base + i4_chromaBufSize * 3;
5050 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
5051 pu1_mem_base + i4_chromaBufSize * 2;
5052 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
5053 pu1_mem_base + i4_chromaBufSize * 3;
5054 #else
5055 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5056 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5057 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5058 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5059 #endif
5060
5061 break;
5062 }
5063 case IHEVCE_QUALITY_P5:
5064 {
5065 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
5066 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
5067 pu1_mem_base + i4_chromaBufSize * 0;
5068 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
5069 pu1_mem_base + i4_chromaBufSize * 1;
5070 #else
5071 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5072 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
5073 #endif
5074
5075 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
5076 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
5077 pu1_mem_base + i4_chromaBufSize * 2;
5078 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
5079 pu1_mem_base + i4_chromaBufSize * 3;
5080 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
5081 pu1_mem_base + i4_chromaBufSize * 2;
5082 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
5083 pu1_mem_base + i4_chromaBufSize * 3;
5084 #else
5085 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5086 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5087 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
5088 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
5089 #endif
5090
5091 break;
5092 }
5093 }
5094 }
5095
5096 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5097 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
5098 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5099 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5100
5101 } /* Recon Datastore */
5102
5103 /****************************************************/
5104 /****************************************************/
5105 /* ps_pps->i1_sign_data_hiding_flag == UNHIDDEN */
5106 /* when NO_SBH. else HIDDEN */
5107 /****************************************************/
5108 /****************************************************/
5109 /* Zero cbf tool is enabled by default for all presets */
5110 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5111
5112 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5113 {
5114 ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5115 ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5116 ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5117 ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5118 }
5119 else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5120 {
5121 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5122 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5123 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5124 ps_ctxt->i4_sbh_level = NO_SBH;
5125 }
5126 else
5127 {
5128 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5129 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5130 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5131 ps_ctxt->i4_sbh_level = NO_SBH;
5132 }
5133
5134 #if DISABLE_QUANT_ROUNDING
5135 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5136 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5137 #endif
5138 /*Disabling RDOQ only when spatial modulation is enabled
5139 as RDOQ degrades visual quality*/
5140 if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5141 {
5142 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5143 }
5144
5145 #if DISABLE_RDOQ
5146 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5147 #endif
5148
5149 #if DISABLE_SBH
5150 ps_ctxt->i4_sbh_level = NO_SBH;
5151 #endif
5152
5153 /*Rounding factor calc based on previous cabac states */
5154
5155 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5156 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5157 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5158 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5159
5160 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5161 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5162 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5163 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5164
5165 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5166 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5167 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5168
5169 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5170 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5171 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5172
5173 /****************************************************************************************/
5174 /* Setting the perform rdoq and sbh flags appropriately */
5175 /****************************************************************************************/
5176 {
5177 /******************************************/
5178 /* For best cand rdoq and/or sbh */
5179 /******************************************/
5180 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5181 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5182 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5183 we would have to do RDOQ again.*/
5184 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5185 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5186 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5187 (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5188
5189 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5190 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5191
5192 /* SBH should be performed if
5193 a) i4_sbh_level is BEST_CAND_SBH.
5194 b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5195 if SBH has to be done because for these presets the quant, iquant and scan coeff
5196 data are calculated in this function and not during the RDOPT stage*/
5197
5198 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5199 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5200 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5201 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5202 (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5203
5204 /******************************************/
5205 /* For all cand rdoq and/or sbh */
5206 /******************************************/
5207 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5208 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5209 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5210 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5211 ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5212 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5213 }
5214
5215 if(!is_hbd_mode)
5216 {
5217 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5218 {
5219 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5220 {
5221 ps_ctxt->apf_quant_iquant_ssd[0] =
5222 ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5223 ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5224 }
5225 else
5226 {
5227 ps_ctxt->apf_quant_iquant_ssd[0] =
5228 ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5229 ps_ctxt->apf_quant_iquant_ssd[2] =
5230 ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5231 }
5232
5233 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5234 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5235 {
5236 ps_ctxt->apf_quant_iquant_ssd[1] =
5237 ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5238 ps_ctxt->apf_quant_iquant_ssd[3] =
5239 ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5240 }
5241 else
5242 {
5243 ps_ctxt->apf_quant_iquant_ssd[1] =
5244 ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5245 ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5246 }
5247 }
5248 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5249 {
5250 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5251 {
5252 ps_ctxt->apf_quant_iquant_ssd[0] =
5253 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5254 ps_ctxt->apf_quant_iquant_ssd[2] =
5255 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5256 }
5257 else
5258 {
5259 ps_ctxt->apf_quant_iquant_ssd[0] =
5260 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5261 ps_ctxt->apf_quant_iquant_ssd[2] =
5262 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5263 }
5264
5265 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5266 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5267 {
5268 ps_ctxt->apf_quant_iquant_ssd[1] =
5269 ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5270 ps_ctxt->apf_quant_iquant_ssd[3] =
5271 ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5272 }
5273 else
5274 {
5275 ps_ctxt->apf_quant_iquant_ssd[1] =
5276 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5277 ps_ctxt->apf_quant_iquant_ssd[3] =
5278 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5279 }
5280 }
5281
5282 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5283 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5284 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5285 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5286 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5287 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5288 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5289 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5290
5291 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5292 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5293 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5294 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5295 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5296 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5297 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5298 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5299
5300 ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5301 ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5302 ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5303 ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5304 ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5305
5306 ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5307 ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5308 ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5309
5310 ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5311 ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5312 ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5313 ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5314 ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5315
5316 ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5317 ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5318 ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5319
5320 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5321 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5322 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5323 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5324 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5325 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5326 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5327 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5328 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5329 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5330 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5331 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5332 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5333 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5334 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5335 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5336 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5337 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5338
5339 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5340 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5341 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5342 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5343 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5344 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5345 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5346 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5347 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5348 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5349 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5350 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5351 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5352 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5353 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5354 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5355 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5356 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5357 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5358 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5359
5360 ps_ctxt->apf_chrm_resd_trns_had[0] =
5361 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5362 ps_ctxt->apf_chrm_resd_trns_had[1] =
5363 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5364 ps_ctxt->apf_chrm_resd_trns_had[2] =
5365 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5366 }
5367
5368 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5369 {
5370 /* initialise the scale & rescale matricies */
5371 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5372 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5373 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5374 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5375 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5376 /*init for inter matrix*/
5377 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5378 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5379 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5380 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5381 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5382
5383 /*init for rescale matrix*/
5384 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5385 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5386 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5387 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5388 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5389 /*init for rescale inter matrix*/
5390 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5391 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5392 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5393 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5394 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5395 }
5396 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5397 {
5398 /* initialise the scale & rescale matricies */
5399 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5400 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5401 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5402 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5403 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5404 /*init for inter matrix*/
5405 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5406 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5407 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5408 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5409 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5410
5411 /*init for rescale matrix*/
5412 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5413 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5414 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5415 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5416 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5417 /*init for rescale inter matrix*/
5418 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5419 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5420 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5421 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5422 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5423 }
5424 else
5425 {
5426 ASSERT(0);
5427 }
5428
5429 /* Not recomputing Luma pred-data and header data for any preset now */
5430 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5431 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5432 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5433
5434 switch(ps_ctxt->i4_quality_preset)
5435 {
5436 case IHEVCE_QUALITY_P0:
5437 {
5438 ps_ctxt->i4_max_merge_candidates = 5;
5439 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5440 ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5441 ps_ctxt->u1_use_early_cbf_data = 0;
5442 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5443 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5444 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5445
5446 break;
5447 }
5448 case IHEVCE_QUALITY_P2:
5449 {
5450 ps_ctxt->i4_max_merge_candidates = 5;
5451 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5452 ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5453 ps_ctxt->u1_use_early_cbf_data = 0;
5454
5455 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5456 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5457 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5458
5459 break;
5460 }
5461 case IHEVCE_QUALITY_P3:
5462 {
5463 ps_ctxt->i4_max_merge_candidates = 3;
5464 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5465 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5466
5467 ps_ctxt->u1_use_early_cbf_data = 0;
5468 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5469 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5470 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5471
5472 break;
5473 }
5474 case IHEVCE_QUALITY_P4:
5475 {
5476 ps_ctxt->i4_max_merge_candidates = 2;
5477 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5478 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5479 ps_ctxt->u1_use_early_cbf_data = 0;
5480 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5481 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5482 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5483
5484 break;
5485 }
5486 case IHEVCE_QUALITY_P5:
5487 {
5488 ps_ctxt->i4_max_merge_candidates = 2;
5489 ps_ctxt->i4_use_satd_for_merge_eval = 0;
5490 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5491 ps_ctxt->u1_use_early_cbf_data = 0;
5492 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5493 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5494 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5495
5496 break;
5497 }
5498 case IHEVCE_QUALITY_P6:
5499 {
5500 ps_ctxt->i4_max_merge_candidates = 2;
5501 ps_ctxt->i4_use_satd_for_merge_eval = 0;
5502 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5503 ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5504 break;
5505 }
5506 default:
5507 {
5508 ASSERT(0);
5509 }
5510 }
5511
5512 #if DISABLE_SKIP_AND_MERGE_EVAL
5513 ps_ctxt->i4_max_merge_candidates = 0;
5514 #endif
5515
5516 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5517 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5518
5519 /*initialize memory for RC related parameters required/populated by enc_loop */
5520 /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5521 |-------|-> Thread 0, instance 0
5522 | |
5523 | |
5524 | |
5525 |-------|-> thread 0, instance 1
5526 | |
5527 | |
5528 | |
5529 |-------|-> thread 0, intance 2
5530 | |
5531 | |
5532 | |
5533 |-------|-> thread 1, instance 0
5534 | |
5535 | |
5536 | |
5537 |-------|-> thread 1, instance 1
5538 | |
5539 | |
5540 | |
5541 |-------|-> thread 1, instance 2
5542 ... ...
5543
5544 Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5545 Finally, one thread will become master and collate the data from all the threads */
5546 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5547 {
5548 for(i = 0; i < i4_num_bitrate_inst; i++)
5549 {
5550 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5551 ps_enc_loop_rc_params++;
5552 }
5553 }
5554 /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5555
5556 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5557 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5558 #endif
5559
5560 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5561 MAX_TU_SIZE;
5562 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5563 MAX_TU_SIZE;
5564 /*Multiplying by two to account for interleaving of cb and cr*/
5565 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5566 << 1;
5567 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5568 MAX_TU_SIZE << 1;
5569
5570 /* Memory for a frame level memory to store tile-id */
5571 /* corresponding to each CTB of frame */
5572 ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5573
5574 ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5575 /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5576 /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5577 if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5578 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5579 {
5580 UWORD32 psy_strength;
5581 UWORD32 psy_strength_mask =
5582 224; // only bits 5,6,7 are ones. These three bits represent the psy strength
5583 psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5584 ps_ctxt->u1_enable_psyRDOPT = 1;
5585 ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5586 if(psy_strength == 0)
5587 {
5588 ps_ctxt->u1_enable_psyRDOPT = 0;
5589 ps_ctxt->u4_psy_strength = 0;
5590 }
5591 }
5592
5593 ps_ctxt->u1_is_stasino_enabled =
5594 ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5595 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5596 (ps_init_prms->s_coding_tools_prms.i4_vqet &
5597 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5598
5599 ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5600 ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5601 ps_ctxt++;
5602 }
5603 /* Store Tile params base into EncLoop Master context */
5604 ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5605
5606 if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5607 {
5608 i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5609 }
5610
5611 /* Updating ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5612 /* Loop over all tile-cols in frame */
5613 for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5614 {
5615 WORD32 i4_tile_col_wd_in_ctb_unit =
5616 (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5617 WORD32 offset_x;
5618
5619 if(ctr == (i4_num_tile_cols - 1))
5620 { /* Last tile-row of frame */
5621 WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5622
5623 WORD32 cu_aligned_pic_wd =
5624 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5625 SET_CTB_ALIGN(
5626 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5627 min_cu_size);
5628
5629 WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5630
5631 offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5632 offset_x += last_hz_ctb_wd;
5633 }
5634 else
5635 { /* Not the last tile-row of frame */
5636 offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5637 }
5638
5639 offset_x /= 4;
5640 offset_x -= 1;
5641
5642 ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5643 }
5644
5645 n_tabs = NUM_ENC_LOOP_MEM_RECS;
5646
5647 /*store num bit-rate instances in the master context */
5648 ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5649 ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5650 /*************************************************************************/
5651 /* --- EncLoop Deblock sync Dep Mngr Mem init -- */
5652 /*************************************************************************/
5653 {
5654 WORD32 count;
5655 WORD32 num_vert_units, num_blks_in_row;
5656 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5657 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5658
5659 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5660 ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5661 ASSERT(num_vert_units > 0);
5662 ASSERT(num_blks_in_row > 0);
5663
5664 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5665 {
5666 for(i = 0; i < i4_num_bitrate_inst; i++)
5667 {
5668 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5669 &ps_mem_tab[n_tabs],
5670 pv_osal_handle,
5671 DEP_MNGR_ROW_ROW_SYNC,
5672 num_vert_units,
5673 num_blks_in_row,
5674 i4_num_tile_cols, /* Number of Col Tiles */
5675 i4_num_proc_thrds,
5676 0 /*Sem Disabled*/
5677 );
5678
5679 n_tabs += ihevce_dmgr_get_num_mem_recs();
5680 }
5681 }
5682 }
5683 /*************************************************************************/
5684 /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init -- */
5685 /*************************************************************************/
5686 {
5687 WORD32 count;
5688 WORD32 num_vert_units, num_blks_in_row;
5689 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5690 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5691
5692 WORD32 i4_sem = 0;
5693
5694 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5695 IHEVCE_QUALITY_P4)
5696 i4_sem = 0;
5697 else
5698 i4_sem = 1;
5699 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5700 /* For Top-Right CU sync, adding one more CTB since value updation */
5701 /* happens in that way for the last CTB in the row */
5702 num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5703 num_blks_in_row += MAX_CTB_SIZE;
5704
5705 ASSERT(num_vert_units > 0);
5706 ASSERT(num_blks_in_row > 0);
5707
5708 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5709 {
5710 for(i = 0; i < i4_num_bitrate_inst; i++)
5711 {
5712 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5713 {
5714 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5715 ihevce_dmgr_init(
5716 &ps_mem_tab[n_tabs],
5717 pv_osal_handle,
5718 DEP_MNGR_ROW_ROW_SYNC,
5719 num_vert_units,
5720 num_blks_in_row,
5721 i4_num_tile_cols, /* Number of Col Tiles */
5722 i4_num_proc_thrds,
5723 i4_sem /*Sem Disabled*/
5724 );
5725 }
5726 n_tabs += ihevce_dmgr_get_num_mem_recs();
5727 }
5728 }
5729 }
5730
5731 for(i = 1; i < 5; i++)
5732 {
5733 WORD32 i4_log2_trans_size = i + 1;
5734 WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5735
5736 ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5737 }
5738
5739 ga_trans_shift[0] = ga_trans_shift[1];
5740
5741 /* return the handle to caller */
5742 return ((void *)ps_master_ctxt);
5743 }
5744
5745 /*!
5746 ******************************************************************************
5747 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5748 *
5749 * \brief
5750 * Intialization for ENC_LOOP context state structure .
5751 *
5752 * \param[in] ps_mem_tab : pointer to memory descriptors table
5753 * \param[in] ppv_sem_hdls : Array of semaphore handles
5754 * \param[in] i4_num_proc_thrds : Number of processing threads
5755 *
5756 * \return
5757 * None
5758 *
5759 * \author
5760 * Ittiam
5761 *
5762 *****************************************************************************
5763 */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5764 void ihevce_enc_loop_reg_sem_hdls(
5765 void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5766 {
5767 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5768 WORD32 i, enc_frm_id;
5769
5770 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5771
5772 /*************************************************************************/
5773 /* --- EncLoop Deblock sync Dep Mngr reg Semaphores -- */
5774 /*************************************************************************/
5775 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5776 {
5777 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5778 {
5779 ihevce_dmgr_reg_sem_hdls(
5780 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5781 ppv_sem_hdls,
5782 i4_num_proc_thrds);
5783 }
5784 }
5785
5786 /*************************************************************************/
5787 /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores -- */
5788 /*************************************************************************/
5789 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5790 {
5791 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5792 {
5793 ihevce_dmgr_reg_sem_hdls(
5794 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5795 ppv_sem_hdls,
5796 i4_num_proc_thrds);
5797 }
5798 }
5799
5800 return;
5801 }
5802
5803 /*!
5804 ******************************************************************************
5805 * \if Function name : ihevce_enc_loop_delete \endif
5806 *
5807 * \brief
5808 * Destroy EncLoop module
5809 * Note : Only Destroys the resources allocated in the module like
5810 * semaphore,etc. Memory free is done Separately using memtabs
5811 *
5812 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5813 *
5814 * \return
5815 * None
5816 *
5817 * \author
5818 * Ittiam
5819 *
5820 *****************************************************************************
5821 */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5822 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5823 {
5824 ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5825 WORD32 ctr, enc_frm_id;
5826
5827 ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5828
5829 for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5830 {
5831 for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5832 {
5833 /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5834 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5835 /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5836 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5837 }
5838 }
5839 }
5840
5841 /*!
5842 ******************************************************************************
5843 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5844 *
5845 * \brief
5846 * Frame level Reset for the Dependency Mngrs local to EncLoop.,
5847 * ie CU_TopRight and Dblk
5848 *
5849 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
5850 *
5851 * \return
5852 * None
5853 *
5854 * \author
5855 * Ittiam
5856 *
5857 *****************************************************************************
5858 */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5859 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5860 {
5861 WORD32 ctr, frame_id;
5862 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5863
5864 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5865
5866 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5867 {
5868 frame_id = 0;
5869 }
5870 else
5871 {
5872 frame_id = enc_frm_id;
5873 }
5874
5875 for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5876 {
5877 /* Dep. Mngr : Reset the num ctb Deblocked in every row for ENC sync */
5878 ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5879
5880 /* Dep. Mngr : Reset the TopRight CU Processed in every row for ENC sync */
5881 ihevce_dmgr_rst_row_row_sync(
5882 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5883 }
5884 }
5885
5886 /*!
5887 ******************************************************************************
5888 * \if Function name : ihevce_enc_loop_frame_init \endif
5889 *
5890 * \brief
5891 * Frame level init of enocde loop function .
5892 *
5893 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
5894 * \param[in] pi4_cu_processed : ptr to cur frame cu process in pix.
5895 * \param[in] aps_ref_list : ref pic list for the current frame
5896 * \param[in] ps_slice_hdr : ptr to current slice header params
5897 * \param[in] ps_pps : ptr to active pps params
5898 * \param[in] ps_sps : ptr to active sps params
5899 * \param[in] ps_vps : ptr to active vps params
5900
5901
5902 * \param[in] i1_weighted_pred_flag : weighted pred enable flag (unidir)
5903 * \param[in] i1_weighted_bipred_flag : weighted pred enable flag (bidir)
5904 * \param[in] log2_luma_wght_denom : down shift factor for weighted pred of luma
5905 * \param[in] log2_chroma_wght_denom : down shift factor for weighted pred of chroma
5906 * \param[in] cur_poc : currennt frame poc
5907 * \param[in] i4_bitrate_instance_num : number indicating the instance of bit-rate for multi-rate encoder
5908 *
5909 * \return
5910 * None
5911 *
5912 * \author
5913 * Ittiam
5914 *
5915 *****************************************************************************
5916 */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5917 void ihevce_enc_loop_frame_init(
5918 void *pv_enc_loop_ctxt,
5919 WORD32 i4_frm_qp,
5920 recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5921 recon_pic_buf_t *ps_frm_recon,
5922 slice_header_t *ps_slice_hdr,
5923 pps_t *ps_pps,
5924 sps_t *ps_sps,
5925 vps_t *ps_vps,
5926 WORD8 i1_weighted_pred_flag,
5927 WORD8 i1_weighted_bipred_flag,
5928 WORD32 log2_luma_wght_denom,
5929 WORD32 log2_chroma_wght_denom,
5930 WORD32 cur_poc,
5931 WORD32 i4_display_num,
5932 enc_ctxt_t *ps_enc_ctxt,
5933 me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5934 WORD32 i4_bitrate_instance_num,
5935 WORD32 i4_thrd_id,
5936 WORD32 i4_enc_frm_id,
5937 WORD32 i4_num_bitrates,
5938 WORD32 i4_quality_preset,
5939 void *pv_dep_mngr_encloop_dep_me)
5940 {
5941 /* local variables */
5942 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5943 ihevce_enc_loop_ctxt_t *ps_ctxt;
5944 WORD32 chroma_qp_offset, i4_div_factor;
5945 WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5946 WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5947
5948 /* ENC_LOOP master state structure */
5949 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5950
5951 /* Nithya: Store the current POC in the slice header */
5952 ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5953
5954 /* Update the POC list of the current frame to the recon buffer */
5955 if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5956 {
5957 int i4_i;
5958 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5959 {
5960 ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5961 }
5962 }
5963 if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5964 {
5965 int i4_i;
5966 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5967 {
5968 ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5969 }
5970 }
5971
5972 /* loop over all the threads */
5973 // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5974 {
5975 /* ENC_LOOP state structure */
5976 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5977
5978 /* SAO ctxt structure initialization*/
5979 ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5980 ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5981 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5982
5983 /*bit-rate instance number for Multi-bitrate (MBR) encode */
5984 ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5985 ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5986 ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5987 ps_ctxt->i4_is_first_query = 1;
5988 ps_ctxt->i4_is_ctb_qp_modified = 0;
5989
5990 /* enc_frm_id for multiframe encode */
5991
5992 if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5993 {
5994 ps_ctxt->i4_enc_frm_id = 0;
5995 i4_enc_frm_id = 0;
5996 }
5997 else
5998 {
5999 ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
6000 }
6001
6002 /*Initialize the sub pic rc buf appropriately */
6003
6004 /*Set the thrd id flag */
6005 ps_enc_ctxt->s_multi_thrd
6006 .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
6007
6008 ps_enc_ctxt->s_multi_thrd
6009 .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6010 ps_enc_ctxt->s_multi_thrd
6011 .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6012
6013 ps_enc_ctxt->s_multi_thrd
6014 .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6015 ps_enc_ctxt->s_multi_thrd
6016 .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6017
6018 ps_enc_ctxt->s_multi_thrd
6019 .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6020 ps_enc_ctxt->s_multi_thrd
6021 .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6022 ps_enc_ctxt->s_multi_thrd
6023 .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6024 ps_enc_ctxt->s_multi_thrd
6025 .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6026 ps_enc_ctxt->s_multi_thrd
6027 .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
6028 ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
6029 i4_frm_qp;
6030
6031 /*Frame level data for Sub Pic rc is initalized here */
6032 /*Can be sent once per frame*/
6033 {
6034 WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
6035 ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
6036
6037 /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
6038 ps_ctxt->u4_total_cu_bits = 0;
6039 ps_ctxt->u4_total_cu_hdr_bits = 0;
6040
6041 ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
6042 ps_ctxt->u4_cu_tot_bits = 0;
6043 ps_ctxt->u4_total_cu_bits_mul_qs = 0;
6044 ps_ctxt->i4_display_num = i4_display_num;
6045 ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
6046 /*The Qscale is to be generated every 10th of total frame ctb is completed */
6047 //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
6048 ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
6049
6050 ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
6051 /*Sub Pic RC frame level params */
6052 ps_ctxt->i8_frame_l1_ipe_sad =
6053 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
6054 ps_ctxt->i8_frame_l0_ipe_satd =
6055 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
6056 ps_ctxt->i8_frame_l1_me_sad =
6057 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
6058 ps_ctxt->i8_frame_l1_activity_fact =
6059 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
6060 if(ps_ctxt->i4_sub_pic_level_rc)
6061 {
6062 ASSERT(
6063 ps_curr_inp_prms->ps_curr_inp->s_lap_out
6064 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6065
6066 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6067 [ps_ctxt->i4_bitrate_instance_num] =
6068 ps_curr_inp_prms->ps_curr_inp->s_lap_out
6069 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6070 }
6071 //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6072
6073 ps_ctxt->i4_is_I_scenecut =
6074 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6075 (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6076 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6077
6078 ps_ctxt->i4_is_non_I_scenecut =
6079 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6080 (ps_ctxt->i4_is_I_scenecut == 0));
6081
6082 /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6083 ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6084 ps_ctxt->i4_is_model_valid =
6085 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6086 }
6087 /* cb and cr offsets are assumed to be same */
6088 chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6089
6090 /* assumption of cb = cr qp */
6091 ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6092 ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6093
6094 ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6095
6096 ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6097
6098 ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6099 ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6100
6101 /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6102 ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6103 ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6104 ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6105
6106 ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6107 ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6108 ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6109 ps_ctxt->i4_use_const_lamda_modifier =
6110 ps_ctxt->i4_use_const_lamda_modifier ||
6111 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6112 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6113 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6114 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6115 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6116 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6117 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6118 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6119 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6120 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6121
6122 {
6123 ps_ctxt->f_i_pic_lamda_modifier =
6124 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6125 }
6126
6127 ps_ctxt->i4_frame_qp = i4_frm_qp;
6128 ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6129 ps_ctxt->i4_cu_qp = i4_frm_qp;
6130 ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6131 ps_ctxt->i4_chrm_cu_qp =
6132 (ps_ctxt->u1_chroma_array_type == 2)
6133 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6134 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6135
6136 ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6137 i4_div_factor = (i4_frm_qp + 3) / 6;
6138 i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6139 ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6140
6141 ps_ctxt->i4_chrm_cu_qp_div6 =
6142 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6143 ps_ctxt->i4_chrm_cu_qp_mod6 =
6144 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6145
6146 #define INTER_RND_QP_BY_6
6147 #ifdef INTER_RND_QP_BY_6
6148
6149 { /*1/6 rounding for 8 bit b frames*/
6150 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6151 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6152 }
6153 #else
6154 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6155 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6156 #endif
6157
6158 if(ISLICE == i1_slice_type)
6159 {
6160 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6161 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6162 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6163 }
6164 else
6165 {
6166 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6167 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6168 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6169 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6170 }
6171
6172 ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6173
6174 ps_ctxt->i1_slice_type = i1_slice_type;
6175
6176 /* intialize the inter pred (MC) context at frame level */
6177 ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6178 ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6179 ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6180 ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6181 ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6182
6183 /* intialize the MV pred context at frame level */
6184 ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6185 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6186 ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6187 ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6188 ps_pps->i1_log2_parallel_merge_level - 2;
6189
6190 #if ADAPT_COLOCATED_FROM_L0_FLAG
6191 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6192 {
6193 if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6194 (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6195 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6196 {
6197 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6198 }
6199 }
6200 #endif
6201 /* Initialization of deblocking params */
6202 ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6203 ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6204
6205 ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6206
6207 ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6208 /*init frame level stat accumualtion parameters */
6209 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6210 ->u4_frame_sad_acc = 0;
6211 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6212 ->u4_frame_intra_sad_acc = 0;
6213 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6214 ->u4_frame_open_loop_intra_sad = 0;
6215 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6216 ->i8_frame_open_loop_ssd = 0;
6217 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6218 ->u4_frame_inter_sad_acc = 0;
6219
6220 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6221 ->i8_frame_cost_acc = 0;
6222 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6223 ->i8_frame_intra_cost_acc = 0;
6224 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6225 ->i8_frame_inter_cost_acc = 0;
6226
6227 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6228 ->u4_frame_intra_sad = 0;
6229 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6230 ->u4_frame_rdopt_bits = 0;
6231 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6232 ->u4_frame_rdopt_header_bits = 0;
6233 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6234 ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6235 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6236 ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6237 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6238 ->i4_8x8_cu_sum[0] = 0;
6239 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6240 ->i4_8x8_cu_sum[1] = 0;
6241 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6242 ->i8_sad_by_qscale[0] = 0;
6243 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6244 ->i8_sad_by_qscale[1] = 0;
6245 /* Compute the frame_qstep */
6246 GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6247
6248 ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6249
6250 ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6251 /* intialize the cabac rdopt context at frame level */
6252 ihevce_entropy_rdo_frame_init(
6253 &ps_ctxt->s_rdopt_entropy_ctxt,
6254 ps_slice_hdr,
6255 ps_pps,
6256 ps_sps,
6257 ps_vps,
6258 ps_master_ctxt->au1_cu_skip_top_row,
6259 &ps_enc_ctxt->s_rc_quant);
6260
6261 /* register the dep mngr instance for forward ME sync */
6262 ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6263 }
6264 }
6265 /*
6266 ******************************************************************************
6267 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6268 *
6269 * \brief
6270 * returns Nil
6271 *
6272 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6273 * \param[out]ps_rc_prms : ptr to frame level info structure
6274 *
6275 * \return
6276 * None
6277 *
6278 * \author
6279 * Ittiam
6280 *
6281 *****************************************************************************
6282 */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6283 void ihevce_enc_loop_get_frame_rc_prms(
6284 void *pv_enc_loop_ctxt,
6285 rc_bits_sad_t *ps_rc_prms,
6286 WORD32 i4_br_id, //bitrate instance id
6287 WORD32 i4_enc_frm_id) // frame id
6288 {
6289 /*Get the master thread pointer*/
6290 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6291 ihevce_enc_loop_ctxt_t *ps_ctxt;
6292 UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6293 LWORD64 i8_total_ssd_frame = 0;
6294 UWORD32 total_frame_sad = 0;
6295 UWORD32 total_frame_rdopt_bits = 0;
6296 UWORD32 total_frame_rdopt_header_bits = 0;
6297 WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6298 WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6299 LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6300 WORD32 i4_curr_qp_acc = 0;
6301 WORD32 i;
6302
6303 /* ENC_LOOP master state structure */
6304 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6305
6306 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6307 {
6308 i4_enc_frm_id = 0;
6309 }
6310 /*loop through all threads and accumulate intra sad across all threads*/
6311 for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6312 {
6313 /* ENC_LOOP state structure */
6314 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6315 total_frame_open_loop_intra_sad +=
6316 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6317 i8_total_ssd_frame +=
6318 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6319 total_frame_intra_sad +=
6320 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6321 total_frame_sad +=
6322 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6323 total_frame_rdopt_bits +=
6324 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6325 total_frame_rdopt_header_bits +=
6326 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6327 i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6328 ->i4_qp_normalized_8x8_cu_sum[0];
6329 i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6330 ->i4_qp_normalized_8x8_cu_sum[1];
6331 i4_8x8_cu_sum[0] +=
6332 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6333 i4_8x8_cu_sum[1] +=
6334 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6335 i8_sad_by_qscale[0] +=
6336 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6337 i8_sad_by_qscale[1] +=
6338 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6339 }
6340
6341 ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6342 ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6343 ps_rc_prms->u4_total_sad = total_frame_sad;
6344 ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6345 ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6346 /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6347 ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6348 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6349 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6350 ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6351 ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6352 ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6353 ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6354 }
6355