• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /******************************************************************************
2   *
3   * Copyright (C) 2018 The Android Open Source Project
4   *
5   * Licensed under the Apache License, Version 2.0 (the "License");
6   * you may not use this file except in compliance with the License.
7   * You may obtain a copy of the License at:
8   *
9   * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   *
17   *****************************************************************************
18   * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20  /*!
21  ******************************************************************************
22  * \file ihevce_enc_loop_structs.h
23  *
24  * \brief
25  *    This file contains strcutures of enc_loop pass
26  *
27  * \date
28  *    18/09/2012
29  *
30  * \author
31  *    Ittiam
32  *
33  ******************************************************************************
34  */
35  
36  #ifndef _IHEVCE_ENC_LOOP_STRUCTS_H_
37  #define _IHEVCE_ENC_LOOP_STRUCTS_H_
38  
39  #include "ihevc_macros.h"
40  
41  extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
42  
43  /*****************************************************************************/
44  /* Constant Macros                                                           */
45  /*****************************************************************************/
46  /** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
47  #define NUM_TRANS_TYPES 5
48  #define INTRA_PLANAR 0
49  #define INTRA_DC 1
50  #define NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD 2
51  #define MAX_TU_IN_TU_EQ_DIV_2 4
52  #define MAX_MVP_LIST_CAND 2
53  #define MAX_COST 0x7ffffff
54  #define MAX_COST_64 0x7ffffffffffffff
55  #define NUM_32CU_AND_64CU_IN_CTB 5 /* 4 - 32x32 + 1 64x64*/
56  #define PING_PONG 2
57  #define MAX_SAO_RD_CAND 10
58  #define SCRATCH_BUF_STRIDE 80
59  
60  /*****************************************************************************/
61  /* Function Macros                                                           */
62  /*****************************************************************************/
63  #define INTRA_ANGULAR(x) (x)
64  
65  /** @breif max 30bit value */
66  #define MAX30 ((1 << 30) - 1)
67  
68  /* @brief macro to clip a data to max of 30bits (assuming unsgined) */
69  #define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
70  
71  /* @brief compute the (lambda * rate) with a qshift and clip result to 30bits */
72  #define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
73  
74  #define IHEVCE_INV_WT_PRED(inp, wt, off, shift)                                                    \
75      (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
76  
77  #define POPULATE_PU_STRUCT(ps_pu, mvx, mvy, offset_x, offset_y, wd, ht, ref_idx, pred_lx)          \
78      {                                                                                              \
79          (ps_pu)->b4_pos_x = (offset_x) >> 2;                                                       \
80          (ps_pu)->b4_pos_y = (offset_y) >> 2;                                                       \
81          (ps_pu)->b4_wd = ((wd) >> 2) - 1;                                                          \
82          (ps_pu)->b4_ht = ((ht) >> 2) - 1;                                                          \
83          (ps_pu)->b1_intra_flag = 0;                                                                \
84          (ps_pu)->b2_pred_mode = pred_lx;                                                           \
85          if(pred_lx)                                                                                \
86          {                                                                                          \
87              (ps_pu)->mv.i1_l0_ref_idx = -1;                                                        \
88              (ps_pu)->mv.i1_l1_ref_idx = ref_idx;                                                   \
89              (ps_pu)->mv.s_l1_mv.i2_mvx = mvx;                                                      \
90              (ps_pu)->mv.s_l1_mv.i2_mvy = mvy;                                                      \
91          }                                                                                          \
92          else                                                                                       \
93          {                                                                                          \
94              (ps_pu)->mv.i1_l0_ref_idx = ref_idx;                                                   \
95              (ps_pu)->mv.i1_l1_ref_idx = -1;                                                        \
96              (ps_pu)->mv.s_l0_mv.i2_mvx = mvx;                                                      \
97              (ps_pu)->mv.s_l0_mv.i2_mvy = mvy;                                                      \
98          }                                                                                          \
99      }
100  
101  #define GET_FRAME_QSTEP_FROM_QP(frame_qp, frame_qstep)                                             \
102      {                                                                                              \
103          double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };                             \
104                                                                                                     \
105          frame_qstep = (WORD32)((1 << ((frame_qp) / 6)) * q_steps[(frame_qp) % 6]);                 \
106      }
107  
108  #define INITIALISE_MERGE_RESULT_STRUCT(ps_merge_data, pas_pu_results)                              \
109      {                                                                                              \
110          WORD32 i, j, k;                                                                            \
111                                                                                                     \
112          for(i = 0; i < TOT_NUM_PARTS; i++)                                                         \
113          {                                                                                          \
114              (ps_merge_data)->s_pu_results.u1_num_results_per_part_l0[i] = 0;                       \
115              (ps_merge_data)->s_pu_results.u1_num_results_per_part_l1[i] = 0;                       \
116          }                                                                                          \
117          for(i = 0; i < 2; i++)                                                                     \
118          {                                                                                          \
119              for(j = 0; j < TOT_NUM_PARTS; j++)                                                     \
120              {                                                                                      \
121                  (ps_merge_data)->s_pu_results.aps_pu_results[i][j] = pas_pu_results[i][j];         \
122                  for(k = 0; k < MAX_NUM_RESULTS_PER_PART_LIST; k++)                                 \
123                  {                                                                                  \
124                      pas_pu_results[i][j][k].i4_tot_cost = MAX_COST;                                \
125                      pas_pu_results[i][j][k].pu.mv.i1_l0_ref_idx = -1;                              \
126                      pas_pu_results[i][j][k].pu.mv.i1_l1_ref_idx = -1;                              \
127                  }                                                                                  \
128              }                                                                                      \
129          }                                                                                          \
130      }
131  
132  #define POPULATE_CTB_PARAMS                                                                        \
133      (ps_common_frm_prms,                                                                           \
134       apu1_wt_inp,                                                                                  \
135       i4_ctb_x_off,                                                                                 \
136       i4_ctb_y_off,                                                                                 \
137       ppu1_pred,                                                                                    \
138       cu_size,                                                                                      \
139       ref_stride,                                                                                   \
140       bidir_enabled,                                                                                \
141       num_refs,                                                                                     \
142       pps_rec_list_l0,                                                                              \
143       pps_rec_list_l1,                                                                              \
144       pu1_non_wt_inp,                                                                               \
145       lambda,                                                                                       \
146       lambda_q_shift,                                                                               \
147       wpred_log_wdc)                                                                                \
148      {                                                                                              \
149          WORD32 i, j;                                                                               \
150          (ps_common_frm_prms)->i4_bidir_enabled = bidir_enabled;                                    \
151          (ps_common_frm_prms)->i4_ctb_x_off = i4_ctb_x_off;                                         \
152          (ps_common_frm_prms)->i4_ctb_y_off = i4_ctb_y_off;                                         \
153          (ps_common_frm_prms)->i4_inp_stride = cu_size;                                             \
154          (ps_common_frm_prms)->i4_lamda = lambda;                                                   \
155          (ps_common_frm_prms)->i4_pred_stride = cu_size;                                            \
156          (ps_common_frm_prms)->i4_rec_stride = ref_stride;                                          \
157          (ps_common_frm_prms)->pps_rec_list_l0 = pps_rec_list_l0;                                   \
158          (ps_common_frm_prms)->pps_rec_list_l1 = pps_rec_list_l1;                                   \
159          (ps_common_frm_prms)->ppu1_pred = ppu1_pred;                                               \
160          (ps_common_frm_prms)->pu1_non_wt_inp = pu1_non_wt_inp;                                     \
161          (ps_common_frm_prms)->pu1_wkg_mem = NULL;                                                  \
162          (ps_common_frm_prms)->u1_lamda_qshift = lambda_q_shift;                                    \
163          (ps_common_frm_prms)->u1_num_ref = num_refs;                                               \
164          (ps_common_frm_prms)->wpred_log_wdc = wpred_log_wdc;                                       \
165          for(i = 0; i < 2; i++)                                                                     \
166          {                                                                                          \
167              for(j = 0; j < MAX_NUM_REF; j++)                                                       \
168              {                                                                                      \
169                  (ps_common_frm_prms)->apu1_wt_inp = (apu1_wt_inp)[i][j];                           \
170              }                                                                                      \
171          }                                                                                          \
172      }
173  
174  #define COMPUTE_MERGE_IDX_COST(merge_idx_0_model, merge_idx, max_merge_cand, lambda, cost)         \
175      {                                                                                              \
176          WORD32 cab_bits_q12 = 0;                                                                   \
177                                                                                                     \
178          /* sanity checks */                                                                        \
179          ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand));                                  \
180                                                                                                     \
181          /* encode the merge idx only if required */                                                \
182          if(max_merge_cand > 1)                                                                     \
183          {                                                                                          \
184              WORD32 bin = (merge_idx > 0);                                                          \
185                                                                                                     \
186              /* bits for the context modelled first bin */                                          \
187              cab_bits_q12 += gau2_ihevce_cabac_bin_to_bits[merge_idx_0_model ^ bin];                \
188                                                                                                     \
189              /* bits for larged merge idx coded as bypass tunary */                                 \
190              if((max_merge_cand > 2) && (merge_idx > 0))                                            \
191              {                                                                                      \
192                  cab_bits_q12 += (MIN(merge_idx, (max_merge_cand - 2))) << CABAC_FRAC_BITS_Q;       \
193              }                                                                                      \
194                                                                                                     \
195              cost = COMPUTE_RATE_COST_CLIP30(                                                       \
196                  cab_bits_q12, lambda, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));                       \
197          }                                                                                          \
198          else                                                                                       \
199          {                                                                                          \
200              cost = 0;                                                                              \
201          }                                                                                          \
202      }
203  
204  /*****************************************************************************/
205  /* Typedefs                                                                  */
206  /*****************************************************************************/
207  
208  typedef FT_CALC_HAD_SATD_8BIT *pf_res_trans_luma_had_chroma;
209  
210  /** \breif function pointer prototype for residue and transform enc_loop */
211  typedef UWORD32 (*pf_res_trans_chroma)(
212      UWORD8 *pu1_src,
213      UWORD8 *pu1_pred,
214      WORD32 *pi4_tmp,
215      WORD16 *pi2_dst,
216      WORD32 src_strd,
217      WORD32 pred_strd,
218      WORD32 dst_strd,
219      CHROMA_PLANE_ID_T e_chroma_plane);
220  
221  /** \breif function pointer prototype for quantization and inv Quant for ssd
222  calc. for all transform sizes */
223  typedef WORD32 (*pf_quant_iquant_ssd)(
224      WORD16 *pi2_coeffs,
225      WORD16 *pi2_quant_coeff,
226      WORD16 *pi2_q_dst,
227      WORD16 *pi2_iq_dst,
228      WORD32 trans_size,
229      WORD32 qp_div, /* qpscaled / 6 */
230      WORD32 qp_rem, /* qpscaled % 6 */
231      WORD32 q_add,
232      WORD32 *pi4_quant_round_factor_0_1,
233      WORD32 *pi4_quant_round_factor_1_2,
234      WORD32 src_strd,
235      WORD32 dst_q_strd,
236      WORD32 dst_iq_strd,
237      UWORD8 *csbf,
238      WORD32 csbf_strd,
239      WORD32 *zero_col,
240      WORD32 *zero_row,
241      WORD16 *pi2_dequant_coeff,
242      LWORD64 *pi8_cost);
243  
244  /** \breif function pointer prototype for quantization and inv Quant for ssd
245  calc. for all transform sizes (in case of RDOQ + SBH) */
246  typedef WORD32 (*pf_quant_iquant_ssd_sbh)(
247      WORD16 *pi2_coeffs,
248      WORD16 *pi2_quant_coeff,
249      WORD16 *pi2_q_dst,
250      WORD16 *pi2_iq_dst,
251      WORD32 trans_size,
252      WORD32 qp_div, /* qpscaled / 6 */
253      WORD32 qp_rem, /* qpscaled % 6 */
254      WORD32 q_add,
255      WORD32 src_strd,
256      WORD32 dst_q_strd,
257      WORD32 dst_iq_strd,
258      UWORD8 *csbf,
259      WORD32 csbf_strd,
260      WORD32 *zero_col,
261      WORD32 *zero_row,
262      WORD16 *pi2_dequant_coeff,
263      WORD32 *pi4_cost,
264      WORD32 i4_scan_idx,
265      WORD32 i4_perform_rdoq);
266  
267  /** \breif function pointer prototype for inverse transform and recon
268  for all transform sizes : Luma */
269  typedef void (*pf_it_recon)(
270      WORD16 *pi2_src,
271      WORD16 *pi2_tmp,
272      UWORD8 *pu1_pred,
273      UWORD8 *pu1_dst,
274      WORD32 src_strd,
275      WORD32 pred_strd,
276      WORD32 dst_strd,
277      WORD32 zero_cols,
278      WORD32 zero_rows);
279  
280  /** \breif function pointer prototype for inverse transform and recon
281  for all transform sizes : Chroma */
282  typedef void (*pf_it_recon_chroma)(
283      WORD16 *pi2_src,
284      WORD16 *pi2_tmp,
285      UWORD8 *pu1_pred,
286      UWORD8 *pu1_dst,
287      WORD32 src_strd,
288      WORD32 pred_strd,
289      WORD32 dst_strd,
290      WORD32 zero_cols,
291      WORD32 zero_rows);
292  
293  /** \breif function pointer prototype for luma sao. */
294  typedef void (*pf_sao_luma)(
295      UWORD8 *pu1_src,
296      WORD32 src_strd,
297      UWORD8 *pu1_src_left,
298      UWORD8 *pu1_src_top,
299      UWORD8 *pu1_src_top_left,
300      UWORD8 *pu1_src_top_right,
301      UWORD8 *pu1_src_bot_left,
302      UWORD8 *pu1_avail,
303      WORD8 *pi1_sao_offset,
304      WORD32 wd,
305      WORD32 ht);
306  
307  /** \breif function pointer prototype for chroma sao. */
308  typedef void (*pf_sao_chroma)(
309      UWORD8 *pu1_src,
310      WORD32 src_strd,
311      UWORD8 *pu1_src_left,
312      UWORD8 *pu1_src_top,
313      UWORD8 *pu1_src_top_left,
314      UWORD8 *pu1_src_top_right,
315      UWORD8 *pu1_src_bot_left,
316      UWORD8 *pu1_avail,
317      WORD8 *pi1_sao_offset_u,
318      WORD8 *pi1_sao_offset_v,
319      WORD32 wd,
320      WORD32 ht);
321  
322  /*****************************************************************************/
323  /* Enums                                                                     */
324  /*****************************************************************************/
325  
326  typedef enum
327  {
328      IP_FUNC_MODE_0 = 0,
329      IP_FUNC_MODE_1,
330      IP_FUNC_MODE_2,
331      IP_FUNC_MODE_3TO9,
332      IP_FUNC_MODE_10,
333      IP_FUNC_MODE_11TO17,
334      IP_FUNC_MODE_18_34,
335      IP_FUNC_MODE_19TO25,
336      IP_FUNC_MODE_26,
337      IP_FUNC_MODE_27TO33,
338  
339      NUM_IP_FUNCS
340  
341  } IP_FUNCS_T;
342  
343  typedef enum
344  {
345      /* currently only cu and cu/2 modes are supported */
346      TU_EQ_CU = 0,
347      TU_EQ_CU_DIV2,
348      TU_EQ_SUBCU, /* only applicable for NXN mode at mincusize */
349  
350      /* support for below modes needs to be added */
351      TU_EQ_CU_DIV4,
352      TU_EQ_CU_DIV8,
353      TU_EQ_CU_DIV16,
354  
355      NUM_TU_WRT_CU,
356  
357  } TU_SIZE_WRT_CU_T;
358  
359  typedef enum
360  {
361      RDOPT_MODE = 0,
362      RDOPT_SKIP_MODE = 1,
363  
364      NUM_CORE_CALL_MODES,
365  
366  } CORE_FUNC_CALL_MODE_T;
367  
368  typedef enum
369  {
370      ENC_LOOP_CTXT = 0,
371      ENC_LOOP_THRDS_CTXT,
372      ENC_LOOP_SCALE_MAT,
373      ENC_LOOP_RESCALE_MAT,
374      ENC_LOOP_TOP_LUMA,
375      ENC_LOOP_TOP_CHROMA,
376      ENC_LOOP_TOP_NBR4X4,
377      ENC_LOOP_RC_PARAMS, /* memory to dump rate control parameters by each thread for each bit-rate instance */
378      ENC_LOOP_QP_TOP_4X4,
379      ENC_LOOP_DEBLOCKING,
380      ENC_LOOP_422_CHROMA_INTRA_PRED,
381      ENC_LOOP_INTER_PRED,
382      ENC_LOOP_CHROMA_PRED_INTRA,
383      ENC_LOOP_REF_SUB_OUT,
384      ENC_LOOP_REF_FILT_OUT,
385      ENC_LOOP_CU_RECUR_LUMA_RECON,
386      ENC_LOOP_CU_RECUR_CHROMA_RECON,
387      ENC_LOOP_CU_RECUR_LUMA_PRED,
388      ENC_LOOP_CU_RECUR_CHROMA_PRED,
389      ENC_LOOP_LEFT_LUMA_DATA,
390      ENC_LOOP_LEFT_CHROMA_DATA,
391      ENC_LOOP_SAO,
392      ENC_LOOP_CU_COEFF_DATA,
393      ENC_LOOP_CU_RECUR_COEFF_DATA,
394      ENC_LOOP_CU_DEQUANT_DATA,
395      ENC_LOOP_RECON_DATA_STORE,
396      /* should always be the last entry */
397      NUM_ENC_LOOP_MEM_RECS
398  
399  } ENC_LOOP_MEM_TABS_T;
400  
401  /** This is for assigning the pred buiffers for luma (2 ping-pong) and
402  chroma(1)   */
403  typedef enum
404  {
405      CU_ME_INTRA_PRED_LUMA_IDX0 = 0,
406      CU_ME_INTRA_PRED_LUMA_IDX1,
407      CU_ME_INTRA_PRED_CHROMA_IDX,
408  
409      /* should be always the last entry */
410      NUM_CU_ME_INTRA_PRED_IDX
411  
412  } CU_ME_INTRA_PRED_IDX_T;
413  
414  /*****************************************************************************/
415  /* Structure                                                                 */
416  /*****************************************************************************/
417  
418  /**
419  ******************************************************************************
420  *  @brief     Structure to store TU prms req. for enc_loop only
421  ******************************************************************************
422  */
423  typedef struct
424  {
425      /** Zero_col info. for the current TU Luma */
426      UWORD32 u4_luma_zero_col;
427      /** Zero_row info. for the current TU Luma */
428      UWORD32 u4_luma_zero_row;
429  
430      /** Zero_col info. for the current TU Chroma Cb */
431      UWORD32 au4_cb_zero_col[2];
432      /** Zero_row info. for the current TU Chroma Cb */
433      UWORD32 au4_cb_zero_row[2];
434      /** Zero_col info. for the current TU Chroma Cr */
435      UWORD32 au4_cr_zero_col[2];
436      /** Zero_row info. for the current TU Chroma Cr */
437      UWORD32 au4_cr_zero_row[2];
438  
439      /** bytes consumed by the luma ecd data */
440      WORD16 i2_luma_bytes_consumed;
441      /** bytes consumed by the Cb ecd data */
442      WORD16 ai2_cb_bytes_consumed[2];
443      /** bytes consumed by the Cr ecd data */
444      WORD16 ai2_cr_bytes_consumed[2];
445  
446      /** flag to re-evaluate IQ and Coeff data of luma in the final_recon
447      function. If zero, uses the data from RDOPT cand.                   */
448      UWORD16 b1_eval_luma_iq_and_coeff_data : 1;
449      /** flag to re-evaluate IQ and Coeff data of chroma in the final_recon
450      function. If zero, uses the data from RDOPT cand.                   */
451      UWORD16 b1_eval_chroma_iq_and_coeff_data : 1;
452  
453      /* TO DO : No support now, need to add. Always comapre ZERO_CBF cost */
454      /** Luma ZERO_CBF cost is compared with residue coding cost only if this
455      flag is enabled */
456      UWORD16 b1_eval_luma_zero_cbf_cost : 1;
457      /** Chroma ZERO_CBF cost is compared with residue coding cost only if this
458      flag is enabled */
459      UWORD16 b1_eval_chroma_zero_cbf_cost : 1;
460  
461      /** Reserved to make WORD32 alignment */
462      UWORD16 b12_reserved : 12;
463  
464  } tu_enc_loop_temp_prms_t;
465  
466  typedef struct recon_datastore_t
467  {
468      /* 2 to store current and best */
469      void *apv_luma_recon_bufs[2];
470  
471      /* 0 to store cur chroma mode recon */
472      /* 1 to store winning independent chroma mode with a single TU's recon */
473      /* 2 to store winning independent chroma mode with 4 TUs' recon */
474      void *apv_chroma_recon_bufs[3];
475  
476      /* The following two arrays are used to store the ID's of the buffers */
477      /* where the winning recon is being stored */
478      /* For Luma buffers, the permissible values are 0, 1 and UCHAR_MAX */
479      /* For Chroma buffers, the permissible values are 0, 1, 2 and UCHAR_MAX */
480      /* The value 'UCHAR_MAX' indicates the absence of Recon for that particular TU */
481      UWORD8 au1_bufId_with_winning_LumaRecon[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
482  
483      /* 2 - 2 Chroma planes */
484      /* 2 - 2 possible subTU's */
485      UWORD8 au1_bufId_with_winning_ChromaRecon[2][MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW][2];
486  
487      WORD32 i4_lumaRecon_stride;
488  
489      WORD32 i4_chromaRecon_stride;
490  
491      UWORD8 au1_is_chromaRecon_available[3];
492  
493      UWORD8 u1_is_lumaRecon_available;
494  
495  } recon_datastore_t;
496  
497  typedef struct enc_loop_cu_final_prms_t
498  {
499      recon_datastore_t s_recon_datastore;
500  
501      /**
502      * Cu size of the current cu being processed
503      */
504      UWORD8 u1_cu_size;
505      /**
506      * flags to indicate the final cu prediction mode
507      */
508      UWORD8 u1_intra_flag;
509  
510      /**
511      * flags to indicate Skip mode for CU
512      */
513      UWORD8 u1_skip_flag;
514  
515      /**
516      * number of tu in current cu for a given mode
517      * if skip then this value should be 1
518      */
519      UWORD16 u2_num_tus_in_cu;
520  
521      /**
522      * number of pu in current cu for a given mode
523      * if skip then this value should be 1
524      */
525      UWORD16 u2_num_pus_in_cu;
526  
527      /**
528      * total bytes produced in ECD data buffer
529      * if skip then this value should be 0
530      */
531      WORD32 i4_num_bytes_ecd_data;
532  
533      /**
534      * Partition mode of the best candidate
535      * if skip then this value should be SIZE_2Nx2N
536      * @sa PART_SIZE_E
537      */
538      UWORD8 u1_part_mode;
539  
540      /**
541      * indicates if inter cu has coded coeffs 1: coded, 0: not coded
542      * if skip then this value shoudl be ignored
543      */
544      UWORD8 u1_is_cu_coded;
545  
546      /**
547      * Chroma pred mode as signalled in bitstream
548      */
549      UWORD8 u1_chroma_intra_pred_mode;
550  
551      /**
552      * To store the best chroma mode for TU. Will be same for NxN case.
553      * Actual Chroma pred
554      */
555      UWORD8 u1_chroma_intra_pred_actual_mode;
556  
557      /**
558      * sad accumulated over all Tus of given CU
559      */
560      UWORD32 u4_cu_sad;
561  
562      /**
563      * sad accumulated over all Tus of given CU
564      */
565      LWORD64 i8_cu_ssd;
566  
567      /**
568      * open loop intra sad
569      */
570      UWORD32 u4_cu_open_intra_sad;
571  
572      /**
573      * header bits of cu estimated during RDO evaluation.
574      * Includes tu splits flags excludes cbf flags
575      */
576      UWORD32 u4_cu_hdr_bits;
577      /**
578      * luma residual bits of a cu estimated during RDO evaluation.
579      */
580      UWORD32 u4_cu_luma_res_bits;
581  
582      /**
583      * chroma residual bits of a cu estimated during RDO evaluation.
584      */
585      UWORD32 u4_cu_chroma_res_bits;
586  
587      /**
588      * cbf bits of a cu estimated during RDO evaluation (considered as part of texture bits later)
589      */
590      UWORD32 u4_cu_cbf_bits;
591  
592      /**
593      * array of PU for current CU
594      * For Inter PUs this will contain the follwoing
595      *   - merge flag
596      *   - (MVD and reference indicies) or (Merge Index)
597      *   - (if Cu is skipped then Merge index for skip
598      *      will be in 1st PU entry in array)
599      * for intra PU only intra flag will be set to 1
600      *
601      */
602      pu_t as_pu_enc_loop[NUM_PU_PARTS];
603  
604      /**
605      * array of PU for chroma usage
606      * in case of Merge MVs and reference idx of the final candidate
607      * used by luma need sto be stored
608      * for intra PU this will not be used
609      */
610      pu_t as_pu_chrm_proc[NUM_PU_PARTS];
611  
612      /**
613      * array of colocated PU for current CU
614      * MV and Ref pic id should be stored in this
615      * for intra PU only intra flag will be set to 1
616      */
617      pu_col_mv_t as_col_pu_enc_loop[NUM_INTER_PU_PARTS];
618  
619      /** array to store the intra mode pred related params
620      * if nxn mode the all 4 lcoations will be used
621      */
622      intra_prev_rem_flags_t as_intra_prev_rem[NUM_PU_PARTS];
623  
624      /**
625      * array to store TU propeties of the each tu in a CU
626      */
627      tu_enc_loop_out_t as_tu_enc_loop[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
628  
629      /**
630      * array to store TU propeties (req. for enc_loop only and not for
631      * entropy) of the each tu in a CU
632      */
633      tu_enc_loop_temp_prms_t as_tu_enc_loop_temp_prms[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
634  
635      /**
636      * Neighbour flags stored for chroma reuse
637      */
638      UWORD32 au4_nbr_flags[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
639  
640      /**
641      * intra pred modes stored for chroma reuse
642      */
643      UWORD8 au1_intra_pred_mode[4];
644  
645      /**
646      * array for storing coeffs during RD opt stage at CU level.
647      * Luma and chroma together
648      */
649      UWORD8 *pu1_cu_coeffs;
650  
651      /**
652      * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
653      */
654      WORD32 i4_chrm_cu_coeff_strt_idx;
655  
656      /**
657      * array for storing dequantized vals. during RD opt stage at CU level
658      * Luma and chroma together.
659      * Stride is assumed to be cu_size
660      * u-v interleaved storing is at TU level
661      */
662      WORD16 *pi2_cu_deq_coeffs;
663  
664      /**
665      * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
666      */
667      WORD32 i4_chrm_deq_coeff_strt_idx;
668  
669      /**
670      * The total RDOPT cost of the CU for the best mode
671      */
672      LWORD64 i8_best_rdopt_cost;
673  
674      /**
675      * The current running RDOPT cost for the current mode
676      */
677      LWORD64 i8_curr_rdopt_cost;
678  
679      LWORD64 i8_best_distortion;
680  
681  } enc_loop_cu_final_prms_t;
682  
683  typedef struct
684  {
685      /** Current Cu chroma recon pointer in pic buffer */
686      UWORD8 *pu1_final_recon;
687  
688      UWORD16 *pu2_final_recon;
689  
690      /** Current Cu chroma source pointer in pic buffer */
691      UWORD8 *pu1_curr_src;
692  
693      UWORD16 *pu2_curr_src;
694  
695      /** Current CU chroma reocn buffer stride */
696      WORD32 i4_chrm_recon_stride;
697  
698      /** Current CU chroma source buffer stride */
699      WORD32 i4_chrm_src_stride;
700  
701      /** Current Cu chroma Left pointer for intra pred */
702      UWORD8 *pu1_cu_left;
703  
704      UWORD16 *pu2_cu_left;
705  
706      /** Left buffer stride */
707      WORD32 i4_cu_left_stride;
708  
709      /** Current Cu chroma top pointer for intra pred */
710      UWORD8 *pu1_cu_top;
711  
712      UWORD16 *pu2_cu_top;
713  
714      /** Current Cu chroma top left pointer for intra pred */
715      UWORD8 *pu1_cu_top_left;
716  
717      UWORD16 *pu2_cu_top_left;
718  
719  } enc_loop_chrm_cu_buf_prms_t;
720  
721  typedef struct
722  {
723      /** cost of the current satd cand */
724      WORD32 i4_cost;
725  
726      /** tu size w.r.t to cu of the current satd cand
727      * @sa TU_SIZE_WRT_CU_T
728      */
729      WORD8 i4_tu_depth;
730  
731      /**
732      *  access valid number of entries in this array based on u1_part_size
733      */
734      UWORD8 au1_intra_luma_modes[NUM_PU_PARTS];
735  
736      /** @remarks u1_part_size 2Nx2N or  NxN  */
737      UWORD8 u1_part_mode; /* @sa: PART_SIZE_E */
738  
739      /** Flag to indicate whether current candidate needs to be evaluated */
740      UWORD8 u1_eval_flag;
741  
742  } cu_intra_satd_out_t;
743  
744  /** \brief cu level parameters for SATD / RDOPT function */
745  
746  typedef struct
747  {
748      /** pointer to source luma pointer
749      *  pointer will be pointing to CTB start location
750      *  At CU level based on the CU position this pointer
751      *  has to appropriately incremented
752      */
753      UWORD8 *pu1_luma_src;
754  
755      UWORD16 *pu2_luma_src;
756  
757      /** pointer to source chroma pointer
758      *  pointer will be pointing to CTB start location
759      *  At CU level based on the CU position this pointer
760      *  has to appropriately incremented
761      */
762      UWORD8 *pu1_chrm_src;
763  
764      UWORD16 *pu2_chrm_src;
765  
766      /** pointer to recon luma pointer
767      *  pointer will be pointing to CTB start location
768      *  At CU level based on the CU position this pointer
769      *  has to appropriately incremented
770      */
771      UWORD8 *pu1_luma_recon;
772  
773      UWORD16 *pu2_luma_recon;
774  
775      /** pointer to recon chroma pointer
776      *  pointer will be pointing to CTB start location
777      *  At CU level based on the CU position this pointer
778      *  has to appropriately incremented
779      */
780      UWORD8 *pu1_chrm_recon;
781  
782      UWORD16 *pu2_chrm_recon;
783  
784      /*1st pass parallel dpb buffer pointers aimilar to the above*/
785      UWORD8 *pu1_luma_recon_src;
786  
787      UWORD16 *pu2_luma_recon_src;
788  
789      UWORD8 *pu1_chrm_recon_src;
790  
791      UWORD16 *pu2_chrm_recon_src;
792  
793      /** Pointer to Subpel Plane Buffer */
794      UWORD8 *pu1_sbpel_hxfy;
795  
796      /** Pointer to Subpel Plane Buffer */
797      UWORD8 *pu1_sbpel_fxhy;
798  
799      /** Pointer to Subpel Plane Buffer */
800      UWORD8 *pu1_sbpel_hxhy;
801  
802      /** Luma source stride */
803      WORD32 i4_luma_src_stride;
804  
805      /** chroma soruce stride */
806      WORD32 i4_chrm_src_stride;
807  
808      /** Luma recon stride */
809      WORD32 i4_luma_recon_stride;
810  
811      /** chroma recon stride */
812      WORD32 i4_chrm_recon_stride;
813  
814      /** ctb size */
815      WORD32 i4_ctb_size;
816  
817      /** current ctb postion horz */
818      WORD32 i4_ctb_pos;
819  
820      /** number of PU finalized for curr CU  */
821      WORD32 i4_num_pus_in_cu;
822  
823      /** number of bytes consumed for current in ecd data buf */
824      WORD32 i4_num_bytes_cons;
825  
826      UWORD8 u1_is_cu_noisy;
827  
828      UWORD8 *pu1_is_8x8Blk_noisy;
829  
830  } enc_loop_cu_prms_t;
831  
832  /**
833  ******************************************************************************
834  *  @brief Pad inter pred recon context
835  ******************************************************************************
836  */
837  typedef struct
838  {
839      /** Pointer to Subpel Plane Buffer */
840      UWORD8 *pu1_sbpel_hxfy;
841  
842      /** Pointer to Subpel Plane Buffer */
843      UWORD8 *pu1_sbpel_fxhy;
844  
845      /** Pointer to Subpel Plane Buffer */
846      UWORD8 *pu1_sbpel_hxhy;
847  
848      /** pointer to recon luma pointer
849      *  pointer will be pointing to CTB start location
850      *  At CU level based on the CU position this pointer
851      *  has to appropriately incremented
852      */
853      UWORD8 *pu1_luma_recon;
854  
855      /** pointer to recon chroma pointer
856      *  pointer will be pointing to CTB start location
857      *  At CU level based on the CU position this pointer
858      *  has to appropriately incremented
859      */
860      UWORD8 *pu1_chrm_recon;
861  
862      /*FOr recon source 1st pass starts*/
863  
864      UWORD8 *pu1_luma_recon_src;
865  
866      /** pointer to recon chroma pointer
867      *  pointer will be pointing to CTB start location
868      *  At CU level based on the CU position this pointer
869      *  has to appropriately incremented
870      */
871      UWORD8 *pu1_chrm_recon_src;
872      /*FOr recon source 1st pass ends */
873      /** Luma recon stride */
874      WORD32 i4_luma_recon_stride;
875  
876      /** chroma recon stride */
877      WORD32 i4_chrm_recon_stride;
878  
879      /** ctb size */
880      WORD32 i4_ctb_size;
881  
882      /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
883      UWORD8 u1_chroma_array_type;
884  
885  } pad_interp_recon_frm_t;
886  
887  /**
888  ******************************************************************************
889  *  @brief inter prediction (MC) context for enc loop
890  ******************************************************************************
891  */
892  /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
893  typedef struct
894  {
895      /** pointer to reference lists */
896      recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
897  
898      /** scratch buffer for horizontal interpolation destination */
899      WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
900  
901      /** scratch 16 bit buffer for interpolation in l0 direction */
902      WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
903  
904      /** scratch 16 bit buffer for interpolation in l1 direction */
905      WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
906  
907      /** Pointer to struct containing function pointers to
908      functions in the 'common' library' */
909      func_selector_t *ps_func_selector;
910  
911      /** common denominator used for luma weights */
912      WORD32 i4_log2_luma_wght_denom;
913  
914      /** common denominator used for chroma weights */
915      WORD32 i4_log2_chroma_wght_denom;
916  
917      /**  offset w.r.t frame start in horz direction (pels) */
918      WORD32 i4_ctb_frm_pos_x;
919  
920      /**  offset w.r.t frame start in vert direction (pels) */
921      WORD32 i4_ctb_frm_pos_y;
922  
923      /* Bit Depth of Input */
924      WORD32 i4_bit_depth;
925  
926      /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
927      UWORD8 u1_chroma_array_type;
928  
929      /** weighted_pred_flag      */
930      WORD8 i1_weighted_pred_flag;
931  
932      /** weighted_bipred_flag    */
933      WORD8 i1_weighted_bipred_flag;
934  
935      /** Structure to describe extra CTBs around frame due to search
936      range associated with distributed-mode. Entries are top, left,
937      right and bottom */
938      WORD32 ai4_tile_xtra_pel[4];
939  
940  } inter_pred_ctxt_t;
941  /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
942  
943  typedef IV_API_CALL_STATUS_T (*PF_LUMA_INTER_PRED_PU)(
944      void *pv_inter_pred_ctxt,
945      pu_t *ps_pu,
946      void *pv_dst_buf,
947      WORD32 dst_stride,
948      WORD32 i4_flag_inter_pred_source);
949  
950  /**
951  ******************************************************************************
952  *  @brief  Motion predictor context structure
953  ******************************************************************************
954  */
955  typedef struct
956  {
957      /** pointer to reference lists */
958      recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
959  
960      /** pointer to the slice header */
961      slice_header_t *ps_slice_hdr;
962  
963      /** pointer to SPS */
964      sps_t *ps_sps;
965  
966      /** CTB x. In CTB unit*/
967      WORD32 i4_ctb_x;
968  
969      /** CTB y. In CTB unit */
970      WORD32 i4_ctb_y;
971  
972      /** Log2 Parallel Merge Level - 2  */
973      WORD32 i4_log2_parallel_merge_level_minus2;
974  
975      /* Number of extra CTBs external to tile due to fetched search-range around Tile */
976      /* TOP, left, right and bottom */
977      WORD32 ai4_tile_xtra_ctb[4];
978  
979  } mv_pred_ctxt_t;
980  
981  /**
982  ******************************************************************************
983  *  @brief  Deblocking and Boundary strength CTB level structure
984  ******************************************************************************
985  */
986  typedef struct
987  {
988      /** Array to store the packed BS values in horizontal direction  */
989      UWORD32 au4_horz_bs[(MAX_CTB_SIZE >> 3) + 1];
990  
991      /** Array to store the packed BS values in vertical direction  */
992      UWORD32 au4_vert_bs[(MAX_CTB_SIZE >> 3) + 1];
993  
994      /** CTB neighbour availability flags for deblocking */
995      UWORD8 u1_not_first_ctb_col_of_frame;
996      UWORD8 u1_not_first_ctb_row_of_frame;
997  
998  } deblk_bs_ctb_ctxt_t;
999  
1000  /**
1001  ******************************************************************************
1002  *  @brief  Deblocking and CTB level structure
1003  ******************************************************************************
1004  */
1005  typedef struct
1006  {
1007      /**
1008      * BS of the last vertical 4x4 column of previous CTB
1009      */
1010      UWORD8 au1_prev_bs[MAX_CTB_SIZE >> 3];
1011  
1012      /**
1013      * BS of the last vertical 4x4 column of previous CTB
1014      */
1015      UWORD8 au1_prev_bs_uv[MAX_CTB_SIZE >> 3];
1016  
1017      /** pointer to top 4x4 ctb nbr structure; for accessing qp  */
1018      nbr_4x4_t *ps_top_ctb_nbr_4x4;
1019  
1020      /** pointer to left 4x4 ctb nbr structure; for accessing qp */
1021      nbr_4x4_t *ps_left_ctb_nbr_4x4;
1022  
1023      /** pointer to current 4x4 ctb nbr structure; for accessing qp */
1024      nbr_4x4_t *ps_cur_ctb_4x4;
1025  
1026      /** max of 8 such contiguous bs to be computed for 64x64 ctb */
1027      UWORD32 *pu4_bs_horz;
1028  
1029      /** max of 8 such contiguous bs to be computed for 64x64 ctb */
1030      UWORD32 *pu4_bs_vert;
1031  
1032      /** ptr to current ctb luma pel in frame */
1033      UWORD8 *pu1_ctb_y;
1034  
1035      UWORD16 *pu2_ctb_y;
1036  
1037      /** ptr to current ctb sp interleaved chroma pel in frame */
1038      UWORD8 *pu1_ctb_uv;
1039  
1040      UWORD16 *pu2_ctb_uv;
1041  
1042      func_selector_t *ps_func_selector;
1043  
1044      /** left nbr buffer stride in terms of 4x4 units */
1045      WORD32 i4_left_nbr_4x4_strd;
1046  
1047      /** current  buffer stride in terms of 4x4 units */
1048      WORD32 i4_cur_4x4_strd;
1049  
1050      /** size in pels 16 / 32 /64 */
1051      WORD32 i4_ctb_size;
1052  
1053      /** stride for luma       */
1054      WORD32 i4_luma_pic_stride;
1055  
1056      /** stride for  chroma */
1057      WORD32 i4_chroma_pic_stride;
1058  
1059      /** boolean indicating if left ctb edge is to be deblocked or not */
1060      WORD32 i4_deblock_left_ctb_edge;
1061  
1062      /** boolean indicating if top ctb edge is to be deblocked or not */
1063      WORD32 i4_deblock_top_ctb_edge;
1064  
1065      /** beta offset index */
1066      WORD32 i4_beta_offset_div2;
1067  
1068      /** tc offset index */
1069      WORD32 i4_tc_offset_div2;
1070  
1071      /** chroma cb qp offset index */
1072      WORD32 i4_cb_qp_indx_offset;
1073  
1074      /** chroma cr qp offset index */
1075      WORD32 i4_cr_qp_indx_offset;
1076  
1077      WORD32 i4_bit_depth;
1078  
1079      /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
1080      UWORD8 u1_chroma_array_type;
1081  
1082  } deblk_ctb_params_t;
1083  
1084  /**
1085  ******************************************************************************
1086  *  @brief  Stores the BS and Qp of a CTB row. For CTB-row level deblocking
1087  ******************************************************************************
1088  */
1089  typedef struct deblk_ctbrow_prms
1090  {
1091      /**
1092      * Refer to ihevce_enc_loop_get_mem_recs() and
1093      * ihevce_enc_loop_init()for more info
1094      * regarding memory allocation to each one below.
1095      */
1096  
1097      /**
1098      * Stores the vertical boundary strength of a CTB row.
1099      */
1100      UWORD32 *pu4_ctb_row_bs_vert;
1101  
1102      /**
1103      * Storage is same as above. Contains horizontal BS.
1104      */
1105      UWORD32 *pu4_ctb_row_bs_horz;
1106  
1107      /**
1108      * Pointer to the CTB row's Qp storage
1109      */
1110      WORD8 *pi1_ctb_row_qp;
1111  
1112      /**
1113      * Stride of the pu1_ctb_row_qp_p buffer in WORD32 unit
1114      */
1115      WORD32 u4_qp_buffer_stride;
1116  
1117      /*
1118      *   Pointer to the  memory which contains the Qp of
1119      *   top4x4 neighbour blocks for each CTB row.
1120      *   This memory is at frame level.
1121      */
1122      WORD8 *api1_qp_top_4x4_ctb_row[MAX_NUM_ENC_LOOP_PARALLEL];
1123  
1124      /*
1125      *   Stride of the above memory location.
1126      *   Values in one-stride correspondes to one CTB row.
1127      */
1128      WORD32 u4_qp_top_4x4_buf_strd;
1129  
1130      /*size of frm level qp buffer*/
1131      WORD32 u4_qp_top_4x4_buf_size;
1132  
1133  } deblk_ctbrow_prms_t;
1134  
1135  /**
1136  ******************************************************************************
1137  *  @brief  Entropy rd opt context for cabac bit estimation and RDO
1138  ******************************************************************************
1139  */
1140  typedef struct rdopt_entropy_ctxt
1141  {
1142      /**
1143      * array for entropy contexts during RD opt stage at CU level
1144      * one best and one current is required
1145      */
1146      entropy_context_t as_cu_entropy_ctxt[2];
1147  
1148      /**
1149      * init state of entropy context models during CU RD opt stage,
1150      * required for saving and restoring the cabac states
1151      */
1152      UWORD8 au1_init_cabac_ctxt_states[IHEVC_CAB_CTXT_END];
1153  
1154      /*
1155      * ptr to top row cu skip flags (1 bit per 8x8CU)
1156      */
1157      UWORD8 *pu1_cu_skip_top_row;
1158  
1159      /**
1160      * Current entropy ctxt idx
1161      */
1162      WORD32 i4_curr_buf_idx;
1163  
1164  } rdopt_entropy_ctxt_t;
1165  
1166  /**
1167  ******************************************************************************
1168  *  @brief  structure to save predicted data from Inter SATD stage to Inter RD opt stage
1169  ******************************************************************************
1170  */
1171  typedef struct
1172  {
1173      /*Buffer to store the predicted data after motion compensation for merge and
1174      * skip candidates.
1175      * [2] Because for a given candidate we do motion compensation for 5 merge candidates.
1176      *     store the pred data after mc for the first 2 candidates and from 3rd candidate
1177      *     onwards, overwrite the data which has higher SATD cost.
1178      */
1179      void *apv_pred_data[2];
1180  
1181      /** Stride to store the predicted data
1182      */
1183      WORD32 i4_pred_data_stride;
1184  
1185  } merge_skip_pred_data_t;
1186  /**
1187  ******************************************************************************
1188  *  @brief  Structure to hold Rate control related parameters
1189  *          for each bit-rate instance and each thread
1190  ******************************************************************************
1191  */
1192  typedef struct
1193  {
1194      /**
1195      *frame level open loop intra sad
1196      *
1197      */
1198      LWORD64 i8_frame_open_loop_ssd;
1199  
1200      /**
1201      *frame level open loop intra sad
1202      *
1203      */
1204      UWORD32 u4_frame_open_loop_intra_sad;
1205      /**
1206      * frame level intra sad accumulator
1207      */
1208      UWORD32 u4_frame_intra_sad;
1209  
1210      /**
1211      *  frame level sad accumulator
1212      */
1213      UWORD32 u4_frame_sad_acc;
1214  
1215      /**
1216      *  frame level intra sad accumulator
1217      */
1218      UWORD32 u4_frame_inter_sad_acc;
1219  
1220      /**
1221      *  frame level inter sad accumulator
1222      */
1223      UWORD32 u4_frame_intra_sad_acc;
1224  
1225      /**
1226      *  frame level cost accumulator
1227      */
1228      LWORD64 i8_frame_cost_acc;
1229  
1230      /**
1231      *  frame level intra cost accumulator
1232      */
1233      LWORD64 i8_frame_inter_cost_acc;
1234  
1235      /**
1236      *  frame level inter cost accumulator
1237      */
1238      LWORD64 i8_frame_intra_cost_acc;
1239  
1240      /**
1241      * frame level rdopt bits accumulator
1242      */
1243      UWORD32 u4_frame_rdopt_bits;
1244  
1245      /**
1246      * frame level rdopt header bits accumulator
1247      */
1248      UWORD32 u4_frame_rdopt_header_bits;
1249  
1250      /* Sum the Qps of each 8*8 block in CU
1251      * 8*8 block is considered as Min CU size possible as per standard is 8
1252      * 0 corresponds to INTER and 1 corresponds to INTRA
1253      */
1254      WORD32 i4_qp_normalized_8x8_cu_sum[2];
1255  
1256      /* Count the number of 8x8 blocks in each CU type (INTER/INTRA)
1257      * 0 corresponds to INTER and 1 corresponds to INTRA
1258      */
1259      WORD32 i4_8x8_cu_sum[2];
1260  
1261      /* SAD/Qscale accumulated over all CUs. CU size is inherently
1262      * taken care in SAD
1263      */
1264      LWORD64 i8_sad_by_qscale[2];
1265  
1266  } enc_loop_rc_params_t;
1267  /**
1268  ******************************************************************************
1269  *  @brief  CU information structure. This is to store the
1270  *  CU final out after Recursion
1271  ******************************************************************************
1272  */
1273  typedef struct ihevce_enc_cu_node_ctxt_t
1274  {
1275      /* CU params */
1276      /** CU X position in terms of min CU (8x8) units */
1277      UWORD8 b3_cu_pos_x : 3;
1278  
1279      /** CU Y position in terms of min CU (8x8) units */
1280      UWORD8 b3_cu_pos_y : 3;
1281  
1282      /** reserved bytes */
1283      UWORD8 b2_reserved : 2;
1284  
1285      /** CU size 2N (width or height) in pixels */
1286      UWORD8 u1_cu_size;
1287  
1288      /**
1289      * array for storing cu level final params for a given mode
1290      * one best and one current is required
1291      */
1292      enc_loop_cu_final_prms_t s_cu_prms;
1293  
1294      /**
1295      * array for storing cu level final params for a given mode
1296      * one best and one current is required
1297      */
1298      enc_loop_cu_final_prms_t *ps_cu_prms;
1299  
1300      /* flag to indicate if current CU is the first
1301      CU of the Quantisation group*/
1302      UWORD32 b1_first_cu_in_qg : 1;
1303  
1304      /** qp used during for CU
1305      * @remarks :
1306      */
1307      WORD8 i1_cu_qp;
1308  
1309  } ihevce_enc_cu_node_ctxt_t;
1310  
1311  typedef struct
1312  {
1313      WORD32 i4_sad;
1314  
1315      WORD32 i4_mv_cost;
1316  
1317      WORD32 i4_tot_cost;
1318  
1319      WORD8 i1_ref_idx;
1320  
1321      mv_t s_mv;
1322  
1323  } block_merge_nodes_t;
1324  
1325  /**
1326  ******************************************************************************
1327  *  @brief  This struct is used for storing output of block merge
1328  ******************************************************************************
1329  */
1330  typedef struct
1331  {
1332      block_merge_nodes_t *aps_best_results[MAX_NUM_PARTS];
1333  
1334      /* Contains the best uni dir for each partition type */
1335      WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
1336  
1337      /* Contains the best pred dir for each partition type */
1338      WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
1339  
1340      WORD32 i4_tot_cost;
1341  
1342      PART_TYPE_T e_part_type;
1343  } block_merge_results_t;
1344  
1345  /**
1346  ******************************************************************************
1347  *  @brief  This struct is used for storing output of block merge and also
1348  *          all of the intermediate results required
1349  ******************************************************************************
1350  */
1351  typedef struct
1352  {
1353      block_merge_results_t as_best_results[3 + 1][NUM_BEST_ME_OUTPUTS];
1354  
1355      block_merge_nodes_t as_nodes[3][TOT_NUM_PARTS][NUM_BEST_ME_OUTPUTS];
1356  
1357      WORD32 part_mask;
1358  
1359      WORD32 num_results_per_part;
1360  
1361      WORD32 num_best_results;
1362  
1363      /**
1364      * Overall best CU cost, while other entries store CU costs
1365      * in single direction, this is best CU cost, where each
1366      * partition cost is evaluated as best of uni/bi
1367      */
1368      WORD32 best_cu_cost;
1369  
1370  } block_merge_data_t;
1371  /**
1372  ******************************************************************************
1373  *  @brief  CU nbr information structure. This is to store the
1374  *  neighbour information for final reconstruction function
1375  ******************************************************************************
1376  */
1377  typedef struct
1378  {
1379      /* Pointer to top-left nbr */
1380      nbr_4x4_t *ps_topleft_nbr_4x4;
1381      /* Pointer to left nbr */
1382      nbr_4x4_t *ps_left_nbr_4x4;
1383      /* Pointer to top nbr */
1384      nbr_4x4_t *ps_top_nbr_4x4;
1385      /* stride of left_nbr_4x4 */
1386      WORD32 nbr_4x4_left_strd;
1387  
1388      /* Pointer to CU top */
1389      UWORD8 *pu1_cu_top;
1390  
1391      UWORD16 *pu2_cu_top;
1392  
1393      /* Pointer to CU top-left */
1394      UWORD8 *pu1_cu_top_left;
1395  
1396      UWORD16 *pu2_cu_top_left;
1397  
1398      /* Pointer to CU left */
1399      UWORD8 *pu1_cu_left;
1400  
1401      UWORD16 *pu2_cu_left;
1402  
1403      /* stride of left pointer */
1404      WORD32 cu_left_stride;
1405  } cu_nbr_prms_t;
1406  
1407  /** Structure to save the flags required for Final mode Reconstruction
1408  function. These flags are set based on quality presets and
1409  the bit-rate we are working on */
1410  typedef struct
1411  {
1412      /** Flag to indicate whether Luma pred data need to recomputed in the
1413      final_recon function. Now disabled for all modes */
1414      UWORD8 u1_eval_luma_pred_data;
1415  
1416      /** Flag to indicate whether Chroma pred data need to recomputed in the
1417      final_recon function. Now disabled for MedSpeed only */
1418      UWORD8 u1_eval_chroma_pred_data;
1419  
1420      /** Flag to indicate whether header data need to recomputed in the
1421      final_recon function. Now disabled for all modes */
1422      UWORD8 u1_eval_header_data;
1423  
1424      UWORD8 u1_eval_recon_data;
1425  } cu_final_recon_flags_t;
1426  
1427  /**
1428  ******************************************************************************
1429  *  @brief  structure to save pred data of ME cand. 1 ping-pong to store the
1430  *  the best and current luma cand. 1 buffer to store the best chroma pred
1431  ******************************************************************************
1432  */
1433  typedef struct
1434  {
1435      /** Pointers to store luma pred data of me/intra cand.(2) and chroma(1) */
1436      UWORD8 *pu1_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
1437  
1438      UWORD16 *pu2_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
1439  
1440      /** Stride to store the predicted data of me/intra cand.(2) and chroma(1) */
1441      WORD32 ai4_pred_data_stride[NUM_CU_ME_INTRA_PRED_IDX];
1442      /** Counter saying how many pointers are assigned */
1443      WORD32 i4_pointer_count;
1444  
1445  } cu_me_intra_pred_prms_t;
1446  
1447  /**
1448  ******************************************************************************
1449  *  @brief  Chroma RDOPT context structure
1450  ******************************************************************************
1451  */
1452  typedef struct
1453  {
1454      /** Storing the inverse quantized data (cb) for the special modes*/
1455      WORD16 ai2_iq_data_cb[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
1456  
1457      /** Storing the inverse quantized data (cr) for the special modes*/
1458      WORD16 ai2_iq_data_cr[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
1459  
1460      /** Storing the scan coeffs (cb) for the special modes*/
1461      UWORD8 au1_scan_coeff_cb[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
1462  
1463      /** Storing the scan coeffs (cb) for the special modes*/
1464      UWORD8 au1_scan_coeff_cr[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
1465  
1466      /** Max number of bytes filled in scan coeff data (cb) per TU*/
1467      WORD32 ai4_num_bytes_scan_coeff_cb_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
1468  
1469      /** Max number of bytes filled in scan coeff data (cr) per TU*/
1470      WORD32 ai4_num_bytes_scan_coeff_cr_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
1471  
1472      /** Stride of the iq buffer*/
1473      WORD32 i4_iq_buff_stride;
1474  
1475      /** Storing the pred data
1476      The predicted data is always interleaved. Therefore the size of this array will be
1477      ((MAX_TU_SIZE * MAX_TU_SIZE) >> 2) * 2)*/
1478      void *pv_pred_data;
1479  
1480      /** Predicted data stride*/
1481      WORD32 i4_pred_stride;
1482  
1483      /** Storing the cbfs for each tu
1484      For 1 tu case, only the 0th element will be valid*/
1485      UWORD8 au1_cbf_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1486  
1487      /** Storing the cbfs for each tu
1488      For 1 tu case, only the 0th element will be valid*/
1489      UWORD8 au1_cbf_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1490  
1491      /** To store the cabac ctxt model updated by the RDOPT of best chroma mode
1492      [0] : for 1 TU case, [1] : for 4 TU case */
1493      UWORD8 au1_chrm_satd_updated_ctxt_models[IHEVC_CAB_CTXT_END];
1494  
1495      /** Best SATD chroma mode, [0] : for 1 TU case (TU_EQ_CU) , [1] : for 4 TU case
1496      Values : 0(PLANAR), 1(VERT), 2(HOR), 3(DC) chroma mode per each TU */
1497      UWORD8 u1_best_cr_mode;
1498  
1499      /** Best SATD chroma mode's RDOPT cost, [0] : for 1 TU case, [1] : for 4 TU case */
1500      LWORD64 i8_chroma_best_rdopt;
1501  
1502      /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
1503      /* This is done by adding the bits for signalling chroma mode (0-3)    */
1504      /* and subtracting the bits for chroma mode same as luma mode (4)      */
1505      LWORD64 i8_cost_to_encode_chroma_mode;
1506  
1507      /** Best SATD chroma mode's tu bits, [0] : for 1 TU case, [1] : for 4 TU case */
1508      WORD32 i4_chrm_tu_bits;
1509  
1510      /** Storing the zero col values for each TU for cb*/
1511      WORD32 ai4_zero_col_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1512  
1513      /** Storing the zero col values for each TU for cr*/
1514      WORD32 ai4_zero_col_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1515  
1516      /** Storing the zero row values for each TU for cb*/
1517      WORD32 ai4_zero_row_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
1518  
1519      /** Storing the zero row values for each TU for cr*/
1520      WORD32 ai4_zero_row_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
1521  } chroma_intra_satd_ctxt_t;
1522  
1523  /**
1524  ******************************************************************************
1525  *  @brief  Chroma RDOPT context structure
1526  ******************************************************************************
1527  */
1528  typedef struct
1529  {
1530      /** Chroma SATD context structure. It is an array of two to account for the TU_EQ_CU candidate
1531      and the TU_EQ_CU_DIV2 candidate*/
1532      chroma_intra_satd_ctxt_t as_chr_intra_satd_ctxt[NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD];
1533  
1534      /** Chroma SATD has has to be evaluated only for the HIGH QUALITY */
1535      UWORD8 u1_eval_chrm_satd;
1536  
1537      /** Chroma RDOPT has to be evaluated only for the HIGH QUALITY / MEDIUM SPEED preset */
1538      UWORD8 u1_eval_chrm_rdopt;
1539  
1540  } ihevce_chroma_rdopt_ctxt_t;
1541  
1542  typedef struct
1543  {
1544      inter_cu_results_t s_cu_results;
1545  
1546      inter_pu_results_t s_pu_results;
1547  } block_merge_output_t;
1548  
1549  /**
1550  ******************************************************************************
1551  *  @brief  Structure to store the Merge/Skip Cand. for EncLoop
1552  ******************************************************************************
1553  */
1554  typedef struct
1555  {
1556      /** List of all  merge/skip candidates to be evalauted (SATD/RDOPT) for
1557      * this CU
1558      */
1559      cu_inter_cand_t as_cu_inter_merge_skip_cand[MAX_NUM_CU_MERGE_SKIP_CAND];
1560  
1561      /** number of merge/skip candidates
1562      */
1563      UWORD8 u1_num_merge_cands;
1564  
1565      UWORD8 u1_num_skip_cands;
1566  
1567      UWORD8 u1_num_merge_skip_cands;
1568  
1569  } cu_inter_merge_skip_t;
1570  
1571  /** Structure to store the Mixed mode Cand. for EncLoop */
1572  typedef struct
1573  {
1574      cu_inter_cand_t as_cu_data[MAX_NUM_MIXED_MODE_INTER_RDO_CANDS];
1575  
1576      UWORD8 u1_num_mixed_mode_type0_cands;
1577  
1578      UWORD8 u1_num_mixed_mode_type1_cands;
1579  
1580  } cu_mixed_mode_inter_t;
1581  
1582  typedef struct
1583  {
1584      /* +2 because an additional buffer is required for */
1585      /* storing both cur and best during merge eval */
1586      void *apv_inter_pred_data[MAX_NUM_INTER_RDO_CANDS + 4];
1587  
1588      /* Bit field used to determine the indices of free bufs in 'apv_pred_data' buf array */
1589      UWORD32 u4_is_buf_in_use;
1590  
1591      /* Assumption is that the same stride is used for the */
1592      /* entire set of buffers above and is equal to the */
1593      /* CU size */
1594      WORD32 i4_pred_stride;
1595  
1596  } ihevce_inter_pred_buf_data_t;
1597  /** Structure to store the Inter Cand. info in EncLoop */
1598  typedef struct
1599  {
1600      cu_inter_cand_t *aps_cu_data[MAX_NUM_INTER_RDO_CANDS];
1601  
1602      UWORD32 au4_cost[MAX_NUM_INTER_RDO_CANDS];
1603  
1604      UWORD8 au1_pred_buf_idx[MAX_NUM_INTER_RDO_CANDS];
1605  
1606      UWORD32 u4_src_variance;
1607  
1608      UWORD8 u1_idx_of_worst_cost_in_cost_array;
1609  
1610      UWORD8 u1_idx_of_worst_cost_in_pred_buf_array;
1611  
1612      UWORD8 u1_num_inter_cands;
1613  
1614  } inter_cu_mode_info_t;
1615  typedef struct
1616  {
1617      /*Frame level base pointer of buffers for each ctb row to store the top pixels
1618      *and top left pixel for the next ctb row.These buffers are common accross all threads
1619      */
1620      UWORD8 *apu1_sao_src_frm_top_luma[MAX_NUM_ENC_LOOP_PARALLEL];
1621      /*Ctb level pointer to buffer to store the top pixels
1622      *and top left pixel for the next ctb row.These buffers are common accross all threads
1623      */
1624      UWORD8 *pu1_curr_sao_src_top_luma;
1625      /*Buffer to store the left boundary before
1626      * doing sao on current ctb for the next ctb in the current row
1627      */
1628      UWORD8 au1_sao_src_left_luma[MAX_CTB_SIZE];
1629      /*Frame level base pointer of buffers for each ctb row to store the top pixels
1630      *and top left pixel for the next ctb row.These buffers are common accross all threads
1631      */
1632      UWORD8 *apu1_sao_src_frm_top_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
1633  
1634      WORD32 i4_frm_top_chroma_buf_stride;
1635  
1636      /*Ctb level pointer to buffer to store the top chroma pixels
1637      *and top left pixel for the next ctb row.These buffers are common accross all threads
1638      */
1639      UWORD8 *pu1_curr_sao_src_top_chroma;
1640  
1641      /*Scratch buffer to store the left boundary before
1642      * doing sao on current ctb for the next ctb in the current row
1643      */
1644      UWORD8 au1_sao_src_left_chroma[MAX_CTB_SIZE * 2];
1645  
1646      /**
1647      * Luma recon buffer
1648      */
1649      UWORD8 *pu1_frm_luma_recon_buf;
1650      /**
1651      * Chroma recon buffer
1652      */
1653      UWORD8 *pu1_frm_chroma_recon_buf;
1654      /**
1655      * Luma recon buffer for curr ctb
1656      */
1657      UWORD8 *pu1_cur_luma_recon_buf;
1658      /**
1659      * Chroma recon buffer for curr ctb
1660      */
1661      UWORD8 *pu1_cur_chroma_recon_buf;
1662      /**
1663      * Luma src buffer
1664      */
1665      UWORD8 *pu1_frm_luma_src_buf;
1666      /**
1667      * Chroma src buffer
1668      */
1669      UWORD8 *pu1_frm_chroma_src_buf;
1670      /**
1671      * Luma src(input yuv) buffer for curr ctb
1672      */
1673      UWORD8 *pu1_cur_luma_src_buf;
1674      /**
1675      * Chroma src buffer for curr ctb
1676      */
1677      UWORD8 *pu1_cur_chroma_src_buf;
1678      /* Left luma scratch buffer required for sao RD optimisation*/
1679      UWORD8 au1_left_luma_scratch[MAX_CTB_SIZE];
1680  
1681      /* Left chroma scratch buffer required for sao RD optimisation*/
1682      /* Min size required= MAX_CTB_SIZE/2 * 2
1683      * Multiplied by 2 because size reuired is MAX_CTB_SIZE/2 each for U and V
1684      */
1685      UWORD8 au1_left_chroma_scratch[MAX_CTB_SIZE * 2];
1686  
1687      /* Top luma scratch buffer required for sao RD optimisation*/
1688      UWORD8 au1_top_luma_scratch[MAX_CTB_SIZE + 2];  // +1 for top left pixel and +1 for top right
1689  
1690      /* Top chroma scratch buffer required for sao RD optimisation*/
1691      UWORD8 au1_top_chroma_scratch[MAX_CTB_SIZE + 4];  // +2 for top left pixel and +2 for top right
1692  
1693      /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
1694      /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
1695      MAX_CTB _SIZE + 4*/
1696      UWORD8 au1_sao_luma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
1697  
1698      /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
1699      /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
1700      MAX_CTB _SIZE + 4*/
1701      UWORD8 au1_sao_chroma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
1702  
1703      /**
1704      * CTB size
1705      */
1706      WORD32 i4_ctb_size;
1707      /**
1708      * Luma recon buffer stride
1709      */
1710      WORD32 i4_frm_luma_recon_stride;
1711      /**
1712      * Chroma recon buffer stride
1713      */
1714      WORD32 i4_frm_chroma_recon_stride;
1715      /**
1716      * Luma recon buffer stride for curr ctb
1717      */
1718      WORD32 i4_cur_luma_recon_stride;
1719      /**
1720      * Chroma recon buffer stride for curr ctb
1721      */
1722      WORD32 i4_cur_chroma_recon_stride;
1723      /**
1724      * Luma src buffer stride
1725      */
1726      WORD32 i4_frm_luma_src_stride;
1727      /**
1728      * Chroma src buffer stride
1729      */
1730      WORD32 i4_frm_chroma_src_stride;
1731  
1732      WORD32 i4_frm_top_luma_buf_stride;
1733      /**
1734      * Luma src buffer stride for curr ctb
1735      */
1736      WORD32 i4_cur_luma_src_stride;
1737      /**
1738      * Chroma src buffer stride for curr ctb
1739      */
1740      WORD32 i4_cur_chroma_src_stride;
1741  
1742      /* Top luma buffer size */
1743      WORD32 i4_top_luma_buf_size;
1744  
1745      /* Top Chroma buffer size */
1746      WORD32 i4_top_chroma_buf_size;
1747  
1748      /*** Number of CTB units **/
1749      WORD32 i4_num_ctb_units;
1750  
1751      /**
1752      * CTB x pos
1753      */
1754      WORD32 i4_ctb_x;
1755      /**
1756      * CTB y pos
1757      */
1758      WORD32 i4_ctb_y;
1759      /* SAO block width*/
1760      WORD32 i4_sao_blk_wd;
1761  
1762      /* SAO block height*/
1763      WORD32 i4_sao_blk_ht;
1764  
1765      /* Last ctb row flag*/
1766      WORD32 i4_is_last_ctb_row;
1767  
1768      /* Last ctb col flag*/
1769      WORD32 i4_is_last_ctb_col;
1770  
1771      /* CTB aligned width */
1772      UWORD32 u4_ctb_aligned_wd;
1773  
1774      /* Number of ctbs in a row*/
1775      UWORD32 u4_num_ctbs_horz;
1776  
1777      UWORD32 u4_num_ctbs_vert;
1778      /**
1779      * Closed loop SSD Lambda
1780      * This is multiplied with bits for RD cost computations in SSD mode
1781      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1782      */
1783      LWORD64 i8_cl_ssd_lambda_qf;
1784  
1785      /**
1786      * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
1787      * This is multiplied with bits for RD cost computations in SSD mode
1788      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1789      */
1790      LWORD64 i8_cl_ssd_lambda_chroma_qf;
1791      /**
1792      * Pointer to current PPS
1793      */
1794      pps_t *ps_pps;  //not used currently
1795      /**
1796      * Pointer to current SPS
1797      */
1798      sps_t *ps_sps;
1799  
1800      /**
1801      * Pointer to current slice header structure
1802      */
1803      slice_header_t *ps_slice_hdr;
1804      /**
1805      * Pointer to current frame ctb out array of structures
1806      */
1807      ctb_enc_loop_out_t *ps_ctb_out;
1808      /**
1809      *  context for cabac bit estimation used during rdopt stage
1810      */
1811      rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt;
1812      /**
1813      * Pointer to sao_enc_t for the current ctb
1814      */
1815      sao_enc_t *ps_sao;
1816      /*
1817      * Pointer to an array to store the sao information of the top ctb
1818      * This is required for to decide top merge
1819      */
1820      sao_enc_t *aps_frm_top_ctb_sao[MAX_NUM_ENC_LOOP_PARALLEL];
1821  
1822      /*
1823      * Pointer to structure to store the sao parameters of (x,y)th ctb
1824      * for top merge of (x,y+1)th ctb
1825      */
1826      sao_enc_t *ps_top_ctb_sao;
1827  
1828      /* structure to store the sao parameters of (x,y)th ctb for
1829      * the left merge of (x+1,y)th ctb
1830      */
1831      sao_enc_t s_left_ctb_sao;
1832  
1833      /* Array of structures for SAO RDO candidates*/
1834      sao_enc_t as_sao_rd_cand[MAX_SAO_RD_CAND];
1835  
1836      /** array of function pointers for luma sao */
1837      pf_sao_luma apf_sao_luma[4];
1838  
1839      /** array of function pointers for chroma sao */
1840      pf_sao_chroma apf_sao_chroma[4];
1841  
1842      /* Flag to do SAO luma and chroma filtering*/
1843      WORD8 i1_slice_sao_luma_flag;
1844  
1845      WORD8 i1_slice_sao_chroma_flag;
1846  
1847  #if DISABLE_SAO_WHEN_NOISY
1848      ctb_analyse_t *ps_ctb_data;
1849  
1850      WORD32 i4_ctb_data_stride;
1851  #endif
1852  
1853      ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
1854  
1855  } sao_ctxt_t;
1856  
1857  /**
1858  ******************************************************************************
1859  *  @brief  Encode loop module context structure
1860  ******************************************************************************
1861  */
1862  typedef struct
1863  {
1864  #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
1865      void *pv_err_func_selector;
1866  #endif
1867  
1868      /**
1869      * Quality preset for comtrolling numbe of RD opt cand
1870      * @sa : IHEVCE_QUALITY_CONFIG_T
1871      */
1872      WORD32 i4_quality_preset;
1873      /**
1874      *
1875      *
1876      */
1877      WORD32 i4_rc_pass;
1878      /**
1879      * Lamda to be mulitplied with bits for SATD
1880      * should be equal to Lamda*Qp
1881      */
1882      WORD32 i4_satd_lamda;
1883  
1884      /**
1885      * Lamda to be mulitplied with bits for SAD
1886      * should be equal to Lamda*Qp
1887      */
1888      WORD32 i4_sad_lamda;
1889  
1890      /**
1891      * Closed loop SSD Lambda
1892      * This is multiplied with bits for RD cost computations in SSD mode
1893      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1894      */
1895      LWORD64 i8_cl_ssd_lambda_qf;
1896  
1897      /**
1898      * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
1899      * This is multiplied with bits for RD cost computations in SSD mode
1900      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1901      */
1902      LWORD64 i8_cl_ssd_lambda_chroma_qf;
1903  
1904      /**
1905      * Ratio of Closed loop SSD Lambda and Closed loop SSD Lambda for chroma
1906      * This is multiplied with (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)
1907      * to keep the precision of the ratio
1908      */
1909      UWORD32 u4_chroma_cost_weighing_factor;
1910      /**
1911      * Frame level QP to be used
1912      */
1913      WORD32 i4_frame_qp;
1914  
1915      WORD32 i4_frame_mod_qp;
1916  
1917      WORD32 i4_frame_qstep;
1918  
1919      UWORD8 u1_max_tr_depth;
1920  
1921      /**
1922      * CU level Qp
1923      */
1924      WORD32 i4_cu_qp;
1925  
1926      /**
1927      * CU level Qp / 6
1928      */
1929      WORD32 i4_cu_qp_div6;
1930  
1931      /**
1932      * CU level Qp % 6
1933      */
1934      WORD32 i4_cu_qp_mod6;
1935  
1936      /**
1937      *  CU level QP to be used
1938      */
1939      WORD32 i4_chrm_cu_qp;
1940  
1941      /**
1942      * CU level Qp / 6
1943      */
1944      WORD32 i4_chrm_cu_qp_div6;
1945  
1946      /**
1947      * CU level Qp % 6
1948      */
1949      WORD32 i4_chrm_cu_qp_mod6;
1950  
1951      /** previous cu qp
1952      * @remarks : This needs to be remembered to handle skip cases in deblocking.
1953      */
1954      WORD32 i4_prev_cu_qp;
1955  
1956      /** chroma qp offset
1957      * @remarks : Used to calculate chroma qp and other qp related parameter at CU level
1958      */
1959      WORD32 i4_chroma_qp_offset;
1960  
1961      /**
1962      * Buffer Pointer to populate the scale matrix for all transform size
1963      */
1964      WORD16 *pi2_scal_mat;
1965  
1966      /**
1967      * Buffer Pointer to populate the rescale matrix for all transform size
1968      */
1969      WORD16 *pi2_rescal_mat;
1970  
1971      /** array of pointer to store the scaling matrices for
1972      *  all transform sizes and qp % 6 (pre computed)
1973      */
1974      WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
1975  
1976      /** array of pointer to store the re-scaling matrices for
1977      *  all transform sizes and qp % 6 (pre computed)
1978      */
1979      WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
1980  
1981      /** array of function pointers for residual and
1982      *  forward transform for all transform sizes
1983      */
1984      pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
1985  
1986      /** array of function pointers for residual and
1987      *  forward HAD transform for all transform sizes
1988      */
1989      pf_res_trans_luma_had_chroma apf_chrm_resd_trns_had[NUM_TRANS_TYPES - 2];
1990  
1991      /** array of function pointers for residual and
1992      *  forward transform for all transform sizes
1993      *  for chroma
1994      */
1995      pf_res_trans_chroma apf_chrm_resd_trns[NUM_TRANS_TYPES - 2];
1996  
1997      /** array of function pointers for qunatization and
1998      *  inv Quant for ssd calc. for all transform sizes
1999      */
2000      pf_quant_iquant_ssd apf_quant_iquant_ssd[4];
2001  
2002      /** array of function pointers for inv.transform and
2003      *  recon for all transform sizes
2004      */
2005      pf_it_recon apf_it_recon[NUM_TRANS_TYPES];
2006  
2007      /** array of function pointers for inverse transform
2008      * and recon for all transform sizes for chroma
2009      */
2010      pf_it_recon_chroma apf_chrm_it_recon[NUM_TRANS_TYPES - 2];
2011  
2012      /** array of luma intra prediction function pointers */
2013      pf_intra_pred apf_lum_ip[NUM_IP_FUNCS];
2014  
2015      /** array of chroma intra prediction function pointers */
2016      pf_intra_pred apf_chrm_ip[NUM_IP_FUNCS];
2017  
2018      /* - Function pointer to cu_mode_decide function */
2019      /* - The 'void *' is used since one of the parameters of */
2020      /* this class of functions is the current structure */
2021      /* - This function pointer is used to choose the */
2022      /* appropriate function depending on whether bit_depth is */
2023      /* chosen as 8 bits or greater */
2024      /* - This function pointer's type is defined at the end */
2025      /* of this file */
2026      void *pv_cu_mode_decide;
2027  
2028      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2029      void *pv_inter_rdopt_cu_mc_mvp;
2030  
2031      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2032      void *pv_inter_rdopt_cu_ntu;
2033  
2034      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2035      void *pv_intra_chroma_pred_mode_selector;
2036  
2037      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2038      void *pv_intra_rdopt_cu_ntu;
2039  
2040      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2041      void *pv_final_rdopt_mode_prcs;
2042  
2043      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2044      void *pv_store_cu_results;
2045  
2046      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2047      void *pv_enc_loop_cu_bot_copy;
2048  
2049      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2050      void *pv_final_mode_reevaluation_with_modified_cu_qp;
2051  
2052      /* Infer from the comment for the variable 'pv_cu_mode_decide' */
2053      void *pv_enc_loop_ctb_left_copy;
2054  
2055      /** Qunatization rounding factor for inter and intra CUs */
2056      WORD32 i4_quant_rnd_factor[2];
2057  
2058      /**
2059      * Frame Buffer Pointer to store the top row luma data.
2060      * one pixel row in every ctb row
2061      */
2062      void *apv_frm_top_row_luma[MAX_NUM_ENC_LOOP_PARALLEL];
2063  
2064      /**
2065      * One CTB row size of Top row luma data buffer
2066      */
2067      WORD32 i4_top_row_luma_stride;
2068  
2069      /**
2070      * One frm of Top row luma data buffer
2071      */
2072      WORD32 i4_frm_top_row_luma_size;
2073  
2074      /**
2075      * Current luma row bottom data store pointer
2076      */
2077      void *pv_bot_row_luma;
2078  
2079      /**
2080      * Top luma row top data access pointer
2081      */
2082      void *pv_top_row_luma;
2083  
2084      /**
2085      * Frame Buffer Pointer to store the top row chroma data (Cb  Cr pixel interleaved )
2086      * one pixel row in every ctb row
2087      */
2088      void *apv_frm_top_row_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
2089  
2090      /**
2091      * One CTB row size of Top row chroma data buffer (Cb  Cr pixel interleaved )
2092      */
2093      WORD32 i4_top_row_chroma_stride;
2094  
2095      /**
2096      * One frm size of Top row chroma data buffer (Cb  Cr pixel interleaved )
2097      */
2098      WORD32 i4_frm_top_row_chroma_size;
2099  
2100      /**
2101      * Current chroma row bottom data store pointer
2102      */
2103      void *pv_bot_row_chroma;
2104  
2105      /**
2106      * Top chroma row top data access pointer
2107      */
2108      void *pv_top_row_chroma;
2109  
2110      /**
2111      * Frame Buffer Pointer to store the top row neighbour modes stored at 4x4 level
2112      * one 4x4 row in every ctb row
2113      */
2114      nbr_4x4_t *aps_frm_top_row_nbr[MAX_NUM_ENC_LOOP_PARALLEL];
2115  
2116      /**
2117      * One CTB row size of Top row nbr 4x4 params buffer
2118      */
2119      WORD32 i4_top_row_nbr_stride;
2120  
2121      /**
2122      * One frm size of Top row nbr 4x4 params buffer
2123      */
2124      WORD32 i4_frm_top_row_nbr_size;
2125  
2126      /**
2127      * Current row nbr prms bottom data store pointer
2128      */
2129      nbr_4x4_t *ps_bot_row_nbr;
2130  
2131      /**
2132      * Top row nbr prms top data access pointer
2133      */
2134      nbr_4x4_t *ps_top_row_nbr;
2135  
2136      /**
2137      * Pointer to (1,1) location in au1_nbr_ctb_map
2138      */
2139      UWORD8 *pu1_ctb_nbr_map;
2140  
2141      /**
2142      * neigbour map buffer stride;
2143      */
2144      WORD32 i4_nbr_map_strd;
2145  
2146      /**
2147      * Array at ctb level to store the neighour map
2148      * its size is 25x25 for ctb size of 64x64
2149      */
2150      UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
2151  
2152      /**
2153      * Array to store left ctb data for luma
2154      * some padding is added to take care of unconditional access
2155      */
2156      void *pv_left_luma_data;
2157  
2158      /**
2159      * Array to store left ctb data for chroma (cb abd cr pixel interleaved
2160      * some padding is added to take care of unconditional access
2161      */
2162      void *pv_left_chrm_data;
2163  
2164      /**
2165      * Array to store the left neighbour modes at 4x4 level
2166      */
2167      nbr_4x4_t as_left_col_nbr[MAX_PU_IN_CTB_ROW];
2168  
2169      /**
2170      * Array to store currrent CTb pred modes at a 4x4 level
2171      * used for prediction inside ctb
2172      */
2173      nbr_4x4_t as_ctb_nbr_arr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2174  
2175      /**
2176      * array for storing csbf during RD opt stage at CU level
2177      * one best and one current is required
2178      */
2179      UWORD8 au1_cu_csbf[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
2180  
2181      /**
2182      * Stride of csbf buffer. will be useful for scanning access
2183      * if stored in a 2D order. right now set to max tx size >> 4;
2184      */
2185      WORD32 i4_cu_csbf_strd;
2186  
2187      /**
2188      * Array to store pred modes  during SATD and RD opt stage at CU level
2189      * one best and one current is required
2190      */
2191      nbr_4x4_t as_cu_nbr[2][MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2192  
2193      /**
2194      * array to store the output of reference substitution process output
2195      * for intra CUs
2196      * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
2197      */
2198      void *pv_ref_sub_out;
2199  
2200      /**
2201      * array to store the filtered reference samples for intra CUs
2202      * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
2203      */
2204      void *pv_ref_filt_out;
2205  
2206      /**
2207      * Used for 3 purposes
2208      *
2209      * 1. MC Intermediate buffer
2210      * array for storing intermediate 16-bit value for hxhy subpel
2211      * generation at CTB level (+ 16) for subpel planes boundary
2212      * +4 is for horizontal 4pels
2213      *
2214      * 2. Temprory scratch buffer for transform and coeffs storage
2215      * MAX_TRANS_SIZE *2 for trans_scratch(32bit) and MAX_TRANS_SIZE *1 for trans_values
2216      * The first part i.e. from 0 to MAX_TRANS_SIZE is then reused for storing the quant coeffs
2217      * Max of both are used
2218      *
2219      * 3. MC Intermediate buffer
2220      * buffer for storing intermediate 16 bit values prior to conversion to 8bit in HBD
2221      *
2222      */
2223      MEM_ALIGN16 WORD16 ai2_scratch[(MAX_CTB_SIZE + 8 + 8) * (MAX_CTB_SIZE + 8 + 8 + 8) * 2];
2224  
2225      /**
2226      * array for storing cu level final params for a given mode
2227      * one best and one current is required
2228      */
2229      enc_loop_cu_final_prms_t as_cu_prms[2];
2230  
2231      /**
2232      * Scan index to be used for any gien transform
2233      * this is a scartch variable used to communicate
2234      * scan idx at every transform level
2235      */
2236      WORD32 i4_scan_idx;
2237  
2238      /**
2239      * Buffer index in ping pong buffers
2240      * to be used SATD mode evaluations
2241      */
2242      WORD32 i4_satd_buf_idx;
2243  
2244      /**
2245      * Motion Compensation module context structre
2246      */
2247      inter_pred_ctxt_t s_mc_ctxt;
2248  
2249      /**
2250      * MV pred module context structre
2251      */
2252      mv_pred_ctxt_t s_mv_pred_ctxt;
2253  
2254      /**
2255      * Deblock BS ctb structure
2256      */
2257      deblk_bs_ctb_ctxt_t s_deblk_bs_prms;
2258  
2259      /**
2260      * Deblocking ctb structure
2261      */
2262      deblk_ctb_params_t s_deblk_prms;
2263  
2264      /**
2265      * Deblocking structure. For ctb-row level
2266      */
2267      deblk_ctbrow_prms_t s_deblk_ctbrow_prms;
2268  
2269      /**
2270      * Deblocking enable flag
2271      */
2272      WORD32 i4_deblock_type;
2273  
2274      /**
2275      *  context for cabac bit estimation used during rdopt stage
2276      */
2277      rdopt_entropy_ctxt_t s_rdopt_entropy_ctxt;
2278  
2279      /**
2280      * Context models stored for RDopt store and restore purpose
2281      */
2282      UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
2283  
2284      /**
2285      * current picture slice type
2286      */
2287      WORD8 i1_slice_type;
2288  
2289      /**
2290      * strong_intra_smoothing_enable_flag
2291      */
2292      WORD8 i1_strong_intra_smoothing_enable_flag;
2293  
2294      /** Pointer to Dep Mngr for controlling Top-Right CU dependency */
2295      void *pv_dep_mngr_enc_loop_cu_top_right;
2296  
2297      /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
2298      void *pv_dep_mngr_enc_loop_dblk;
2299  
2300      /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
2301      void *pv_dep_mngr_enc_loop_sao;
2302  
2303      /** pointer to store the cabac states at end of second CTB in current row */
2304      UWORD8 *pu1_curr_row_cabac_state;
2305  
2306      /** pointer to copy the cabac states at start of first CTB in current row */
2307      UWORD8 *pu1_top_rt_cabac_state;
2308      /** flag to indicate rate control mode.
2309      * @remarks :  To enable CU level qp modulation only when required.
2310      */
2311      WORD8 i1_cu_qp_delta_enable;
2312  
2313      /** flag to indicate rate control mode.
2314      * @remarks :  Entropy sync enable flag
2315      */
2316      WORD8 i1_entropy_coding_sync_enabled_flag;
2317  
2318      /** Use SATD or SAD for best merge candidate evaluation */
2319      WORD32 i4_use_satd_for_merge_eval;
2320  
2321      UWORD8 u1_use_early_cbf_data;
2322  
2323      /** Use SATD or SAD for best CU merge candidate evaluation */
2324      WORD32 i4_use_satd_for_cu_merge;
2325  
2326      /** Maximum number of merge candidates to be evaluated */
2327      WORD32 i4_max_merge_candidates;
2328  
2329      /** Flag to indicate whether current pictute needs to be deblocked,
2330      padded and hpel planes need to be generated.
2331      These are turned off typically in non referecne pictures when psnr
2332      and recon dump is disabled
2333      */
2334      WORD32 i4_deblk_pad_hpel_cur_pic;
2335  
2336      /* Array of structures for storing mc predicted data for
2337      * merge and skip modes
2338      */
2339      merge_skip_pred_data_t as_merge_skip_pred_data[MAX_NUM_CU_MERGE_SKIP_CAND];
2340  
2341      /* Sum the Qps of each 8*8 block in CU
2342      * 8*8 block is considered as Min CU size possible as per standard is 8
2343      * 0 corresponds to INTER and 1 corresponds to INTRA
2344      */
2345      LWORD64 i8_cl_ssd_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
2346      UWORD32 au4_chroma_cost_weighing_factor_array[MAX_HEVC_QP_12bit + 1];
2347      LWORD64 i8_cl_ssd_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
2348      WORD32 i4_satd_lamda_array[MAX_HEVC_QP_12bit + 1];
2349      WORD32 i4_sad_lamda_array[MAX_HEVC_QP_12bit + 1];
2350  
2351      /************************************************************************/
2352      /* The fields with the string 'type2' in their names are required */
2353      /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
2354      /* to the bit_depth != internal_bit_depth are stored in these fields */
2355      /************************************************************************/
2356      LWORD64 i8_cl_ssd_type2_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
2357      LWORD64 i8_cl_ssd_type2_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
2358      WORD32 i4_satd_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
2359      WORD32 i4_sad_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
2360  
2361      /* Lokesh: Added to find if the CU is the first to be coded in the group */
2362      WORD32 i4_is_first_cu_qg_coded;
2363  
2364      /* Chroma RDOPT related parameters */
2365      ihevce_chroma_rdopt_ctxt_t s_chroma_rdopt_ctxt;
2366  
2367      /* Structure to save pred data of ME/Intra cand */
2368      cu_me_intra_pred_prms_t s_cu_me_intra_pred_prms;
2369  
2370      /* Structure to save the flags required for Final mode Reconstruction
2371      function. These flags are set based on quality presets and bit-rate
2372      we are working on */
2373      cu_final_recon_flags_t s_cu_final_recon_flags;
2374  
2375      /* Parameter to how at which level RDOQ will be implemented:
2376      0 - RDOQ disbaled
2377      1 - RDOQ enabled during RDOPT for all candidates
2378      2 - RDOQ enabled only for the final candidate*/
2379      WORD32 i4_rdoq_level;
2380  
2381      /* Parameter to how at which level Quant rounding factors are computed:
2382      FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
2383      NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
2384      CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
2385      CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
2386      TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used*/
2387      WORD32 i4_quant_rounding_level;
2388  
2389      /* Parameter to how at which level Quant rounding factors are computed:
2390      CHROMA_QUANT_ROUNDING    : Chroma Quant rounding values are used for chroma */
2391      WORD32 i4_chroma_quant_rounding_level;
2392  
2393      /* Parameter to how at which level RDOQ will be implemented:
2394      0 - SBH disbaled
2395      1 - SBH enabled during RDOPT for all candidates
2396      2 - SBH enabled only for the final candidate*/
2397      WORD32 i4_sbh_level;
2398  
2399      /* Parameter to how at which level ZERO CBF RDO will be implemented:
2400      0 - ZCBF disbaled
2401      1 - ZCBF enabled during RDOPT for all candidates
2402      2 - ZCBF enabled only for the final candidate
2403      */
2404      WORD32 i4_zcbf_rdo_level;
2405  
2406      /*RDOQ-SBH context structure*/
2407      rdoq_sbh_ctxt_t s_rdoq_sbh_ctxt;
2408  
2409      /** Structure to store the Merge/Skip Cand. for EncLoop */
2410      cu_inter_merge_skip_t s_cu_inter_merge_skip;
2411      /** Structure to store the Mixed mode Cand. for EncLoop */
2412      cu_mixed_mode_inter_t s_mixed_mode_inter_cu;
2413  
2414      ihevce_inter_pred_buf_data_t s_pred_buf_data;
2415  
2416      void *pv_422_chroma_intra_pred_buf;
2417  
2418      WORD32 i4_max_num_inter_rdopt_cands;
2419  
2420      /* Output Struct per each CU during recursions */
2421      ihevce_enc_cu_node_ctxt_t as_enc_cu_ctxt[MAX_CU_IN_CTB + 1];
2422  
2423      /* Used to store best inter candidate. Used only when */
2424      /* 'CU modulated QP override' is enabled */
2425      cu_inter_cand_t as_best_cand[MAX_CU_IN_CTB + 1];
2426  
2427      cu_inter_cand_t *ps_best_cand;
2428  
2429      UWORD8 au1_cu_init_cabac_state_a_priori[MAX_CU_IN_CTB + 1][IHEVC_CAB_CTXT_END];
2430  
2431      UWORD8 (*pau1_curr_cu_a_priori_cabac_state)[IHEVC_CAB_CTXT_END];
2432  
2433      /* Used to store pred data of each CU in the CTB. */
2434      /* Used only when 'CU modulated QP override' is enabled */
2435      void *pv_CTB_pred_luma;
2436  
2437      void *pv_CTB_pred_chroma;
2438  
2439      /**
2440      * array for storing recon during SATD and RD opt stage at CU level
2441      * one best and one current is required.Luma and chroma together
2442      */
2443      void *pv_cu_luma_recon;
2444  
2445      /**
2446      * array for storing recon during SATD and RD opt stage at CU level
2447      * one best and one current is required.Luma and chroma together
2448      */
2449      void *pv_cu_chrma_recon;
2450  
2451      /**
2452      * Array to store pred modes  during SATD and RD opt stage at CU level
2453      * one best and one current is required
2454      */
2455      nbr_4x4_t as_cu_recur_nbr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
2456  
2457      /**
2458      * Pointer to Array to store pred modes  during SATD and RD opt stage at CU level
2459      * one best and one current is required
2460      */
2461      nbr_4x4_t *ps_cu_recur_nbr;
2462  
2463      /**
2464      * Context models stored for CU recursion parent evaluation
2465      */
2466      UWORD8 au1_rdopt_recur_ctxt_models[4][IHEVC_CAB_CTXT_END];
2467  
2468      ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt;
2469  
2470      /**
2471      * array for storing coeffs during RD opt stage at CU level
2472      * one best and one current is required. Luma and chroma together
2473      */
2474      /*UWORD8 au1_cu_recur_coeffs[MAX_LUMA_COEFFS_CTB + MAX_CHRM_COEFFS_CTB];*/
2475  
2476      UWORD8 *pu1_cu_recur_coeffs;
2477  
2478      UWORD8 *apu1_cu_level_pingpong_coeff_buf_addr[2];
2479  
2480      WORD16 *api2_cu_level_pingpong_deq_buf_addr[2];
2481  
2482      UWORD8 *pu1_ecd_data;
2483  
2484      /* OPT: flag to skip parent CU=4TU eval during recursion */
2485      UWORD8 is_parent_cu_rdopt;
2486  
2487      /**
2488      *   Array of structs containing block merge data for
2489      *   4 32x32 CU's in indices 1 - 4 and 64x64 CU at 0
2490      */
2491      UWORD8 u1_cabac_states_next_row_copied_flag;
2492  
2493      UWORD8 u1_cabac_states_first_cu_copied_flag;
2494  
2495      UWORD32 u4_cur_ctb_wd;
2496  
2497      UWORD32 u4_cur_ctb_ht;
2498  
2499      /* thread id of the current context */
2500      WORD32 thrd_id;
2501  
2502      /** Number of processing threads created run time */
2503      WORD32 i4_num_proc_thrds;
2504  
2505      /* Instance number of bit-rate for multiple bit-rate encode */
2506      WORD32 i4_bitrate_instance_num;
2507  
2508      WORD32 i4_num_bitrates;
2509  
2510      WORD32 i4_enc_frm_id;
2511  
2512      /* Flag to indicate if chroma needs to be considered for cost calculation */
2513      WORD32 i4_consider_chroma_cost;
2514  
2515      /* Number of modes to be evaluated for intra */
2516      WORD32 i4_num_modes_to_evaluate_intra;
2517  
2518      /* Number of modes to be evaluated for inter */
2519      WORD32 i4_num_modes_to_evaluate_inter;
2520      /*pointers for struct to hold RC parameters for each bit-rate instance */
2521      enc_loop_rc_params_t
2522          *aaps_enc_loop_rc_params[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2523  
2524      /** Pointer to structure containing function pointers of common*/
2525      func_selector_t *ps_func_selector;
2526  
2527      /* Flag to control Top Right Sync for during Merge */
2528      UWORD8 u1_use_top_at_ctb_boundary;
2529  
2530      UWORD8 u1_is_input_data_hbd;
2531  
2532      UWORD8 u1_bit_depth;
2533  
2534      /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
2535      UWORD8 u1_chroma_array_type;
2536  
2537      rc_quant_t *ps_rc_quant_ctxt;
2538  
2539      sao_ctxt_t s_sao_ctxt_t;
2540  
2541      /* Offset to get the Qp for the last CU of upper CTB-row.
2542      This offset is from the current tile top row QP map start.
2543      This will only be consumed by the first CU of current CTB-row
2544      iff [it is skip && entropy sync is off] */
2545      WORD32 *pi4_offset_for_last_cu_qp;
2546  
2547      double i4_lamda_modifier;
2548      double i4_uv_lamda_modifier;
2549      WORD32 i4_temporal_layer_id;
2550  
2551      UWORD8 u1_disable_intra_eval;
2552  
2553      WORD32 i4_quant_round_tu[2][32 * 32];
2554  
2555      WORD32 *pi4_quant_round_factor_tu_0_1[5];
2556      WORD32 *pi4_quant_round_factor_tu_1_2[5];
2557  
2558      WORD32 i4_quant_round_4x4[2][4 * 4];
2559      WORD32 i4_quant_round_8x8[2][8 * 8];
2560      WORD32 i4_quant_round_16x16[2][16 * 16];
2561      WORD32 i4_quant_round_32x32[2][32 * 32];
2562  
2563      WORD32 *pi4_quant_round_factor_cu_ctb_0_1[5];
2564      WORD32 *pi4_quant_round_factor_cu_ctb_1_2[5];
2565  
2566      WORD32 i4_quant_round_cr_4x4[2][4 * 4];
2567      WORD32 i4_quant_round_cr_8x8[2][8 * 8];
2568      WORD32 i4_quant_round_cr_16x16[2][16 * 16];
2569  
2570      WORD32 *pi4_quant_round_factor_cr_cu_ctb_0_1[3];
2571      WORD32 *pi4_quant_round_factor_cr_cu_ctb_1_2[3];
2572      /* cost for not coding cu residue i.e forcing no residue syntax as 1 */
2573      LWORD64 i8_cu_not_coded_cost;
2574  
2575      /* dependency manager for forward ME  sync */
2576      void *pv_dep_mngr_encloop_dep_me;
2577  
2578      LWORD64 ai4_source_satd_8x8[64];
2579  
2580      LWORD64 ai4_source_chroma_satd[256];
2581  
2582      UWORD8 u1_is_refPic;
2583  
2584      WORD32 i4_qp_mod;
2585  
2586      WORD32 i4_is_ref_pic;
2587  
2588      WORD32 i4_chroma_format;
2589  
2590      WORD32 i4_temporal_layer;
2591  
2592      WORD32 i4_use_const_lamda_modifier;
2593  
2594      double f_i_pic_lamda_modifier;
2595  
2596      LWORD64 i8_distortion;
2597  
2598      WORD32 i4_use_ctb_level_lamda;
2599  
2600      float f_str_ratio;
2601  
2602      /* Flag to indicate if current frame is to be shared with other clients.
2603      Used only in distributed-encoding */
2604      WORD32 i4_share_flag;
2605  
2606      /* Pointer to the current recon being processed.
2607      Needed for enabling TMVP in dist-encoding */
2608      void *pv_frm_recon;
2609  
2610      ihevce_cmn_opt_func_t s_cmn_opt_func;
2611  
2612      /* The ME analogue to the struct above was not included since */
2613      /* that would have entailed inclusion of all ME specific */
2614      /* header files */
2615      /*FT_SAD_EVALUATOR **/
2616  
2617      /*FT_SAD_EVALUATOR **/
2618      void *pv_evalsad_pt_npu_mxn_8bit;
2619      UWORD8 u1_enable_psyRDOPT;
2620  
2621      UWORD8 u1_is_stasino_enabled;
2622  
2623      UWORD32 u4_psy_strength;
2624      /*Sub PIC rc context */
2625  
2626      WORD32 i4_sub_pic_level_rc;
2627      WORD32 i4_num_ctb_for_out_scale;
2628  
2629      /**
2630       * Accumalated bits of all cu for required CTBS estimated during RDO evaluation.
2631       * Required for sup pic level RC. Reset when required CU/CTB count is reached.
2632       */
2633      UWORD32 u4_total_cu_bits;
2634  
2635      UWORD32 u4_total_cu_bits_mul_qs;
2636  
2637      UWORD32 u4_total_cu_hdr_bits;
2638  
2639      UWORD32 u4_cu_tot_bits_into_qscale;
2640  
2641      UWORD32 u4_cu_tot_bits;
2642  
2643      /*Scale added to the current qscale, output from sub pic rc*/
2644      WORD32 i4_cu_qp_sub_pic_rc;
2645  
2646      /*Frame level L1 IPE sad*/
2647      LWORD64 i8_frame_l1_ipe_sad;
2648  
2649      /*Frame level L0 IPE satd*/
2650      LWORD64 i8_frame_l0_ipe_satd;
2651  
2652      /*Frame level L1 ME sad*/
2653      LWORD64 i8_frame_l1_me_sad;
2654  
2655      /*Frame level L1 activity factor*/
2656      LWORD64 i8_frame_l1_activity_fact;
2657      /*bits esimated for frame calulated for sub pic rc bit control */
2658      WORD32 ai4_frame_bits_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2659      /** I Scene cut */
2660      WORD32 i4_is_I_scenecut;
2661  
2662      /** Non Scene cut */
2663      WORD32 i4_is_non_I_scenecut;
2664  
2665      /** Frames for which online/offline model is not valid */
2666      WORD32 i4_is_model_valid;
2667  
2668      /** Steady State Frame */
2669      //WORD32 i4_is_steady_state;
2670  
2671      WORD32 i4_is_first_query;
2672  
2673      /* Pointer to Tile params base */
2674      void *pv_tile_params_base;
2675  
2676      /** The index of column tile for which it is working */
2677      WORD32 i4_tile_col_idx;
2678  
2679      WORD32 i4_max_search_range_horizontal;
2680  
2681      WORD32 i4_max_search_range_vertical;
2682  
2683      WORD32 i4_is_ctb_qp_modified;
2684  
2685      WORD32 i4_display_num;
2686  
2687      WORD32 i4_pred_qp;
2688  
2689      /*assumption of qg size is 8x8 block size*/
2690      WORD32 ai4_qp_qg[8 * 8];
2691  
2692      WORD32 i4_last_cu_qp_from_prev_ctb;
2693  
2694      WORD32 i4_prev_QP;
2695  
2696      UWORD8 u1_max_inter_tr_depth;
2697  
2698      UWORD8 u1_max_intra_tr_depth;
2699  
2700  } ihevce_enc_loop_ctxt_t;
2701  
2702  /*****************************************************************************/
2703  /* Enums                                                                     */
2704  /*****************************************************************************/
2705  
2706  /** @brief RDOQ_LEVELS_T: This enumeration specifies the RDOQ mode of operation
2707  *
2708  *  NO_RDOQ    : RDOQ is not performed
2709  *  BEST_CAND_RDOQ : RDOQ for final candidate only
2710  *  ALL_CAND_RDOQ : RDOQ for all candidates
2711  */
2712  typedef enum
2713  {
2714      NO_RDOQ,
2715      BEST_CAND_RDOQ,
2716      ALL_CAND_RDOQ,
2717  } RDOQ_LEVELS_T;
2718  
2719  /** @brief QUANT_ROUNDING_COEFF_LEVELS_T: This enumeration specifies the Coef level RDOQ mode of operation
2720  *
2721  *  FIXED_QUANT_ROUNDING       : Fixed Quant rounding values are used
2722  *  NCTB_LEVEL_QUANT_ROUNDING  : NCTB level Cmputed Quant rounding values are used
2723  *  CTB_LEVEL_QUANT_ROUNDING   : CTB level Cmputed Quant rounding values are used
2724  *  CU_LEVEL_QUANT_ROUNDING    : CU level Cmputed Quant rounding values are used
2725  *  TU_LEVEL_QUANT_ROUNDING    : TU level Cmputed Quant rounding values are used
2726  *               Defaulat for all candidtes, based on RDOQ_LEVELS_T choose to best candidate
2727  */
2728  typedef enum
2729  {
2730      FIXED_QUANT_ROUNDING,
2731      NCTB_LEVEL_QUANT_ROUNDING,
2732      CTB_LEVEL_QUANT_ROUNDING,
2733      CU_LEVEL_QUANT_ROUNDING,
2734      TU_LEVEL_QUANT_ROUNDING,
2735      CHROMA_QUANT_ROUNDING
2736  } QUANT_ROUNDING_COEFF_LEVELS_T;
2737  
2738  /*****************************************************************************/
2739  /* Enums                                                                     */
2740  /*****************************************************************************/
2741  
2742  /** @brief SBH_LEVELS_T: This enumeration specifies the RDOQ mode of operation
2743  *
2744  *  NO_SBH    : SBH is not performed
2745  *  BEST_CAND_SBH : SBH for final candidate only
2746  *  ALL_CAND_SBH : SBH for all candidates
2747  */
2748  typedef enum
2749  {
2750      NO_SBH,
2751      BEST_CAND_SBH,
2752      ALL_CAND_SBH,
2753  } SBH_LEVELS_T;
2754  
2755  /** @brief ZCBF_LEVELS_T: This enumeration specifies the ZeroCBF RDO mode of operation
2756  *
2757  *  NO_ZCBF    : ZCBF RDO is not performed
2758  *  ALL_CAND_ZCBF : ZCBF RDO for all candidates
2759  */
2760  typedef enum
2761  {
2762      NO_ZCBF,
2763      ZCBF_ENABLE,
2764  } ZCBF_LEVELS_T;
2765  
2766  /**
2767  ******************************************************************************
2768  *  @brief  Encode loop master context structure
2769  ******************************************************************************
2770  */
2771  typedef struct
2772  {
2773      /** Array of encode loop structure */
2774      ihevce_enc_loop_ctxt_t *aps_enc_loop_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
2775  
2776      /** Number of processing threads created run time */
2777      WORD32 i4_num_proc_thrds;
2778  
2779      /**
2780      *  Array of top row cu skip flags (1 bit per 8x8CU)
2781      */
2782      UWORD8 au1_cu_skip_top_row[HEVCE_MAX_WIDTH >> 6];
2783  
2784      /** Context models stored at the end of second CTB in a row)
2785      *  stored in packed form pState[bits6-1] | MPS[bit0]
2786      *  for each CTB row
2787      *  using entropy sync model in RD opt
2788      */
2789      UWORD8 au1_ctxt_models[MAX_NUM_CTB_ROWS_FRM][IHEVC_CAB_CTXT_END];
2790  
2791      /** Dependency manager for controlling EncLoop Top-Right CU dependency
2792      * One per each bit-rate and one per each frame in parallel
2793      */
2794      void *aapv_dep_mngr_enc_loop_cu_top_right[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2795  
2796      /** Dependency manager for controlling Deblocking Top dependency
2797      * One per each bit-rate and one per each frame in parallel
2798      */
2799      void *aapv_dep_mngr_enc_loop_dblk[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2800  
2801      /** Dependency manager for controlling Sao Top dependency
2802      * One per each bit-rate and one per each frame in parallel
2803      */
2804      void *aapv_dep_mngr_enc_loop_sao[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2805  
2806      /** number of bit-rate instances running */
2807      WORD32 i4_num_bitrates;
2808  
2809      /** number of enc frames running in parallel */
2810      WORD32 i4_num_enc_loop_frm_pllel;
2811  
2812      /* Pointer to Tile params base */
2813      void *pv_tile_params_base;
2814      /* Offset to get the Qp for the last CU of upper CTB-row.
2815      This offset is from the current tile top row QP map start.
2816  
2817      This will only be consumed by the first CU of current CTB-row
2818      iff [it is skip && entropy sync is off]
2819      There is one entry of every tile-column bcoz offset remains constant
2820      for all tiles lying in a tile-column */
2821      WORD32 ai4_offset_for_last_cu_qp[MAX_TILE_COLUMNS];
2822  } ihevce_enc_loop_master_ctxt_t;
2823  
2824  /**
2825  ******************************************************************************
2826  *  @brief  This struct is used for storing data required by the block merge
2827  *          function
2828  ******************************************************************************
2829  */
2830  typedef struct
2831  {
2832      block_data_8x8_t *ps_8x8_data;
2833  
2834      block_data_16x16_t *ps_16x16_data;
2835  
2836      block_data_32x32_t *ps_32x32_data;
2837  
2838      block_data_64x64_t *ps_64x64_data;
2839  
2840      part_type_results_t **ps_32x32_results;
2841  
2842      cur_ctb_cu_tree_t *ps_cu_tree;
2843  
2844      ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
2845  
2846      mv_pred_ctxt_t *ps_mv_pred_ctxt;
2847  
2848      recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
2849  
2850      nbr_4x4_t *ps_top_nbr_4x4;
2851  
2852      nbr_4x4_t *ps_left_nbr_4x4;
2853  
2854      nbr_4x4_t *ps_curr_nbr_4x4;
2855  
2856      UWORD8 *pu1_inp;
2857  
2858      UWORD8 *pu1_ctb_nbr_map;
2859  
2860      WORD32 i4_nbr_map_strd;
2861  
2862      WORD32 inp_stride;
2863  
2864      WORD32 i4_ctb_x_off;
2865  
2866      WORD32 i4_ctb_y_off;
2867  
2868      WORD32 use_satd_for_err_calc;
2869  
2870      WORD32 lambda;
2871  
2872      WORD32 lambda_q_shift;
2873  
2874      WORD32 frm_qstep;
2875  
2876      WORD32 num_4x4_in_ctb;
2877  
2878      UWORD8 *pu1_wkg_mem;
2879  
2880      UWORD8 **ppu1_pred;
2881  
2882      UWORD8 u1_bidir_enabled;
2883  
2884      UWORD8 u1_max_tr_depth;
2885  
2886      WORD32 i4_ctb_pos;
2887  
2888      WORD32 i4_ctb_size;
2889  
2890      UWORD8 *apu1_wt_inp[MAX_REFS_SEARCHABLE + 1];
2891  
2892      /** Pointer of Dep Mngr for EncLoop Top-Right CU dependency */
2893      void *pv_dep_mngr_enc_loop_cu_top_right;
2894      /** The current cu row no. for Dep Manager to Check */
2895      WORD32 i4_dep_mngr_cur_cu_row_no;
2896      /** The Top cu row no. for Dep Manager to Check */
2897      WORD32 i4_dep_mngr_top_cu_row_no;
2898  
2899      WORD8 i1_quality_preset;
2900  
2901      /* Flag to control Top Right Sync for during Merge */
2902      UWORD8 u1_use_top_at_ctb_boundary;
2903  
2904  } block_merge_input_t;
2905  
2906  /* Structure which stores the info regarding the TU's present in the CU*/
2907  typedef struct tu_prms_t
2908  {
2909      UWORD8 u1_tu_size;
2910  
2911      UWORD8 u1_x_off;
2912  
2913      UWORD8 u1_y_off;
2914  
2915      WORD32 i4_tu_cost;
2916  
2917      WORD32 i4_early_cbf;
2918  
2919  } tu_prms_t;
2920  
2921  typedef struct
2922  {
2923      cu_enc_loop_out_t **pps_cu_final;
2924  
2925      pu_t **pps_row_pu;
2926  
2927      tu_enc_loop_out_t **pps_row_tu;
2928  
2929      UWORD8 **ppu1_row_ecd_data;
2930  
2931      WORD32 *pi4_num_pus_in_ctb;
2932  
2933      WORD32 *pi4_last_cu_pos_in_ctb;
2934  
2935      WORD32 *pi4_last_cu_size;
2936  
2937      UWORD8 *pu1_num_cus_in_ctb_out;
2938  
2939  } cu_final_update_prms;
2940  
2941  typedef struct
2942  {
2943      cu_nbr_prms_t *ps_cu_nbr_prms;
2944  
2945      cu_inter_cand_t *ps_best_inter_cand;
2946  
2947      enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms;
2948  
2949      WORD32 packed_pred_mode;
2950  
2951      WORD32 rd_opt_best_idx;
2952  
2953      void *pv_src;
2954  
2955      WORD32 src_strd;
2956  
2957      void *pv_pred;
2958  
2959      WORD32 pred_strd;
2960  
2961      void *pv_pred_chrm;
2962  
2963      WORD32 pred_chrm_strd;
2964  
2965      UWORD8 *pu1_final_ecd_data;
2966  
2967      UWORD8 *pu1_csbf_buf;
2968  
2969      WORD32 csbf_strd;
2970  
2971      void *pv_luma_recon;
2972  
2973      WORD32 recon_luma_strd;
2974  
2975      void *pv_chrm_recon;
2976  
2977      WORD32 recon_chrma_strd;
2978  
2979      UWORD8 u1_cu_pos_x;
2980  
2981      UWORD8 u1_cu_pos_y;
2982  
2983      UWORD8 u1_cu_size;
2984  
2985      WORD8 i1_cu_qp;
2986  
2987      UWORD8 u1_will_cabac_state_change;
2988  
2989      UWORD8 u1_recompute_sbh_and_rdoq;
2990  
2991      UWORD8 u1_is_first_pass;
2992  
2993  #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2994      UWORD8 u1_is_cu_noisy;
2995  #endif
2996  
2997  } final_mode_process_prms_t;
2998  
2999  typedef struct
3000  {
3001      cu_inter_cand_t s_best_cand;
3002  
3003      /* The size is twice of what is required to ensure availability */
3004      /* of adequate space for 'HBD' case */
3005      UWORD8 au1_pred_luma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
3006  
3007      /* The size is twice of what is required to ensure availability */
3008      /* of adequate space for 422 case */
3009      UWORD8 au1_pred_chroma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
3010  } final_mode_state_t;
3011  
3012  typedef struct
3013  {
3014      cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
3015  
3016      cu_inter_cand_t *ps_me_cands;
3017  
3018      cu_inter_cand_t *ps_merge_cands;
3019  
3020      mv_pred_ctxt_t *ps_mv_pred_ctxt;
3021  
3022      inter_pred_ctxt_t *ps_mc_ctxt;
3023  
3024      UWORD8 *pu1_ctb_nbr_map;
3025  
3026      void *pv_src;
3027  
3028      nbr_4x4_t *ps_cu_nbr_buf;
3029  
3030      nbr_4x4_t *ps_left_nbr_4x4;
3031  
3032      nbr_4x4_t *ps_top_nbr_4x4;
3033  
3034      nbr_4x4_t *ps_topleft_nbr_4x4;
3035  
3036      WORD32 i4_ctb_nbr_map_stride;
3037  
3038      WORD32 i4_src_strd;
3039  
3040      WORD32 i4_nbr_4x4_left_strd;
3041  
3042      UWORD8 u1_cu_size;
3043  
3044      UWORD8 u1_cu_pos_x;
3045  
3046      UWORD8 u1_cu_pos_y;
3047  
3048      UWORD8 u1_num_me_cands;
3049  
3050      UWORD8 u1_num_merge_cands;
3051  
3052      UWORD8 u1_max_num_mixed_mode_cands_to_select;
3053  
3054      UWORD8 u1_max_merge_candidates;
3055  
3056      UWORD8 u1_use_satd_for_merge_eval;
3057  
3058  } ihevce_mixed_inter_modes_selector_prms_t;
3059  
3060  typedef struct
3061  {
3062      LWORD64 i8_ssd;
3063  
3064      LWORD64 i8_cost;
3065  
3066  #if ENABLE_INTER_ZCU_COST
3067      LWORD64 i8_not_coded_cost;
3068  #endif
3069  
3070      UWORD32 u4_sad;
3071  
3072      WORD32 i4_bits;
3073  
3074      WORD32 i4_num_bytes_used_for_ecd;
3075  
3076      WORD32 i4_zero_col;
3077  
3078      WORD32 i4_zero_row;
3079  
3080      UWORD8 u1_cbf;
3081  
3082      UWORD8 u1_reconBufId;
3083  
3084      UWORD8 u1_is_valid_node;
3085  
3086      UWORD8 u1_size;
3087  
3088      UWORD8 u1_posx;
3089  
3090      UWORD8 u1_posy;
3091  } tu_node_data_t;
3092  
3093  typedef struct tu_tree_node_t
3094  {
3095      struct tu_tree_node_t *ps_child_node_tl;
3096  
3097      struct tu_tree_node_t *ps_child_node_tr;
3098  
3099      struct tu_tree_node_t *ps_child_node_bl;
3100  
3101      struct tu_tree_node_t *ps_child_node_br;
3102  
3103      tu_node_data_t s_luma_data;
3104  
3105      /* 2 because of the 2 subTU's when input is 422 */
3106      tu_node_data_t as_cb_data[2];
3107  
3108      tu_node_data_t as_cr_data[2];
3109  
3110      UWORD8 u1_is_valid_node;
3111  
3112  } tu_tree_node_t;
3113  
3114  /*****************************************************************************/
3115  /* Extern Variable Declarations                                              */
3116  /*****************************************************************************/
3117  
3118  /*****************************************************************************/
3119  /* Extern Function Declarations                                              */
3120  /*****************************************************************************/
3121  
3122  /*****************************************************************************/
3123  /* Typedefs                                                                  */
3124  /*****************************************************************************/
3125  typedef LWORD64 (*pf_cu_mode_decide)(
3126      ihevce_enc_loop_ctxt_t *ps_ctxt,
3127      enc_loop_cu_prms_t *ps_cu_prms,
3128      cu_analyse_t *ps_cu_analyse,
3129      final_mode_state_t *ps_final_mode_state,
3130      UWORD8 *pu1_ecd_data,
3131      pu_col_mv_t *ps_col_pu,
3132      UWORD8 *pu1_col_pu_map,
3133      WORD32 col_start_pu_idx);
3134  
3135  typedef LWORD64 (*pf_inter_rdopt_cu_mc_mvp)(
3136      ihevce_enc_loop_ctxt_t *ps_ctxt,
3137      cu_inter_cand_t *ps_inter_cand,
3138      WORD32 cu_size,
3139      WORD32 cu_pos_x,
3140      WORD32 cu_pos_y,
3141      nbr_4x4_t *ps_left_nbr_4x4,
3142      nbr_4x4_t *ps_top_nbr_4x4,
3143      nbr_4x4_t *ps_topleft_nbr_4x4,
3144      WORD32 nbr_4x4_left_strd,
3145      WORD32 curr_buf_idx);
3146  
3147  typedef LWORD64 (*pf_inter_rdopt_cu_ntu)(
3148      ihevce_enc_loop_ctxt_t *ps_ctxt,
3149      enc_loop_cu_prms_t *ps_cu_prms,
3150      void *pv_src,
3151      WORD32 cu_size,
3152      WORD32 cu_pos_x,
3153      WORD32 cu_pos_y,
3154      WORD32 curr_buf_idx,
3155      enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3156      cu_inter_cand_t *ps_inter_cand,
3157      cu_analyse_t *ps_cu_analyse,
3158      WORD32 i4_alpha_stim_multiplier);
3159  
3160  typedef void (*pf_intra_chroma_pred_mode_selector)(
3161      ihevce_enc_loop_ctxt_t *ps_ctxt,
3162      enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3163      cu_analyse_t *ps_cu_analyse,
3164      WORD32 rd_opt_curr_idx,
3165      WORD32 tu_mode,
3166      WORD32 i4_alpha_stim_multiplier,
3167      UWORD8 u1_is_cu_noisy);
3168  
3169  typedef LWORD64 (*pf_intra_rdopt_cu_ntu)(
3170      ihevce_enc_loop_ctxt_t *ps_ctxt,
3171      enc_loop_cu_prms_t *ps_cu_prms,
3172      void *pv_pred_org,
3173      WORD32 pred_strd_org,
3174      enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
3175      UWORD8 *pu1_luma_mode,
3176      cu_analyse_t *ps_cu_analyse,
3177      void *pv_curr_src,
3178      void *pv_cu_left,
3179      void *pv_cu_top,
3180      void *pv_cu_top_left,
3181      nbr_4x4_t *ps_left_nbr_4x4,
3182      nbr_4x4_t *ps_top_nbr_4x4,
3183      WORD32 nbr_4x4_left_strd,
3184      WORD32 cu_left_stride,
3185      WORD32 curr_buf_idx,
3186      WORD32 func_proc_mode,
3187      WORD32 i4_alpha_stim_multiplier);
3188  
3189  typedef void (*pf_final_rdopt_mode_prcs)(
3190      ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
3191  
3192  typedef void (*pf_store_cu_results)(
3193      ihevce_enc_loop_ctxt_t *ps_ctxt,
3194      enc_loop_cu_prms_t *ps_cu_prms,
3195      final_mode_state_t *ps_final_state);
3196  
3197  typedef void (*pf_enc_loop_cu_bot_copy)(
3198      ihevce_enc_loop_ctxt_t *ps_ctxt,
3199      enc_loop_cu_prms_t *ps_cu_prms,
3200      ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
3201      WORD32 curr_cu_pos_in_row,
3202      WORD32 curr_cu_pos_in_ctb);
3203  
3204  typedef void (*pf_enc_loop_ctb_left_copy)(
3205      ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms);
3206  
3207  #endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
3208