1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_me_pass.c
24 *
25 * \brief
26 * Converts the language of the encoder to language of me. This is an i/f
27 * between the encoder style APIs and ME style APIs. This is basically
28 * a memoryless glue layer.
29 *
30 * \date
31 * 22/10/2012
32 *
33 * \author
34 * Ittiam
35 *
36 *
37 * List of Functions
38 *
39 *
40 ******************************************************************************
41 */
42
43 /*****************************************************************************/
44 /* File Includes */
45 /*****************************************************************************/
46 /* System include files */
47 #include <stdio.h>
48 #include <string.h>
49 #include <stdlib.h>
50 #include <assert.h>
51 #include <stdarg.h>
52 #include <math.h>
53
54 /* User include files */
55 #include "ihevc_typedefs.h"
56 #include "itt_video_api.h"
57 #include "ihevce_api.h"
58
59 #include "rc_cntrl_param.h"
60 #include "rc_frame_info_collector.h"
61 #include "rc_look_ahead_params.h"
62
63 #include "ihevc_debug.h"
64 #include "ihevc_defs.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80
81 #include "ihevce_defs.h"
82 #include "ihevce_lap_enc_structs.h"
83 #include "ihevce_multi_thrd_structs.h"
84 #include "ihevce_me_common_defs.h"
85 #include "ihevce_had_satd.h"
86 #include "ihevce_error_codes.h"
87 #include "ihevce_bitstream.h"
88 #include "ihevce_cabac.h"
89 #include "ihevce_rdoq_macros.h"
90 #include "ihevce_function_selector.h"
91 #include "ihevce_enc_structs.h"
92 #include "ihevce_entropy_structs.h"
93 #include "ihevce_cmn_utils_instr_set_router.h"
94 #include "ihevce_enc_loop_structs.h"
95 #include "ihevce_inter_pred.h"
96
97 #include "hme_datatype.h"
98 #include "hme_interface.h"
99 #include "hme_common_defs.h"
100 #include "hme_defs.h"
101 #include "ihevce_me_instr_set_router.h"
102 #include "hme_utils.h"
103 #include "hme_coarse.h"
104 #include "hme_refine.h"
105 #include "hme_function_selector.h"
106 #include "ihevce_me_pass.h"
107
108 #include "cast_types.h"
109 #include "osal.h"
110 #include "osal_defaults.h"
111
112 /*****************************************************************************/
113 /* Macros */
114 /*****************************************************************************/
115
116 /** orig simple five tap scaler */
117 #define FIVE_TAP_ORIG_SCALER 0
118
119 /** simple gaussian filter, blurs the image a bit */
120 #define SIMPLE_GAUSSIAN_SCALER 0
121
122 /** lanczos scaler gives sharper images */
123 #define LANCZOS_SCALER 1
124
125 // Saturated addition z = x + y
126 // overflow condition: z<x or z<y
127 #define SATURATED_ADD(z, x, y) \
128 { \
129 (z) = (x) + (y); \
130 if(((z) < (x)) || ((z) < (y))) \
131 (z) = MAX_INTRA_COST_IPE; \
132 }
133
134 #define SATURATED_SUB(z, x, y) \
135 { \
136 (z) = (x) - (y); \
137 if((z) < 0) /*if (((z) > (x)) || ((z) > (y))) */ \
138 (z) = 0; \
139 }
140
141 #if(FIVE_TAP_ORIG_SCALER + SIMPLE_GAUSSIAN_SCALER + LANCZOS_SCALER) > 1
142 #error "HME ERROR: Only one scaler can be enabled at a time"
143 #endif
144
145 /*****************************************************************************/
146 /* Function Definitions */
147 /*****************************************************************************/
148
149 /*!
150 ******************************************************************************
151 * \if Function name : ihevce_me_get_num_mem_recs \endif
152 *
153 * \brief
154 * Number of memory records are returned for ME module
155 * Note : Include TOT MEM. req. for ME + TOT MEM. req. for Dep Mngr for L0 ME
156 *
157 * \return
158 * Number of memory records
159 *
160 * \author
161 * Ittiam
162 *
163 *****************************************************************************
164 */
ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel)165 WORD32 ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel)
166 {
167 WORD32 me_mem_recs = hme_enc_num_alloc(i4_num_me_frm_pllel);
168
169 return (me_mem_recs);
170 }
171
ihevce_derive_me_init_prms(ihevce_static_cfg_params_t * ps_init_prms,hme_init_prms_t * ps_hme_init_prms,S32 i4_num_proc_thrds,S32 i4_resolution_id)172 void ihevce_derive_me_init_prms(
173 ihevce_static_cfg_params_t *ps_init_prms,
174 hme_init_prms_t *ps_hme_init_prms,
175 S32 i4_num_proc_thrds,
176 S32 i4_resolution_id)
177 {
178 WORD32 i4_field_pic = ps_init_prms->s_src_prms.i4_field_pic;
179 WORD32 min_cu_size;
180
181 /* max number of ref frames. This should be > ref frms sent any frm */
182 ps_hme_init_prms->max_num_ref = ((DEFAULT_MAX_REFERENCE_PICS) << i4_field_pic);
183
184 /* get the min cu size from config params */
185 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
186
187 min_cu_size = 1 << min_cu_size;
188
189 /* Width and height for the layer being encoded */
190 ps_hme_init_prms->a_wd[0] =
191 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
192 SET_CTB_ALIGN(
193 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
194
195 ps_hme_init_prms->a_ht[0] =
196 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
197 SET_CTB_ALIGN(
198 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
199
200 /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
201 ps_hme_init_prms->max_num_results_coarse = 4;
202
203 /* Every refinement layer stores a max of 2 results per partition */
204 ps_hme_init_prms->max_num_results = 2;
205
206 /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
207 /* frames in all but final layer In final layer, it could be 1/2 */
208 ps_hme_init_prms->num_layers_explicit_search = 3;
209
210 /* Populate the max_tr_depth for Inter */
211 ps_hme_init_prms->u1_max_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
212
213 ps_hme_init_prms->log_ctb_size = ps_init_prms->s_config_prms.i4_max_log2_cu_size;
214 ASSERT(ps_hme_init_prms->log_ctb_size == 6);
215
216 /* currently encoding only 1 layer */
217 ps_hme_init_prms->num_simulcast_layers = 1;
218
219 /* this feature not yet supported */
220 ps_hme_init_prms->segment_higher_layers = 0;
221
222 /* Allow 4x4 in refinement layers. Unconditionally enabled in coarse lyr */
223 /* And not enabled in encode layers, this is just for intermediate refine*/
224 /* layers, where it could be used for better accuracy of motion. */
225
226 #if !OLD_XTREME_SPEED
227 if((IHEVCE_QUALITY_P6 ==
228 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
229 (IHEVCE_QUALITY_P7 ==
230 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
231 (IHEVCE_QUALITY_P5 ==
232 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
233 (IHEVCE_QUALITY_P4 ==
234 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset))
235 ps_hme_init_prms->use_4x4 = 0;
236 else
237 ps_hme_init_prms->use_4x4 = 1;
238 #else
239 ps_hme_init_prms->use_4x4 = 1;
240 #endif
241
242 ps_hme_init_prms->num_b_frms =
243 (1 << ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1;
244
245 ps_hme_init_prms->i4_num_proc_thrds = i4_num_proc_thrds;
246
247 if(IHEVCE_QUALITY_P0 ==
248 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
249 {
250 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_PRISTINE_QUALITY;
251 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
252 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
253 }
254 else if(
255 IHEVCE_QUALITY_P2 ==
256 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
257 {
258 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_QUALITY;
259 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
260 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
261 }
262 else if(
263 IHEVCE_QUALITY_P3 ==
264 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
265 {
266 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_MEDIUM_SPEED;
267 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 2;
268 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 2;
269 }
270 else if(
271 IHEVCE_QUALITY_P4 ==
272 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
273 {
274 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_SPEED;
275 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
276 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
277 }
278 else if(
279 IHEVCE_QUALITY_P5 ==
280 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
281 {
282 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED;
283 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
284 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
285 }
286 else if(
287 IHEVCE_QUALITY_P6 ==
288 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
289 {
290 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
291 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
292 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
293 }
294 else if(
295 IHEVCE_QUALITY_P7 ==
296 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
297 {
298 ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
299 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
300 ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 0;
301 }
302
303 ps_hme_init_prms->s_me_coding_tools.u1_l0_me_controlled_via_cmd_line = 0;
304
305 /* Register the search range params from static params */
306 ps_hme_init_prms->max_horz_search_range = ps_init_prms->s_config_prms.i4_max_search_range_horz;
307 ps_hme_init_prms->max_vert_search_range = ps_init_prms->s_config_prms.i4_max_search_range_vert;
308 ps_hme_init_prms->e_arch_type = ps_init_prms->e_arch_type;
309 ps_hme_init_prms->is_interlaced = (ps_init_prms->s_src_prms.i4_field_pic == IV_INTERLACED);
310
311 ps_hme_init_prms->u1_is_stasino_enabled =
312 ((ps_init_prms->s_coding_tools_prms.i4_vqet &
313 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
314 (ps_init_prms->s_coding_tools_prms.i4_vqet &
315 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
316 }
317
318 /*!
319 ******************************************************************************
320 * \if Function name : ihevce_me_get_mem_recs \endif
321 *
322 * \brief
323 * Memory requirements are returned for ME.
324 *
325 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
326 * \param[in] ps_init_prms : Create time static parameters
327 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
328 * \param[in] i4_mem_space : memspace in whihc memory request should be done
329 *
330 * \return
331 * Number of records
332 *
333 * \author
334 * Ittiam
335 *
336 *****************************************************************************
337 */
ihevce_me_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_mem_space,WORD32 i4_resolution_id,WORD32 i4_num_me_frm_pllel)338 WORD32 ihevce_me_get_mem_recs(
339 iv_mem_rec_t *ps_mem_tab,
340 ihevce_static_cfg_params_t *ps_init_prms,
341 WORD32 i4_num_proc_thrds,
342 WORD32 i4_mem_space,
343 WORD32 i4_resolution_id,
344 WORD32 i4_num_me_frm_pllel)
345 {
346 hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
347 WORD32 n_tabs, i;
348
349 /* Init prms structure specific to HME */
350 hme_init_prms_t s_hme_init_prms;
351
352 /*************************************************************************/
353 /* code flow: we call hme alloc function and then remap those memtabs */
354 /* to a different type of memtab structure. */
355 /*************************************************************************/
356 if(i4_num_me_frm_pllel > 1)
357 {
358 ASSERT(MAX_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
359 }
360 else
361 {
362 ASSERT(MIN_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
363 }
364
365 /*************************************************************************/
366 /* POPULATE THE HME INIT PRMS */
367 /*************************************************************************/
368 ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
369
370 /*************************************************************************/
371 /* CALL THE ME FUNCTION TO GET MEMTABS */
372 /*************************************************************************/
373 n_tabs = hme_enc_alloc(&as_memtabs[0], &s_hme_init_prms, i4_num_me_frm_pllel);
374 ASSERT(n_tabs == hme_enc_num_alloc(i4_num_me_frm_pllel));
375
376 /*************************************************************************/
377 /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE */
378 /*************************************************************************/
379 for(i = 0; i < n_tabs; i++)
380 {
381 ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
382 ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
383 ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
384 ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
385 }
386
387 /*************************************************************************/
388 /* --- L0 ME sync Dep Mngr Mem requests -- */
389 /*************************************************************************/
390 ps_mem_tab += n_tabs;
391
392 return (n_tabs);
393 }
394
395 /*!
396 ******************************************************************************
397 * \if Function name : ihevce_me_init \endif
398 *
399 * \brief
400 * Intialization for ME context state structure .
401 *
402 * \param[in] ps_mem_tab : pointer to memory descriptors table
403 * \param[in] ps_init_prms : Create time static parameters
404 * \param[in] pv_osal_handle : Osal handle
405 *
406 * \return
407 * Handle to the ME context
408 *
409 * \author
410 * Ittiam
411 *
412 *****************************************************************************
413 */
ihevce_me_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,rc_quant_t * ps_rc_quant_ctxt,void * pv_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_me_frm_pllel,UWORD8 u1_is_popcnt_available)414 void *ihevce_me_init(
415 iv_mem_rec_t *ps_mem_tab,
416 ihevce_static_cfg_params_t *ps_init_prms,
417 WORD32 i4_num_proc_thrds,
418 void *pv_osal_handle,
419 rc_quant_t *ps_rc_quant_ctxt,
420 void *pv_tile_params_base,
421 WORD32 i4_resolution_id,
422 WORD32 i4_num_me_frm_pllel,
423 UWORD8 u1_is_popcnt_available)
424 {
425 /* ME handle to be returned */
426 void *pv_me_ctxt;
427 WORD32 status;
428 me_master_ctxt_t *ps_me_ctxt;
429 IV_ARCH_T e_arch_type;
430
431 /* Init prms structure specific to HME */
432 hme_init_prms_t s_hme_init_prms;
433
434 /* memtabs to be passed to hme */
435 hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
436 WORD32 n_tabs, i;
437
438 /*************************************************************************/
439 /* POPULATE THE HME INIT PRMS */
440 /*************************************************************************/
441 ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
442
443 /*************************************************************************/
444 /* Ensure local declaration is sufficient */
445 /*************************************************************************/
446 n_tabs = hme_enc_num_alloc(i4_num_me_frm_pllel);
447
448 if(i4_num_me_frm_pllel > 1)
449 {
450 ASSERT(MAX_HME_ENC_TOT_MEMTABS >= n_tabs);
451 }
452 else
453 {
454 ASSERT(MIN_HME_ENC_TOT_MEMTABS >= n_tabs);
455 }
456
457 /*************************************************************************/
458 /* MAP RESULTS TO HME MEMTAB STRUCTURE */
459 /*************************************************************************/
460 for(i = 0; i < n_tabs; i++)
461 {
462 as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
463 as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
464 as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
465 }
466 /*************************************************************************/
467 /* CALL THE ME FUNCTION TO GET MEMTABS */
468 /*************************************************************************/
469 pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
470 ps_me_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
471 /* Store Tile params base into ME context */
472 ps_me_ctxt->pv_tile_params_base = pv_tile_params_base;
473
474 status = hme_enc_init(
475 pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms, ps_rc_quant_ctxt, i4_num_me_frm_pllel);
476
477 if(status == -1)
478 return NULL;
479
480 /*************************************************************************/
481 /* --- L0 ME sync Dep Mngr Mem init -- */
482 /*************************************************************************/
483 /* Update numer of ME frames running in parallel in me master context */
484 ps_me_ctxt->i4_num_me_frm_pllel = i4_num_me_frm_pllel;
485
486 e_arch_type = ps_init_prms->e_arch_type;
487
488 hme_init_function_ptr(ps_me_ctxt, e_arch_type);
489
490 ihevce_me_instr_set_router(
491 (ihevce_me_optimised_function_list_t *)ps_me_ctxt->pv_me_optimised_function_list,
492 e_arch_type);
493
494 ihevce_cmn_utils_instr_set_router(
495 &ps_me_ctxt->s_cmn_opt_func, u1_is_popcnt_available, e_arch_type);
496
497 ps_mem_tab += n_tabs;
498
499 return (pv_me_ctxt);
500 }
501
502 /**
503 *******************************************************************************
504 * \if Function name : ihevce_me_set_resolution \endif
505 *
506 * \brief
507 * Sets the resolution for ME state
508 *
509 * \par Description:
510 * ME requires information of resolution to prime up its layer descriptors
511 * and contexts. This API is called whenever a control call from application
512 * causes a change of resolution. Has to be called once initially before
513 * processing any frame. Again this is just a glue function and calls the
514 * actual ME API for the same.
515 *
516 * \param[in,out] pv_me_ctxt: Handle to the ME context
517 * \param[in] n_enc_layers: Number of layers getting encoded
518 * \param[in] p_wd : Pointer containing widths of each layer getting encoded.
519 * \param[in] p_ht : Pointer containing heights of each layer getting encoded.
520 *
521 * \returns
522 * none
523 *
524 * \author
525 * Ittiam
526 *
527 *******************************************************************************
528 */
ihevce_me_set_resolution(void * pv_me_ctxt,WORD32 n_enc_layers,WORD32 * p_wd,WORD32 * p_ht)529 void ihevce_me_set_resolution(void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
530 {
531 /* local variables */
532 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
533 WORD32 thrds;
534 WORD32 i;
535
536 for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
537 {
538 me_ctxt_t *ps_me_thrd_ctxt;
539
540 ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
541
542 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
543 {
544 hme_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht, i);
545 }
546 }
547 }
548
ihevce_populate_me_ctb_data(me_ctxt_t * ps_ctxt,me_frm_ctxt_t * ps_frm_ctxt,cur_ctb_cu_tree_t * ps_cu_tree,me_ctb_data_t * ps_me_ctb_data,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)549 void ihevce_populate_me_ctb_data(
550 me_ctxt_t *ps_ctxt,
551 me_frm_ctxt_t *ps_frm_ctxt,
552 cur_ctb_cu_tree_t *ps_cu_tree,
553 me_ctb_data_t *ps_me_ctb_data,
554 CU_POS_T e_grandparent_blk_pos,
555 CU_POS_T e_parent_blk_pos,
556 CU_POS_T e_cur_blk_pos)
557 {
558 inter_cu_results_t *ps_cu_results;
559
560 switch(ps_cu_tree->u1_cu_size)
561 {
562 case 64:
563 {
564 block_data_64x64_t *ps_data = &ps_me_ctb_data->s_64x64_block_data;
565
566 ps_cu_results = &ps_frm_ctxt->s_cu64x64_results;
567 ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
568 : 0;
569
570 break;
571 }
572 case 32:
573 {
574 block_data_32x32_t *ps_data = &ps_me_ctb_data->as_32x32_block_data[e_cur_blk_pos];
575
576 ps_cu_results = &ps_frm_ctxt->as_cu32x32_results[e_cur_blk_pos];
577 ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
578 : 0;
579
580 break;
581 }
582 case 16:
583 {
584 WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2);
585
586 block_data_16x16_t *ps_data = &ps_me_ctb_data->as_block_data[i4_blk_id];
587
588 ps_cu_results = &ps_frm_ctxt->as_cu16x16_results[i4_blk_id];
589 ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
590 : 0;
591
592 break;
593 }
594 case 8:
595 {
596 WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2) + (e_grandparent_blk_pos << 4);
597
598 block_data_8x8_t *ps_data = &ps_me_ctb_data->as_8x8_block_data[i4_blk_id];
599
600 ps_cu_results = &ps_frm_ctxt->as_cu8x8_results[i4_blk_id];
601 ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
602 : 0;
603
604 break;
605 }
606 }
607
608 if(ps_cu_tree->is_node_valid)
609 {
610 if((ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY) &&
611 (ps_cu_tree->u1_cu_size != 8))
612 {
613 ihevce_populate_me_ctb_data(
614 ps_ctxt,
615 ps_frm_ctxt,
616 ps_cu_tree->ps_child_node_tl,
617 ps_me_ctb_data,
618 e_parent_blk_pos,
619 e_cur_blk_pos,
620 POS_TL);
621
622 ihevce_populate_me_ctb_data(
623 ps_ctxt,
624 ps_frm_ctxt,
625 ps_cu_tree->ps_child_node_tr,
626 ps_me_ctb_data,
627 e_parent_blk_pos,
628 e_cur_blk_pos,
629 POS_TR);
630
631 ihevce_populate_me_ctb_data(
632 ps_ctxt,
633 ps_frm_ctxt,
634 ps_cu_tree->ps_child_node_bl,
635 ps_me_ctb_data,
636 e_parent_blk_pos,
637 e_cur_blk_pos,
638 POS_BL);
639
640 ihevce_populate_me_ctb_data(
641 ps_ctxt,
642 ps_frm_ctxt,
643 ps_cu_tree->ps_child_node_br,
644 ps_me_ctb_data,
645 e_parent_blk_pos,
646 e_cur_blk_pos,
647 POS_BR);
648 }
649 }
650 else if(ps_cu_tree->u1_cu_size != 8)
651 {
652 ihevce_populate_me_ctb_data(
653 ps_ctxt,
654 ps_frm_ctxt,
655 ps_cu_tree->ps_child_node_tl,
656 ps_me_ctb_data,
657 e_parent_blk_pos,
658 e_cur_blk_pos,
659 POS_TL);
660
661 ihevce_populate_me_ctb_data(
662 ps_ctxt,
663 ps_frm_ctxt,
664 ps_cu_tree->ps_child_node_tr,
665 ps_me_ctb_data,
666 e_parent_blk_pos,
667 e_cur_blk_pos,
668 POS_TR);
669
670 ihevce_populate_me_ctb_data(
671 ps_ctxt,
672 ps_frm_ctxt,
673 ps_cu_tree->ps_child_node_bl,
674 ps_me_ctb_data,
675 e_parent_blk_pos,
676 e_cur_blk_pos,
677 POS_BL);
678
679 ihevce_populate_me_ctb_data(
680 ps_ctxt,
681 ps_frm_ctxt,
682 ps_cu_tree->ps_child_node_br,
683 ps_me_ctb_data,
684 e_parent_blk_pos,
685 e_cur_blk_pos,
686 POS_BR);
687 }
688 }
689
ihevce_me_update_ctb_results(void * pv_me_ctxt,void * pv_me_frm_ctxt,WORD32 i4_ctb_x,WORD32 i4_ctb_y)690 void ihevce_me_update_ctb_results(
691 void *pv_me_ctxt, void *pv_me_frm_ctxt, WORD32 i4_ctb_x, WORD32 i4_ctb_y)
692 {
693 ctb_analyse_t *ps_ctb_out;
694 cur_ctb_cu_tree_t *ps_cu_tree;
695 me_ctb_data_t *ps_me_ctb_data;
696
697 me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
698 me_frm_ctxt_t *ps_frm_ctxt = (me_frm_ctxt_t *)pv_me_frm_ctxt;
699
700 ps_ctb_out = ps_frm_ctxt->ps_ctb_analyse_curr_row + i4_ctb_x;
701
702 ps_me_ctb_data = ps_frm_ctxt->ps_me_ctb_data_curr_row + i4_ctb_x;
703 ps_cu_tree = ps_frm_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
704
705 ps_ctb_out->ps_cu_tree = ps_cu_tree;
706 ps_ctb_out->ps_me_ctb_data = ps_me_ctb_data;
707
708 ihevce_populate_me_ctb_data(
709 ps_ctxt, ps_frm_ctxt, ps_cu_tree, ps_me_ctb_data, POS_NA, POS_NA, POS_NA);
710 }
711
ihevce_me_find_poc_in_list(recon_pic_buf_t ** pps_rec_list,WORD32 poc,WORD32 i4_idr_gop_num,WORD32 num_ref)712 WORD32 ihevce_me_find_poc_in_list(
713 recon_pic_buf_t **pps_rec_list, WORD32 poc, WORD32 i4_idr_gop_num, WORD32 num_ref)
714 {
715 WORD32 i;
716
717 for(i = 0; i < num_ref; i++)
718 {
719 if(pps_rec_list[i]->i4_poc == poc && pps_rec_list[i]->i4_idr_gop_num == i4_idr_gop_num)
720 return (i);
721 }
722
723 /* should never come here */
724 ASSERT(0);
725 return (-1);
726 }
ihevc_me_update_ref_desc(hme_ref_desc_t * ps_ref_desc,recon_pic_buf_t * ps_recon_pic,WORD32 ref_id_l0,WORD32 ref_id_l1,WORD32 ref_id_lc,WORD32 is_fwd)727 void ihevc_me_update_ref_desc(
728 hme_ref_desc_t *ps_ref_desc,
729 recon_pic_buf_t *ps_recon_pic,
730 WORD32 ref_id_l0,
731 WORD32 ref_id_l1,
732 WORD32 ref_id_lc,
733 WORD32 is_fwd)
734 {
735 hme_ref_buf_info_t *ps_ref_info = &ps_ref_desc->as_ref_info[0];
736 iv_enc_yuv_buf_t *ps_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc;
737 iv_enc_yuv_buf_t *ps_src_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc_src;
738 S32 offset;
739
740 /* Padding beyond 64 is not of use to ME */
741 ps_ref_info->u1_pad_x = MIN(64, PAD_HORZ);
742 ps_ref_info->u1_pad_y = MIN(64, PAD_VERT);
743
744 /* Luma stride and offset. Assuming here that supplied ptr is */
745 /* 0, 0 position and hence setting offset to 0. In fact, it is */
746 /* not used inside ME as of now. */
747 ps_ref_info->luma_stride = ps_yuv_desc->i4_y_strd;
748 ps_ref_info->luma_offset = 0;
749
750 /* 4 planes, fxfy is the direct recon buf, others are from subpel planes */
751 //offset = ps_ref_info->luma_stride * PAD_VERT + PAD_HORZ;
752 offset = 0;
753 ps_ref_info->pu1_rec_fxfy = (UWORD8 *)ps_yuv_desc->pv_y_buf + offset;
754 ps_ref_info->pu1_rec_hxfy = ps_recon_pic->apu1_y_sub_pel_planes[0] + offset;
755 ps_ref_info->pu1_rec_fxhy = ps_recon_pic->apu1_y_sub_pel_planes[1] + offset;
756 ps_ref_info->pu1_rec_hxhy = ps_recon_pic->apu1_y_sub_pel_planes[2] + offset;
757 ps_ref_info->pu1_ref_src = (UWORD8 *)ps_src_yuv_desc->pv_y_buf + offset;
758
759 /* U V ptrs though they are not used */
760 ps_ref_info->pu1_rec_u = (U08 *)ps_yuv_desc->pv_u_buf;
761 ps_ref_info->pu1_rec_v = (U08 *)ps_yuv_desc->pv_v_buf;
762
763 /* uv offsets and strides, same treatment sa luma */
764 ps_ref_info->chroma_offset = 0;
765 ps_ref_info->chroma_stride = ps_yuv_desc->i4_uv_strd;
766
767 ps_ref_info->pv_dep_mngr = ps_recon_pic->pv_dep_mngr_recon;
768
769 /* L0, L1 and LC id. */
770 ps_ref_desc->i1_ref_id_l0 = ref_id_l0;
771 ps_ref_desc->i1_ref_id_l1 = ref_id_l1;
772 ps_ref_desc->i1_ref_id_lc = ref_id_lc;
773
774 /* POC of the ref pic */
775 ps_ref_desc->i4_poc = ps_recon_pic->i4_poc;
776
777 /* Display num of the ref pic */
778 ps_ref_desc->i4_display_num = ps_recon_pic->i4_display_num;
779
780 /* GOP number of the reference pic*/
781 ps_ref_desc->i4_GOP_num = ps_recon_pic->i4_idr_gop_num;
782
783 /* Whether this picture is in past (fwd) or future (bck) */
784 ps_ref_desc->u1_is_fwd = is_fwd;
785
786 /* store the weight and offsets fo refernce picture */
787 ps_ref_desc->i2_weight = ps_recon_pic->s_weight_offset.i2_luma_weight;
788 ps_ref_desc->i2_offset = ps_recon_pic->s_weight_offset.i2_luma_offset;
789 }
790
791 /* Create the reference map for ME */
ihevce_me_create_ref_map(recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,WORD32 num_ref_l0_active,WORD32 num_ref_l1_active,WORD32 num_ref,hme_ref_map_t * ps_ref_map)792 void ihevce_me_create_ref_map(
793 recon_pic_buf_t **pps_rec_list_l0,
794 recon_pic_buf_t **pps_rec_list_l1,
795 WORD32 num_ref_l0_active,
796 WORD32 num_ref_l1_active,
797 WORD32 num_ref,
798 hme_ref_map_t *ps_ref_map)
799 {
800 WORD32 min_ref, i, poc, ref_id_l0, ref_id_l1;
801
802 /* tracks running count of ref pics */
803 WORD32 ref_count = 0, i4_idr_gop_num;
804
805 /* points to One instance of a ref pic structure */
806 recon_pic_buf_t *ps_recon_pic;
807
808 /* points to one instance of ref desc str used by ME */
809 hme_ref_desc_t *ps_ref_desc;
810
811 min_ref = MIN(num_ref_l0_active, num_ref_l1_active);
812
813 for(i = 0; i < min_ref; i++)
814 {
815 /* Create interleaved L0 and L1 entries */
816 ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
817 ps_recon_pic = pps_rec_list_l0[i];
818 poc = ps_recon_pic->i4_poc;
819 i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
820 ref_id_l0 = i;
821 ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
822 ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i, 1);
823
824 ref_count++;
825
826 ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
827 ps_recon_pic = pps_rec_list_l1[i];
828 poc = ps_recon_pic->i4_poc;
829 i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
830 ref_id_l1 = i;
831 ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
832 ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i + 1, 0);
833
834 ref_count++;
835 }
836
837 if(num_ref_l0_active > min_ref)
838 {
839 for(i = 0; i < (num_ref_l0_active - min_ref); i++)
840 {
841 ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
842 ref_id_l0 = i + min_ref;
843 ps_recon_pic = pps_rec_list_l0[ref_id_l0];
844 poc = ps_recon_pic->i4_poc;
845 i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
846 ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
847 ihevc_me_update_ref_desc(
848 ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 1);
849 ref_count++;
850 }
851 }
852 else
853 {
854 for(i = 0; i < (num_ref_l1_active - min_ref); i++)
855 {
856 ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
857 ref_id_l1 = i + min_ref;
858 ps_recon_pic = pps_rec_list_l1[ref_id_l1];
859 poc = ps_recon_pic->i4_poc;
860 i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
861 ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
862 ihevc_me_update_ref_desc(
863 ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 0);
864 ref_count++;
865 }
866 }
867
868 ps_ref_map->i4_num_ref = ref_count;
869 ASSERT(ref_count == (num_ref_l0_active + num_ref_l1_active));
870
871 /* TODO : Fill better values in lambda depending on ref dist */
872 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
873 ps_ref_map->as_ref_desc[i].lambda = 20;
874 }
875
876 /*!
877 ******************************************************************************
878 * \if Function name : ihevce_me_process \endif
879 *
880 * \brief
881 * Frame level ME function
882 *
883 * \par Description:
884 * Processing of all layers starting from coarse and going
885 * to the refinement layers, all layers
886 * that are encoded go CTB by CTB. Outputs of this function are populated
887 * ctb_analyse_t structures, one per CTB.
888 *
889 * \param[in] pv_ctxt : pointer to ME module
890 * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
891 * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
892 * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
893 * \param[in] pd_intra_costs : pointerto intra cost buffer
894 * \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt
895 * \param[in] thrd_id : Thread id of the current thrd in which function is executed
896 *
897 * \return
898 * None
899 *
900 * \author
901 * Ittiam
902 *
903 *****************************************************************************
904 */
ihevce_me_process(void * pv_me_ctxt,ihevce_lap_enc_buf_t * ps_enc_lap_inp,ctb_analyse_t * ps_ctb_out,me_enc_rdopt_ctxt_t * ps_cur_out_me_prms,double * pd_intra_costs,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse_ctb,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input,void * pv_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 i4_frame_parallelism_level,WORD32 thrd_id,WORD32 i4_me_frm_id)905 void ihevce_me_process(
906 void *pv_me_ctxt,
907 ihevce_lap_enc_buf_t *ps_enc_lap_inp,
908 ctb_analyse_t *ps_ctb_out,
909 me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
910 double *pd_intra_costs,
911 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb,
912 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
913 void *pv_coarse_layer,
914 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
915 WORD32 i4_frame_parallelism_level,
916 WORD32 thrd_id,
917 WORD32 i4_me_frm_id)
918 {
919 me_ctxt_t *ps_thrd_ctxt;
920 me_frm_ctxt_t *ps_ctxt;
921
922 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn;
923
924 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
925 cur_ctb_cu_tree_t *ps_cu_tree_out = ps_cur_out_me_prms->ps_cur_ctb_cu_tree;
926 me_ctb_data_t *ps_me_ctb_data_out = ps_cur_out_me_prms->ps_cur_ctb_me_data;
927 layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
928
929 pf_ext_update_fxn = (PF_EXT_UPDATE_FXN_T)ihevce_me_update_ctb_results;
930
931 /* get the current thread ctxt pointer */
932 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
933 ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
934 ps_ctxt->thrd_id = thrd_id;
935
936 /* store the ctb out and cu out base pointers */
937 ps_ctxt->ps_ctb_analyse_base = ps_ctb_out;
938
939 ps_ctxt->ps_cu_tree_base = ps_cu_tree_out;
940 ps_ctxt->ps_ipe_l0_ctb_frm_base = ps_ipe_analyse_ctb;
941 ps_ctxt->ps_me_ctb_data_base = ps_me_ctb_data_out;
942 ps_ctxt->ps_func_selector = &ps_master_ctxt->s_func_selector;
943
944 /** currently in master context. Copying that to me context **/
945 /* frame level processing function */
946 hme_process_frm(
947 (void *)ps_thrd_ctxt,
948 ps_l0_ipe_input,
949 &ps_master_ctxt->as_ref_map[i4_me_frm_id],
950 &pd_intra_costs,
951 &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
952 pf_ext_update_fxn,
953 ps_coarse_layer,
954 ps_multi_thrd_ctxt,
955 i4_frame_parallelism_level,
956 thrd_id,
957 i4_me_frm_id);
958 }
959 /*!
960 ******************************************************************************
961 * \if Function name : ihevce_me_frame_dpb_update \endif
962 *
963 * \brief
964 * Frame level ME initialisation function
965 *
966 * \par Description:
967 * Updation of ME's internal DPB
968 * based on available ref list information
969 *
970 * \param[in] pv_ctxt : pointer to ME module
971 * \param[in] num_ref_l0 : Number of reference pics in L0 list
972 * \param[in] num_ref_l1 : Number of reference pics in L1 list
973 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
974 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
975 *
976 * \return
977 * None
978 *
979 * \author
980 * Ittiam
981 *
982 *****************************************************************************
983 */
ihevce_me_frame_dpb_update(void * pv_me_ctxt,WORD32 num_ref_l0,WORD32 num_ref_l1,recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,WORD32 i4_thrd_id)984 void ihevce_me_frame_dpb_update(
985 void *pv_me_ctxt,
986 WORD32 num_ref_l0,
987 WORD32 num_ref_l1,
988 recon_pic_buf_t **pps_rec_list_l0,
989 recon_pic_buf_t **pps_rec_list_l1,
990 WORD32 i4_thrd_id)
991 {
992 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
993 me_ctxt_t *ps_thrd0_ctxt;
994 WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
995 WORD32 i, i4_is_buffer_full;
996 WORD32 i4_least_POC = 0x7FFFFFFF;
997 WORD32 i4_least_GOP_num = 0x7FFFFFFF;
998 me_ctxt_t *ps_ctxt;
999
1000 /* All processing done using shared / common memory across */
1001 /* threads is done using thrd ctxt */
1002 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
1003
1004 ps_ctxt = (me_ctxt_t *)ps_thrd0_ctxt;
1005 a_pocs_to_remove[0] = INVALID_POC;
1006 /*************************************************************************/
1007 /* Updation of ME's DPB list. This involves the following steps: */
1008 /* 1. Obtain list of active POCs maintained within ME. */
1009 /* 2. Search each of them in the ref list. Whatever is not found goes to */
1010 /* the list to be removed. Note: a_pocs_buffered_in_me holds the */
1011 /* currently active POC list within ME. a_pocs_to_remove holds the */
1012 /* list of POCs to be removed, terminated by -1. */
1013 /*************************************************************************/
1014 i4_is_buffer_full =
1015 hme_get_active_pocs_list((void *)ps_thrd0_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
1016
1017 if(i4_is_buffer_full)
1018 {
1019 /* remove if any non-reference pictures are present */
1020 for(i = 0;
1021 i <
1022 (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1;
1023 i++)
1024 {
1025 if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_reference == 0 &&
1026 ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_non_ref_free == 1)
1027 {
1028 i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
1029 i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
1030 }
1031 }
1032 /* if all non reference pictures are removed, then find the least poc
1033 in the least gop number*/
1034 if(i4_least_POC == 0x7FFFFFFF)
1035 {
1036 ASSERT(i4_least_GOP_num == 0x7FFFFFFF);
1037 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
1038 ps_master_ctxt->i4_num_me_frm_pllel) +
1039 1;
1040 i++)
1041 {
1042 if(i4_least_GOP_num > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num)
1043 {
1044 i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
1045 }
1046 }
1047 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
1048 ps_master_ctxt->i4_num_me_frm_pllel) +
1049 1;
1050 i++)
1051 {
1052 if(i4_least_POC > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc &&
1053 ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_least_GOP_num)
1054 {
1055 i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
1056 }
1057 }
1058 }
1059 ASSERT(i4_least_POC != 0x7FFFFFFF);
1060 a_pocs_to_remove[0] = i4_least_POC;
1061 a_pocs_to_remove[1] = INVALID_POC;
1062 }
1063
1064 /* Call the ME API to remove "outdated" POCs */
1065 hme_discard_frm(
1066 ps_thrd0_ctxt, a_pocs_to_remove, i4_least_GOP_num, ps_master_ctxt->i4_num_me_frm_pllel);
1067 }
1068 /*!
1069 ******************************************************************************
1070 * \if Function name : ihevce_me_frame_init \endif
1071 *
1072 * \brief
1073 * Frame level ME initialisation function
1074 *
1075 * \par Description:
1076 * The following pre-conditions exist for this function: a. We have the input
1077 * pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
1078 * and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
1079 * been called atleast once. Once these are supplied, the following are
1080 * done here: a. Input pyramid creation, b. Updation of ME's internal DPB
1081 * based on available ref list information
1082 *
1083 * \param[in] pv_ctxt : pointer to ME module
1084 * \param[in] ps_frm_ctb_prms : CTB characteristics parameters
1085 * \param[in] ps_frm_lamda : Frame level Lambda params
1086 * \param[in] num_ref_l0 : Number of reference pics in L0 list
1087 * \param[in] num_ref_l1 : Number of reference pics in L1 list
1088 * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
1089 * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
1090 * \param[in] pps_rec_list_l0 : List of recon pics in L0 list
1091 * \param[in] pps_rec_list_l1 : List of recon pics in L1 list
1092 * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
1093 * \param[in] i4_frm_qp : current picture QP
1094 *
1095 * \return
1096 * None
1097 *
1098 * \author
1099 * Ittiam
1100 *
1101 *****************************************************************************
1102 */
ihevce_me_frame_init(void * pv_me_ctxt,me_enc_rdopt_ctxt_t * ps_cur_out_me_prms,ihevce_static_cfg_params_t * ps_stat_prms,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,WORD32 num_ref_l0,WORD32 num_ref_l1,WORD32 num_ref_l0_active,WORD32 num_ref_l1_active,recon_pic_buf_t ** pps_rec_list_l0,recon_pic_buf_t ** pps_rec_list_l1,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],func_selector_t * ps_func_selector,ihevce_lap_enc_buf_t * ps_enc_lap_inp,void * pv_coarse_layer,WORD32 i4_me_frm_id,WORD32 i4_thrd_id,WORD32 i4_frm_qp,WORD32 i4_temporal_layer_id,WORD8 i1_cu_qp_delta_enabled_flag,void * pv_dep_mngr_encloop_dep_me)1103 void ihevce_me_frame_init(
1104 void *pv_me_ctxt,
1105 me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
1106 ihevce_static_cfg_params_t *ps_stat_prms,
1107 frm_ctb_ctxt_t *ps_frm_ctb_prms,
1108 frm_lambda_ctxt_t *ps_frm_lamda,
1109 WORD32 num_ref_l0,
1110 WORD32 num_ref_l1,
1111 WORD32 num_ref_l0_active,
1112 WORD32 num_ref_l1_active,
1113 recon_pic_buf_t **pps_rec_list_l0,
1114 recon_pic_buf_t **pps_rec_list_l1,
1115 recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
1116 func_selector_t *ps_func_selector,
1117 ihevce_lap_enc_buf_t *ps_enc_lap_inp,
1118 void *pv_coarse_layer,
1119 WORD32 i4_me_frm_id,
1120 WORD32 i4_thrd_id,
1121 WORD32 i4_frm_qp,
1122 WORD32 i4_temporal_layer_id,
1123 WORD8 i1_cu_qp_delta_enabled_flag,
1124 void *pv_dep_mngr_encloop_dep_me)
1125 {
1126 me_ctxt_t *ps_thrd_ctxt;
1127 me_ctxt_t *ps_thrd0_ctxt;
1128 me_frm_ctxt_t *ps_ctxt;
1129 hme_inp_desc_t s_inp_desc;
1130
1131 WORD32 inp_poc, num_ref;
1132 WORD32 i;
1133
1134 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
1135 layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
1136
1137 /* Input POC is derived from input buffer */
1138 inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
1139 num_ref = num_ref_l0 + num_ref_l1;
1140
1141 /* All processing done using shared / common memory across */
1142 /* threads is done using thrd ctxt */
1143 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
1144
1145 ps_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[i4_me_frm_id];
1146
1147 /* Update the paarameters "num_ref_l0_active" and "num_ref_l1_active" in hme_frm_prms */
1148 ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l0 = num_ref_l0_active;
1149 ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l1 = num_ref_l1_active;
1150
1151 /*************************************************************************/
1152 /* Add the current input to ME's DPB. This will also create the pyramids */
1153 /* for the HME layers tha are not "encoded". */
1154 /*************************************************************************/
1155 s_inp_desc.i4_poc = inp_poc;
1156 s_inp_desc.i4_idr_gop_num = ps_enc_lap_inp->s_lap_out.i4_idr_gop_num;
1157 s_inp_desc.i4_is_reference = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
1158 s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
1159 s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
1160 s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
1161
1162 s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
1163 s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
1164
1165 hme_add_inp(pv_me_ctxt, &s_inp_desc, i4_me_frm_id, i4_thrd_id);
1166
1167 /* store the frm ctb ctxt to all the thrd ctxt */
1168 {
1169 WORD32 num_thrds;
1170
1171 /* initialise the parameters for all the threads */
1172 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1173 {
1174 me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1175
1176 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1177
1178 ps_me_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
1179
1180 ps_thrd_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
1181 ps_me_tmp_frm_ctxt->i4_l0me_qp_mod = ps_stat_prms->s_config_prms.i4_cu_level_rc & 1;
1182
1183 /* intialize the inter pred (MC) context at frame level */
1184 ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
1185 ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_pred_flag =
1186 ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag;
1187 ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_bipred_flag =
1188 ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
1189 ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom =
1190 ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
1191 ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom =
1192 ps_enc_lap_inp->s_lap_out.i4_log2_chroma_wght_denom;
1193 ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_bit_depth = 8;
1194 ps_me_tmp_frm_ctxt->s_mc_ctxt.u1_chroma_array_type = 1;
1195 ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
1196 /* Initiallization for non-distributed mode */
1197 memset(
1198 ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel,
1199 0,
1200 sizeof(ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel));
1201
1202 ps_me_tmp_frm_ctxt->i4_pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
1203
1204 ps_me_tmp_frm_ctxt->i4_rc_pass = ps_stat_prms->s_pass_prms.i4_pass;
1205 ps_me_tmp_frm_ctxt->i4_temporal_layer = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
1206 ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
1207 ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier =
1208 ps_ctxt->i4_use_const_lamda_modifier ||
1209 ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
1210 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
1211 ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
1212 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
1213 (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1214 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
1215 (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1216 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
1217 (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1218 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
1219 {
1220 ps_me_tmp_frm_ctxt->f_i_pic_lamda_modifier =
1221 ps_enc_lap_inp->s_lap_out.f_i_pic_lamda_modifier;
1222 }
1223 /* weighted pred enable flag */
1224 ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag =
1225 ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
1226 ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
1227
1228 if(1 == ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag)
1229 {
1230 /* log2 weight denom */
1231 ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc =
1232 ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
1233 }
1234 else
1235 {
1236 /* default value */
1237 ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
1238 }
1239
1240 ps_me_tmp_frm_ctxt->u1_is_curFrame_a_refFrame = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
1241
1242 ps_thrd_ctxt->pv_me_optimised_function_list =
1243 ps_master_ctxt->pv_me_optimised_function_list;
1244 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
1245 }
1246 }
1247
1248 /* Create the reference map for ME */
1249 ihevce_me_create_ref_map(
1250 pps_rec_list_l0,
1251 pps_rec_list_l1,
1252 num_ref_l0_active,
1253 num_ref_l1_active,
1254 num_ref,
1255 &ps_master_ctxt->as_ref_map[i4_me_frm_id]);
1256
1257 /** Remember the pointers to recon list parmas for L0 and L1 lists in the context */
1258 ps_ctxt->ps_hme_ref_map->pps_rec_list_l0 = pps_rec_list_l0;
1259 ps_ctxt->ps_hme_ref_map->pps_rec_list_l1 = pps_rec_list_l1;
1260
1261 /*************************************************************************/
1262 /* Call the ME frame level processing for further actiion. */
1263 /* ToDo: Support Row Level API. */
1264 /*************************************************************************/
1265 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_x =
1266 ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
1267 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_y =
1268 ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
1269 ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 0;
1270 ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_pic_second_field =
1271 (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
1272 ps_enc_lap_inp->s_input_buf.i4_topfield_first));
1273 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_temporal_layer_id = i4_temporal_layer_id;
1274 {
1275 S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
1276
1277 /*********************************************************************/
1278 /* For I Pic, we do not call update fn at ctb level, instead we do */
1279 /* one shot update for entire picture. */
1280 /*********************************************************************/
1281 if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
1282 {
1283 ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 1;
1284 ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
1285 }
1286
1287 else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
1288 {
1289 ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
1290 }
1291 else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
1292 {
1293 ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 1;
1294 }
1295 else
1296 {
1297 /* not sure whether we need to handle mixed frames like IP, */
1298 /* they should ideally come as single field. */
1299 /* TODO : resolve thsi ambiguity */
1300 ASSERT(0);
1301 }
1302 }
1303 /************************************************************************/
1304 /* Lambda calculations moved outside ME and to one place, so as to have */
1305 /* consistent lambda across ME, IPE, CL RDOPT etc */
1306 /************************************************************************/
1307
1308 {
1309 double d_q_factor;
1310
1311 d_q_factor = pow(2.0, (i4_frm_qp / 6.)) * 5.0 / 8.0;
1312 ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep = (WORD32)(d_q_factor + .5);
1313 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_frame_qp = i4_frm_qp;
1314
1315 /* Qstep multiplied by 256, to work at higher precision:
1316 5/6 is the rounding factor. Multiplied by 2 for the Had vs DCT
1317 cost variation */
1318 ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep_ls8 =
1319 (WORD32)((((d_q_factor * 256) * 5) / 3) + .5);
1320 }
1321
1322 /* Frame level init of all threads of ME */
1323 {
1324 WORD32 num_thrds;
1325
1326 /* initialise the parameters for all the threads */
1327 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1328 {
1329 me_frm_ctxt_t *ps_tmp_frm_ctxt;
1330
1331 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1332
1333 ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
1334
1335 hme_process_frm_init(
1336 (void *)ps_thrd_ctxt,
1337 ps_tmp_frm_ctxt->ps_hme_ref_map,
1338 ps_tmp_frm_ctxt->ps_hme_frm_prms,
1339 i4_me_frm_id,
1340 ps_master_ctxt->i4_num_me_frm_pllel);
1341
1342 ps_tmp_frm_ctxt->s_frm_lambda_ctxt = *ps_frm_lamda;
1343 ps_tmp_frm_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
1344 }
1345 }
1346
1347 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_sad_lambda_qf =
1348 ps_frm_lamda->i4_cl_sad_lambda_qf;
1349 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_satd_lambda_qf =
1350 ps_frm_lamda->i4_cl_satd_lambda_qf;
1351 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_sad_lambda_qf =
1352 ps_frm_lamda->i4_ol_sad_lambda_qf;
1353 ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_satd_lambda_qf =
1354 ps_frm_lamda->i4_ol_satd_lambda_qf;
1355 ps_master_ctxt->as_frm_prms[i4_me_frm_id].lambda_q_shift = LAMBDA_Q_SHIFT;
1356
1357 ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_is_cu_qp_delta_enabled =
1358 i1_cu_qp_delta_enabled_flag;
1359
1360 /*************************************************************************/
1361 /* If num ref is 0, that means that it has to be coded as I. Do nothing */
1362 /* However mv bank update needs to happen with "intra" mv. */
1363 /*************************************************************************/
1364 if(ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref == 0 ||
1365 ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic)
1366 {
1367 for(i = 0; i < 1; i++)
1368 {
1369 layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
1370 BLK_SIZE_T e_blk_size;
1371 S32 use_4x4;
1372
1373 /* The mv bank is filled with "intra" mv */
1374 use_4x4 = hme_get_mv_blk_size(
1375 ps_thrd0_ctxt->s_init_prms.use_4x4, i, ps_ctxt->num_layers, ps_ctxt->u1_encode[i]);
1376 e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
1377 hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
1378 hme_fill_mvbank_intra(ps_layer_ctxt);
1379
1380 /* Clear out the global mvs */
1381 memset(
1382 ps_layer_ctxt->s_global_mv,
1383 0,
1384 sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
1385 }
1386
1387 return;
1388 }
1389
1390 /*************************************************************************/
1391 /* Encode layer frame init */
1392 /*************************************************************************/
1393 {
1394 refine_prms_t s_refine_prms;
1395 layer_ctxt_t *ps_curr_layer;
1396 S16 i2_max;
1397 S32 layer_id;
1398
1399 layer_id = 0;
1400 i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
1401 i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
1402
1403 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[layer_id];
1404
1405 {
1406 hme_set_refine_prms(
1407 &s_refine_prms,
1408 ps_ctxt->u1_encode[layer_id],
1409 ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref,
1410 layer_id,
1411 ps_ctxt->num_layers,
1412 ps_ctxt->num_layers_explicit_search,
1413 ps_thrd0_ctxt->s_init_prms.use_4x4,
1414 &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
1415 NULL,
1416 &ps_thrd0_ctxt->s_init_prms
1417 .s_me_coding_tools); /* during frm init Intra cost Pointer is not required */
1418
1419 hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
1420 }
1421 }
1422 }
1423
1424 /*!
1425 ******************************************************************************
1426 * \if Function name : ihevce_l0_me_frame_end \endif
1427 *
1428 * \brief
1429 * End of frame update function performs
1430 * - Dynamic Search Range collation
1431 *
1432 * \param[in] pv_ctxt : pointer to ME module
1433 *
1434 * \return
1435 * None
1436 *
1437 * \author
1438 * Ittiam
1439 *
1440 *****************************************************************************
1441 */
1442
ihevce_l0_me_frame_end(void * pv_me_ctxt,WORD32 i4_idx_dvsr_p,WORD32 i4_display_num,WORD32 me_frm_id)1443 void ihevce_l0_me_frame_end(
1444 void *pv_me_ctxt, WORD32 i4_idx_dvsr_p, WORD32 i4_display_num, WORD32 me_frm_id)
1445 {
1446 WORD32 i4_num_ref = 0, num_ref, num_thrds, cur_poc, frm_num;
1447
1448 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
1449 me_ctxt_t *ps_thrd0_ctxt;
1450 me_frm_ctxt_t *ps_frm_ctxt;
1451 WORD32 prev_me_frm_id;
1452
1453 ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1454 ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
1455
1456 /* Deriving the previous poc from previous frames context */
1457 if(me_frm_id == 0)
1458 prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
1459 else
1460 prev_me_frm_id = me_frm_id - 1;
1461
1462 /* Getting the max num references value */
1463 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1464 {
1465 i4_num_ref =
1466 MAX(i4_num_ref,
1467 ps_master_ctxt->aps_me_ctxt[num_thrds]
1468 ->aps_me_frm_prms[me_frm_id]
1469 ->as_l0_dyn_range_prms[i4_idx_dvsr_p]
1470 .i4_num_act_ref_in_l0);
1471 }
1472
1473 /* No processing is required if current pic is I pic */
1474 if(1 == ps_master_ctxt->as_frm_prms[me_frm_id].is_i_pic)
1475 {
1476 return;
1477 }
1478
1479 /* If a B/b pic, then the previous frame ctxts dyn search prms should be copied ito the latest ctxt */
1480 if(1 == ps_frm_ctxt->s_frm_prms.bidir_enabled)
1481 {
1482 return;
1483 }
1484
1485 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1486 ASSERT(ps_frm_ctxt->s_frm_prms.is_i_pic == ps_frm_ctxt->s_frm_prms.bidir_enabled);
1487
1488 /* use thrd 0 ctxt to collate the Dynamic Search Range across all threads */
1489 for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
1490 {
1491 dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1492
1493 ps_dyn_range_prms_thrd0 =
1494 &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1495
1496 /* run a loop over all the other threads to update the dynamical search range */
1497 for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1498 {
1499 me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1500
1501 dyn_range_prms_t *ps_dyn_range_prms;
1502
1503 ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
1504
1505 /* get current thrd dynamical search range param. pointer */
1506 ps_dyn_range_prms =
1507 &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1508
1509 /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
1510 hme_update_dynamic_search_params(
1511 ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_max_y);
1512
1513 hme_update_dynamic_search_params(
1514 ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_min_y);
1515 }
1516 }
1517
1518 /*************************************************************************/
1519 /* Get the MAX/MIN per POC distance based on the all the ref. pics */
1520 /*************************************************************************/
1521 cur_poc = ps_frm_ctxt->i4_curr_poc;
1522 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = 0;
1523 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = 0;
1524 /*populate display num*/
1525 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num = i4_display_num;
1526
1527 for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
1528 {
1529 WORD16 i2_mv_per_poc;
1530 WORD32 ref_poc, poc_diff;
1531 dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1532 ps_dyn_range_prms_thrd0 =
1533 &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1534
1535 ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
1536 /* Should be cleaned up for ME llsm */
1537 poc_diff = (cur_poc - ref_poc);
1538 poc_diff = MAX(1, poc_diff);
1539
1540 /* cur. ref. pic. max y per POC */
1541 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
1542 /* update the max y per POC */
1543 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = MAX(
1544 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc, i2_mv_per_poc);
1545
1546 /* cur. ref. pic. min y per POC */
1547 i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
1548 /* update the min y per POC */
1549 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = MIN(
1550 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc, i2_mv_per_poc);
1551 }
1552
1553 /*************************************************************************/
1554 /* Populate the results to all thread ctxt */
1555 /*************************************************************************/
1556 for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1557 {
1558 me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1559
1560 ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
1561
1562 ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc =
1563 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc;
1564
1565 ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc =
1566 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc;
1567
1568 ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num =
1569 ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num;
1570 }
1571
1572 /* Copy the dynamic search paramteres into the other Frame cotexts in parallel */
1573 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1574 {
1575 l0_dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1576
1577 ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
1578
1579 i4_num_ref = ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0;
1580
1581 ps_dyn_range_prms_thrd0 = &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
1582
1583 for(frm_num = 0; frm_num < MAX_NUM_ME_PARALLEL; frm_num++)
1584 {
1585 if(me_frm_id != frm_num)
1586 {
1587 me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1588
1589 l0_dyn_range_prms_t *ps_dyn_range_prms;
1590
1591 ps_me_tmp_frm_ctxt =
1592 ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[frm_num];
1593
1594 /* get current thrd dynamical search range param. pointer */
1595 ps_dyn_range_prms = &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
1596
1597 memcpy(ps_dyn_range_prms, ps_dyn_range_prms_thrd0, sizeof(l0_dyn_range_prms_t));
1598 }
1599 }
1600 }
1601 }
1602