• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_refine.c
23 *
24 * @brief
25 *    Contains the implementation of the refinement layer searches and related
26 *    functionality like CU merge.
27 *
28 * @author
29 *    Ittiam
30 *
31 *
32 * List of Functions
33 *
34 *
35 ******************************************************************************
36 */
37 
38 /*****************************************************************************/
39 /* File Includes                                                             */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49 
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54 
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58 
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75 
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_fullpel.h"
103 #include "hme_subpel.h"
104 #include "hme_refine.h"
105 #include "hme_err_compute.h"
106 #include "hme_common_utils.h"
107 #include "hme_search_algo.h"
108 #include "ihevce_stasino_helpers.h"
109 #include "ihevce_common_utils.h"
110 
111 /*****************************************************************************/
112 /* Globals                                                                   */
113 /*****************************************************************************/
114 
115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117     { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118 };
119 
120 /*****************************************************************************/
121 /* Extern Fucntion declaration                                               */
122 /*****************************************************************************/
123 extern ctb_boundary_attrs_t *
124     get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125 
126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127     search_node_t *ps_search_node,
128     layer_ctxt_t *ps_curr_layer,
129     layer_ctxt_t *ps_coarse_layer,
130     S32 i4_pos_x,
131     S32 i4_pos_y,
132     S08 i1_ref_id,
133     S32 i4_result_id);
134 
135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136     search_node_t *ps_search_node,
137     layer_ctxt_t *ps_curr_layer,
138     layer_ctxt_t *ps_coarse_layer,
139     S32 i4_pos_x,
140     S32 i4_pos_y,
141     S32 i4_num_act_ref_l0,
142     U08 u1_pred_dir,
143     U08 u1_default_ref_id,
144     S32 i4_result_id);
145 
146 /*****************************************************************************/
147 /* Function Definitions                                                      */
148 /*****************************************************************************/
149 
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150 void ihevce_no_wt_copy(
151     coarse_me_ctxt_t *ps_ctxt,
152     layer_ctxt_t *ps_curr_layer,
153     pu_t *ps_pu,
154     UWORD8 *pu1_temp_pred,
155     WORD32 temp_stride,
156     WORD32 blk_x,
157     WORD32 blk_y)
158 {
159     UWORD8 *pu1_ref;
160     WORD32 ref_stride, ref_offset;
161     WORD32 row, col, i4_tmp;
162 
163     ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164 
165     if(ps_pu->b2_pred_mode == PRED_L0)
166     {
167         WORD8 i1_ref_idx;
168 
169         i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171 
172         ref_stride = ps_curr_layer->i4_inp_stride;
173 
174         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175         ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176 
177         pu1_ref += ref_offset;
178 
179         for(row = 0; row < temp_stride; row++)
180         {
181             for(col = 0; col < temp_stride; col++)
182             {
183                 i4_tmp = pu1_ref[col];
184                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185             }
186 
187             pu1_ref += ref_stride;
188             pu1_temp_pred += temp_stride;
189         }
190     }
191     else
192     {
193         WORD8 i1_ref_idx;
194 
195         i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197 
198         ref_stride = ps_curr_layer->i4_inp_stride;
199 
200         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201         ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202 
203         pu1_ref += ref_offset;
204 
205         for(row = 0; row < temp_stride; row++)
206         {
207             for(col = 0; col < temp_stride; col++)
208             {
209                 i4_tmp = pu1_ref[col];
210                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211             }
212 
213             pu1_ref += ref_stride;
214             pu1_temp_pred += temp_stride;
215         }
216     }
217 }
218 
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
220     cluster_data_t *ps_cluster_base,
221     search_node_t *ps_merge_cand,
222     range_prms_t **pps_range_prms,
223     U08 *pu1_refid_to_pred_dir_list,
224     WORD32 i4_num_clusters,
225     U08 u1_pred_dir)
226 {
227     WORD32 i, j, k;
228     WORD32 i4_num_cands_added = 0;
229     WORD32 i4_num_mvs_in_cluster;
230 
231     for(i = 0; i < i4_num_clusters; i++)
232     {
233         cluster_data_t *ps_data = &ps_cluster_base[i];
234 
235         if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236         {
237             i4_num_mvs_in_cluster = ps_data->num_mvs;
238 
239             for(j = 0; j < i4_num_mvs_in_cluster; j++)
240             {
241                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243                 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244 
245                 CLIP_MV_WITHIN_RANGE(
246                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248                     pps_range_prms[ps_data->ref_id],
249                     0,
250                     0,
251                     0);
252 
253                 for(k = 0; k < i4_num_cands_added; k++)
254                 {
255                     if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256                        (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257                        (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258                     {
259                         break;
260                     }
261                 }
262 
263                 if(k == i4_num_cands_added)
264                 {
265                     i4_num_cands_added++;
266                 }
267             }
268         }
269     }
270 
271     return i4_num_cands_added;
272 }
273 
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274 static WORD32 hme_add_me_best_as_merge_cands(
275     search_results_t **pps_child_data_array,
276     inter_cu_results_t *ps_8x8cu_results,
277     search_node_t *ps_merge_cand,
278     range_prms_t **pps_range_prms,
279     U08 *pu1_refid_to_pred_dir_list,
280     S08 *pi1_past_list,
281     S08 *pi1_future_list,
282     BLK_SIZE_T e_blk_size,
283     ME_QUALITY_PRESETS_T e_quality_preset,
284     S32 i4_num_cands_added,
285     U08 u1_pred_dir)
286 {
287     WORD32 i, j, k;
288     WORD32 i4_max_cands_to_add;
289 
290     WORD32 i4_result_id = 0;
291 
292     ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293     ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294     ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295     ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296 
297     switch(e_quality_preset)
298     {
299     case ME_PRISTINE_QUALITY:
300     {
301         i4_max_cands_to_add = MAX_MERGE_CANDTS;
302 
303         break;
304     }
305     case ME_HIGH_QUALITY:
306     {
307         /* All 4 children are split and each grandchild contributes an MV */
308         /* and 2 best results per grandchild */
309         i4_max_cands_to_add = 4 * 4 * 2;
310 
311         break;
312     }
313     case ME_MEDIUM_SPEED:
314     {
315         i4_max_cands_to_add = 4 * 2 * 2;
316 
317         break;
318     }
319     case ME_HIGH_SPEED:
320     case ME_XTREME_SPEED:
321     case ME_XTREME_SPEED_25:
322     {
323         i4_max_cands_to_add = 4 * 2 * 1;
324 
325         break;
326     }
327     }
328 
329     while(i4_result_id < 4)
330     {
331         for(i = 0; i < 4; i++)
332         {
333             inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334             inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335 
336             if(!pps_child_data_array[i]->u1_split_flag)
337             {
338                 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339 
340                 if(ps_child_data->u1_num_best_results <= i4_result_id)
341                 {
342                     continue;
343                 }
344 
345                 if(ps_data->as_pu_results->pu.b1_intra_flag)
346                 {
347                     continue;
348                 }
349 
350                 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351                 {
352                     mv_t *ps_mv;
353 
354                     S08 i1_ref_idx;
355 
356                     pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357 
358                     if(u1_pred_dir !=
359                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360                     {
361                         continue;
362                     }
363 
364                     if(u1_pred_dir)
365                     {
366                         ps_mv = &ps_pu->mv.s_l1_mv;
367                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368                     }
369                     else
370                     {
371                         ps_mv = &ps_pu->mv.s_l0_mv;
372                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373                     }
374 
375                     if(-1 == i1_ref_idx)
376                     {
377                         continue;
378                     }
379 
380                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383 
384                     CLIP_MV_WITHIN_RANGE(
385                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387                         pps_range_prms[i1_ref_idx],
388                         0,
389                         0,
390                         0);
391 
392                     for(k = 0; k < i4_num_cands_added; k++)
393                     {
394                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397                         {
398                             break;
399                         }
400                     }
401 
402                     if(k == i4_num_cands_added)
403                     {
404                         i4_num_cands_added++;
405 
406                         if(i4_max_cands_to_add <= i4_num_cands_added)
407                         {
408                             return i4_num_cands_added;
409                         }
410                     }
411                 }
412             }
413             else
414             {
415                 for(j = 0; j < 4; j++)
416                 {
417                     mv_t *ps_mv;
418 
419                     S08 i1_ref_idx;
420 
421                     part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422                     pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423 
424                     ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425 
426                     if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427                     {
428                         continue;
429                     }
430 
431                     if(ps_data->as_pu_results->pu.b1_intra_flag)
432                     {
433                         continue;
434                     }
435 
436                     if(u1_pred_dir !=
437                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438                     {
439                         continue;
440                     }
441 
442                     if(u1_pred_dir)
443                     {
444                         ps_mv = &ps_pu->mv.s_l1_mv;
445                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446                     }
447                     else
448                     {
449                         ps_mv = &ps_pu->mv.s_l0_mv;
450                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451                     }
452 
453                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456 
457                     CLIP_MV_WITHIN_RANGE(
458                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460                         pps_range_prms[i1_ref_idx],
461                         0,
462                         0,
463                         0);
464 
465                     for(k = 0; k < i4_num_cands_added; k++)
466                     {
467                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470                         {
471                             break;
472                         }
473                     }
474 
475                     if(k == i4_num_cands_added)
476                     {
477                         i4_num_cands_added++;
478 
479                         if(i4_max_cands_to_add <= i4_num_cands_added)
480                         {
481                             return i4_num_cands_added;
482                         }
483                     }
484                 }
485             }
486         }
487 
488         i4_result_id++;
489     }
490 
491     return i4_num_cands_added;
492 }
493 
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494 WORD32 hme_add_cands_for_merge_eval(
495     ctb_cluster_info_t *ps_cluster_info,
496     search_results_t **pps_child_data_array,
497     inter_cu_results_t *ps_8x8cu_results,
498     range_prms_t **pps_range_prms,
499     search_node_t *ps_merge_cand,
500     U08 *pu1_refid_to_pred_dir_list,
501     S08 *pi1_past_list,
502     S08 *pi1_future_list,
503     ME_QUALITY_PRESETS_T e_quality_preset,
504     BLK_SIZE_T e_blk_size,
505     U08 u1_pred_dir,
506     U08 u1_blk_id)
507 {
508     WORD32 i4_num_cands_added = 0;
509 
510     if(ME_PRISTINE_QUALITY == e_quality_preset)
511     {
512         cluster_data_t *ps_cluster_primo;
513 
514         WORD32 i4_num_clusters;
515 
516         if(BLK_32x32 == e_blk_size)
517         {
518             ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519             i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520         }
521         else
522         {
523             ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524             i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525         }
526 
527         i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528             ps_cluster_primo,
529             ps_merge_cand,
530             pps_range_prms,
531             pu1_refid_to_pred_dir_list,
532             i4_num_clusters,
533             u1_pred_dir);
534     }
535 
536     i4_num_cands_added = hme_add_me_best_as_merge_cands(
537         pps_child_data_array,
538         ps_8x8cu_results,
539         ps_merge_cand,
540         pps_range_prms,
541         pu1_refid_to_pred_dir_list,
542         pi1_past_list,
543         pi1_future_list,
544         e_blk_size,
545         e_quality_preset,
546         i4_num_cands_added,
547         u1_pred_dir);
548 
549     return i4_num_cands_added;
550 }
551 
552 /**
553 ********************************************************************************
554 *  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555 *                                           S08 i1_ref_idx,
556 *                                           S32 i4_best_part_type,
557 *                                           S32 i4_is_vert)
558 *
559 *  @brief  Given a target partition orientation in the merged CU, and the
560 *          partition type of most likely partition this fxn picks up
561 *          candidates from the 4 constituent CUs and does refinement search
562 *          to identify best results for the merge CU across active partitions
563 *
564 *  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565 *                  these params, the search result structure is also derived and
566 *                 updated during the search
567 *
568 *  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569 *               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570 *
571 *  @param[in] i4_best_part_type : partition type of potential partition in the
572 *              merged CU, -1 if the merge process has not yet been able to
573 *              determine this.
574 *
575 *  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576 *             orientation or horizontal orientation.
577 *
578 *  @return Number of merge candidates
579 ********************************************************************************
580 */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581 WORD32 hme_pick_eval_merge_candts(
582     hme_merge_prms_t *ps_merge_prms,
583     hme_subpel_prms_t *ps_subpel_prms,
584     S32 i4_search_idx,
585     S32 i4_best_part_type,
586     S32 i4_is_vert,
587     wgt_pred_ctxt_t *ps_wt_inp_prms,
588     S32 i4_frm_qstep,
589     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591 {
592     S32 x_off, y_off;
593     search_node_t *ps_search_node;
594     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595     S32 i4_num_valid_parts;
596     pred_ctxt_t *ps_pred_ctxt;
597 
598     search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599     S32 num_unique_nodes_cu_merge = 0;
600 
601     search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602     CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603     S32 i4_part_mask = ps_search_results->i4_part_mask;
604 
605     search_results_t *aps_child_results[4];
606     layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607 
608     S32 i4_ref_stride, i, j;
609     result_upd_prms_t s_result_prms;
610 
611     BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612     S32 i4_offset;
613 
614     /*************************************************************************/
615     /* Function pointer for SAD/SATD, array and prms structure to pass to    */
616     /* This function                                                         */
617     /*************************************************************************/
618     PF_SAD_FXN_T pf_err_compute;
619     S32 ai4_sad_grid[9][17];
620     err_prms_t s_err_prms;
621 
622     /*************************************************************************/
623     /* Allowed MV RANGE                                                      */
624     /*************************************************************************/
625     range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626     PF_INTERP_FXN_T pf_qpel_interp;
627     PF_MV_COST_FXN pf_mv_cost_compute;
628     WORD32 pred_lx;
629     U08 *apu1_hpel_ref[4];
630 
631     interp_prms_t s_interp_prms;
632     S32 i4_interp_buf_id;
633 
634     S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635     S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636 
637     /* Sanity checks */
638     ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639 
640     s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641 
642     /* Initialize all the ptrs to child CUs for merge decision */
643     aps_child_results[0] = ps_merge_prms->ps_results_tl;
644     aps_child_results[1] = ps_merge_prms->ps_results_tr;
645     aps_child_results[2] = ps_merge_prms->ps_results_bl;
646     aps_child_results[3] = ps_merge_prms->ps_results_br;
647 
648     num_unique_nodes_cu_merge = 0;
649 
650     pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651 
652     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653     {
654         num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655             ps_merge_prms->ps_cluster_info,
656             aps_child_results,
657             ps_merge_prms->ps_8x8_cu_results,
658             pps_range_prms,
659             as_merge_unique_node,
660             ps_search_results->pu1_is_past,
661             ps_merge_prms->pi1_past_list,
662             ps_merge_prms->pi1_future_list,
663             ps_merge_prms->e_quality_preset,
664             e_blk_size,
665             i4_search_idx,
666             (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667                 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668     }
669     else
670     {
671         /*************************************************************************/
672         /* Populate the list of unique search nodes in the child CUs for merge   */
673         /* evaluation                                                            */
674         /*************************************************************************/
675         for(i = 0; i < 4; i++)
676         {
677             search_node_t s_search_node;
678 
679             PART_TYPE_T e_part_type;
680             PART_ID_T e_part_id;
681 
682             WORD32 part_num;
683 
684             search_results_t *ps_child = aps_child_results[i];
685 
686             if(ps_child->ps_cu_results->u1_num_best_results)
687             {
688                 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689                      (1 == ps_child->ps_cu_results->u1_num_best_results)))
690                 {
691                     e_part_type =
692                         (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693 
694                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695 
696                     /* Insert mvs of NxN partitions. */
697                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698                         part_num++)
699                     {
700                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701 
702                         if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703                         {
704                             s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706                             {
707                                 CLIP_MV_WITHIN_RANGE(
708                                     s_search_node.s_mv.i2_mvx,
709                                     s_search_node.s_mv.i2_mvy,
710                                     pps_range_prms[s_search_node.i1_ref_idx],
711                                     0,
712                                     0,
713                                     0);
714 
715                                 INSERT_NEW_NODE_NOMAP(
716                                     as_merge_unique_node,
717                                     num_unique_nodes_cu_merge,
718                                     s_search_node,
719                                     1);
720                             }
721                         }
722                     }
723                 }
724             }
725             else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726                            .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727                       (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728                                 .ps_cu_results->u1_num_best_results)))
729             {
730                 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731 
732                 for(j = 0; j < 4; j++)
733                 {
734                     e_part_type = (PART_TYPE_T)ps_results_root[j]
735                                       .ps_cu_results->ps_best_results[0]
736                                       .u1_part_type;
737 
738                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739 
740                     /* Insert mvs of NxN partitions. */
741                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742                         part_num++)
743                     {
744                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745 
746                         if((ps_results_root[j]
747                                 .aps_part_results[i4_search_idx][e_part_id]
748                                 ->i1_ref_idx != -1) &&
749                            (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750                                  .b1_intra_flag))
751                         {
752                             s_search_node =
753                                 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755                             {
756                                 CLIP_MV_WITHIN_RANGE(
757                                     s_search_node.s_mv.i2_mvx,
758                                     s_search_node.s_mv.i2_mvy,
759                                     pps_range_prms[s_search_node.i1_ref_idx],
760                                     0,
761                                     0,
762                                     0);
763 
764                                 INSERT_NEW_NODE_NOMAP(
765                                     as_merge_unique_node,
766                                     num_unique_nodes_cu_merge,
767                                     s_search_node,
768                                     1);
769                             }
770                         }
771                     }
772                 }
773             }
774         }
775     }
776 
777     if(0 == num_unique_nodes_cu_merge)
778     {
779         return 0;
780     }
781 
782     /*************************************************************************/
783     /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784     /* fixed through this subpel refinement for this partition.              */
785     /* Note, we do not enable grid sads since one pt is evaluated per node   */
786     /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
787     /*************************************************************************/
788     i4_part_mask = ps_search_results->i4_part_mask;
789 
790     /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791     if(ps_subpel_prms->i4_use_satd)
792     {
793         if(BLK_32x32 == e_blk_size)
794         {
795             pf_err_compute = hme_evalsatd_pt_pu_32x32;
796         }
797         else
798         {
799             pf_err_compute = hme_evalsatd_pt_pu_64x64;
800         }
801     }
802     else
803     {
804         pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805     }
806 
807     i4_ref_stride = ps_curr_layer->i4_rec_stride;
808 
809     x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810     y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811     i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812 
813     /*************************************************************************/
814     /* This array stores the ids of the partitions whose                     */
815     /* SADs are updated. Since the partitions whose SADs are updated may not */
816     /* be in contiguous order, we supply another level of indirection.       */
817     /*************************************************************************/
818     i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819 
820     /* Initialize result params used for partition update */
821     s_result_prms.pf_mv_cost_compute = NULL;
822     s_result_prms.ps_search_results = ps_search_results;
823     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824     s_result_prms.i1_ref_idx = i4_search_idx;
825     s_result_prms.i4_part_mask = i4_part_mask;
826     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827     s_result_prms.i4_grid_mask = 1;
828 
829     /* One time Initialization of error params used for SAD/SATD compute */
830     s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831     s_err_prms.i4_ref_stride = i4_ref_stride;
832     s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833     s_err_prms.i4_grid_mask = 1;
834     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835     s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836     s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837     s_err_prms.i4_step = 1;
838 
839     /*************************************************************************/
840     /* One time preparation of non changing interpolation params.            */
841     /*************************************************************************/
842     s_interp_prms.i4_ref_stride = i4_ref_stride;
843     s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844     s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845     s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846     s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847     i4_interp_buf_id = 0;
848 
849     pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850 
851     /***************************************************************************/
852     /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853     /* results                                                                 */
854     /***************************************************************************/
855     for(i = 0; i < num_unique_nodes_cu_merge; i++)
856     {
857         WORD8 i1_ref_idx;
858         ps_search_node = &as_merge_unique_node[i];
859 
860         /*********************************************************************/
861         /* Compute the base pointer for input, interpolated buffers          */
862         /* The base pointers point as follows:                               */
863         /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864         /* To these, we need to add the offset of the current node           */
865         /*********************************************************************/
866         i1_ref_idx = ps_search_node->i1_ref_idx;
867         apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868         apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869         apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870         apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871 
872         s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873 
874         pf_qpel_interp(
875             &s_interp_prms,
876             ps_search_node->s_mv.i2_mvx,
877             ps_search_node->s_mv.i2_mvy,
878             i4_interp_buf_id);
879 
880         pred_lx = i4_search_idx;
881         ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882 
883         s_result_prms.u1_pred_lx = pred_lx;
884         s_result_prms.ps_search_node_base = ps_search_node;
885         s_err_prms.pu1_inp =
886             ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887         s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888         s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889 
890         /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891         Here the tu recursion logic is restricted with the size of the PU*/
892         pf_err_compute(&s_err_prms);
893 
894         if(ps_subpel_prms->u1_is_cu_noisy &&
895            ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896         {
897             ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898                 s_err_prms.pu1_ref,
899                 s_err_prms.i4_ref_stride,
900                 ai4_valid_part_ids,
901                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903                 s_err_prms.pi4_sad_grid,
904                 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905                 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906                 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907                 i4_num_valid_parts,
908                 ps_wt_inp_prms->wpred_log_wdc,
909                 (BLK_32x32 == e_blk_size) ? 32 : 64);
910         }
911 
912         /* Update the mv's */
913         s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914         s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915 
916         /* Update best results */
917         hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918     }
919 
920     /************************************************************************/
921     /* Update mv cost and total cost for each valid partition in the CU     */
922     /************************************************************************/
923     for(i = 0; i < TOT_NUM_PARTS; i++)
924     {
925         if(i4_part_mask & (1 << i))
926         {
927             WORD32 j;
928             WORD32 i4_mv_cost;
929 
930             ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931 
932             for(j = 0;
933                 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934                 j++)
935             {
936                 if(ps_search_node->i1_ref_idx != -1)
937                 {
938                     pred_lx = i4_search_idx;
939                     ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940 
941                     /* Prediction context should now deal with qpel units */
942                     HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943 
944                     ps_search_node->u1_subpel_done = 1;
945                     ps_search_node->u1_is_avail = 1;
946 
947                     i4_mv_cost =
948                         pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949 
950                     ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951                     ps_search_node->i4_mv_cost = i4_mv_cost;
952 
953                     ps_search_node++;
954                 }
955             }
956         }
957     }
958 
959     return num_unique_nodes_cu_merge;
960 }
961 
962 #define CU_MERGE_MAX_INTRA_PARTS 4
963 
964 /**
965 ********************************************************************************
966 *  @fn     hme_try_merge_high_speed
967 *
968 *  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969 entity or with partititons for high speed preset
970 *
971 *  @param[in,out]  hme_merge_prms_t: Params for CU merge
972 *
973 *  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974 ********************************************************************************
975 */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
977     me_ctxt_t *ps_thrd_ctxt,
978     me_frm_ctxt_t *ps_ctxt,
979     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980     hme_subpel_prms_t *ps_subpel_prms,
981     hme_merge_prms_t *ps_merge_prms,
982     inter_pu_results_t *ps_pu_results,
983     pu_result_t *ps_pu_result)
984 {
985     search_results_t *ps_results_tl, *ps_results_tr;
986     search_results_t *ps_results_bl, *ps_results_br;
987 
988     S32 i;
989     S32 i4_search_idx;
990     S32 i4_cost_parent;
991     S32 intra_cu_size;
992     ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993 
994     search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995     wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996 
997     S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998     S32 is_vert = 0, i4_best_part_type = -1;
999     S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000     S32 i4_cost_children = 0;
1001     S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002     S32 i4_num_merge_cands_evaluated = 0;
1003     U08 u1_x_off = ps_results_merge->u1_x_off;
1004     U08 u1_y_off = ps_results_merge->u1_y_off;
1005     S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006 
1007     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011     ps_results_tl = ps_merge_prms->ps_results_tl;
1012     ps_results_tr = ps_merge_prms->ps_results_tr;
1013     ps_results_bl = ps_merge_prms->ps_results_bl;
1014     ps_results_br = ps_merge_prms->ps_results_br;
1015 
1016     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017     {
1018         i4_part_mask &= ~ENABLE_AMP;
1019     }
1020 
1021     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022     {
1023         i4_part_mask &= ~ENABLE_AMP;
1024 
1025         i4_part_mask &= ~ENABLE_SMP;
1026     }
1027 
1028     ps_merge_prms->i4_num_pred_dir_actual = 0;
1029 
1030     /*************************************************************************/
1031     /* The logic for High speed CU merge goes as follows:                    */
1032     /*                                                                       */
1033     /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034     /*    exceed 7                                                           */
1035     /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036     /*    are identical                                                      */
1037     /* 3. Find the all unique mvs of best partitions of children CUs and     */
1038     /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
1039     /*    best parent cost is lower than sum of the best children costs      */
1040     /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041     /*                                                                       */
1042     /*************************************************************************/
1043 
1044     /* Count the number of best partitions in child CUs, early exit if > 7 */
1045     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046        (CU_32x32 == ps_results_merge->e_cu_size))
1047     {
1048         S32 num_parts_in_32x32 = 0;
1049         WORD32 i4_part_type;
1050 
1051         if(ps_results_tl->u1_split_flag)
1052         {
1053             num_parts_in_32x32 += 4;
1054 
1055 #define COST_INTERCHANGE 0
1056             i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057                                ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058                                ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059                                ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060         }
1061         else
1062         {
1063             i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065             i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066         }
1067 
1068         if(ps_results_tr->u1_split_flag)
1069         {
1070             num_parts_in_32x32 += 4;
1071 
1072             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073                                 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074                                 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075                                 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076         }
1077         else
1078         {
1079             i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081             i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082         }
1083 
1084         if(ps_results_bl->u1_split_flag)
1085         {
1086             num_parts_in_32x32 += 4;
1087 
1088             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089                                 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090                                 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091                                 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092         }
1093         else
1094         {
1095             i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097             i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098         }
1099 
1100         if(ps_results_br->u1_split_flag)
1101         {
1102             num_parts_in_32x32 += 4;
1103 
1104             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105                                 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106                                 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107                                 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108         }
1109         else
1110         {
1111             i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113             i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114         }
1115 
1116         if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117         {
1118             return CU_SPLIT;
1119         }
1120 
1121         if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122            (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123         {
1124             return CU_SPLIT;
1125         }
1126     }
1127 
1128     /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
1129     /* Note : Each intra part represent a NxN unit of the children CUs          */
1130     /* This is essentially 1/16th of the CUsize under consideration for merge   */
1131     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132     {
1133         if(CU_64x64 == ps_results_merge->e_cu_size)
1134         {
1135             i4_intra_parts =
1136                 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137                     ? 16
1138                     : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139         }
1140         else
1141         {
1142             switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143             {
1144             case 0:
1145             {
1146                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147                                        ->u1_inter_eval_enable)
1148                                      ? 16
1149                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150                                             ->ps_child_node_tl->u1_intra_eval_enable);
1151 
1152                 break;
1153             }
1154             case 1:
1155             {
1156                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157                                        ->u1_inter_eval_enable)
1158                                      ? 16
1159                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160                                             ->ps_child_node_tr->u1_intra_eval_enable);
1161 
1162                 break;
1163             }
1164             case 2:
1165             {
1166                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167                                        ->u1_inter_eval_enable)
1168                                      ? 16
1169                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170                                             ->ps_child_node_bl->u1_intra_eval_enable);
1171 
1172                 break;
1173             }
1174             case 3:
1175             {
1176                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177                                        ->u1_inter_eval_enable)
1178                                      ? 16
1179                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180                                             ->ps_child_node_br->u1_intra_eval_enable);
1181 
1182                 break;
1183             }
1184             }
1185         }
1186     }
1187     else
1188     {
1189         for(i = 0; i < 4; i++)
1190         {
1191             search_results_t *ps_results =
1192                 (i == 0) ? ps_results_tl
1193                          : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194 
1195             part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196 
1197             if(ps_results->u1_split_flag)
1198             {
1199                 U08 u1_x_off = ps_results->u1_x_off;
1200                 U08 u1_y_off = ps_results->u1_y_off;
1201                 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202                                       2;
1203 
1204                 /* Special case to handle 8x8 CUs when 16x16 is split */
1205                 ASSERT(ps_results->e_cu_size == CU_16x16);
1206 
1207                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208 
1209                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210                     i4_intra_parts += 1;
1211 
1212                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213 
1214                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215                     i4_intra_parts += 1;
1216 
1217                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218 
1219                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220                     i4_intra_parts += 1;
1221 
1222                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223 
1224                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225                     i4_intra_parts += 1;
1226             }
1227             else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228             {
1229                 i4_intra_parts += 4;
1230             }
1231         }
1232     }
1233 
1234     /* Determine the max intra CU size indicated by IPE */
1235     intra_cu_size = CU_64x64;
1236     if(ps_cur_ipe_ctb->u1_split_flag)
1237     {
1238         intra_cu_size = CU_32x32;
1239         if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240         {
1241             intra_cu_size = CU_16x16;
1242         }
1243     }
1244 
1245     if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246         (intra_cu_size < ps_results_merge->e_cu_size) &&
1247         (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248        (i4_intra_parts == 16))
1249     {
1250         S32 i4_merge_outcome;
1251 
1252         i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253                                ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254                                   ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255                                : (!ps_cur_ipe_ctb->u1_split_flag);
1256 
1257         i4_merge_outcome = i4_merge_outcome ||
1258                            (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259 
1260         i4_merge_outcome = i4_merge_outcome &&
1261                            !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262 
1263         if(i4_merge_outcome)
1264         {
1265             inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266             part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267             pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268 
1269             ps_cu_results->u1_num_best_results = 1;
1270             ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271             ps_cu_results->u1_x_off = u1_x_off;
1272             ps_cu_results->u1_y_off = u1_y_off;
1273 
1274             ps_best_result->u1_part_type = PRT_2Nx2N;
1275             ps_best_result->ai4_tu_split_flag[0] = 0;
1276             ps_best_result->ai4_tu_split_flag[1] = 0;
1277             ps_best_result->ai4_tu_split_flag[2] = 0;
1278             ps_best_result->ai4_tu_split_flag[3] = 0;
1279             ps_best_result->i4_tot_cost =
1280                 (CU_64x64 == ps_results_merge->e_cu_size)
1281                     ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282                     : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283 
1284             ps_pu->b1_intra_flag = 1;
1285             ps_pu->b4_pos_x = u1_x_off >> 2;
1286             ps_pu->b4_pos_y = u1_y_off >> 2;
1287             ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288             ps_pu->b4_ht = ps_pu->b4_wd;
1289             ps_pu->mv.i1_l0_ref_idx = -1;
1290             ps_pu->mv.i1_l1_ref_idx = -1;
1291             ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292             ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293             ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294             ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295 
1296             return CU_MERGED;
1297         }
1298         else
1299         {
1300             return CU_SPLIT;
1301         }
1302     }
1303 
1304     if(i4_intra_parts)
1305     {
1306         i4_part_mask = ENABLE_2Nx2N;
1307     }
1308 
1309     ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310 
1311     hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312 
1313     ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314     ps_merge_prms->i4_num_pred_dir_actual = 0;
1315 
1316     if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317     {
1318         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319         S32 i4_num_valid_parts;
1320         S32 i4_sigma_array_offset;
1321 
1322         i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323 
1324         /*********************************************************************************************************************************************/
1325         /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
1326         /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327         /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
1328         /*********************************************************************************************************************************************/
1329         i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330                                 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331 
1332         for(i = 0; i < i4_num_valid_parts; i++)
1333         {
1334             S32 i4_part_id = ai4_valid_part_ids[i];
1335 
1336             hme_compute_final_sigma_of_pu_from_base_blocks(
1337                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339                 au8_final_src_sigmaX,
1340                 au8_final_src_sigmaXSquared,
1341                 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342                 4,
1343                 i4_part_id,
1344                 16);
1345         }
1346 
1347         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349     }
1350 
1351     /*************************************************************************/
1352     /* Loop through all ref idx and pick the merge candts and refine based   */
1353     /* on the active partitions. At this stage num ref will be 1 or 2        */
1354     /*************************************************************************/
1355     for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356     {
1357         S32 i4_cands;
1358         U08 u1_pred_dir = 0;
1359 
1360         if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361         {
1362             u1_pred_dir = i4_search_idx;
1363         }
1364         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365         {
1366             u1_pred_dir = 1;
1367         }
1368         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369         {
1370             u1_pred_dir = 0;
1371         }
1372         else
1373         {
1374             ASSERT(0);
1375         }
1376 
1377         /* call the function to pick and evaluate the merge candts, given */
1378         /* a ref id and a part mask.                                      */
1379         i4_cands = hme_pick_eval_merge_candts(
1380             ps_merge_prms,
1381             ps_subpel_prms,
1382             u1_pred_dir,
1383             i4_best_part_type,
1384             is_vert,
1385             ps_wt_inp_prms,
1386             i4_frm_qstep,
1387             ps_cmn_utils_optimised_function_list,
1388             ps_me_optimised_function_list);
1389 
1390         if(i4_cands)
1391         {
1392             ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393                 u1_pred_dir;
1394             ps_merge_prms->i4_num_pred_dir_actual++;
1395         }
1396 
1397         i4_num_merge_cands_evaluated += i4_cands;
1398     }
1399 
1400     /* Call the decide_part_types function here */
1401     /* Populate the new PU struct with the results post subpel refinement*/
1402     if(i4_num_merge_cands_evaluated)
1403     {
1404         inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405 
1406         hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407 
1408         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410 
1411         hme_populate_pus(
1412             ps_thrd_ctxt,
1413             ps_ctxt,
1414             ps_subpel_prms,
1415             ps_results_merge,
1416             ps_cu_results,
1417             ps_pu_results,
1418             ps_pu_result,
1419             ps_merge_prms->ps_inter_ctb_prms,
1420             &ps_ctxt->s_wt_pred,
1421             ps_merge_prms->ps_layer_ctxt,
1422             ps_merge_prms->au1_pred_dir_searched,
1423             ps_merge_prms->i4_num_pred_dir_actual);
1424 
1425         ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426 
1427         hme_decide_part_types(
1428             ps_cu_results,
1429             ps_pu_results,
1430             ps_merge_prms->ps_inter_ctb_prms,
1431             ps_ctxt,
1432             ps_cmn_utils_optimised_function_list,
1433             ps_me_optimised_function_list
1434 
1435         );
1436 
1437         /*****************************************************************/
1438         /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
1439         /*****************************************************************/
1440 #if DISABLE_INTRA_IN_BPICS
1441         if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442                  (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443 #endif
1444         {
1445             if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446             {
1447                 hme_insert_intra_nodes_post_bipred(
1448                     ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449             }
1450         }
1451     }
1452     else
1453     {
1454         return CU_SPLIT;
1455     }
1456 
1457     /* We check the best result of ref idx 0 and compare for parent vs child */
1458     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459        (CU_32x32 == ps_results_merge->e_cu_size))
1460     {
1461         i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462         /*********************************************************************/
1463         /* Add the cost of signaling the CU tree bits.                       */
1464         /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465         /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466         /* So, 4*lambda is extra for children cost. :Lokesh                  */
1467         /*********************************************************************/
1468         {
1469             pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470 
1471             i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472         }
1473 
1474         if(i4_cost_parent < i4_cost_children)
1475         {
1476             return CU_MERGED;
1477         }
1478 
1479         return CU_SPLIT;
1480     }
1481     else
1482     {
1483         return CU_MERGED;
1484     }
1485 }
1486 
1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
1488     {                                                                                              \
1489         (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
1490         (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
1491         *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
1492     }
1493 
1494 /**
1495 ********************************************************************************
1496 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497 *                               layer_mv_t *ps_layer_mv,
1498 *                               S32 i4_search_blk_x,
1499 *                               S32 i4_search_blk_y,
1500 *                               mvbank_update_prms_t *ps_prms)
1501 *
1502 *  @brief  Updates the mv bank in case there is no further encodign to be done
1503 *
1504 *  @param[in]  ps_search_results: contains results for the block just searched
1505 *
1506 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1507 *
1508 *  @param[in] i4_search_blk_x  : col num of blk being searched
1509 *
1510 *  @param[in] i4_search_blk_y : row num of blk being searched
1511 *
1512 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
1513 *
1514 *  @return None
1515 ********************************************************************************
1516 */
1517 
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518 void hme_update_mv_bank_noencode(
1519     search_results_t *ps_search_results,
1520     layer_mv_t *ps_layer_mv,
1521     S32 i4_search_blk_x,
1522     S32 i4_search_blk_y,
1523     mvbank_update_prms_t *ps_prms)
1524 {
1525     hme_mv_t *ps_mv;
1526     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528     S32 i4_blk_x, i4_blk_y, i4_offset;
1529     S32 i4_j, i4_ref_id;
1530     search_node_t *ps_search_node;
1531     search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532     search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533     search_node_t *ps_search_node_4x4_4;
1534 
1535     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538 
1539     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540 
1541     /* Identify the correct offset in the mvbank and the reference id buf */
1542     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544 
1545     /*************************************************************************/
1546     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1547     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1548     /* do a straightforward single update of results. This will have a 1-1   */
1549     /* correspondence.                                                       */
1550     /*************************************************************************/
1551     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552     {
1553         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554         {
1555             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557             {
1558                 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559                 ps_mv++;
1560                 pi1_ref_idx++;
1561                 ps_search_node++;
1562             }
1563         }
1564         return;
1565     }
1566 
1567     /*************************************************************************/
1568     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569     /* case, we need to have NxN partitions enabled in search.               */
1570     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572     /*************************************************************************/
1573     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576 
1577     /*************************************************************************/
1578     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579     /* hence the below check.                                                */
1580     /*************************************************************************/
1581     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582 
1583     ps_mv1 = ps_mv;
1584     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587     pi1_ref_idx1 = pi1_ref_idx;
1588     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591 
1592     for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593     {
1594         ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595 
1596         ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597 
1598         ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599 
1600         ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601 
1602         ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603 
1604         COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605         ps_mv1++;
1606         pi1_ref_idx1++;
1607         ps_search_node_4x4_1++;
1608         COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609         ps_mv2++;
1610         pi1_ref_idx2++;
1611         ps_search_node_4x4_2++;
1612         COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613         ps_mv3++;
1614         pi1_ref_idx3++;
1615         ps_search_node_4x4_3++;
1616         COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617         ps_mv4++;
1618         pi1_ref_idx4++;
1619         ps_search_node_4x4_4++;
1620 
1621         if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622         {
1623             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624             ps_mv1++;
1625             pi1_ref_idx1++;
1626             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627             ps_mv2++;
1628             pi1_ref_idx2++;
1629             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630             ps_mv3++;
1631             pi1_ref_idx3++;
1632             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633             ps_mv4++;
1634             pi1_ref_idx4++;
1635         }
1636 
1637         for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638         {
1639             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640             ps_mv1++;
1641             pi1_ref_idx1++;
1642             ps_search_node_4x4_1++;
1643             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644             ps_mv2++;
1645             pi1_ref_idx2++;
1646             ps_search_node_4x4_2++;
1647             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648             ps_mv3++;
1649             pi1_ref_idx3++;
1650             ps_search_node_4x4_3++;
1651             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652             ps_mv4++;
1653             pi1_ref_idx4++;
1654             ps_search_node_4x4_4++;
1655         }
1656     }
1657 }
1658 
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659 void hme_update_mv_bank_encode(
1660     search_results_t *ps_search_results,
1661     layer_mv_t *ps_layer_mv,
1662     S32 i4_search_blk_x,
1663     S32 i4_search_blk_y,
1664     mvbank_update_prms_t *ps_prms,
1665     U08 *pu1_pred_dir_searched,
1666     S32 i4_num_act_ref_l0)
1667 {
1668     hme_mv_t *ps_mv;
1669     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671     S32 i4_blk_x, i4_blk_y, i4_offset;
1672     S32 j, i, num_parts;
1673     search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674     search_node_t *ps_search_node_bl, *ps_search_node_br;
1675     search_node_t s_zero_mv;
1676     WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677 
1678     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681 
1682     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683 
1684     /* Identify the correct offset in the mvbank and the reference id buf */
1685     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687 
1688     ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689     ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690 
1691     /*************************************************************************/
1692     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693     /* hence the below check.                                                */
1694     /*************************************************************************/
1695     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696 
1697     ps_mv1 = ps_mv;
1698     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701     pi1_ref_idx1 = pi1_ref_idx;
1702     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705 
1706     /* Initialize zero mv: default mv used for intra mvs */
1707     s_zero_mv.s_mv.i2_mvx = 0;
1708     s_zero_mv.s_mv.i2_mvy = 0;
1709     s_zero_mv.i1_ref_idx = 0;
1710 
1711     if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712        (ps_search_results->i4_part_mask & ENABLE_NxN))
1713     {
1714         i4_part_type = PRT_NxN;
1715     }
1716 
1717     for(i = 0; i < ps_prms->i4_num_ref; i++)
1718     {
1719         for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720         {
1721             WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722 
1723             num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724 
1725             ps_search_node_tl =
1726                 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727 
1728             if(num_parts == 1)
1729             {
1730                 ps_search_node_tr = ps_search_node_tl;
1731                 ps_search_node_bl = ps_search_node_tl;
1732                 ps_search_node_br = ps_search_node_tl;
1733             }
1734             else if(num_parts == 2)
1735             {
1736                 /* For vertically oriented partitions, tl, bl pt to same result */
1737                 /* For horizontally oriented partition, tl, tr pt to same result */
1738                 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739                 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740                 /* and right 2 8x8 have 12x16R partition */
1741                 if(gau1_is_vert_part[i4_part_type])
1742                 {
1743                     ps_search_node_tr =
1744                         ps_search_results
1745                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746                     ps_search_node_bl = ps_search_node_tl;
1747                 }
1748                 else
1749                 {
1750                     ps_search_node_tr = ps_search_node_tl;
1751                     ps_search_node_bl =
1752                         ps_search_results
1753                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754                 }
1755                 ps_search_node_br =
1756                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757             }
1758             else
1759             {
1760                 /* 4 unique results */
1761                 ps_search_node_tr =
1762                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763                 ps_search_node_bl =
1764                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765                 ps_search_node_br =
1766                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767             }
1768 
1769             if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770                 ps_search_node_tl++;
1771             if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772                 ps_search_node_tr++;
1773             if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774                 ps_search_node_bl++;
1775             if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776                 ps_search_node_br++;
1777 
1778             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779             ps_mv1++;
1780             pi1_ref_idx1++;
1781             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782             ps_mv2++;
1783             pi1_ref_idx2++;
1784             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785             ps_mv3++;
1786             pi1_ref_idx3++;
1787             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788             ps_mv4++;
1789             pi1_ref_idx4++;
1790 
1791             if(ps_prms->i4_num_results_to_store > 1)
1792             {
1793                 ps_search_node_tl =
1794                     &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795 
1796                 if(num_parts == 1)
1797                 {
1798                     ps_search_node_tr = ps_search_node_tl;
1799                     ps_search_node_bl = ps_search_node_tl;
1800                     ps_search_node_br = ps_search_node_tl;
1801                 }
1802                 else if(num_parts == 2)
1803                 {
1804                     /* For vertically oriented partitions, tl, bl pt to same result */
1805                     /* For horizontally oriented partition, tl, tr pt to same result */
1806                     /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807                     /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808                     /* and right 2 8x8 have 12x16R partition */
1809                     if(gau1_is_vert_part[i4_part_type])
1810                     {
1811                         ps_search_node_tr =
1812                             &ps_search_results
1813                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814                         ps_search_node_bl = ps_search_node_tl;
1815                     }
1816                     else
1817                     {
1818                         ps_search_node_tr = ps_search_node_tl;
1819                         ps_search_node_bl =
1820                             &ps_search_results
1821                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822                     }
1823                     ps_search_node_br =
1824                         &ps_search_results
1825                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826                 }
1827                 else
1828                 {
1829                     /* 4 unique results */
1830                     ps_search_node_tr =
1831                         &ps_search_results
1832                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833                     ps_search_node_bl =
1834                         &ps_search_results
1835                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836                     ps_search_node_br =
1837                         &ps_search_results
1838                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839                 }
1840 
1841                 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842                     ps_search_node_tl++;
1843                 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844                     ps_search_node_tr++;
1845                 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846                     ps_search_node_bl++;
1847                 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848                     ps_search_node_br++;
1849 
1850                 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851                 ps_mv1++;
1852                 pi1_ref_idx1++;
1853                 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854                 ps_mv2++;
1855                 pi1_ref_idx2++;
1856                 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857                 ps_mv3++;
1858                 pi1_ref_idx3++;
1859                 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860                 ps_mv4++;
1861                 pi1_ref_idx4++;
1862             }
1863         }
1864     }
1865 }
1866 
1867 /**
1868 ********************************************************************************
1869 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870 *                               layer_mv_t *ps_layer_mv,
1871 *                               S32 i4_search_blk_x,
1872 *                               S32 i4_search_blk_y,
1873 *                               mvbank_update_prms_t *ps_prms)
1874 *
1875 *  @brief  Updates the mv bank in case there is no further encodign to be done
1876 *
1877 *  @param[in]  ps_search_results: contains results for the block just searched
1878 *
1879 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1880 *
1881 *  @param[in] i4_search_blk_x  : col num of blk being searched
1882 *
1883 *  @param[in] i4_search_blk_y : row num of blk being searched
1884 *
1885 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
1886 *
1887 *  @return None
1888 ********************************************************************************
1889 */
1890 
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891 void hme_update_mv_bank_in_l1_me(
1892     search_results_t *ps_search_results,
1893     layer_mv_t *ps_layer_mv,
1894     S32 i4_search_blk_x,
1895     S32 i4_search_blk_y,
1896     mvbank_update_prms_t *ps_prms)
1897 {
1898     hme_mv_t *ps_mv;
1899     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901     S32 i4_blk_x, i4_blk_y, i4_offset;
1902     S32 i4_j, i4_ref_id;
1903     search_node_t *ps_search_node;
1904     search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905 
1906     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909 
1910     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911 
1912     /* Identify the correct offset in the mvbank and the reference id buf */
1913     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915 
1916     /*************************************************************************/
1917     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1918     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1919     /* do a straightforward single update of results. This will have a 1-1   */
1920     /* correspondence.                                                       */
1921     /*************************************************************************/
1922     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923     {
1924         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925 
1926         hme_mv_t *ps_mv_l0_root = ps_mv;
1927         hme_mv_t *ps_mv_l1_root =
1928             ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929 
1930         U32 u4_num_l0_results_updated = 0;
1931         U32 u4_num_l1_results_updated = 0;
1932 
1933         S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934         S08 *pi1_ref_idx_l1_root =
1935             pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936 
1937         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938         {
1939             U32 *pu4_num_results_updated;
1940             search_node_t **pps_result_nodes;
1941 
1942             U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943 
1944             if(u1_pred_dir_of_cur_ref)
1945             {
1946                 pu4_num_results_updated = &u4_num_l1_results_updated;
1947                 pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948             }
1949             else
1950             {
1951                 pu4_num_results_updated = &u4_num_l0_results_updated;
1952                 pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953             }
1954 
1955             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956 
1957             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958             {
1959                 hme_add_new_node_to_a_sorted_array(
1960                     &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961 
1962                 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963                 (*pu4_num_results_updated)++;
1964             }
1965         }
1966 
1967         for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968         {
1969             COPY_SEARCH_RESULT(
1970                 &ps_mv_l0_root[i4_j],
1971                 &pi1_ref_idx_l0_root[i4_j],
1972                 aps_result_nodes_sorted[0][i4_j],
1973                 0);
1974         }
1975 
1976         for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977         {
1978             COPY_SEARCH_RESULT(
1979                 &ps_mv_l1_root[i4_j],
1980                 &pi1_ref_idx_l1_root[i4_j],
1981                 aps_result_nodes_sorted[1][i4_j],
1982                 0);
1983         }
1984 
1985         return;
1986     }
1987 
1988     /*************************************************************************/
1989     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990     /* case, we need to have NxN partitions enabled in search.               */
1991     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993     /*************************************************************************/
1994     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997 
1998     /*************************************************************************/
1999     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000     /* hence the below check.                                                */
2001     /*************************************************************************/
2002     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003 
2004     ps_mv1 = ps_mv;
2005     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008     pi1_ref_idx1 = pi1_ref_idx;
2009     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012 
2013     {
2014         /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016         U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017 
2018         S32 i;
2019 
2020         hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021         hme_mv_t *ps_mv1_l1_root =
2022             ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023         hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024         hme_mv_t *ps_mv2_l1_root =
2025             ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026         hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027         hme_mv_t *ps_mv3_l1_root =
2028             ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029         hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030         hme_mv_t *ps_mv4_l1_root =
2031             ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032 
2033         U32 u4_num_l0_results_updated = 0;
2034         U32 u4_num_l1_results_updated = 0;
2035 
2036         S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037         S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038                                                             ps_layer_mv->i4_num_mvs_per_ref);
2039         S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040         S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041                                                             ps_layer_mv->i4_num_mvs_per_ref);
2042         S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043         S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044                                                             ps_layer_mv->i4_num_mvs_per_ref);
2045         S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046         S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047                                                             ps_layer_mv->i4_num_mvs_per_ref);
2048 
2049         for(i = 0; i < 4; i++)
2050         {
2051             hme_mv_t *ps_mv_l0_root;
2052             hme_mv_t *ps_mv_l1_root;
2053 
2054             S08 *pi1_ref_idx_l0_root;
2055             S08 *pi1_ref_idx_l1_root;
2056 
2057             for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058             {
2059                 U32 *pu4_num_results_updated;
2060                 search_node_t **pps_result_nodes;
2061                 U08 *pu1_cost_shifts_for_sorted_node;
2062 
2063                 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064 
2065                 if(u1_pred_dir_of_cur_ref)
2066                 {
2067                     pu4_num_results_updated = &u4_num_l1_results_updated;
2068                     pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070                 }
2071                 else
2072                 {
2073                     pu4_num_results_updated = &u4_num_l0_results_updated;
2074                     pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076                 }
2077 
2078                 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079 
2080                 ps_search_node_4x4 =
2081                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082 
2083                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084                 {
2085                     hme_add_new_node_to_a_sorted_array(
2086                         &ps_search_node_4x4[i4_j],
2087                         pps_result_nodes,
2088                         pu1_cost_shifts_for_sorted_node,
2089                         *pu4_num_results_updated,
2090                         0);
2091 
2092                     (*pu4_num_results_updated)++;
2093 
2094                     hme_add_new_node_to_a_sorted_array(
2095                         &ps_search_node_8x8[i4_j],
2096                         pps_result_nodes,
2097                         pu1_cost_shifts_for_sorted_node,
2098                         *pu4_num_results_updated,
2099                         2);
2100 
2101                     (*pu4_num_results_updated)++;
2102                 }
2103             }
2104 
2105             switch(i)
2106             {
2107             case 0:
2108             {
2109                 ps_mv_l0_root = ps_mv1_l0_root;
2110                 ps_mv_l1_root = ps_mv1_l1_root;
2111 
2112                 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113                 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114 
2115                 break;
2116             }
2117             case 1:
2118             {
2119                 ps_mv_l0_root = ps_mv2_l0_root;
2120                 ps_mv_l1_root = ps_mv2_l1_root;
2121 
2122                 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123                 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124 
2125                 break;
2126             }
2127             case 2:
2128             {
2129                 ps_mv_l0_root = ps_mv3_l0_root;
2130                 ps_mv_l1_root = ps_mv3_l1_root;
2131 
2132                 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133                 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134 
2135                 break;
2136             }
2137             case 3:
2138             {
2139                 ps_mv_l0_root = ps_mv4_l0_root;
2140                 ps_mv_l1_root = ps_mv4_l1_root;
2141 
2142                 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143                 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144 
2145                 break;
2146             }
2147             }
2148 
2149             u4_num_l0_results_updated =
2150                 MIN((S32)u4_num_l0_results_updated,
2151                     ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152 
2153             u4_num_l1_results_updated =
2154                 MIN((S32)u4_num_l1_results_updated,
2155                     ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156 
2157             for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158             {
2159                 COPY_SEARCH_RESULT(
2160                     &ps_mv_l0_root[i4_j],
2161                     &pi1_ref_idx_l0_root[i4_j],
2162                     aps_result_nodes_sorted[0][i4_j],
2163                     0);
2164             }
2165 
2166             for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167             {
2168                 COPY_SEARCH_RESULT(
2169                     &ps_mv_l1_root[i4_j],
2170                     &pi1_ref_idx_l1_root[i4_j],
2171                     aps_result_nodes_sorted[1][i4_j],
2172                     0);
2173             }
2174         }
2175     }
2176 }
2177 
2178 /**
2179 ******************************************************************************
2180 *  @brief Scales motion vector component projecte from a diff layer in same
2181 *         picture (so no ref id related delta poc scaling required)
2182 ******************************************************************************
2183 */
2184 
2185 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
2186     ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187 /**
2188 ********************************************************************************
2189 *  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
2190 *                                   layer_ctxt_t *ps_curr_layer,
2191 *                                   layer_ctxt_t *ps_coarse_layer,
2192 *                                   S32 i4_pos_x,
2193 *                                   S32 i4_pos_y,
2194 *                                   S08 i1_ref_id,
2195 *                                   S08 i1_result_id)
2196 *
2197 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
2198 *          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199 *
2200 *  @param[out]  ps_search_node : contains the projected result
2201 *
2202 *  @param[in]   ps_curr_layer : current layer context
2203 *
2204 *  @param[in]   ps_coarse_layer  : coarser layer context
2205 *
2206 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2207 *
2208 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2209 *
2210 *  @param[in]   i1_ref_id : reference id for which the candidate required
2211 *
2212 *  @param[in]   i4_result_id : result id for which the candidate required
2213 *                              (0 : best result, 1 : next best)
2214 *
2215 *  @return None
2216 ********************************************************************************
2217 */
2218 
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2219 void hme_project_coloc_candt(
2220     search_node_t *ps_search_node,
2221     layer_ctxt_t *ps_curr_layer,
2222     layer_ctxt_t *ps_coarse_layer,
2223     S32 i4_pos_x,
2224     S32 i4_pos_y,
2225     S08 i1_ref_id,
2226     S32 i4_result_id)
2227 {
2228     S32 wd_c, ht_c, wd_p, ht_p;
2229     S32 blksize_p, blk_x, blk_y, i4_offset;
2230     layer_mv_t *ps_layer_mvbank;
2231     hme_mv_t *ps_mv;
2232     S08 *pi1_ref_idx;
2233 
2234     /* Width and ht of current and prev layers */
2235     wd_c = ps_curr_layer->i4_wd;
2236     ht_c = ps_curr_layer->i4_ht;
2237     wd_p = ps_coarse_layer->i4_wd;
2238     ht_p = ps_coarse_layer->i4_ht;
2239 
2240     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241     blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242 
2243     /* Safety check to avoid uninitialized access across temporal layers */
2244     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246 
2247     /* Project the positions to prev layer */
2248     /* TODO: convert these to scale factors at pic level */
2249     blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250     blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251 
2252     /* Pick up the mvs from the location */
2253     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255 
2256     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258 
2259     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261 
2262     ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263     ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265     ps_search_node->u1_subpel_done = 0;
2266     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267     {
2268         ps_search_node->i1_ref_idx = i1_ref_id;
2269         ps_search_node->s_mv.i2_mvx = 0;
2270         ps_search_node->s_mv.i2_mvy = 0;
2271     }
2272 }
2273 
2274 /**
2275 ********************************************************************************
2276 *  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277 *                                   layer_ctxt_t *ps_curr_layer,
2278 *                                   layer_ctxt_t *ps_coarse_layer,
2279 *                                   S32 i4_pos_x,
2280 *                                   S32 i4_pos_y,
2281 *                                   S08 i1_ref_id,
2282 *                                   S08 i1_result_id)
2283 *
2284 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
2285 *          position in the picture when the ratios are dyadic
2286 *
2287 *  @param[out]  ps_search_node : contains the projected result
2288 *
2289 *  @param[in]   ps_curr_layer : current layer context
2290 *
2291 *  @param[in]   ps_coarse_layer  : coarser layer context
2292 *
2293 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2294 *
2295 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2296 *
2297 *  @param[in]   i1_ref_id : reference id for which the candidate required
2298 *
2299 *  @param[in]   i4_result_id : result id for which the candidate required
2300 *                              (0 : best result, 1 : next best)
2301 *
2302 *  @return None
2303 ********************************************************************************
2304 */
2305 
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2306 void hme_project_coloc_candt_dyadic(
2307     search_node_t *ps_search_node,
2308     layer_ctxt_t *ps_curr_layer,
2309     layer_ctxt_t *ps_coarse_layer,
2310     S32 i4_pos_x,
2311     S32 i4_pos_y,
2312     S08 i1_ref_id,
2313     S32 i4_result_id)
2314 {
2315     S32 wd_c, ht_c, wd_p, ht_p;
2316     S32 blksize_p, blk_x, blk_y, i4_offset;
2317     layer_mv_t *ps_layer_mvbank;
2318     hme_mv_t *ps_mv;
2319     S08 *pi1_ref_idx;
2320 
2321     /* Width and ht of current and prev layers */
2322     wd_c = ps_curr_layer->i4_wd;
2323     ht_c = ps_curr_layer->i4_ht;
2324     wd_p = ps_coarse_layer->i4_wd;
2325     ht_p = ps_coarse_layer->i4_ht;
2326 
2327     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328     /* blksize_p = log2(wd) + 1 */
2329     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330 
2331     /* ASSERT for valid sizes */
2332     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333 
2334     /* Safety check to avoid uninitialized access across temporal layers */
2335     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337 
2338     /* Project the positions to prev layer */
2339     /* TODO: convert these to scale factors at pic level */
2340     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2341     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2342 
2343     /* Pick up the mvs from the location */
2344     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346 
2347     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349 
2350     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352 
2353     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357     {
2358         ps_search_node->i1_ref_idx = i1_ref_id;
2359         ps_search_node->s_mv.i2_mvx = 0;
2360         ps_search_node->s_mv.i2_mvy = 0;
2361     }
2362 }
2363 
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2364 void hme_project_coloc_candt_dyadic_implicit(
2365     search_node_t *ps_search_node,
2366     layer_ctxt_t *ps_curr_layer,
2367     layer_ctxt_t *ps_coarse_layer,
2368     S32 i4_pos_x,
2369     S32 i4_pos_y,
2370     S32 i4_num_act_ref_l0,
2371     U08 u1_pred_dir,
2372     U08 u1_default_ref_id,
2373     S32 i4_result_id)
2374 {
2375     S32 wd_c, ht_c, wd_p, ht_p;
2376     S32 blksize_p, blk_x, blk_y, i4_offset;
2377     layer_mv_t *ps_layer_mvbank;
2378     hme_mv_t *ps_mv;
2379     S08 *pi1_ref_idx;
2380 
2381     /* Width and ht of current and prev layers */
2382     wd_c = ps_curr_layer->i4_wd;
2383     ht_c = ps_curr_layer->i4_ht;
2384     wd_p = ps_coarse_layer->i4_wd;
2385     ht_p = ps_coarse_layer->i4_ht;
2386 
2387     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389 
2390     /* ASSERT for valid sizes */
2391     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392 
2393     /* Safety check to avoid uninitialized access across temporal layers */
2394     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396     /* Project the positions to prev layer */
2397     /* TODO: convert these to scale factors at pic level */
2398     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2399     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2400 
2401     /* Pick up the mvs from the location */
2402     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404 
2405     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407 
2408     if(u1_pred_dir == 1)
2409     {
2410         ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411         pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412     }
2413 
2414     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418     {
2419         ps_search_node->i1_ref_idx = u1_default_ref_id;
2420         ps_search_node->s_mv.i2_mvx = 0;
2421         ps_search_node->s_mv.i2_mvy = 0;
2422     }
2423 }
2424 
2425 #define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
2426     {                                                                                              \
2427         prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
2428         prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
2429         prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
2430         prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
2431     }
2432 
2433 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
2434     {                                                                                              \
2435         prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
2436         prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
2437         prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
2438         prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
2439     }
2440 
2441 /**
2442 ********************************************************************************
2443 *  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444 *                       refine_layer_prms_t *ps_refine_prms)
2445 *
2446 *  @brief  Frame init of refinemnet layers in ME
2447 *
2448 *  @param[in,out]  ps_ctxt: ME Handle
2449 *
2450 *  @param[in]  ps_refine_prms : refinement layer prms
2451 *
2452 *  @return None
2453 ********************************************************************************
2454 */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2455 void hme_refine_frm_init(
2456     layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457 {
2458     /* local variables */
2459     BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460     S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461 
2462     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463 
2464     if(ps_refine_prms->explicit_ref)
2465     {
2466         i4_num_ref_fpel = i4_num_ref_prev_layer;
2467     }
2468     else
2469     {
2470         i4_num_ref_fpel = 2;
2471     }
2472 
2473     if(ps_refine_prms->i4_enable_4x4_part)
2474     {
2475         e_result_blk_size = BLK_4x4;
2476     }
2477 
2478     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479 
2480     hme_init_mv_bank(
2481         ps_curr_layer,
2482         e_result_blk_size,
2483         i4_num_ref_fpel,
2484         ps_refine_prms->i4_num_mvbank_results,
2485         ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486 }
2487 
2488 #if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489 /**
2490 ********************************************************************************
2491 *  @fn   void hme_init_clusters_16x16
2492 *               (
2493 *                   cluster_16x16_blk_t *ps_cluster_blk_16x16
2494 *               )
2495 *
2496 *  @brief  Intialisations for the structs used in clustering algorithm
2497 *
2498 *  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
2499 *                                        of 16x16 block
2500 *
2501 *  @return None
2502 ********************************************************************************
2503 */
2504 static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2505     hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506 {
2507     S32 i;
2508 
2509     ps_cluster_blk_16x16->num_clusters = 0;
2510     ps_cluster_blk_16x16->intra_mv_area = 0;
2511     ps_cluster_blk_16x16->best_inter_cost = 0;
2512 
2513     for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514     {
2515         ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517 
2518         ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519 
2520         ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521         ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522     }
2523     for(i = 0; i < MAX_NUM_REF; i++)
2524     {
2525         ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526     }
2527 }
2528 
2529 /**
2530 ********************************************************************************
2531 *  @fn   void hme_init_clusters_32x32
2532 *               (
2533 *                   cluster_32x32_blk_t *ps_cluster_blk_32x32
2534 *               )
2535 *
2536 *  @brief  Intialisations for the structs used in clustering algorithm
2537 *
2538 *  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
2539 *                                        of 32x32 block
2540 *
2541 *  @return None
2542 ********************************************************************************
2543 */
2544 static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2545     hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546 {
2547     S32 i;
2548 
2549     ps_cluster_blk_32x32->num_clusters = 0;
2550     ps_cluster_blk_32x32->intra_mv_area = 0;
2551     ps_cluster_blk_32x32->best_alt_ref = -1;
2552     ps_cluster_blk_32x32->best_uni_ref = -1;
2553     ps_cluster_blk_32x32->best_inter_cost = 0;
2554     ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555 
2556     for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557     {
2558         ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560         ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561 
2562         ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563         ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564     }
2565     for(i = 0; i < MAX_NUM_REF; i++)
2566     {
2567         ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568     }
2569 }
2570 
2571 /**
2572 ********************************************************************************
2573 *  @fn   void hme_init_clusters_64x64
2574 *               (
2575 *                   cluster_64x64_blk_t *ps_cluster_blk_64x64
2576 *               )
2577 *
2578 *  @brief  Intialisations for the structs used in clustering algorithm
2579 *
2580 *  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
2581 *                                        of 64x64 block
2582 *
2583 *  @return None
2584 ********************************************************************************
2585 */
2586 static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2587     hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588 {
2589     S32 i;
2590 
2591     ps_cluster_blk_64x64->num_clusters = 0;
2592     ps_cluster_blk_64x64->intra_mv_area = 0;
2593     ps_cluster_blk_64x64->best_alt_ref = -1;
2594     ps_cluster_blk_64x64->best_uni_ref = -1;
2595     ps_cluster_blk_64x64->best_inter_cost = 0;
2596 
2597     for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598     {
2599         ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601         ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602 
2603         ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604         ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605     }
2606     for(i = 0; i < MAX_NUM_REF; i++)
2607     {
2608         ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609     }
2610 }
2611 
2612 /**
2613 ********************************************************************************
2614 *  @fn   void hme_sort_and_assign_top_ref_ids_areawise
2615 *               (
2616 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2617 *               )
2618 *
2619 *  @brief  Finds best_uni_ref and best_alt_ref
2620 *
2621 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2622 *
2623 *  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
2624 *                             enabled
2625 *
2626 *  @param[in]  block_width: width of the block in pels
2627 *
2628 *  @param[in]  e_cu_pos: position of the block within the CTB
2629 *
2630 *  @return None
2631 ********************************************************************************
2632 */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2633 void hme_sort_and_assign_top_ref_ids_areawise(
2634     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635 {
2636     cluster_32x32_blk_t *ps_32x32 = NULL;
2637     cluster_64x64_blk_t *ps_64x64 = NULL;
2638     cluster_data_t *ps_data;
2639 
2640     S32 j, k;
2641 
2642     S32 ai4_uni_area[MAX_NUM_REF];
2643     S32 ai4_bi_area[MAX_NUM_REF];
2644     S32 ai4_ref_id_found[MAX_NUM_REF];
2645     S32 ai4_ref_id[MAX_NUM_REF];
2646 
2647     S32 best_uni_ref = -1, best_alt_ref = -1;
2648     S32 num_clusters;
2649     S32 num_ref = 0;
2650     S32 num_clusters_evaluated = 0;
2651     S32 is_cur_blk_valid;
2652 
2653     if(32 == block_width)
2654     {
2655         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656         ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657         num_clusters = ps_32x32->num_clusters;
2658         ps_data = &ps_32x32->as_cluster_data[0];
2659     }
2660     else
2661     {
2662         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663         ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664         num_clusters = ps_64x64->num_clusters;
2665         ps_data = &ps_64x64->as_cluster_data[0];
2666     }
2667 
2668 #if !ENABLE_4CTB_EVALUATION
2669     if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670     {
2671         return;
2672     }
2673 #endif
2674     if(num_clusters == 0)
2675     {
2676         return;
2677     }
2678     else if(!is_cur_blk_valid)
2679     {
2680         return;
2681     }
2682 
2683     memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684     memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685     memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686     memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687 
2688     for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689     {
2690         S32 ref_id;
2691 
2692         if(!ps_data->is_valid_cluster)
2693         {
2694             continue;
2695         }
2696 
2697         ref_id = ps_data->ref_id;
2698 
2699         num_clusters_evaluated++;
2700 
2701         ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702         ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703 
2704         if(!ai4_ref_id_found[ref_id])
2705         {
2706             ai4_ref_id[ref_id] = ref_id;
2707             ai4_ref_id_found[ref_id] = 1;
2708             num_ref++;
2709         }
2710     }
2711 
2712     {
2713         S32 ai4_ref_id_temp[MAX_NUM_REF];
2714 
2715         memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716 
2717         for(k = 1; k < MAX_NUM_REF; k++)
2718         {
2719             if(ai4_uni_area[k] > ai4_uni_area[0])
2720             {
2721                 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722                 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723             }
2724         }
2725 
2726         best_uni_ref = ai4_ref_id_temp[0];
2727     }
2728 
2729     if(bidir_enabled)
2730     {
2731         for(k = 1; k < MAX_NUM_REF; k++)
2732         {
2733             if(ai4_bi_area[k] > ai4_bi_area[0])
2734             {
2735                 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736                 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737             }
2738         }
2739 
2740         if(!ai4_bi_area[0])
2741         {
2742             best_alt_ref = -1;
2743 
2744             if(32 == block_width)
2745             {
2746                 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747             }
2748             else
2749             {
2750                 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751             }
2752 
2753             return;
2754         }
2755 
2756         if(best_uni_ref == ai4_ref_id[0])
2757         {
2758             for(k = 2; k < MAX_NUM_REF; k++)
2759             {
2760                 if(ai4_bi_area[k] > ai4_bi_area[1])
2761                 {
2762                     SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763                     SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764                 }
2765             }
2766 
2767             best_alt_ref = ai4_ref_id[1];
2768         }
2769         else
2770         {
2771             best_alt_ref = ai4_ref_id[0];
2772         }
2773     }
2774 
2775     if(32 == block_width)
2776     {
2777         SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778     }
2779     else
2780     {
2781         SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782     }
2783 }
2784 
2785 /**
2786 ********************************************************************************
2787 *  @fn   void hme_find_top_ref_ids
2788 *               (
2789 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2790 *               )
2791 *
2792 *  @brief  Finds best_uni_ref and best_alt_ref
2793 *
2794 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2795 *
2796 *  @return None
2797 ********************************************************************************
2798 */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2799 void hme_find_top_ref_ids(
2800     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801 {
2802     S32 i;
2803 
2804     if(32 == block_width)
2805     {
2806         for(i = 0; i < 4; i++)
2807         {
2808             hme_sort_and_assign_top_ref_ids_areawise(
2809                 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810         }
2811     }
2812     else if(64 == block_width)
2813     {
2814         hme_sort_and_assign_top_ref_ids_areawise(
2815             ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816     }
2817 }
2818 
2819 /**
2820 ********************************************************************************
2821 *  @fn   void hme_boot_out_outlier
2822 *               (
2823 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2824 *               )
2825 *
2826 *  @brief  Removes outlier clusters before CU tree population
2827 *
2828 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2829 *
2830 *  @return None
2831 ********************************************************************************
2832 */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2833 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834 {
2835     cluster_32x32_blk_t *ps_32x32;
2836 
2837     S32 i;
2838 
2839     cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840 
2841     S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842 
2843     if(32 == blk_width)
2844     {
2845         /* 32x32 clusters */
2846         for(i = 0; i < 4; i++)
2847         {
2848             ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849 
2850             if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851             {
2852                 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853             }
2854         }
2855     }
2856     else if(64 == blk_width)
2857     {
2858         /* 64x64 clusters */
2859         if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860         {
2861             BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862         }
2863     }
2864 }
2865 
2866 /**
2867 ********************************************************************************
2868 *  @fn   void hme_update_cluster_attributes
2869 *               (
2870 *                   cluster_data_t *ps_cluster_data,
2871 *                   S32 mvx,
2872 *                   S32 mvy,
2873 *                   PART_ID_T e_part_id
2874 *               )
2875 *
2876 *  @brief  Implementation fo the clustering algorithm
2877 *
2878 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2879 *
2880 *  @param[in]  mvx : x co-ordinate of the motion vector
2881 *
2882 *  @param[in]  mvy : y co-ordinate of the motion vector
2883 *
2884 *  @param[in]  ref_idx : ref_id of the motion vector
2885 *
2886 *  @param[in]  e_part_id : partition id of the motion vector
2887 *
2888 *  @return None
2889 ********************************************************************************
2890 */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2891 static __inline void hme_update_cluster_attributes(
2892     cluster_data_t *ps_cluster_data,
2893     S32 mvx,
2894     S32 mvy,
2895     S32 mvdx,
2896     S32 mvdy,
2897     S32 ref_id,
2898     S32 sdi,
2899     U08 is_part_of_bi,
2900     PART_ID_T e_part_id)
2901 {
2902     LWORD64 i8_mvx_sum_q8;
2903     LWORD64 i8_mvy_sum_q8;
2904 
2905     S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906     S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907 
2908     if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909     {
2910         ps_cluster_data->min_x = mvx;
2911     }
2912     else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913     {
2914         ps_cluster_data->max_x = mvx;
2915     }
2916 
2917     if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918     {
2919         ps_cluster_data->min_y = mvy;
2920     }
2921     else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922     {
2923         ps_cluster_data->max_y = mvy;
2924     }
2925 
2926     {
2927         S32 num_mvs = ps_cluster_data->num_mvs;
2928 
2929         ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930         ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931         ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932 
2933         /***************************/
2934         ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935         ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936         /**************************/
2937     }
2938 
2939     /* Updation of centroid */
2940     {
2941         i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942         i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943 
2944         ps_cluster_data->num_mvs++;
2945 
2946         ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947             (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948         ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949             (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950     }
2951 
2952     ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953 
2954     if(is_part_of_bi)
2955     {
2956         ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957     }
2958     else
2959     {
2960         ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961     }
2962 }
2963 
2964 /**
2965 ********************************************************************************
2966 *  @fn   void hme_try_cluster_merge
2967 *               (
2968 *                   cluster_data_t *ps_cluster_data,
2969 *                   S32 *pi4_num_clusters,
2970 *                   S32 idx_of_updated_cluster
2971 *               )
2972 *
2973 *  @brief  Implementation fo the clustering algorithm
2974 *
2975 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2976 *
2977 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
2978 *
2979 *  @param[in]  idx_of_updated_cluster : index of the cluster most recently
2980 *                                       updated
2981 *
2982 *  @return Nothing
2983 ********************************************************************************
2984 */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2985 void hme_try_cluster_merge(
2986     cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987 {
2988     centroid_t *ps_centroid;
2989 
2990     S32 cur_pos_x_q8;
2991     S32 cur_pos_y_q8;
2992     S32 i;
2993     S32 max_dist_from_centroid;
2994     S32 mvd;
2995     S32 mvdx_q8;
2996     S32 mvdx;
2997     S32 mvdy_q8;
2998     S32 mvdy;
2999     S32 num_clusters, num_clusters_evaluated;
3000     S32 other_pos_x_q8;
3001     S32 other_pos_y_q8;
3002 
3003     cluster_data_t *ps_root = ps_cluster_data;
3004     cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005     centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006 
3007     /* Merge is superfluous if num_clusters is 1 */
3008     if(*pu1_num_clusters == 1)
3009     {
3010         return;
3011     }
3012 
3013     cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014     cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015 
3016     max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017 
3018     num_clusters = *pu1_num_clusters;
3019     num_clusters_evaluated = 0;
3020 
3021     for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022     {
3023         if(!ps_cluster_data->is_valid_cluster)
3024         {
3025             continue;
3026         }
3027         if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028         {
3029             num_clusters_evaluated++;
3030             continue;
3031         }
3032 
3033         ps_centroid = &ps_cluster_data->s_centroid;
3034 
3035         other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036         other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037 
3038         mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039         mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042 
3043         mvd = ABS(mvdx) + ABS(mvdy);
3044 
3045         if(mvd <= (max_dist_from_centroid >> 1))
3046         {
3047             /* 0 => no updates */
3048             /* 1 => min updated */
3049             /* 2 => max updated */
3050             S32 minmax_x_update_id;
3051             S32 minmax_y_update_id;
3052 
3053             LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054             LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055             LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056             LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057 
3058             (*pu1_num_clusters)--;
3059 
3060             ps_cluster_data->is_valid_cluster = 0;
3061 
3062             memcpy(
3063                 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064                 ps_cluster_data->as_mv,
3065                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066 
3067             ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068             ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069             ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070             ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071             i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072             i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073 
3074             ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075             ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076 
3077             minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078                                      ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079                                      : 1;
3080             minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081                                      ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082                                      : 1;
3083 
3084             /* Updation of centroid spread */
3085             switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086             {
3087             case 1:
3088             {
3089                 S32 mvd, mvd_q8;
3090 
3091                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092 
3093                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3095 
3096                 if(mvd > (max_dist_from_centroid))
3097                 {
3098                     ps_cluster_data->max_dist_from_centroid = mvd;
3099                 }
3100                 break;
3101             }
3102             case 2:
3103             {
3104                 S32 mvd, mvd_q8;
3105 
3106                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107 
3108                 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3110 
3111                 if(mvd > (max_dist_from_centroid))
3112                 {
3113                     ps_cluster_data->max_dist_from_centroid = mvd;
3114                 }
3115                 break;
3116             }
3117             case 4:
3118             {
3119                 S32 mvd, mvd_q8;
3120 
3121                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122 
3123                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3125 
3126                 if(mvd > (max_dist_from_centroid))
3127                 {
3128                     ps_cluster_data->max_dist_from_centroid = mvd;
3129                 }
3130                 break;
3131             }
3132             case 5:
3133             {
3134                 S32 mvd;
3135                 S32 mvdx, mvdx_q8;
3136                 S32 mvdy, mvdy_q8;
3137 
3138                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140 
3141                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143 
3144                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145 
3146                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148 
3149                 if(mvd > max_dist_from_centroid)
3150                 {
3151                     ps_cluster_data->max_dist_from_centroid = mvd;
3152                 }
3153                 break;
3154             }
3155             case 6:
3156             {
3157                 S32 mvd;
3158                 S32 mvdx, mvdx_q8;
3159                 S32 mvdy, mvdy_q8;
3160 
3161                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163 
3164                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166 
3167                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168 
3169                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171 
3172                 if(mvd > max_dist_from_centroid)
3173                 {
3174                     ps_cluster_data->max_dist_from_centroid = mvd;
3175                 }
3176                 break;
3177             }
3178             case 8:
3179             {
3180                 S32 mvd, mvd_q8;
3181 
3182                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183 
3184                 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3186 
3187                 if(mvd > (max_dist_from_centroid))
3188                 {
3189                     ps_cluster_data->max_dist_from_centroid = mvd;
3190                 }
3191                 break;
3192             }
3193             case 9:
3194             {
3195                 S32 mvd;
3196                 S32 mvdx, mvdx_q8;
3197                 S32 mvdy, mvdy_q8;
3198 
3199                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201 
3202                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204 
3205                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206 
3207                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209 
3210                 if(mvd > max_dist_from_centroid)
3211                 {
3212                     ps_cluster_data->max_dist_from_centroid = mvd;
3213                 }
3214                 break;
3215             }
3216             case 10:
3217             {
3218                 S32 mvd;
3219                 S32 mvdx, mvdx_q8;
3220                 S32 mvdy, mvdy_q8;
3221 
3222                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224 
3225                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227 
3228                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229 
3230                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232 
3233                 if(mvd > ps_cluster_data->max_dist_from_centroid)
3234                 {
3235                     ps_cluster_data->max_dist_from_centroid = mvd;
3236                 }
3237                 break;
3238             }
3239             default:
3240             {
3241                 break;
3242             }
3243             }
3244 
3245             hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246 
3247             return;
3248         }
3249 
3250         num_clusters_evaluated++;
3251     }
3252 }
3253 
3254 /**
3255 ********************************************************************************
3256 *  @fn   void hme_find_and_update_clusters
3257 *               (
3258 *                   cluster_data_t *ps_cluster_data,
3259 *                   S32 *pi4_num_clusters,
3260 *                   S32 mvx,
3261 *                   S32 mvy,
3262 *                   S32 ref_idx,
3263 *                   PART_ID_T e_part_id
3264 *               )
3265 *
3266 *  @brief  Implementation fo the clustering algorithm
3267 *
3268 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
3269 *
3270 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
3271 *
3272 *  @param[in]  mvx : x co-ordinate of the motion vector
3273 *
3274 *  @param[in]  mvy : y co-ordinate of the motion vector
3275 *
3276 *  @param[in]  ref_idx : ref_id of the motion vector
3277 *
3278 *  @param[in]  e_part_id : partition id of the motion vector
3279 *
3280 *  @return None
3281 ********************************************************************************
3282 */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3283 void hme_find_and_update_clusters(
3284     cluster_data_t *ps_cluster_data,
3285     U08 *pu1_num_clusters,
3286     S16 i2_mv_x,
3287     S16 i2_mv_y,
3288     U08 i1_ref_idx,
3289     S32 i4_sdi,
3290     PART_ID_T e_part_id,
3291     U08 is_part_of_bi)
3292 {
3293     S32 i;
3294     S32 min_mvd_cluster_id = -1;
3295     S32 mvd, mvd_limit, mvdx, mvdy;
3296     S32 min_mvdx, min_mvdy;
3297 
3298     S32 min_mvd = MAX_32BIT_VAL;
3299     S32 num_clusters = *pu1_num_clusters;
3300 
3301     S32 mvx = i2_mv_x;
3302     S32 mvy = i2_mv_y;
3303     S32 ref_idx = i1_ref_idx;
3304     S32 sdi = i4_sdi;
3305     S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306 
3307     if(num_clusters == 0)
3308     {
3309         cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310 
3311         ps_data->num_mvs = 1;
3312         ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313         ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314         ps_data->ref_id = ref_idx;
3315         ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316         ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317         ps_data->as_mv[0].mvx = mvx;
3318         ps_data->as_mv[0].mvy = mvy;
3319 
3320         /***************************/
3321         ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322         ps_data->as_mv[0].sdi = sdi;
3323         if(is_part_of_bi)
3324         {
3325             ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326         }
3327         else
3328         {
3329             ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330         }
3331         /**************************/
3332         ps_data->max_x = mvx;
3333         ps_data->min_x = mvx;
3334         ps_data->max_y = mvy;
3335         ps_data->min_y = mvy;
3336 
3337         ps_data->is_valid_cluster = 1;
3338 
3339         *pu1_num_clusters = 1;
3340     }
3341     else
3342     {
3343         S32 num_clusters_evaluated = 0;
3344 
3345         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346         {
3347             cluster_data_t *ps_data = &ps_cluster_data[i];
3348 
3349             centroid_t *ps_centroid;
3350 
3351             S32 mvx_q8;
3352             S32 mvy_q8;
3353             S32 posx_q8;
3354             S32 posy_q8;
3355             S32 mvdx_q8;
3356             S32 mvdy_q8;
3357 
3358             /* In anticipation of a possible merging of clusters */
3359             if(ps_data->is_valid_cluster == 0)
3360             {
3361                 new_cluster_idx = i;
3362                 continue;
3363             }
3364 
3365             if(ref_idx != ps_data->ref_id)
3366             {
3367                 num_clusters_evaluated++;
3368                 continue;
3369             }
3370 
3371             ps_centroid = &ps_data->s_centroid;
3372             posx_q8 = ps_centroid->i4_pos_x_q8;
3373             posy_q8 = ps_centroid->i4_pos_y_q8;
3374 
3375             mvx_q8 = mvx << 8;
3376             mvy_q8 = mvy << 8;
3377 
3378             mvdx_q8 = posx_q8 - mvx_q8;
3379             mvdy_q8 = posy_q8 - mvy_q8;
3380 
3381             mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382             mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383 
3384             mvd = ABS(mvdx) + ABS(mvdy);
3385 
3386             if(mvd < min_mvd)
3387             {
3388                 min_mvd = mvd;
3389                 min_mvdx = mvdx;
3390                 min_mvdy = mvdy;
3391                 min_mvd_cluster_id = i;
3392             }
3393 
3394             num_clusters_evaluated++;
3395         }
3396 
3397         mvd_limit = (min_mvd_cluster_id == -1)
3398                         ? ps_cluster_data[0].max_dist_from_centroid
3399                         : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400 
3401         /* This condition implies that min_mvd has been updated */
3402         if(min_mvd <= mvd_limit)
3403         {
3404             hme_update_cluster_attributes(
3405                 &ps_cluster_data[min_mvd_cluster_id],
3406                 mvx,
3407                 mvy,
3408                 min_mvdx,
3409                 min_mvdy,
3410                 ref_idx,
3411                 sdi,
3412                 is_part_of_bi,
3413                 e_part_id);
3414 
3415             if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416             {
3417                 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418             }
3419         }
3420         else
3421         {
3422             cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423                                           ? &ps_cluster_data[num_clusters]
3424                                           : &ps_cluster_data[new_cluster_idx];
3425 
3426             ps_data->num_mvs = 1;
3427             ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428             ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429             ps_data->ref_id = ref_idx;
3430             ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431             ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432             ps_data->as_mv[0].mvx = mvx;
3433             ps_data->as_mv[0].mvy = mvy;
3434 
3435             /***************************/
3436             ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437             ps_data->as_mv[0].sdi = sdi;
3438             if(is_part_of_bi)
3439             {
3440                 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441             }
3442             else
3443             {
3444                 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445             }
3446             /**************************/
3447             ps_data->max_x = mvx;
3448             ps_data->min_x = mvx;
3449             ps_data->max_y = mvy;
3450             ps_data->min_y = mvy;
3451 
3452             ps_data->is_valid_cluster = 1;
3453 
3454             num_clusters++;
3455             *pu1_num_clusters = num_clusters;
3456         }
3457     }
3458 }
3459 
3460 /**
3461 ********************************************************************************
3462 *  @fn   void hme_update_32x32_cluster_attributes
3463 *               (
3464 *                   cluster_32x32_blk_t *ps_blk_32x32,
3465 *                   cluster_data_t *ps_cluster_data
3466 *               )
3467 *
3468 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
3469 *          the constituent 16x16 clusters
3470 *
3471 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
3472 *
3473 *  @param[in]  ps_cluster_data : structure containing 16x16 block results
3474 *
3475 *  @return None
3476 ********************************************************************************
3477 */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3478 void hme_update_32x32_cluster_attributes(
3479     cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480 {
3481     cluster_data_t *ps_cur_cluster_32;
3482 
3483     S32 i;
3484     S32 mvd_limit;
3485 
3486     S32 num_clusters = ps_blk_32x32->num_clusters;
3487 
3488     if(0 == num_clusters)
3489     {
3490         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491 
3492         ps_blk_32x32->num_clusters++;
3493         ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494 
3495         ps_cur_cluster_32->is_valid_cluster = 1;
3496 
3497         ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498         ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499         ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500 
3501         memcpy(
3502             ps_cur_cluster_32->as_mv,
3503             ps_cluster_data->as_mv,
3504             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505 
3506         ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507 
3508         ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509 
3510         ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511         ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512         ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513         ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514 
3515         ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516     }
3517     else
3518     {
3519         centroid_t *ps_centroid;
3520 
3521         S32 cur_posx_q8, cur_posy_q8;
3522         S32 min_mvd_cluster_id = -1;
3523         S32 mvd;
3524         S32 mvdx;
3525         S32 mvdy;
3526         S32 mvdx_min;
3527         S32 mvdy_min;
3528         S32 mvdx_q8;
3529         S32 mvdy_q8;
3530 
3531         S32 num_clusters_evaluated = 0;
3532 
3533         S32 mvd_min = MAX_32BIT_VAL;
3534 
3535         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537 
3538         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539         {
3540             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541 
3542             if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543             {
3544                 num_clusters_evaluated++;
3545                 continue;
3546             }
3547             if(!ps_cluster_data->is_valid_cluster)
3548             {
3549                 continue;
3550             }
3551 
3552             num_clusters_evaluated++;
3553 
3554             ps_centroid = &ps_cur_cluster_32->s_centroid;
3555 
3556             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558 
3559             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561 
3562             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564 
3565             mvd = ABS(mvdx) + ABS(mvdy);
3566 
3567             if(mvd < mvd_min)
3568             {
3569                 mvd_min = mvd;
3570                 mvdx_min = mvdx;
3571                 mvdy_min = mvdy;
3572                 min_mvd_cluster_id = i;
3573             }
3574         }
3575 
3576         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577 
3578         mvd_limit = (min_mvd_cluster_id == -1)
3579                         ? ps_cur_cluster_32[0].max_dist_from_centroid
3580                         : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581 
3582         if(mvd_min <= mvd_limit)
3583         {
3584             LWORD64 i8_updated_posx;
3585             LWORD64 i8_updated_posy;
3586             WORD32 minmax_updated_x = 0;
3587             WORD32 minmax_updated_y = 0;
3588 
3589             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590 
3591             ps_centroid = &ps_cur_cluster_32->s_centroid;
3592 
3593             ps_cur_cluster_32->is_valid_cluster = 1;
3594 
3595             ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598 
3599             memcpy(
3600                 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601                 ps_cluster_data->as_mv,
3602                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603 
3604             if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605             {
3606                 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607                 minmax_updated_x = 1;
3608             }
3609             else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610             {
3611                 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612                 minmax_updated_x = 2;
3613             }
3614 
3615             if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616             {
3617                 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618                 minmax_updated_y = 1;
3619             }
3620             else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621             {
3622                 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623                 minmax_updated_y = 2;
3624             }
3625 
3626             switch((minmax_updated_y << 2) + minmax_updated_x)
3627             {
3628             case 1:
3629             {
3630                 S32 mvd, mvd_q8;
3631 
3632                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3634 
3635                 if(mvd > (mvd_limit))
3636                 {
3637                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638                 }
3639                 break;
3640             }
3641             case 2:
3642             {
3643                 S32 mvd, mvd_q8;
3644 
3645                 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3647 
3648                 if(mvd > (mvd_limit))
3649                 {
3650                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651                 }
3652                 break;
3653             }
3654             case 4:
3655             {
3656                 S32 mvd, mvd_q8;
3657 
3658                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3660 
3661                 if(mvd > (mvd_limit))
3662                 {
3663                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664                 }
3665                 break;
3666             }
3667             case 5:
3668             {
3669                 S32 mvd;
3670                 S32 mvdx, mvdx_q8;
3671                 S32 mvdy, mvdy_q8;
3672 
3673                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675 
3676                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678 
3679                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680 
3681                 if(mvd > mvd_limit)
3682                 {
3683                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684                 }
3685                 break;
3686             }
3687             case 6:
3688             {
3689                 S32 mvd;
3690                 S32 mvdx, mvdx_q8;
3691                 S32 mvdy, mvdy_q8;
3692 
3693                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695 
3696                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698 
3699                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700 
3701                 if(mvd > mvd_limit)
3702                 {
3703                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704                 }
3705                 break;
3706             }
3707             case 8:
3708             {
3709                 S32 mvd, mvd_q8;
3710 
3711                 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3713 
3714                 if(mvd > (mvd_limit))
3715                 {
3716                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717                 }
3718                 break;
3719             }
3720             case 9:
3721             {
3722                 S32 mvd;
3723                 S32 mvdx, mvdx_q8;
3724                 S32 mvdy, mvdy_q8;
3725 
3726                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728 
3729                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731 
3732                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733 
3734                 if(mvd > mvd_limit)
3735                 {
3736                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737                 }
3738                 break;
3739             }
3740             case 10:
3741             {
3742                 S32 mvd;
3743                 S32 mvdx, mvdx_q8;
3744                 S32 mvdy, mvdy_q8;
3745 
3746                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748 
3749                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751 
3752                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753 
3754                 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755                 {
3756                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757                 }
3758                 break;
3759             }
3760             default:
3761             {
3762                 break;
3763             }
3764             }
3765 
3766             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770 
3771             ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772 
3773             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775         }
3776         else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777         {
3778             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779 
3780             ps_blk_32x32->num_clusters++;
3781             ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782 
3783             ps_cur_cluster_32->is_valid_cluster = 1;
3784 
3785             ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788 
3789             memcpy(
3790                 ps_cur_cluster_32->as_mv,
3791                 ps_cluster_data->as_mv,
3792                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793 
3794             ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795 
3796             ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797 
3798             ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799             ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800             ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801             ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802 
3803             ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804         }
3805     }
3806 }
3807 
3808 /**
3809 ********************************************************************************
3810 *  @fn   void hme_update_64x64_cluster_attributes
3811 *               (
3812 *                   cluster_64x64_blk_t *ps_blk_32x32,
3813 *                   cluster_data_t *ps_cluster_data
3814 *               )
3815 *
3816 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
3817 *          the constituent 16x16 clusters
3818 *
3819 *  @param[out]  ps_blk_64x64: structure containing 64x64 block results
3820 *
3821 *  @param[in]  ps_cluster_data : structure containing 32x32 block results
3822 *
3823 *  @return None
3824 ********************************************************************************
3825 */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3826 void hme_update_64x64_cluster_attributes(
3827     cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828 {
3829     cluster_data_t *ps_cur_cluster_64;
3830 
3831     S32 i;
3832     S32 mvd_limit;
3833 
3834     S32 num_clusters = ps_blk_64x64->num_clusters;
3835 
3836     if(0 == num_clusters)
3837     {
3838         ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839 
3840         ps_blk_64x64->num_clusters++;
3841         ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842 
3843         ps_cur_cluster_64->is_valid_cluster = 1;
3844 
3845         ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846         ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847         ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848 
3849         memcpy(
3850             ps_cur_cluster_64->as_mv,
3851             ps_cluster_data->as_mv,
3852             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853 
3854         ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855 
3856         ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857 
3858         ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859         ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860         ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861         ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862 
3863         ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864     }
3865     else
3866     {
3867         centroid_t *ps_centroid;
3868 
3869         S32 cur_posx_q8, cur_posy_q8;
3870         S32 min_mvd_cluster_id = -1;
3871         S32 mvd;
3872         S32 mvdx;
3873         S32 mvdy;
3874         S32 mvdx_min;
3875         S32 mvdy_min;
3876         S32 mvdx_q8;
3877         S32 mvdy_q8;
3878 
3879         S32 num_clusters_evaluated = 0;
3880 
3881         S32 mvd_min = MAX_32BIT_VAL;
3882 
3883         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885 
3886         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887         {
3888             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889 
3890             if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891             {
3892                 num_clusters_evaluated++;
3893                 continue;
3894             }
3895 
3896             if(!ps_cur_cluster_64->is_valid_cluster)
3897             {
3898                 continue;
3899             }
3900 
3901             num_clusters_evaluated++;
3902 
3903             ps_centroid = &ps_cur_cluster_64->s_centroid;
3904 
3905             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907 
3908             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910 
3911             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913 
3914             mvd = ABS(mvdx) + ABS(mvdy);
3915 
3916             if(mvd < mvd_min)
3917             {
3918                 mvd_min = mvd;
3919                 mvdx_min = mvdx;
3920                 mvdy_min = mvdy;
3921                 min_mvd_cluster_id = i;
3922             }
3923         }
3924 
3925         ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926 
3927         mvd_limit = (min_mvd_cluster_id == -1)
3928                         ? ps_cur_cluster_64[0].max_dist_from_centroid
3929                         : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930 
3931         if(mvd_min <= mvd_limit)
3932         {
3933             LWORD64 i8_updated_posx;
3934             LWORD64 i8_updated_posy;
3935             WORD32 minmax_updated_x = 0;
3936             WORD32 minmax_updated_y = 0;
3937 
3938             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939 
3940             ps_centroid = &ps_cur_cluster_64->s_centroid;
3941 
3942             ps_cur_cluster_64->is_valid_cluster = 1;
3943 
3944             ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947 
3948             memcpy(
3949                 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950                 ps_cluster_data->as_mv,
3951                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952 
3953             if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954             {
3955                 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956                 minmax_updated_x = 1;
3957             }
3958             else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959             {
3960                 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961                 minmax_updated_x = 2;
3962             }
3963 
3964             if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965             {
3966                 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967                 minmax_updated_y = 1;
3968             }
3969             else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970             {
3971                 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972                 minmax_updated_y = 2;
3973             }
3974 
3975             switch((minmax_updated_y << 2) + minmax_updated_x)
3976             {
3977             case 1:
3978             {
3979                 S32 mvd, mvd_q8;
3980 
3981                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3983 
3984                 if(mvd > (mvd_limit))
3985                 {
3986                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987                 }
3988                 break;
3989             }
3990             case 2:
3991             {
3992                 S32 mvd, mvd_q8;
3993 
3994                 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3996 
3997                 if(mvd > (mvd_limit))
3998                 {
3999                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000                 }
4001                 break;
4002             }
4003             case 4:
4004             {
4005                 S32 mvd, mvd_q8;
4006 
4007                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4009 
4010                 if(mvd > (mvd_limit))
4011                 {
4012                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013                 }
4014                 break;
4015             }
4016             case 5:
4017             {
4018                 S32 mvd;
4019                 S32 mvdx, mvdx_q8;
4020                 S32 mvdy, mvdy_q8;
4021 
4022                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024 
4025                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027 
4028                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029 
4030                 if(mvd > mvd_limit)
4031                 {
4032                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033                 }
4034                 break;
4035             }
4036             case 6:
4037             {
4038                 S32 mvd;
4039                 S32 mvdx, mvdx_q8;
4040                 S32 mvdy, mvdy_q8;
4041 
4042                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044 
4045                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047 
4048                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049 
4050                 if(mvd > mvd_limit)
4051                 {
4052                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053                 }
4054                 break;
4055             }
4056             case 8:
4057             {
4058                 S32 mvd, mvd_q8;
4059 
4060                 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4062 
4063                 if(mvd > (mvd_limit))
4064                 {
4065                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066                 }
4067                 break;
4068             }
4069             case 9:
4070             {
4071                 S32 mvd;
4072                 S32 mvdx, mvdx_q8;
4073                 S32 mvdy, mvdy_q8;
4074 
4075                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077 
4078                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080 
4081                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082 
4083                 if(mvd > mvd_limit)
4084                 {
4085                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086                 }
4087                 break;
4088             }
4089             case 10:
4090             {
4091                 S32 mvd;
4092                 S32 mvdx, mvdx_q8;
4093                 S32 mvdy, mvdy_q8;
4094 
4095                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097 
4098                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100 
4101                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102 
4103                 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104                 {
4105                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106                 }
4107                 break;
4108             }
4109             default:
4110             {
4111                 break;
4112             }
4113             }
4114 
4115             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119 
4120             ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121 
4122             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124         }
4125         else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126         {
4127             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128 
4129             ps_blk_64x64->num_clusters++;
4130             ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131 
4132             ps_cur_cluster_64->is_valid_cluster = 1;
4133 
4134             ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137 
4138             memcpy(
4139                 &ps_cur_cluster_64->as_mv[0],
4140                 ps_cluster_data->as_mv,
4141                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142 
4143             ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144 
4145             ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146 
4147             ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148             ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149             ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150             ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151 
4152             ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153         }
4154     }
4155 }
4156 
4157 /**
4158 ********************************************************************************
4159 *  @fn   void hme_update_32x32_clusters
4160 *               (
4161 *                   cluster_32x32_blk_t *ps_blk_32x32,
4162 *                   cluster_16x16_blk_t *ps_blk_16x16
4163 *               )
4164 *
4165 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
4166 *          the constituent 16x16 clusters
4167 *
4168 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
4169 *
4170 *  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
4171 *
4172 *  @return None
4173 ********************************************************************************
4174 */
4175 static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4176     hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177 {
4178     cluster_16x16_blk_t *ps_blk_16x16_cur;
4179     cluster_data_t *ps_cur_cluster;
4180 
4181     S32 i, j;
4182     S32 num_clusters_cur_16x16_blk;
4183 
4184     for(i = 0; i < 4; i++)
4185     {
4186         S32 num_clusters_evaluated = 0;
4187 
4188         ps_blk_16x16_cur = &ps_blk_16x16[i];
4189 
4190         num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191 
4192         ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193 
4194         ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195 
4196         for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197         {
4198             ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199 
4200             if(!ps_cur_cluster->is_valid_cluster)
4201             {
4202                 continue;
4203             }
4204 
4205             hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206 
4207             num_clusters_evaluated++;
4208         }
4209     }
4210 }
4211 
4212 /**
4213 ********************************************************************************
4214 *  @fn   void hme_update_64x64_clusters
4215 *               (
4216 *                   cluster_64x64_blk_t *ps_blk_64x64,
4217 *                   cluster_32x32_blk_t *ps_blk_32x32
4218 *               )
4219 *
4220 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
4221 *          the constituent 16x16 clusters
4222 *
4223 *  @param[out]  ps_blk_64x64: structure containing 32x32 block results
4224 *
4225 *  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
4226 *
4227 *  @return None
4228 ********************************************************************************
4229 */
4230 static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4231     hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232 {
4233     cluster_32x32_blk_t *ps_blk_32x32_cur;
4234     cluster_data_t *ps_cur_cluster;
4235 
4236     S32 i, j;
4237     S32 num_clusters_cur_32x32_blk;
4238 
4239     for(i = 0; i < 4; i++)
4240     {
4241         S32 num_clusters_evaluated = 0;
4242 
4243         ps_blk_32x32_cur = &ps_blk_32x32[i];
4244 
4245         num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246 
4247         ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248         ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249 
4250         for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251         {
4252             ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253 
4254             if(!ps_cur_cluster->is_valid_cluster)
4255             {
4256                 continue;
4257             }
4258 
4259             hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260 
4261             num_clusters_evaluated++;
4262         }
4263     }
4264 }
4265 
4266 /**
4267 ********************************************************************************
4268 *  @fn   void hme_try_merge_clusters_blksize_gt_16
4269 *               (
4270 *                   cluster_data_t *ps_cluster_data,
4271 *                   S32 num_clusters
4272 *               )
4273 *
4274 *  @brief  Merging clusters from blocks of size 32x32 and greater
4275 *
4276 *  @param[in/out]  ps_cluster_data: structure containing cluster data
4277 *
4278 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
4279 *
4280 *  @return Success or failure
4281 ********************************************************************************
4282 */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4283 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284 {
4285     centroid_t *ps_cur_centroid;
4286     cluster_data_t *ps_cur_cluster;
4287 
4288     S32 i, mvd;
4289     S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290 
4291     centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292 
4293     S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294     S32 ref_id = ps_cluster_data->ref_id;
4295 
4296     S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297     S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298     S32 num_clusters_evaluated = 1;
4299     S32 ret_value = 0;
4300 
4301     if(1 >= num_clusters)
4302     {
4303         return ret_value;
4304     }
4305 
4306     for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307     {
4308         S32 cur_posx_q8;
4309         S32 cur_posy_q8;
4310 
4311         ps_cur_cluster = &ps_cluster_data[i];
4312 
4313         if((ref_id != ps_cur_cluster->ref_id))
4314         {
4315             num_clusters_evaluated++;
4316             continue;
4317         }
4318 
4319         if((!ps_cur_cluster->is_valid_cluster))
4320         {
4321             continue;
4322         }
4323 
4324         num_clusters_evaluated++;
4325 
4326         ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327 
4328         cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329         cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330 
4331         mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332         mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333 
4334         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336 
4337         mvd = ABS(mvdx) + ABS(mvdy);
4338 
4339         if(mvd <= (mvd_limit >> 1))
4340         {
4341             LWORD64 i8_updated_posx;
4342             LWORD64 i8_updated_posy;
4343             WORD32 minmax_updated_x = 0;
4344             WORD32 minmax_updated_y = 0;
4345 
4346             ps_cur_cluster->is_valid_cluster = 0;
4347 
4348             ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349             ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350             ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351 
4352             memcpy(
4353                 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354                 ps_cur_cluster->as_mv,
4355                 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356 
4357             if(mvdx > 0)
4358             {
4359                 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360                 minmax_updated_x = 1;
4361             }
4362             else
4363             {
4364                 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365                 minmax_updated_x = 2;
4366             }
4367 
4368             if(mvdy > 0)
4369             {
4370                 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371                 minmax_updated_y = 1;
4372             }
4373             else
4374             {
4375                 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376                 minmax_updated_y = 2;
4377             }
4378 
4379             switch((minmax_updated_y << 2) + minmax_updated_x)
4380             {
4381             case 1:
4382             {
4383                 S32 mvd, mvd_q8;
4384 
4385                 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4387 
4388                 if(mvd > (mvd_limit))
4389                 {
4390                     ps_cluster_data->max_dist_from_centroid = mvd;
4391                 }
4392                 break;
4393             }
4394             case 2:
4395             {
4396                 S32 mvd, mvd_q8;
4397 
4398                 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4400 
4401                 if(mvd > (mvd_limit))
4402                 {
4403                     ps_cluster_data->max_dist_from_centroid = mvd;
4404                 }
4405                 break;
4406             }
4407             case 4:
4408             {
4409                 S32 mvd, mvd_q8;
4410 
4411                 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4413 
4414                 if(mvd > (mvd_limit))
4415                 {
4416                     ps_cluster_data->max_dist_from_centroid = mvd;
4417                 }
4418                 break;
4419             }
4420             case 5:
4421             {
4422                 S32 mvd;
4423                 S32 mvdx, mvdx_q8;
4424                 S32 mvdy, mvdy_q8;
4425 
4426                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428 
4429                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431 
4432                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433 
4434                 if(mvd > mvd_limit)
4435                 {
4436                     ps_cluster_data->max_dist_from_centroid = mvd;
4437                 }
4438                 break;
4439             }
4440             case 6:
4441             {
4442                 S32 mvd;
4443                 S32 mvdx, mvdx_q8;
4444                 S32 mvdy, mvdy_q8;
4445 
4446                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448 
4449                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451 
4452                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453 
4454                 if(mvd > mvd_limit)
4455                 {
4456                     ps_cluster_data->max_dist_from_centroid = mvd;
4457                 }
4458                 break;
4459             }
4460             case 8:
4461             {
4462                 S32 mvd, mvd_q8;
4463 
4464                 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4466 
4467                 if(mvd > (mvd_limit))
4468                 {
4469                     ps_cluster_data->max_dist_from_centroid = mvd;
4470                 }
4471                 break;
4472             }
4473             case 9:
4474             {
4475                 S32 mvd;
4476                 S32 mvdx, mvdx_q8;
4477                 S32 mvdy, mvdy_q8;
4478 
4479                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481 
4482                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484 
4485                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486 
4487                 if(mvd > mvd_limit)
4488                 {
4489                     ps_cluster_data->max_dist_from_centroid = mvd;
4490                 }
4491                 break;
4492             }
4493             case 10:
4494             {
4495                 S32 mvd;
4496                 S32 mvdx, mvdx_q8;
4497                 S32 mvdy, mvdy_q8;
4498 
4499                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501 
4502                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504 
4505                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506 
4507                 if(mvd > ps_cluster_data->max_dist_from_centroid)
4508                 {
4509                     ps_cluster_data->max_dist_from_centroid = mvd;
4510                 }
4511                 break;
4512             }
4513             default:
4514             {
4515                 break;
4516             }
4517             }
4518 
4519             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520                               ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522                               ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523 
4524             ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525 
4526             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528 
4529             if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530             {
4531                 num_clusters--;
4532                 num_clusters_evaluated = 1;
4533                 i = 0;
4534                 ret_value++;
4535             }
4536             else
4537             {
4538                 ret_value++;
4539 
4540                 return ret_value;
4541             }
4542         }
4543     }
4544 
4545     if(ret_value)
4546     {
4547         for(i = 1; i < (num_clusters + ret_value); i++)
4548         {
4549             if(ps_cluster_data[i].is_valid_cluster)
4550             {
4551                 break;
4552             }
4553         }
4554         if(i == (num_clusters + ret_value))
4555         {
4556             return ret_value;
4557         }
4558     }
4559     else
4560     {
4561         i = 1;
4562     }
4563 
4564     return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565            ret_value;
4566 }
4567 
4568 /**
4569 ********************************************************************************
4570 *  @fn   S32 hme_determine_validity_32x32
4571 *               (
4572 *                   ctb_cluster_info_t *ps_ctb_cluster_info
4573 *               )
4574 *
4575 *  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
4576 *           while recursing through the CU tree or not
4577 *
4578 *  @param[in]  ps_cluster_data: structure containing cluster data
4579 *
4580 *  @return Success or failure
4581 ********************************************************************************
4582 */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4583 __inline S32 hme_determine_validity_32x32(
4584     ctb_cluster_info_t *ps_ctb_cluster_info,
4585     S32 *pi4_children_nodes_required,
4586     S32 blk_validity_wrt_pic_bndry,
4587     S32 parent_blk_validity_wrt_pic_bndry)
4588 {
4589     cluster_data_t *ps_data;
4590 
4591     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593 
4594     S32 num_clusters = ps_32x32_blk->num_clusters;
4595     S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596 
4597     if(!blk_validity_wrt_pic_bndry)
4598     {
4599         *pi4_children_nodes_required = 1;
4600         return 0;
4601     }
4602 
4603     if(!parent_blk_validity_wrt_pic_bndry)
4604     {
4605         *pi4_children_nodes_required = 1;
4606         return 1;
4607     }
4608 
4609     if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610     {
4611         *pi4_children_nodes_required = 1;
4612         return 0;
4613     }
4614 
4615     if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616     {
4617         *pi4_children_nodes_required = 1;
4618 
4619         return 1;
4620     }
4621     else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622     {
4623         *pi4_children_nodes_required = 0;
4624 
4625         return 1;
4626     }
4627     else
4628     {
4629         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630         {
4631             *pi4_children_nodes_required = 0;
4632             return 1;
4633         }
4634         else
4635         {
4636             S32 i;
4637 
4638             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639             S32 min_area = MAX_32BIT_VAL;
4640             S32 num_clusters_evaluated = 0;
4641 
4642             for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643             {
4644                 ps_data = &ps_32x32_blk->as_cluster_data[i];
4645 
4646                 if(!ps_data->is_valid_cluster)
4647                 {
4648                     continue;
4649                 }
4650 
4651                 num_clusters_evaluated++;
4652 
4653                 if(ps_data->area_in_pixels < min_area)
4654                 {
4655                     min_area = ps_data->area_in_pixels;
4656                 }
4657             }
4658 
4659             if((min_area << 4) < area_of_parent)
4660             {
4661                 *pi4_children_nodes_required = 1;
4662                 return 0;
4663             }
4664             else
4665             {
4666                 *pi4_children_nodes_required = 0;
4667                 return 1;
4668             }
4669         }
4670     }
4671 }
4672 
4673 /**
4674 ********************************************************************************
4675 *  @fn   S32 hme_determine_validity_16x16
4676 *               (
4677 *                   ctb_cluster_info_t *ps_ctb_cluster_info
4678 *               )
4679 *
4680 *  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
4681 *           while recursing through the CU tree or not
4682 *
4683 *  @param[in]  ps_cluster_data: structure containing cluster data
4684 *
4685 *  @return Success or failure
4686 ********************************************************************************
4687 */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4688 __inline S32 hme_determine_validity_16x16(
4689     ctb_cluster_info_t *ps_ctb_cluster_info,
4690     S32 *pi4_children_nodes_required,
4691     S32 blk_validity_wrt_pic_bndry,
4692     S32 parent_blk_validity_wrt_pic_bndry)
4693 {
4694     cluster_data_t *ps_data;
4695 
4696     cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699 
4700     S32 num_clusters = ps_16x16_blk->num_clusters;
4701     S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702     S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703 
4704     if(!blk_validity_wrt_pic_bndry)
4705     {
4706         *pi4_children_nodes_required = 1;
4707         return 0;
4708     }
4709 
4710     if(!parent_blk_validity_wrt_pic_bndry)
4711     {
4712         *pi4_children_nodes_required = 1;
4713         return 1;
4714     }
4715 
4716     if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717        (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718     {
4719         *pi4_children_nodes_required = 1;
4720         return 1;
4721     }
4722 
4723     /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724     /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725     if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726     {
4727         if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728         {
4729             *pi4_children_nodes_required = 0;
4730 
4731             return 1;
4732         }
4733         else
4734         {
4735             *pi4_children_nodes_required = 1;
4736 
4737             return 0;
4738         }
4739     }
4740     /* Implies nc_64 >= 3 */
4741     else
4742     {
4743         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744         {
4745             *pi4_children_nodes_required = 0;
4746             return 1;
4747         }
4748         else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749         {
4750             *pi4_children_nodes_required = 1;
4751             return 0;
4752         }
4753         else
4754         {
4755             S32 i;
4756 
4757             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758             S32 min_area = MAX_32BIT_VAL;
4759             S32 num_clusters_evaluated = 0;
4760 
4761             for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762             {
4763                 ps_data = &ps_16x16_blk->as_cluster_data[i];
4764 
4765                 if(!ps_data->is_valid_cluster)
4766                 {
4767                     continue;
4768                 }
4769 
4770                 num_clusters_evaluated++;
4771 
4772                 if(ps_data->area_in_pixels < min_area)
4773                 {
4774                     min_area = ps_data->area_in_pixels;
4775                 }
4776             }
4777 
4778             if((min_area << 4) < area_of_parent)
4779             {
4780                 *pi4_children_nodes_required = 1;
4781                 return 0;
4782             }
4783             else
4784             {
4785                 *pi4_children_nodes_required = 0;
4786                 return 1;
4787             }
4788         }
4789     }
4790 }
4791 
4792 /**
4793 ********************************************************************************
4794 *  @fn   void hme_build_cu_tree
4795 *               (
4796 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
4797 *                   cur_ctb_cu_tree_t *ps_cu_tree,
4798 *                   S32 tree_depth,
4799 *                   CU_POS_T e_grand_parent_blk_pos,
4800 *                   CU_POS_T e_parent_blk_pos,
4801 *                   CU_POS_T e_cur_blk_pos
4802 *               )
4803 *
4804 *  @brief  Recursive function for CU tree initialisation
4805 *
4806 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4807 *                                   corresponding to all block sizes from 64x64
4808 *                                   to 16x16
4809 *
4810 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4811 *                                applicable
4812 *
4813 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
4814 *
4815 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4816 *
4817 *  @param[in]  tree_depth : specifies depth of the CU tree
4818 *
4819 *  @return Nothing
4820 ********************************************************************************
4821 */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4822 void hme_build_cu_tree(
4823     ctb_cluster_info_t *ps_ctb_cluster_info,
4824     cur_ctb_cu_tree_t *ps_cu_tree,
4825     S32 tree_depth,
4826     CU_POS_T e_grandparent_blk_pos,
4827     CU_POS_T e_parent_blk_pos,
4828     CU_POS_T e_cur_blk_pos)
4829 {
4830     ihevce_cu_tree_init(
4831         ps_cu_tree,
4832         ps_ctb_cluster_info->ps_cu_tree_root,
4833         &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834         tree_depth,
4835         e_grandparent_blk_pos,
4836         e_parent_blk_pos,
4837         e_cur_blk_pos);
4838 }
4839 
4840 /**
4841 ********************************************************************************
4842 *  @fn   S32 hme_sdi_based_cluster_spread_eligibility
4843 *               (
4844 *                   cluster_32x32_blk_t *ps_blk_32x32
4845 *               )
4846 *
4847 *  @brief  Determines whether the spread of high SDI MV's around each cluster
4848 *          center is below a pre-determined threshold
4849 *
4850 *  @param[in]  ps_blk_32x32: structure containing pointers to clusters
4851 *                                   corresponding to all block sizes from 64x64
4852 *                                   to 16x16
4853 *
4854 *  @return 1 if the spread is constrained, else 0
4855 ********************************************************************************
4856 */
4857 __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4858     hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859 {
4860     S32 cumulative_mv_distance;
4861     S32 i, j;
4862     S32 num_high_sdi_mvs;
4863 
4864     S32 num_clusters = ps_blk_32x32->num_clusters;
4865 
4866     for(i = 0; i < num_clusters; i++)
4867     {
4868         cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869 
4870         num_high_sdi_mvs = 0;
4871         cumulative_mv_distance = 0;
4872 
4873         for(j = 0; j < ps_data->num_mvs; j++)
4874         {
4875             mv_data_t *ps_mv = &ps_data->as_mv[j];
4876 
4877             if(ps_mv->sdi >= sdi_threshold)
4878             {
4879                 num_high_sdi_mvs++;
4880 
4881                 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882             }
4883         }
4884 
4885         if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886         {
4887             return 0;
4888         }
4889     }
4890 
4891     return 1;
4892 }
4893 
4894 /**
4895 ********************************************************************************
4896 *  @fn   S32 hme_populate_cu_tree
4897 *               (
4898 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
4899 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900 *                   cur_ctb_cu_tree_t *ps_cu_tree,
4901 *                   S32 tree_depth,
4902 *                   CU_POS_T e_parent_blk_pos,
4903 *                   CU_POS_T e_cur_blk_pos
4904 *               )
4905 *
4906 *  @brief  Recursive function for CU tree population based on output of
4907 *          clustering algorithm
4908 *
4909 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4910 *                                   corresponding to all block sizes from 64x64
4911 *                                   to 16x16
4912 *
4913 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4914 applicable
4915 *
4916 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
4917 *
4918 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
4919 *
4920 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4921 *
4922 *  @param[in]  tree_depth : specifies depth of the CU tree
4923 *
4924 *  @param[in]  ipe_decision_precedence : specifies whether precedence should
4925 *               be given to decisions made either by IPE(1) or clustering algos.
4926 *
4927 *  @return 1 if re-evaluation of parent node's validity is not required,
4928 else 0
4929 ********************************************************************************
4930 */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4931 void hme_populate_cu_tree(
4932     ctb_cluster_info_t *ps_ctb_cluster_info,
4933     cur_ctb_cu_tree_t *ps_cu_tree,
4934     S32 tree_depth,
4935     ME_QUALITY_PRESETS_T e_quality_preset,
4936     CU_POS_T e_grandparent_blk_pos,
4937     CU_POS_T e_parent_blk_pos,
4938     CU_POS_T e_cur_blk_pos)
4939 {
4940     S32 area_of_cur_blk;
4941     S32 area_limit_for_me_decision_precedence;
4942     S32 children_nodes_required;
4943     S32 intra_mv_area;
4944     S32 intra_eval_enable;
4945     S32 inter_eval_enable;
4946     S32 ipe_decision_precedence;
4947     S32 node_validity;
4948     S32 num_clusters;
4949 
4950     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951 
4952     if(NULL == ps_cu_tree)
4953     {
4954         return;
4955     }
4956 
4957     switch(tree_depth)
4958     {
4959     case 0:
4960     {
4961         /* 64x64 block */
4962         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963 
4964         cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965 
4966         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968         children_nodes_required = 0;
4969         intra_mv_area = ps_blk_64x64->intra_mv_area;
4970 
4971         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972 
4973         intra_eval_enable = ipe_decision_precedence;
4974         inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975 
4976 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977         if(e_quality_preset >= ME_HIGH_QUALITY)
4978         {
4979             inter_eval_enable = 1;
4980             node_validity = (blk_32x32_mask == 0xf);
4981 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983 #endif
4984             break;
4985         }
4986 #endif
4987 
4988 #if ENABLE_4CTB_EVALUATION
4989         node_validity = (blk_32x32_mask == 0xf);
4990 
4991         break;
4992 #else
4993         {
4994             S32 i;
4995 
4996             num_clusters = ps_blk_64x64->num_clusters;
4997 
4998             node_validity = (ipe_decision_precedence)
4999                                 ? (!ps_cur_ipe_ctb->u1_split_flag)
5000                                 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001 
5002             for(i = 0; i < MAX_NUM_REF; i++)
5003             {
5004                 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005                                                   MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006             }
5007 
5008             node_validity = node_validity && (blk_32x32_mask == 0xf);
5009         }
5010         break;
5011 #endif
5012     }
5013     case 1:
5014     {
5015         /* 32x32 block */
5016         S32 is_percent_intra_area_gt_threshold;
5017 
5018         cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019 
5020         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021 
5022 #if !ENABLE_4CTB_EVALUATION
5023         S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024         S32 best_intra_cost =
5025             ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026               ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027                   4) < 0)
5028                 ? MAX_32BIT_VAL
5029                 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030                    ps_ctb_cluster_info->i4_frame_qstep *
5031                        ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032         S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033         S32 cost_differential = (best_inter_cost - best_cost);
5034 #endif
5035 
5036         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038         intra_mv_area = ps_blk_32x32->intra_mv_area;
5039         is_percent_intra_area_gt_threshold =
5040             (intra_mv_area > area_limit_for_me_decision_precedence);
5041         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042 
5043         intra_eval_enable = ipe_decision_precedence;
5044         inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045         children_nodes_required = 1;
5046 
5047 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048         if(e_quality_preset >= ME_HIGH_QUALITY)
5049         {
5050             inter_eval_enable = 1;
5051             node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054 #endif
5055             break;
5056         }
5057 #endif
5058 
5059 #if ENABLE_4CTB_EVALUATION
5060         node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061 
5062         break;
5063 #else
5064         {
5065             S32 i;
5066             num_clusters = ps_blk_32x32->num_clusters;
5067 
5068             if(ipe_decision_precedence)
5069             {
5070                 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071                 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072             }
5073             else
5074             {
5075                 node_validity =
5076                     ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077                     (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078                     (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079 
5080                 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081                 {
5082                     node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083                                                       MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084                 }
5085 
5086                 if(node_validity)
5087                 {
5088                     node_validity = node_validity &&
5089                                     hme_sdi_based_cluster_spread_eligibility(
5090                                         ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091                 }
5092             }
5093         }
5094 
5095         break;
5096 #endif
5097     }
5098     case 2:
5099     {
5100         cluster_16x16_blk_t *ps_blk_16x16 =
5101             &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102 
5103         S32 blk_8x8_mask =
5104             ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105 
5106         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108         children_nodes_required = 1;
5109         intra_mv_area = ps_blk_16x16->intra_mv_area;
5110         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111         num_clusters = ps_blk_16x16->num_clusters;
5112 
5113         intra_eval_enable = ipe_decision_precedence;
5114         inter_eval_enable = 1;
5115 
5116 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117         if(e_quality_preset >= ME_HIGH_QUALITY)
5118         {
5119             node_validity =
5120                 !ps_ctb_cluster_info
5121                      ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122             children_nodes_required = !node_validity;
5123             break;
5124         }
5125 #endif
5126 
5127 #if ENABLE_4CTB_EVALUATION
5128         node_validity = (blk_8x8_mask == 0xf);
5129 
5130 #if ENABLE_CU_TREE_CULLING
5131         {
5132             cur_ctb_cu_tree_t *ps_32x32_root;
5133 
5134             switch(e_parent_blk_pos)
5135             {
5136             case POS_TL:
5137             {
5138                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139 
5140                 break;
5141             }
5142             case POS_TR:
5143             {
5144                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145 
5146                 break;
5147             }
5148             case POS_BL:
5149             {
5150                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151 
5152                 break;
5153             }
5154             case POS_BR:
5155             {
5156                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157 
5158                 break;
5159             }
5160             }
5161 
5162             if(ps_32x32_root->is_node_valid)
5163             {
5164                 node_validity =
5165                     node_validity &&
5166                     !ps_ctb_cluster_info
5167                          ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5168                 children_nodes_required = !node_validity;
5169             }
5170         }
5171 #endif
5172 
5173         break;
5174 #else
5175 
5176         if(ipe_decision_precedence)
5177         {
5178             S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5179                                      .as_intra16_analyse[e_cur_blk_pos]
5180                                      .b1_merge_flag);
5181             S32 valid_flag = (blk_8x8_mask == 0xf);
5182 
5183             node_validity = merge_flag_16 && valid_flag;
5184         }
5185         else
5186         {
5187             node_validity = (blk_8x8_mask == 0xf);
5188         }
5189 
5190         break;
5191 #endif
5192     }
5193     case 3:
5194     {
5195         S32 blk_8x8_mask =
5196             ps_ctb_cluster_info
5197                 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5198         S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5199                                  .as_intra16_analyse[e_parent_blk_pos]
5200                                  .b1_merge_flag);
5201         S32 merge_flag_32 =
5202             (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5203 
5204         intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5205         inter_eval_enable = 1;
5206         children_nodes_required = 0;
5207 
5208 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5209         if(e_quality_preset >= ME_HIGH_QUALITY)
5210         {
5211             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5212             break;
5213         }
5214 #endif
5215 
5216 #if ENABLE_4CTB_EVALUATION
5217         node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5218 
5219         break;
5220 #else
5221         {
5222             cur_ctb_cu_tree_t *ps_32x32_root;
5223             cur_ctb_cu_tree_t *ps_16x16_root;
5224             cluster_32x32_blk_t *ps_32x32_blk;
5225 
5226             switch(e_grandparent_blk_pos)
5227             {
5228             case POS_TL:
5229             {
5230                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5231 
5232                 break;
5233             }
5234             case POS_TR:
5235             {
5236                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5237 
5238                 break;
5239             }
5240             case POS_BL:
5241             {
5242                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5243 
5244                 break;
5245             }
5246             case POS_BR:
5247             {
5248                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5249 
5250                 break;
5251             }
5252             }
5253 
5254             switch(e_parent_blk_pos)
5255             {
5256             case POS_TL:
5257             {
5258                 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5259 
5260                 break;
5261             }
5262             case POS_TR:
5263             {
5264                 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5265 
5266                 break;
5267             }
5268             case POS_BL:
5269             {
5270                 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5271 
5272                 break;
5273             }
5274             case POS_BR:
5275             {
5276                 ps_16x16_root = ps_32x32_root->ps_child_node_br;
5277 
5278                 break;
5279             }
5280             }
5281 
5282             ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5283 
5284             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5285                             ((!ps_32x32_root->is_node_valid) ||
5286                              (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5287                              (!ps_16x16_root->is_node_valid));
5288 
5289             break;
5290         }
5291 #endif
5292     }
5293     }
5294 
5295     /* Fill the current cu_tree node */
5296     ps_cu_tree->is_node_valid = node_validity;
5297     ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5298     ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5299 
5300     if(children_nodes_required)
5301     {
5302         tree_depth++;
5303 
5304         hme_populate_cu_tree(
5305             ps_ctb_cluster_info,
5306             ps_cu_tree->ps_child_node_tl,
5307             tree_depth,
5308             e_quality_preset,
5309             e_parent_blk_pos,
5310             e_cur_blk_pos,
5311             POS_TL);
5312 
5313         hme_populate_cu_tree(
5314             ps_ctb_cluster_info,
5315             ps_cu_tree->ps_child_node_tr,
5316             tree_depth,
5317             e_quality_preset,
5318             e_parent_blk_pos,
5319             e_cur_blk_pos,
5320             POS_TR);
5321 
5322         hme_populate_cu_tree(
5323             ps_ctb_cluster_info,
5324             ps_cu_tree->ps_child_node_bl,
5325             tree_depth,
5326             e_quality_preset,
5327             e_parent_blk_pos,
5328             e_cur_blk_pos,
5329             POS_BL);
5330 
5331         hme_populate_cu_tree(
5332             ps_ctb_cluster_info,
5333             ps_cu_tree->ps_child_node_br,
5334             tree_depth,
5335             e_quality_preset,
5336             e_parent_blk_pos,
5337             e_cur_blk_pos,
5338             POS_BR);
5339     }
5340 }
5341 
5342 /**
5343 ********************************************************************************
5344 *  @fn   void hme_analyse_mv_clustering
5345 *               (
5346 *                   search_results_t *ps_search_results,
5347 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5348 *                   cur_ctb_cu_tree_t *ps_cu_tree
5349 *               )
5350 *
5351 *  @brief  Implementation for the clustering algorithm
5352 *
5353 *  @param[in]  ps_search_results: structure containing 16x16 block results
5354 *
5355 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
5356 *
5357 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
5358 *
5359 *  @return None
5360 ********************************************************************************
5361 */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5362 void hme_analyse_mv_clustering(
5363     search_results_t *ps_search_results,
5364     inter_cu_results_t *ps_16x16_cu_results,
5365     inter_cu_results_t *ps_8x8_cu_results,
5366     ctb_cluster_info_t *ps_ctb_cluster_info,
5367     S08 *pi1_future_list,
5368     S08 *pi1_past_list,
5369     S32 bidir_enabled,
5370     ME_QUALITY_PRESETS_T e_quality_preset)
5371 {
5372     cluster_16x16_blk_t *ps_blk_16x16;
5373     cluster_32x32_blk_t *ps_blk_32x32;
5374     cluster_64x64_blk_t *ps_blk_64x64;
5375 
5376     part_type_results_t *ps_best_result;
5377     pu_result_t *aps_part_result[MAX_NUM_PARTS];
5378     pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5379 
5380     PART_ID_T e_part_id;
5381     PART_TYPE_T e_part_type;
5382 
5383     S32 enable_64x64_merge;
5384     S32 i, j, k;
5385     S32 mvx, mvy;
5386     S32 num_parts;
5387     S32 ref_idx;
5388     S32 ai4_pred_mode[MAX_NUM_PARTS];
5389 
5390     S32 num_32x32_merges = 0;
5391 
5392     /*****************************************/
5393     /*****************************************/
5394     /********* Enter ye who is HQ ************/
5395     /*****************************************/
5396     /*****************************************/
5397 
5398     ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5399 
5400     /* Initialise data in each of the clusters */
5401     for(i = 0; i < 16; i++)
5402     {
5403         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5404 
5405 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5406         if(e_quality_preset < ME_HIGH_QUALITY)
5407         {
5408             hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5409         }
5410         else
5411         {
5412             ps_blk_16x16->best_inter_cost = 0;
5413             ps_blk_16x16->intra_mv_area = 0;
5414         }
5415 #else
5416         hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5417 #endif
5418     }
5419 
5420     for(i = 0; i < 4; i++)
5421     {
5422         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5423 
5424 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5425         if(e_quality_preset < ME_HIGH_QUALITY)
5426         {
5427             hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5428         }
5429         else
5430         {
5431             ps_blk_32x32->best_inter_cost = 0;
5432             ps_blk_32x32->intra_mv_area = 0;
5433         }
5434 #else
5435         hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5436 #endif
5437     }
5438 
5439 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440     if(e_quality_preset < ME_HIGH_QUALITY)
5441     {
5442         hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5443     }
5444     else
5445     {
5446         ps_blk_64x64->best_inter_cost = 0;
5447         ps_blk_64x64->intra_mv_area = 0;
5448     }
5449 #else
5450     hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5451 #endif
5452 
5453     /* Initialise data for all nodes in the CU tree */
5454     hme_build_cu_tree(
5455         ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5456 
5457     if(e_quality_preset >= ME_HIGH_QUALITY)
5458     {
5459         memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5460     }
5461 
5462 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5463     return;
5464 #endif
5465 
5466     for(i = 0; i < 16; i++)
5467     {
5468         S32 blk_8x8_mask;
5469         S32 is_16x16_blk_valid;
5470         S32 num_clusters_updated;
5471         S32 num_clusters;
5472 
5473         blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5474 
5475         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5476 
5477         is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5478 
5479         if(is_16x16_blk_valid)
5480         {
5481             /* Use 8x8 data when 16x16 CU is split */
5482             if(ps_search_results[i].u1_split_flag)
5483             {
5484                 S32 blk_8x8_idx = i << 2;
5485 
5486                 num_parts = 4;
5487                 e_part_type = PRT_NxN;
5488 
5489                 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5490                 {
5491                     /* Only 2Nx2N partition supported for 8x8 block */
5492                     ASSERT(
5493                         ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5494                         ((PART_TYPE_T)PRT_2Nx2N));
5495 
5496                     aps_part_result[j] =
5497                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5498                     aps_inferior_parts[j] =
5499                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5500                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5501                 }
5502             }
5503             else
5504             {
5505                 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5506 
5507                 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5508                 num_parts = gau1_num_parts_in_part_type[e_part_type];
5509 
5510                 for(j = 0; j < num_parts; j++)
5511                 {
5512                     aps_part_result[j] = &ps_best_result->as_pu_results[j];
5513                     aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5514                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5515                 }
5516 
5517                 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5518             }
5519 
5520             for(j = 0; j < num_parts; j++)
5521             {
5522                 pu_result_t *ps_part_result = aps_part_result[j];
5523 
5524                 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5525 
5526                 e_part_id = ge_part_type_to_part_id[e_part_type][j];
5527 
5528                 /* Skip clustering if best mode is intra */
5529                 if((ps_part_result->pu.b1_intra_flag))
5530                 {
5531                     ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5532                     ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5533                     continue;
5534                 }
5535                 else
5536                 {
5537                     ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5538                 }
5539 
5540 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5541                 if(e_quality_preset >= ME_HIGH_QUALITY)
5542                 {
5543                     continue;
5544                 }
5545 #endif
5546 
5547                 for(k = 0; k < num_mvs; k++)
5548                 {
5549                     mv_t *ps_mv;
5550 
5551                     pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5552 
5553                     S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5554 
5555                     ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5556 
5557                     mvx = ps_mv->i2_mvx;
5558                     mvy = ps_mv->i2_mvy;
5559 
5560                     ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5561                                          : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5562 
5563                     num_clusters = ps_blk_16x16->num_clusters;
5564 
5565                     hme_find_and_update_clusters(
5566                         ps_blk_16x16->as_cluster_data,
5567                         &(ps_blk_16x16->num_clusters),
5568                         mvx,
5569                         mvy,
5570                         ref_idx,
5571                         ps_part_result->i4_sdi,
5572                         e_part_id,
5573                         (ai4_pred_mode[j] == 2));
5574 
5575                     num_clusters_updated = (ps_blk_16x16->num_clusters);
5576 
5577                     ps_blk_16x16->au1_num_clusters[ref_idx] +=
5578                         (num_clusters_updated - num_clusters);
5579                 }
5580             }
5581         }
5582     }
5583 
5584     /* Search for 32x32 clusters */
5585     for(i = 0; i < 4; i++)
5586     {
5587         S32 num_clusters_merged;
5588 
5589         S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5590 
5591         if(is_32x32_blk_valid)
5592         {
5593             ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5594             ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5595 
5596 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5597             if(e_quality_preset >= ME_HIGH_QUALITY)
5598             {
5599                 for(j = 0; j < 4; j++, ps_blk_16x16++)
5600                 {
5601                     ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5602 
5603                     ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5604                 }
5605                 continue;
5606             }
5607 #endif
5608 
5609             hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5610 
5611             if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5612             {
5613                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5614                     ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5615 
5616                 if(num_clusters_merged)
5617                 {
5618                     ps_blk_32x32->num_clusters -= num_clusters_merged;
5619 
5620                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5621                 }
5622             }
5623         }
5624     }
5625 
5626 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5627     /* Eliminate outlier 32x32 clusters */
5628     if(e_quality_preset < ME_HIGH_QUALITY)
5629 #endif
5630     {
5631         hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5632 
5633         /* Find best_uni_ref and best_alt_ref */
5634         hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5635     }
5636 
5637     /* Populate the CU tree for depths 1 and higher */
5638     {
5639         cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5640         cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5641         cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5642         cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5643         cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5644 
5645         hme_populate_cu_tree(
5646             ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5647 
5648         num_32x32_merges += (ps_tl->is_node_valid == 1);
5649 
5650         hme_populate_cu_tree(
5651             ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5652 
5653         num_32x32_merges += (ps_tr->is_node_valid == 1);
5654 
5655         hme_populate_cu_tree(
5656             ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5657 
5658         num_32x32_merges += (ps_bl->is_node_valid == 1);
5659 
5660         hme_populate_cu_tree(
5661             ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5662 
5663         num_32x32_merges += (ps_br->is_node_valid == 1);
5664     }
5665 
5666 #if !ENABLE_4CTB_EVALUATION
5667     if(e_quality_preset < ME_HIGH_QUALITY)
5668     {
5669         enable_64x64_merge = (num_32x32_merges >= 3);
5670     }
5671 #else
5672     if(e_quality_preset < ME_HIGH_QUALITY)
5673     {
5674         enable_64x64_merge = 1;
5675     }
5676 #endif
5677 
5678 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5679     if(e_quality_preset >= ME_HIGH_QUALITY)
5680     {
5681         enable_64x64_merge = 1;
5682     }
5683 #else
5684     if(e_quality_preset >= ME_HIGH_QUALITY)
5685     {
5686         enable_64x64_merge = (num_32x32_merges >= 3);
5687     }
5688 #endif
5689 
5690     if(enable_64x64_merge)
5691     {
5692         S32 num_clusters_merged;
5693 
5694         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5695 
5696 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5697         if(e_quality_preset >= ME_HIGH_QUALITY)
5698         {
5699             for(j = 0; j < 4; j++, ps_blk_32x32++)
5700             {
5701                 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5702 
5703                 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5704             }
5705         }
5706         else
5707 #endif
5708         {
5709             hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5710 
5711             if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5712             {
5713                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5714                     ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5715 
5716                 if(num_clusters_merged)
5717                 {
5718                     ps_blk_64x64->num_clusters -= num_clusters_merged;
5719 
5720                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5721                 }
5722             }
5723         }
5724 
5725 #if !ENABLE_4CTB_EVALUATION
5726         if(e_quality_preset < ME_HIGH_QUALITY)
5727         {
5728             S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5729             S32 best_intra_cost =
5730                 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5731                   ps_ctb_cluster_info->i4_frame_qstep *
5732                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5733                     ? MAX_32BIT_VAL
5734                     : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5735                        ps_ctb_cluster_info->i4_frame_qstep *
5736                            ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5737             S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5738             S32 cost_differential = (best_inter_cost - best_cost);
5739 
5740             enable_64x64_merge =
5741                 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5742         }
5743 #endif
5744     }
5745 
5746     if(enable_64x64_merge)
5747     {
5748 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5749         if(e_quality_preset < ME_HIGH_QUALITY)
5750 #endif
5751         {
5752             hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5753 
5754             hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5755         }
5756 
5757         hme_populate_cu_tree(
5758             ps_ctb_cluster_info,
5759             ps_ctb_cluster_info->ps_cu_tree_root,
5760             0,
5761             e_quality_preset,
5762             POS_NA,
5763             POS_NA,
5764             POS_NA);
5765     }
5766 }
5767 #endif
5768 
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5769 static __inline void hme_merge_prms_init(
5770     hme_merge_prms_t *ps_prms,
5771     layer_ctxt_t *ps_curr_layer,
5772     refine_prms_t *ps_refine_prms,
5773     me_frm_ctxt_t *ps_me_ctxt,
5774     range_prms_t *ps_range_prms_rec,
5775     range_prms_t *ps_range_prms_inp,
5776     mv_grid_t **pps_mv_grid,
5777     inter_ctb_prms_t *ps_inter_ctb_prms,
5778     S32 i4_num_pred_dir,
5779     S32 i4_32x32_id,
5780     BLK_SIZE_T e_blk_size,
5781     ME_QUALITY_PRESETS_T e_me_quality_presets)
5782 {
5783     S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5784     S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5785 
5786     /* Currently not enabling segmentation info from prev layers */
5787     ps_prms->i4_seg_info_avail = 0;
5788     ps_prms->i4_part_mask = 0;
5789 
5790     /* Number of reference pics in which to do merge */
5791     ps_prms->i4_num_ref = i4_num_pred_dir;
5792 
5793     /* Layer ctxt info */
5794     ps_prms->ps_layer_ctxt = ps_curr_layer;
5795 
5796     ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5797 
5798     /* Top left, top right, bottom left and bottom right 16x16 units */
5799     if(BLK_32x32 == e_blk_size)
5800     {
5801         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5802         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5803         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5804         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5805 
5806         /* Merge results stored here */
5807         ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5808 
5809         /* This could be lesser than the number of 16x16results generated*/
5810         /* For now, keeping it to be same                                */
5811         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5812         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5813         ps_prms->ps_results_grandchild = NULL;
5814     }
5815     else
5816     {
5817         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5818         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5819         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5820         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5821 
5822         /* Merge results stored here */
5823         ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5824 
5825         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5826         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5827         ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5828     }
5829 
5830     if(i4_use_rec)
5831     {
5832         WORD32 ref_ctr;
5833 
5834         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5835         {
5836             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5837         }
5838     }
5839     else
5840     {
5841         WORD32 ref_ctr;
5842 
5843         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5844         {
5845             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5846         }
5847     }
5848     ps_prms->i4_use_rec = i4_use_rec;
5849 
5850     ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5851 
5852     ps_prms->pps_mv_grid = pps_mv_grid;
5853 
5854     ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5855 
5856     ps_prms->e_quality_preset = e_me_quality_presets;
5857     ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5858     ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5859     ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5860 }
5861 
5862 /**
5863 ********************************************************************************
5864 *  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
5865 *                       refine_layer_prms_t *ps_refine_prms)
5866 *
5867 *  @brief  Top level entry point for refinement ME
5868 *
5869 *  @param[in,out]  ps_ctxt: ME Handle
5870 *
5871 *  @param[in]  ps_refine_prms : refinement layer prms
5872 *
5873 *  @return None
5874 ********************************************************************************
5875 */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5876 void hme_refine(
5877     me_ctxt_t *ps_thrd_ctxt,
5878     refine_prms_t *ps_refine_prms,
5879     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5880     layer_ctxt_t *ps_coarse_layer,
5881     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5882     S32 lyr_job_type,
5883     S32 thrd_id,
5884     S32 me_frm_id,
5885     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5886 {
5887     inter_ctb_prms_t s_common_frm_prms;
5888 
5889     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5890     WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5891     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5892     ME_QUALITY_PRESETS_T e_me_quality_presets =
5893         ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5894 
5895     WORD32 num_rows_proc = 0;
5896     WORD32 num_act_ref_pics;
5897     WORD16 i2_prev_enc_frm_max_mv_y;
5898     WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5899 
5900     /*************************************************************************/
5901     /* Complexity of search: Low to High                                     */
5902     /*************************************************************************/
5903     SEARCH_COMPLEXITY_T e_search_complexity;
5904 
5905     /*************************************************************************/
5906     /* to store the PU results which are passed to the decide_part_types     */
5907     /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5908     /*************************************************************************/
5909 
5910     pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5911     inter_pu_results_t as_inter_pu_results[4];
5912     inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5913 
5914     /*************************************************************************/
5915     /* Config parameter structures for varius ME submodules                  */
5916     /*************************************************************************/
5917     hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5918     hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5919     hme_merge_prms_t s_merge_prms_64x64;
5920     hme_search_prms_t s_search_prms_blk;
5921     mvbank_update_prms_t s_mv_update_prms;
5922     hme_ctb_prms_t s_ctb_prms;
5923     hme_subpel_prms_t s_subpel_prms;
5924     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5925     ctb_cluster_info_t *ps_ctb_cluster_info;
5926     fpel_srch_cand_init_data_t s_srch_cand_init_data;
5927 
5928     /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5929     S32 en_merge_32x32;
5930     /* 5 lsb's specify whether or not merge algorithm is required */
5931     /* to be executed or not. Relevant only in PQ. Ought to be */
5932     /* used in conjunction with en_merge_32x32 and */
5933     /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5934     /* required when all children are deemed to be intras */
5935     S32 en_merge_execution;
5936 
5937     /*************************************************************************/
5938     /* All types of search candidates for predictor based search.            */
5939     /*************************************************************************/
5940     S32 num_init_candts = 0;
5941     S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5942     S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5943     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5944     search_node_t as_top_neighbours[4], as_left_neighbours[3];
5945 
5946     pf_get_wt_inp fp_get_wt_inp;
5947 
5948     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5949     U32 au4_unique_node_map[MAP_X_MAX * 2];
5950 
5951     /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5952     ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5953 
5954     /*************************************************************************/
5955     /* points ot the search results for the blk level search (8x8/16x16)     */
5956     /*************************************************************************/
5957     search_results_t *ps_search_results;
5958 
5959     /*************************************************************************/
5960     /* Coordinates                                                           */
5961     /*************************************************************************/
5962     S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5963     S32 pos_x, pos_y;
5964     S32 blk_id_in_full_ctb;
5965 
5966     /*************************************************************************/
5967     /* Related to dimensions of block being searched and pic dimensions      */
5968     /*************************************************************************/
5969     S32 blk_4x4_to_16x16;
5970     S32 blk_wd, blk_ht, blk_size_shift;
5971     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5972     S32 num_results_prev_layer;
5973 
5974     /*************************************************************************/
5975     /* Size of a basic unit for this layer. For non encode layers, we search */
5976     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5977     /* basic unit size is the ctb size.                                      */
5978     /*************************************************************************/
5979     S32 unit_size;
5980 
5981     /*************************************************************************/
5982     /* Local variable storing results of any 4 CU merge to bigger CU         */
5983     /*************************************************************************/
5984     CU_MERGE_RESULT_T e_merge_result;
5985 
5986     /*************************************************************************/
5987     /* This mv grid stores results during and after fpel search, during      */
5988     /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
5989     /* meant for the 2 directions of search (l0 and l1).                     */
5990     /*************************************************************************/
5991     mv_grid_t *aps_mv_grid[2];
5992 
5993     /*************************************************************************/
5994     /* Pointers to context in current and coarser layers                     */
5995     /*************************************************************************/
5996     layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
5997 
5998     /*************************************************************************/
5999     /* to store mv range per blk, and picture limit, allowed search range    */
6000     /* range prms in hpel and qpel units as well                             */
6001     /*************************************************************************/
6002     range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6003     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6004     range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6005 
6006     /*************************************************************************/
6007     /* These variables are used to track number of references at different   */
6008     /* stages of ME.                                                         */
6009     /*************************************************************************/
6010     S32 i4_num_pred_dir;
6011     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6012     S32 lambda_recon = ps_refine_prms->lambda_recon;
6013 
6014     /* Counts successful merge to 32x32 every CTB (0-4) */
6015     S32 merge_count_32x32;
6016 
6017     S32 ai4_id_coloc[14], ai4_id_Z[2];
6018     U08 au1_search_candidate_list_index[2];
6019     S32 ai4_num_coloc_cands[2];
6020     U08 u1_pred_dir, u1_pred_dir_ctr;
6021 
6022     /*************************************************************************/
6023     /* Input pointer and stride                                              */
6024     /*************************************************************************/
6025     U08 *pu1_inp;
6026     S32 i4_inp_stride;
6027     S32 end_of_frame;
6028     S32 num_sync_units_in_row, num_sync_units_in_tile;
6029 
6030     /*************************************************************************/
6031     /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6032     /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6033     /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
6034     /*************************************************************************/
6035     S32 blk_8x8_mask;
6036     S32 ai4_blk_8x8_mask[16];
6037     U08 au1_is_64x64Blk_noisy[1];
6038     U08 au1_is_32x32Blk_noisy[4];
6039     U08 au1_is_16x16Blk_noisy[16];
6040 
6041     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6042         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6043     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6044         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6045 
6046     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6047 
6048     /*************************************************************************/
6049     /* Pointers to current and coarse layer are needed for projection */
6050     /* Pointer to prev layer are needed for other candts like coloc   */
6051     /*************************************************************************/
6052     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6053 
6054     ps_prev_layer = hme_get_past_layer_ctxt(
6055         ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6056 
6057     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6058 
6059     /* Function pointer is selected based on the C vc X86 macro */
6060 
6061     fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6062 
6063     i4_inp_stride = ps_curr_layer->i4_inp_stride;
6064     i4_pic_wd = ps_curr_layer->i4_wd;
6065     i4_pic_ht = ps_curr_layer->i4_ht;
6066     e_search_complexity = ps_refine_prms->e_search_complexity;
6067     end_of_frame = 0;
6068 
6069     /* This points to all the initial candts */
6070     ps_search_candts = &as_search_candts[0];
6071 
6072     /* mv grid being huge strucutre is part of context */
6073     aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6074     aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6075 
6076     /*************************************************************************/
6077     /* If the current layer is encoded (since it may be multicast or final   */
6078     /* layer (finest)), then we use 16x16 blk size with some selected parts  */
6079     /* If the current layer is not encoded, then we use 8x8 blk size, with   */
6080     /* enable or disable of 4x4 partitions depending on the input prms       */
6081     /*************************************************************************/
6082     e_search_blk_size = BLK_16x16;
6083     blk_wd = blk_ht = 16;
6084     blk_size_shift = 4;
6085     e_result_blk_size = BLK_8x8;
6086     s_mv_update_prms.i4_shift = 1;
6087 
6088     if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6089     {
6090         blk_4x4_to_16x16 = 1;
6091     }
6092     else
6093     {
6094         blk_4x4_to_16x16 = 0;
6095     }
6096 
6097     unit_size = 1 << ps_ctxt->log_ctb_size;
6098     s_search_prms_blk.i4_inp_stride = unit_size;
6099 
6100     /* This is required to properly update the layer mv bank */
6101     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6102     s_search_prms_blk.e_blk_size = e_search_blk_size;
6103 
6104     /*************************************************************************/
6105     /* If current layer is explicit, then the number of ref frames are to    */
6106     /* be same as previous layer. Else it will be 2                          */
6107     /*************************************************************************/
6108     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6109     i4_num_pred_dir =
6110         (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6111         1;
6112 
6113 #if USE_MODIFIED == 1
6114     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6115 #else
6116     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6117 #endif
6118 
6119     i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6120     if(i4_num_ref_prev_layer <= 2)
6121     {
6122         i4_num_ref_each_dir = 1;
6123     }
6124     else
6125     {
6126         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6127     }
6128 
6129     s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6130     s_mv_update_prms.i4_num_results_to_store =
6131         MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6132                                                 : (i4_num_act_ref_l0 > 1) + 1,
6133             ps_refine_prms->i4_num_results_per_part);
6134 
6135     /*************************************************************************/
6136     /* Initialization of merge params for 16x16 to 32x32 merge.              */
6137     /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
6138     /*************************************************************************/
6139     {
6140         hme_merge_prms_t *aps_merge_prms[4];
6141         aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6142         aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6143         aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6144         aps_merge_prms[3] = &s_merge_prms_32x32_br;
6145         for(i = 0; i < 4; i++)
6146         {
6147             hme_merge_prms_init(
6148                 aps_merge_prms[i],
6149                 ps_curr_layer,
6150                 ps_refine_prms,
6151                 ps_ctxt,
6152                 as_range_prms_rec,
6153                 as_range_prms_inp,
6154                 &aps_mv_grid[0],
6155                 &s_common_frm_prms,
6156                 i4_num_pred_dir,
6157                 i,
6158                 BLK_32x32,
6159                 e_me_quality_presets);
6160         }
6161     }
6162 
6163     /*************************************************************************/
6164     /* Initialization of merge params for 32x32 to 64x64 merge.              */
6165     /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
6166     /*************************************************************************/
6167     {
6168         hme_merge_prms_init(
6169             &s_merge_prms_64x64,
6170             ps_curr_layer,
6171             ps_refine_prms,
6172             ps_ctxt,
6173             as_range_prms_rec,
6174             as_range_prms_inp,
6175             &aps_mv_grid[0],
6176             &s_common_frm_prms,
6177             i4_num_pred_dir,
6178             0,
6179             BLK_64x64,
6180             e_me_quality_presets);
6181     }
6182 
6183     /* Pointers to cu_results are initialised here */
6184     {
6185         WORD32 i;
6186 
6187         ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6188 
6189         for(i = 0; i < 4; i++)
6190         {
6191             ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6192         }
6193 
6194         for(i = 0; i < 16; i++)
6195         {
6196             ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6197         }
6198     }
6199 
6200     /*************************************************************************/
6201     /* SUBPEL Params initialized here                                        */
6202     /*************************************************************************/
6203     {
6204         s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6205         s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6206         s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6207 
6208         s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6209         s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6210         s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6211 
6212         s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6213         s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6214 
6215         s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6216 
6217         s_subpel_prms.i4_inp_stride = unit_size;
6218 
6219         s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6220         s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6221         s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6222 
6223         s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6224 
6225         {
6226             WORD32 ref_ctr;
6227             for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6228             {
6229                 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6230                 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6231             }
6232         }
6233         s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6234 
6235 #if USE_MODIFIED == 0
6236         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6237 #else
6238         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6239 #endif
6240         s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6241 
6242         /* BI Refinement done only if this field is 1 */
6243         s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6244 
6245         s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6246 
6247         s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6248         s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6249         s_subpel_prms.u1_max_num_subpel_refine_centers =
6250             ps_refine_prms->u1_max_num_subpel_refine_centers;
6251     }
6252 
6253     /* inter_ctb_prms_t struct initialisation */
6254     {
6255         inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6256         hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6257 
6258         ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6259         ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6260         ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6261         ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6262         ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6263         ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6264         ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6265         ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6266         ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6267         ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6268         ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6269         ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6270         ps_inter_ctb_prms->i4_lamda = lambda_recon;
6271         ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6272         ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6273         ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6274         ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6275         ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6276         ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6277         ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6278             ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6279     }
6280 
6281     for(i = 0; i < MAX_INIT_CANDTS; i++)
6282     {
6283         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6284         ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6285 
6286         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6287     }
6288     num_act_ref_pics =
6289         ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6290 
6291     if(num_act_ref_pics)
6292     {
6293         hme_search_cand_data_init(
6294             ai4_id_Z,
6295             ai4_id_coloc,
6296             ai4_num_coloc_cands,
6297             au1_search_candidate_list_index,
6298             i4_num_act_ref_l0,
6299             i4_num_act_ref_l1,
6300             ps_ctxt->s_frm_prms.bidir_enabled,
6301             blk_4x4_to_16x16);
6302     }
6303 
6304     if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6305     {
6306         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6307         ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6308     }
6309     else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6310     {
6311         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6312     }
6313 
6314     for(i = 0; i < 3; i++)
6315     {
6316         search_node_t *ps_search_node;
6317         ps_search_node = &as_left_neighbours[i];
6318         INIT_SEARCH_NODE(ps_search_node, 0);
6319         ps_search_node = &as_top_neighbours[i];
6320         INIT_SEARCH_NODE(ps_search_node, 0);
6321     }
6322 
6323     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6324     as_left_neighbours[2].u1_is_avail = 0;
6325 
6326     /*************************************************************************/
6327     /* Initialize all the search results structure here. We update all the   */
6328     /* search results to default values, and configure things like blk sizes */
6329     /*************************************************************************/
6330     if(num_act_ref_pics)
6331     {
6332         S32 i4_x, i4_y;
6333         /* 16x16 results */
6334         for(i = 0; i < 16; i++)
6335         {
6336             search_results_t *ps_search_results;
6337             S32 pred_lx;
6338             ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6339             i4_x = (S32)gau1_encode_to_raster_x[i];
6340             i4_y = (S32)gau1_encode_to_raster_y[i];
6341             i4_x <<= 4;
6342             i4_y <<= 4;
6343 
6344             hme_init_search_results(
6345                 ps_search_results,
6346                 i4_num_pred_dir,
6347                 ps_refine_prms->i4_num_fpel_results,
6348                 ps_refine_prms->i4_num_results_per_part,
6349                 e_search_blk_size,
6350                 i4_x,
6351                 i4_y,
6352                 &ps_ctxt->au1_is_past[0]);
6353 
6354             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6355             {
6356                 pred_ctxt_t *ps_pred_ctxt;
6357 
6358                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6359 
6360                 hme_init_pred_ctxt_encode(
6361                     ps_pred_ctxt,
6362                     ps_search_results,
6363                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6364                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6365                     aps_mv_grid[pred_lx],
6366                     pred_lx,
6367                     lambda_recon,
6368                     ps_refine_prms->lambda_q_shift,
6369                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6370                     &ps_ctxt->ai2_ref_scf[0]);
6371             }
6372         }
6373 
6374         for(i = 0; i < 4; i++)
6375         {
6376             search_results_t *ps_search_results;
6377             S32 pred_lx;
6378             ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6379 
6380             i4_x = (S32)gau1_encode_to_raster_x[i];
6381             i4_y = (S32)gau1_encode_to_raster_y[i];
6382             i4_x <<= 5;
6383             i4_y <<= 5;
6384 
6385             hme_init_search_results(
6386                 ps_search_results,
6387                 i4_num_pred_dir,
6388                 ps_refine_prms->i4_num_32x32_merge_results,
6389                 ps_refine_prms->i4_num_results_per_part,
6390                 BLK_32x32,
6391                 i4_x,
6392                 i4_y,
6393                 &ps_ctxt->au1_is_past[0]);
6394 
6395             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6396             {
6397                 pred_ctxt_t *ps_pred_ctxt;
6398 
6399                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6400 
6401                 hme_init_pred_ctxt_encode(
6402                     ps_pred_ctxt,
6403                     ps_search_results,
6404                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6405                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6406                     aps_mv_grid[pred_lx],
6407                     pred_lx,
6408                     lambda_recon,
6409                     ps_refine_prms->lambda_q_shift,
6410                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6411                     &ps_ctxt->ai2_ref_scf[0]);
6412             }
6413         }
6414 
6415         {
6416             search_results_t *ps_search_results;
6417             S32 pred_lx;
6418             ps_search_results = &ps_ctxt->s_search_results_64x64;
6419 
6420             hme_init_search_results(
6421                 ps_search_results,
6422                 i4_num_pred_dir,
6423                 ps_refine_prms->i4_num_64x64_merge_results,
6424                 ps_refine_prms->i4_num_results_per_part,
6425                 BLK_64x64,
6426                 0,
6427                 0,
6428                 &ps_ctxt->au1_is_past[0]);
6429 
6430             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6431             {
6432                 pred_ctxt_t *ps_pred_ctxt;
6433 
6434                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6435 
6436                 hme_init_pred_ctxt_encode(
6437                     ps_pred_ctxt,
6438                     ps_search_results,
6439                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6440                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6441                     aps_mv_grid[pred_lx],
6442                     pred_lx,
6443                     lambda_recon,
6444                     ps_refine_prms->lambda_q_shift,
6445                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6446                     &ps_ctxt->ai2_ref_scf[0]);
6447             }
6448         }
6449     }
6450 
6451     /* Initialise the structure used in clustering  */
6452     if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6453     {
6454         ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6455 
6456         ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6457         ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6458         ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6459         ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6460         ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6461         ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6462         ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6463     }
6464 
6465     /*********************************************************************/
6466     /* Initialize the dyn. search range params. for each reference index */
6467     /* in current layer ctxt                                             */
6468     /*********************************************************************/
6469 
6470     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6471     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6472     {
6473         WORD32 ref_ctr;
6474         /* set no. of act ref in L0 for further use at frame level */
6475         ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6476             ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6477 
6478         for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6479         {
6480             INIT_DYN_SEARCH_PRMS(
6481                 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6482                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6483         }
6484     }
6485     /*************************************************************************/
6486     /* Now that the candidates have been ordered, to choose the right number */
6487     /* of initial candidates.                                                */
6488     /*************************************************************************/
6489     if(blk_4x4_to_16x16)
6490     {
6491         if(i4_num_ref_prev_layer > 2)
6492         {
6493             if(e_search_complexity == SEARCH_CX_LOW)
6494                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6495             else if(e_search_complexity == SEARCH_CX_MED)
6496                 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6497             else if(e_search_complexity == SEARCH_CX_HIGH)
6498                 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6499             else
6500                 ASSERT(0);
6501         }
6502         else if(i4_num_ref_prev_layer == 2)
6503         {
6504             if(e_search_complexity == SEARCH_CX_LOW)
6505                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6506             else if(e_search_complexity == SEARCH_CX_MED)
6507                 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6508             else if(e_search_complexity == SEARCH_CX_HIGH)
6509                 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510             else
6511                 ASSERT(0);
6512         }
6513         else
6514         {
6515             if(e_search_complexity == SEARCH_CX_LOW)
6516                 num_init_candts = 5;
6517             else if(e_search_complexity == SEARCH_CX_MED)
6518                 num_init_candts = 12;
6519             else if(e_search_complexity == SEARCH_CX_HIGH)
6520                 num_init_candts = 19;
6521             else
6522                 ASSERT(0);
6523         }
6524     }
6525     else
6526     {
6527         if(i4_num_ref_prev_layer > 2)
6528         {
6529             if(e_search_complexity == SEARCH_CX_LOW)
6530                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6531             else if(e_search_complexity == SEARCH_CX_MED)
6532                 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6533             else if(e_search_complexity == SEARCH_CX_HIGH)
6534                 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6535             else
6536                 ASSERT(0);
6537         }
6538         else if(i4_num_ref_prev_layer == 2)
6539         {
6540             if(e_search_complexity == SEARCH_CX_LOW)
6541                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6542             else if(e_search_complexity == SEARCH_CX_MED)
6543                 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6544             else if(e_search_complexity == SEARCH_CX_HIGH)
6545                 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546             else
6547                 ASSERT(0);
6548         }
6549         else
6550         {
6551             if(e_search_complexity == SEARCH_CX_LOW)
6552                 num_init_candts = 5;
6553             else if(e_search_complexity == SEARCH_CX_MED)
6554                 num_init_candts = 11;
6555             else if(e_search_complexity == SEARCH_CX_HIGH)
6556                 num_init_candts = 16;
6557             else
6558                 ASSERT(0);
6559         }
6560     }
6561 
6562     /*************************************************************************/
6563     /* The following search parameters are fixed throughout the search across*/
6564     /* all blks. So these are configured outside processing loop             */
6565     /*************************************************************************/
6566     s_search_prms_blk.i4_num_init_candts = num_init_candts;
6567     s_search_prms_blk.i4_start_step = 1;
6568     s_search_prms_blk.i4_use_satd = 0;
6569     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6570     /* we use recon only for encoded layers, otherwise it is not available */
6571     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6572 
6573     s_search_prms_blk.ps_search_candts = ps_search_candts;
6574     if(s_search_prms_blk.i4_use_rec)
6575     {
6576         WORD32 ref_ctr;
6577         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6578             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6579     }
6580     else
6581     {
6582         WORD32 ref_ctr;
6583         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6584             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6585     }
6586 
6587     /*************************************************************************/
6588     /* Initialize coordinates. Meaning as follows                            */
6589     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
6590     /* blk_y : same as above, y coord.                                       */
6591     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
6592     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
6593     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
6594     /* corner of the picture. Always multiple of 64.                         */
6595     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
6596     /*************************************************************************/
6597     blk_y = 0;
6598     blk_id_in_ctb = 0;
6599     i4_ctb_y = 0;
6600 
6601     /*************************************************************************/
6602     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
6603     /* every block given its coordinate. Note thsi assumes that the min amt  */
6604     /* of padding to right of pic is equal to the blk size. If we go all the */
6605     /* way upto 64x64, then the min padding on right size of picture should  */
6606     /* be 64, and also on bottom side of picture.                            */
6607     /*************************************************************************/
6608     SET_PIC_LIMIT(
6609         s_pic_limit_inp,
6610         ps_curr_layer->i4_pad_x_rec,
6611         ps_curr_layer->i4_pad_y_rec,
6612         ps_curr_layer->i4_wd,
6613         ps_curr_layer->i4_ht,
6614         s_search_prms_blk.i4_num_steps_post_refine);
6615 
6616     SET_PIC_LIMIT(
6617         s_pic_limit_rec,
6618         ps_curr_layer->i4_pad_x_rec,
6619         ps_curr_layer->i4_pad_y_rec,
6620         ps_curr_layer->i4_wd,
6621         ps_curr_layer->i4_ht,
6622         s_search_prms_blk.i4_num_steps_post_refine);
6623 
6624     /*************************************************************************/
6625     /* set the MV limit per ref. pic.                                        */
6626     /*    - P pic. : Based on the config params.                             */
6627     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6628     /*************************************************************************/
6629     hme_set_mv_limit_using_dvsr_data(
6630         ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6631     s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6632     s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6633     s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6634     s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6635     s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6636     s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6637     s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6638     s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6639     s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6640     s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6641     s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6642 
6643     while(0 == end_of_frame)
6644     {
6645         job_queue_t *ps_job;
6646         frm_ctb_ctxt_t *ps_frm_ctb_prms;
6647         ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6648 
6649         WORD32 i4_max_mv_x_in_ctb;
6650         WORD32 i4_max_mv_y_in_ctb;
6651         void *pv_dep_mngr_encloop_dep_me;
6652         WORD32 offset_val, check_dep_pos, set_dep_pos;
6653         WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6654 
6655         pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6656 
6657         ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6658 
6659         /* Get the current row from the job queue */
6660         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6661             ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6662 
6663         /* If all rows are done, set the end of process flag to 1, */
6664         /* and the current row to -1 */
6665         if(NULL == ps_job)
6666         {
6667             blk_y = -1;
6668             i4_ctb_y = -1;
6669             tile_col_idx = -1;
6670             end_of_frame = 1;
6671 
6672             continue;
6673         }
6674 
6675         /* set the output dependency after picking up the row */
6676         ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6677 
6678         /* Obtain the current row's details from the job */
6679         {
6680             ihevce_tile_params_t *ps_col_tile_params;
6681 
6682             i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6683             /* Obtain the current colum tile index from the job */
6684             tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6685 
6686             /* in encode layer block are 16x16 and CTB is 64 x 64 */
6687             /* note if ctb is 32x32 the this calc needs to be changed */
6688             num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6689                                     ps_ctxt->log_ctb_size;
6690 
6691             /* The tile parameter for the col. idx. Use only the properties
6692             which is same for all the bottom tiles like width, start_x, etc.
6693             Don't use height, start_y, etc.                                  */
6694             ps_col_tile_params =
6695                 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6696             /* in encode layer block are 16x16 and CTB is 64 x 64 */
6697             /* note if ctb is 32x32 the this calc needs to be changed */
6698             num_sync_units_in_tile =
6699                 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6700                 ps_ctxt->log_ctb_size;
6701 
6702             i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6703             i4_ctb_x = i4_first_ctb_x;
6704 
6705             if(!num_act_ref_pics)
6706             {
6707                 for(i4_ctb_x = i4_first_ctb_x;
6708                     i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6709                     i4_ctb_x++)
6710                 {
6711                     S32 blk_i = 0, blk_j = 0;
6712                     /* set the dependency for the corresponding row in enc loop */
6713                     ihevce_dmgr_set_row_row_sync(
6714                         pv_dep_mngr_encloop_dep_me,
6715                         (i4_ctb_x + 1),
6716                         i4_ctb_y,
6717                         tile_col_idx /* Col Tile No. */);
6718                 }
6719 
6720                 continue;
6721             }
6722 
6723             /* increment the number of rows proc */
6724             num_rows_proc++;
6725 
6726             /* Set Variables for Dep. Checking and Setting */
6727             set_dep_pos = i4_ctb_y + 1;
6728             if(i4_ctb_y > 0)
6729             {
6730                 offset_val = 2;
6731                 check_dep_pos = i4_ctb_y - 1;
6732             }
6733             else
6734             {
6735                 /* First row should run without waiting */
6736                 offset_val = -1;
6737                 check_dep_pos = 0;
6738             }
6739 
6740             /* row ctb out pointer  */
6741             ps_ctxt->ps_ctb_analyse_curr_row =
6742                 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6743 
6744             /* Row level CU Tree buffer */
6745             ps_ctxt->ps_cu_tree_curr_row =
6746                 ps_ctxt->ps_cu_tree_base +
6747                 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6748 
6749             ps_ctxt->ps_me_ctb_data_curr_row =
6750                 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6751         }
6752 
6753         /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6754         left_ctb_in_diff_tile = 1;
6755 
6756         /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
6757         /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6758         {
6759             S32 i4_ref_id, i4_bits_req;
6760 
6761             for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6762                                             ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6763                 i4_ref_id++)
6764             {
6765                 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6766 
6767                 if(i4_bits_req > 12)
6768                 {
6769                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6770                 }
6771                 else
6772                 {
6773                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6774                 }
6775             }
6776 
6777             s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6778         }
6779 
6780         /* if non-encode layer then i4_ctb_x will be same as blk_x */
6781         /* loop over all the units is a row                        */
6782         for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6783             i4_ctb_x++)
6784         {
6785             ihevce_ctb_noise_params *ps_ctb_noise_params =
6786                 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6787 
6788             s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6789             s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6790 
6791             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6792             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6793             /* Initialize ptr to current IPE CTB */
6794             ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6795                              i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6796             {
6797                 ps_ctb_bound_attrs =
6798                     get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6799 
6800                 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6801                 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6802             }
6803 
6804             /* Block to initialise pointers to part_type_results_t */
6805             /* in each size-specific inter_cu_results_t  */
6806             {
6807                 WORD32 i;
6808 
6809                 for(i = 0; i < 64; i++)
6810                 {
6811                     ps_ctxt->as_cu8x8_results[i].ps_best_results =
6812                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6813                             .as_8x8_block_data[i]
6814                             .as_best_results;
6815                     ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6816                 }
6817 
6818                 for(i = 0; i < 16; i++)
6819                 {
6820                     ps_ctxt->as_cu16x16_results[i].ps_best_results =
6821                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6822                     ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6823                 }
6824 
6825                 for(i = 0; i < 4; i++)
6826                 {
6827                     ps_ctxt->as_cu32x32_results[i].ps_best_results =
6828                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6829                             .as_32x32_block_data[i]
6830                             .as_best_results;
6831                     ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6832                 }
6833 
6834                 ps_ctxt->s_cu64x64_results.ps_best_results =
6835                     ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6836                 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6837             }
6838 
6839             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6840             {
6841                 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6842                 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6843                 ps_ctb_cluster_info->ps_cu_tree_root =
6844                     ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6845                 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6846             }
6847 
6848             if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6849             {
6850                 S32 i4_nodes_created_in_cu_tree = 1;
6851 
6852                 ihevce_cu_tree_init(
6853                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6854                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6855                     &i4_nodes_created_in_cu_tree,
6856                     0,
6857                     POS_NA,
6858                     POS_NA,
6859                     POS_NA);
6860             }
6861 
6862             memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6863 
6864             if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6865             {
6866                 S32 j;
6867 
6868                 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6869 
6870                 ps_cur_ipe_ctb =
6871                     ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6872                 lambda_recon =
6873                     hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6874 
6875                 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6876 
6877                 for(i = 0; i < 4; i++)
6878                 {
6879                     ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6880 
6881                     for(j = 0; j < 2; j++)
6882                     {
6883                         ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6884                     }
6885                 }
6886                 ps_search_results = &ps_ctxt->s_search_results_64x64;
6887 
6888                 for(j = 0; j < 2; j++)
6889                 {
6890                     ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6891                 }
6892 
6893                 s_common_frm_prms.i4_lamda = lambda_recon;
6894             }
6895             else
6896             {
6897                 lambda_recon = ps_refine_prms->lambda_recon;
6898             }
6899 
6900             /*********************************************************************/
6901             /* replicate the inp buffer at blk or ctb level for each ref id,     */
6902             /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6903             /* thereby avoiding a bloat up of memory. If we did all references   */
6904             /* weighted pred, we will end up with a duplicate copy of each ref   */
6905             /* at each layer, since we need to preserve the original reference.  */
6906             /* ToDo: Need to observe performance with this mechanism and compare */
6907             /* with case where ref is weighted.                                  */
6908             /*********************************************************************/
6909             fp_get_wt_inp(
6910                 ps_curr_layer,
6911                 &ps_ctxt->s_wt_pred,
6912                 unit_size,
6913                 s_common_frm_prms.i4_ctb_x_off,
6914                 s_common_frm_prms.i4_ctb_y_off,
6915                 unit_size,
6916                 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6917                 ps_ctxt->i4_wt_pred_enable_flag);
6918 
6919             if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6920             {
6921 #if TEMPORAL_NOISE_DETECT
6922                 {
6923                     WORD32 had_block_size = 16;
6924                     WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6925                                            ? 64
6926                                            : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6927                     WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6928                                             ? 64
6929                                             : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6930                     WORD32 num_pred_dir = i4_num_pred_dir;
6931                     WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6932                     WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6933 
6934                     WORD32 i;
6935                     WORD32 noise_detected;
6936                     WORD32 ctb_size;
6937                     WORD32 num_comp_had_blocks;
6938                     WORD32 noisy_block_cnt;
6939                     WORD32 index_8x8_block;
6940                     WORD32 num_8x8_in_ctb_row;
6941 
6942                     WORD32 ht_offset;
6943                     WORD32 wd_offset;
6944                     WORD32 block_ht;
6945                     WORD32 block_wd;
6946 
6947                     WORD32 num_horz_blocks;
6948                     WORD32 num_vert_blocks;
6949 
6950                     WORD32 mean;
6951                     UWORD32 variance_8x8;
6952 
6953                     WORD32 hh_energy_percent;
6954 
6955                     /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6956                     WORD32 min_noisy_block_cnt;
6957                     WORD32 min_coeffs_above_avg;
6958                     WORD32 min_coeff_avg_energy;
6959 
6960                     /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6961                     WORD32 i4_cu_x_off, i4_cu_y_off;
6962                     WORD32 is_noisy;
6963 
6964                     /* intialise the variables holding the constants */
6965                     if(had_block_size == 8)
6966                     {
6967                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
6968                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6969                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6970                     }
6971                     else
6972                     {
6973                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
6974                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6975                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6976                     }
6977 
6978                     /* initialize the variables */
6979                     noise_detected = 0;
6980                     noisy_block_cnt = 0;
6981                     hh_energy_percent = 0;
6982                     variance_8x8 = 0;
6983                     block_ht = ctb_height;
6984                     block_wd = ctb_width;
6985 
6986                     mean = 0;
6987 
6988                     ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
6989                     num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
6990 
6991                     num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
6992                     num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
6993 
6994                     ht_offset = -had_block_size;
6995                     wd_offset = -had_block_size;
6996 
6997                     num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
6998                     for(i = 0; i < num_comp_had_blocks; i++)
6999                     {
7000                         if(i % num_horz_blocks == 0)
7001                         {
7002                             wd_offset = -had_block_size;
7003                             ht_offset += had_block_size;
7004                         }
7005                         wd_offset += had_block_size;
7006 
7007                         /* CU level offsets */
7008                         i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
7009                         i4_cu_y_off = i4_y_off + (i / 4) * 16;
7010 
7011                         /* if 50 % or more of the CU is noisy then the return value is 1 */
7012                         is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7013                             ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7014                             (i % 4) * 16,
7015                             (i / 4) * 16,
7016                             16);
7017 
7018                         /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7019                         if(is_noisy)
7020                         {
7021                             index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7022                                               (i % num_horz_blocks) * 2;
7023                             noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7024                                 16,
7025                                 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7026                                     ? 64
7027                                     : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7028                                 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7029                                     ? 64
7030                                     : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7031                                 ps_ctb_noise_params,
7032                                 &s_srch_cand_init_data,
7033                                 &s_search_prms_blk,
7034                                 ps_ctxt,
7035                                 num_pred_dir,
7036                                 i4_num_act_ref_l0,
7037                                 i4_num_act_ref_l1,
7038                                 i4_cu_x_off,
7039                                 i4_cu_y_off,
7040                                 &ps_ctxt->s_wt_pred,
7041                                 unit_size,
7042                                 index_8x8_block,
7043                                 num_horz_blocks,
7044                                 /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
7045                                 i);
7046                         } /* if 16x16 is noisy */
7047                     } /* loop over for all 16x16*/
7048 
7049                     if(noisy_block_cnt >= min_noisy_block_cnt)
7050                     {
7051                         noise_detected = 1;
7052                     }
7053 
7054                     /* write back the noise presence detected for the current CTB to the structure */
7055                     ps_ctb_noise_params->i4_noise_present = noise_detected;
7056                 }
7057 #endif
7058 
7059 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7060                 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7061                    ps_ctb_noise_params->i4_noise_present)
7062                 {
7063                     memset(
7064                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7065                         1,
7066                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7067                 }
7068 #endif
7069 
7070                 for(i = 0; i < 16; i++)
7071                 {
7072                     au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7073                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7074                 }
7075 
7076                 for(i = 0; i < 4; i++)
7077                 {
7078                     au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7079                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7080                 }
7081 
7082                 for(i = 0; i < 1; i++)
7083                 {
7084                     au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7085                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7086                 }
7087 
7088                 if(ps_ctxt->s_frm_prms.bidir_enabled &&
7089                    (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7090                     MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7091                 {
7092                     ps_ctb_noise_params->i4_noise_present = 0;
7093                     memset(
7094                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7095                         0,
7096                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7097                 }
7098 
7099 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7100                 for(i = 0; i < 4; i++)
7101                 {
7102                     S32 j;
7103                     S32 lambda;
7104 
7105                     if(au1_is_32x32Blk_noisy[i])
7106                     {
7107                         lambda = lambda_recon;
7108                         lambda =
7109                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7110 
7111                         ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7112 
7113                         for(j = 0; j < 2; j++)
7114                         {
7115                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
7116                         }
7117                     }
7118                 }
7119 
7120                 {
7121                     S32 j;
7122                     S32 lambda;
7123 
7124                     if(au1_is_64x64Blk_noisy[0])
7125                     {
7126                         lambda = lambda_recon;
7127                         lambda =
7128                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7129 
7130                         ps_search_results = &ps_ctxt->s_search_results_64x64;
7131 
7132                         for(j = 0; j < 2; j++)
7133                         {
7134                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
7135                         }
7136                     }
7137                 }
7138 #endif
7139                 if(au1_is_64x64Blk_noisy[0])
7140                 {
7141                     U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7142                                                              (s_common_frm_prms.i4_ctb_y_off *
7143                                                               ps_curr_layer->i4_inp_stride));
7144 
7145                     hme_compute_sigmaX_and_sigmaXSquared(
7146                         pu1_inp,
7147                         ps_curr_layer->i4_inp_stride,
7148                         ps_ctxt->au4_4x4_src_sigmaX,
7149                         ps_ctxt->au4_4x4_src_sigmaXSquared,
7150                         4,
7151                         4,
7152                         64,
7153                         64,
7154                         1,
7155                         16);
7156                 }
7157                 else
7158                 {
7159                     for(i = 0; i < 4; i++)
7160                     {
7161                         if(au1_is_32x32Blk_noisy[i])
7162                         {
7163                             U08 *pu1_inp =
7164                                 ps_curr_layer->pu1_inp +
7165                                 (s_common_frm_prms.i4_ctb_x_off +
7166                                  (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7167 
7168                             U08 u1_cu_size = 32;
7169                             WORD32 i4_inp_buf_offset =
7170                                 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7171                                  ((i % 2) * u1_cu_size));
7172 
7173                             U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7174                             U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7175                             S32 i4_sigma_arr_offset =
7176                                 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7177                                  ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7178 
7179                             hme_compute_sigmaX_and_sigmaXSquared(
7180                                 pu1_inp + i4_inp_buf_offset,
7181                                 ps_curr_layer->i4_inp_stride,
7182                                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7183                                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7184                                 4,
7185                                 4,
7186                                 32,
7187                                 32,
7188                                 1,
7189                                 16);
7190                         }
7191                         else
7192                         {
7193                             S32 j;
7194 
7195                             U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7196                             U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7197                             S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7198                                 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7199                                  ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7200 
7201                             for(j = 0; j < 4; j++)
7202                             {
7203                                 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7204                                 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7205                                 S32 i4_16x16_blk_index_in_ctb =
7206                                     i4_16x16_blk_start_index_in_i_th_32x32_blk +
7207                                     ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7208                                     ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7209 
7210                                 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7211 
7212                                 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7213                                 {
7214                                     U08 *pu1_inp =
7215                                         ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7216                                                                   (s_common_frm_prms.i4_ctb_y_off *
7217                                                                    ps_curr_layer->i4_inp_stride));
7218 
7219                                     U08 u1_cu_size = 16;
7220                                     WORD32 i4_inp_buf_offset =
7221                                         (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7222                                          ((i4_16x16_blk_index_in_ctb / 4) *
7223                                           (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7224 
7225                                     U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7226                                     U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7227                                     S32 i4_sigma_arr_offset =
7228                                         (((i4_16x16_blk_index_in_ctb % 4) *
7229                                           u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7230                                          ((i4_16x16_blk_index_in_ctb / 4) *
7231                                           u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7232 
7233                                     hme_compute_sigmaX_and_sigmaXSquared(
7234                                         pu1_inp + i4_inp_buf_offset,
7235                                         ps_curr_layer->i4_inp_stride,
7236                                         (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7237                                         (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7238                                         4,
7239                                         4,
7240                                         16,
7241                                         16,
7242                                         1,
7243                                         16);
7244                                 }
7245                             }
7246                         }
7247                     }
7248                 }
7249             }
7250             else
7251             {
7252                 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7253 
7254                 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7255 
7256                 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7257             }
7258 
7259             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7260             {
7261                 S32 ref_ctr;
7262                 U08 au1_pred_dir_searched[2];
7263                 U08 u1_is_cu_noisy;
7264                 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7265 
7266                 {
7267                     blk_x = (i4_ctb_x << 2) +
7268                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7269                     blk_y = (i4_ctb_y << 2) +
7270                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7271 
7272                     blk_id_in_full_ctb =
7273                         ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7274                     blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7275                     ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7276                     s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7277                     s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7278                 }
7279 
7280                 /* get the current input blk point */
7281                 pos_x = blk_x << blk_size_shift;
7282                 pos_y = blk_y << blk_size_shift;
7283                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7284 
7285                 /*********************************************************************/
7286                 /* For every blk in the picture, the search range needs to be derived*/
7287                 /* Any blk can have any mv, but practical search constraints are     */
7288                 /* imposed by the picture boundary and amt of padding.               */
7289                 /*********************************************************************/
7290                 /* MV limit is different based on ref. PIC */
7291                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7292                 {
7293                     if(!s_search_prms_blk.i4_use_rec)
7294                     {
7295                         hme_derive_search_range(
7296                             &as_range_prms_inp[ref_ctr],
7297                             &s_pic_limit_inp,
7298                             &as_mv_limit[ref_ctr],
7299                             pos_x,
7300                             pos_y,
7301                             blk_wd,
7302                             blk_ht);
7303                     }
7304                     else
7305                     {
7306                         hme_derive_search_range(
7307                             &as_range_prms_rec[ref_ctr],
7308                             &s_pic_limit_rec,
7309                             &as_mv_limit[ref_ctr],
7310                             pos_x,
7311                             pos_y,
7312                             blk_wd,
7313                             blk_ht);
7314                     }
7315                 }
7316                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7317                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7318                 /* Select search results from a suitable search result in the context */
7319                 {
7320                     ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7321 
7322                     if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7323                     {
7324                         S32 i;
7325 
7326                         for(i = 0; i < 2; i++)
7327                         {
7328                             ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7329                         }
7330                     }
7331                 }
7332 
7333                 u1_is_cu_noisy = au1_is_16x16Blk_noisy
7334                     [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7335 
7336                 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7337 
7338 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7339                 if(u1_is_cu_noisy)
7340                 {
7341                     S32 j;
7342                     S32 lambda;
7343 
7344                     lambda = lambda_recon;
7345                     lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7346 
7347                     for(j = 0; j < 2; j++)
7348                     {
7349                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
7350                     }
7351                 }
7352                 else
7353                 {
7354                     S32 j;
7355                     S32 lambda;
7356 
7357                     lambda = lambda_recon;
7358 
7359                     for(j = 0; j < 2; j++)
7360                     {
7361                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
7362                     }
7363                 }
7364 #endif
7365 
7366                 s_search_prms_blk.ps_search_results = ps_search_results;
7367 
7368                 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7369                     pu1_inp,
7370                     i4_inp_stride,
7371                     ps_refine_prms->limit_active_partitions,
7372                     ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7373                     ps_ctxt->u1_is_curFrame_a_refFrame,
7374                     blk_8x8_mask,
7375                     e_me_quality_presets);
7376 
7377                 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7378                 {
7379                     ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7380                         s_search_prms_blk.i4_part_mask;
7381                 }
7382 
7383                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7384                 {
7385                     /* Setting u1_num_active_refs to 2 */
7386                     /* for the sole purpose of the */
7387                     /* function called below */
7388                     ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7389 
7390                     hme_reset_search_results(
7391                         ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7392 
7393                     ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7394                 }
7395 
7396                 if(0 == blk_id_in_ctb)
7397                 {
7398                     UWORD8 u1_ctr;
7399                     for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7400                                               ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7401                         u1_ctr++)
7402                     {
7403                         WORD32 i4_max_dep_ctb_y;
7404                         WORD32 i4_max_dep_ctb_x;
7405 
7406                         /* Set max mv in ctb units */
7407                         i4_max_mv_x_in_ctb =
7408                             (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7409                             ps_ctxt->log_ctb_size;
7410 
7411                         i4_max_mv_y_in_ctb =
7412                             (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7413                             ps_ctxt->log_ctb_size;
7414                         /********************************************************************/
7415                         /* Set max ctb_x and ctb_y dependency on reference picture          */
7416                         /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
7417                         /********************************************************************/
7418                         i4_max_dep_ctb_x = CLIP3(
7419                             (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7420                             0,
7421                             ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7422                         i4_max_dep_ctb_y = CLIP3(
7423                             (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7424                             0,
7425                             ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7426 
7427                         ihevce_dmgr_map_chk_sync(
7428                             ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7429                             ps_ctxt->thrd_id,
7430                             i4_ctb_x,
7431                             i4_ctb_y,
7432                             i4_max_mv_x_in_ctb,
7433                             i4_max_mv_y_in_ctb);
7434                     }
7435                 }
7436 
7437                 /* Loop across different Ref IDx */
7438                 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7439                 {
7440                     S32 resultid;
7441                     S08 u1_default_ref_id;
7442                     S32 i4_num_srch_cands = 0;
7443                     S32 i4_num_refinement_iterations;
7444                     S32 i4_refine_iter_ctr;
7445 
7446                     if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7447                        (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7448                     {
7449                         u1_pred_dir = u1_pred_dir_ctr;
7450                     }
7451                     else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7452                     {
7453                         u1_pred_dir = 1;
7454                     }
7455 
7456                     u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7457                                                            : ps_ctxt->ai1_future_list[0];
7458                     au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7459 
7460                     i4_num_srch_cands = 0;
7461                     resultid = 0;
7462 
7463                     /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7464                     if(0 == blk_id_in_ctb)
7465                     {
7466                         /*****************************************************************/
7467                         /* Initialize the mv grid with results of neighbours for the next*/
7468                         /* ctb.                                                          */
7469                         /*****************************************************************/
7470                         hme_fill_ctb_neighbour_mvs(
7471                             ps_curr_layer,
7472                             blk_x,
7473                             blk_y,
7474                             aps_mv_grid[u1_pred_dir],
7475                             u1_pred_dir_ctr,
7476                             u1_default_ref_id,
7477                             ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7478                     }
7479 
7480                     s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7481 
7482                     {
7483                         if((blk_id_in_full_ctb % 4) == 0)
7484                         {
7485                             ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7486                                 .as_pred_ctxt[u1_pred_dir]
7487                                 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7488                         }
7489 
7490                         if(blk_id_in_full_ctb == 0)
7491                         {
7492                             ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7493                         }
7494 
7495                         ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7496                             !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7497                     }
7498 
7499                     {
7500                         S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7501                         S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7502                         U08 u1_is_blk_at_ctb_boundary = !y;
7503 
7504                         s_srch_cand_init_data.u1_is_left_available =
7505                             !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7506 
7507                         if(u1_is_blk_at_ctb_boundary)
7508                         {
7509                             s_srch_cand_init_data.u1_is_topRight_available = 0;
7510                             s_srch_cand_init_data.u1_is_topLeft_available = 0;
7511                             s_srch_cand_init_data.u1_is_top_available = 0;
7512                         }
7513                         else
7514                         {
7515                             s_srch_cand_init_data.u1_is_topRight_available =
7516                                 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7517                             s_srch_cand_init_data.u1_is_top_available = 1;
7518                             s_srch_cand_init_data.u1_is_topLeft_available =
7519                                 s_srch_cand_init_data.u1_is_left_available;
7520                         }
7521                     }
7522 
7523                     s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7524                     s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7525                     s_srch_cand_init_data.i4_pos_x = pos_x;
7526                     s_srch_cand_init_data.i4_pos_y = pos_y;
7527                     s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7528                     s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7529                     s_srch_cand_init_data.u1_search_candidate_list_index =
7530                         au1_search_candidate_list_index[u1_pred_dir];
7531 
7532                     i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7533 
7534                     /* Note this block also clips the MV range for all candidates */
7535                     {
7536                         S08 i1_check_for_mult_refs;
7537 
7538                         i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7539                                                              : (ps_ctxt->num_ref_past > 1);
7540 
7541                         ps_me_optimised_function_list->pf_mv_clipper(
7542                             &s_search_prms_blk,
7543                             i4_num_srch_cands,
7544                             i1_check_for_mult_refs,
7545                             ps_refine_prms->i4_num_steps_fpel_refine,
7546                             ps_refine_prms->i4_num_steps_hpel_refine,
7547                             ps_refine_prms->i4_num_steps_qpel_refine);
7548                     }
7549 
7550 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7551                     i4_num_refinement_iterations =
7552                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7553                             ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7554                             : 1;
7555 #else
7556                     i4_num_refinement_iterations =
7557                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7558 #endif
7559 
7560 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7561                     if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7562                     {
7563                         i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7564                                                                           : i4_num_act_ref_l1;
7565                     }
7566 #endif
7567 
7568                     for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7569                         i4_refine_iter_ctr++)
7570                     {
7571                         S32 center_x;
7572                         S32 center_y;
7573                         S32 center_ref_idx;
7574 
7575                         S08 *pi1_pred_dir_to_ref_idx =
7576                             (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7577 
7578                         {
7579                             WORD32 i4_i;
7580 
7581                             for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7582                             {
7583                                 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7584                                 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7585                                 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7586                                     MAX_SIGNED_16BIT_VAL;
7587                                 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7588                                 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7589                                 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7590 
7591                                 if(ps_refine_prms->i4_num_results_per_part == 2)
7592                                 {
7593                                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7594                                         MAX_SIGNED_16BIT_VAL;
7595                                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7596                                         MAX_SIGNED_16BIT_VAL;
7597                                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7598                                         MAX_SIGNED_16BIT_VAL;
7599                                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7600                                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7601                                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7602                                 }
7603                             }
7604 
7605                             s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7606                             s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7607                         }
7608 
7609                         {
7610                             search_node_t *ps_coloc_node;
7611 
7612                             S32 i = 0;
7613 
7614                             if(i4_num_refinement_iterations > 1)
7615                             {
7616                                 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7617                                 {
7618                                     ps_coloc_node =
7619                                         s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7620                                             .ps_search_node;
7621 
7622                                     if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7623                                        ps_coloc_node->i1_ref_idx)
7624                                     {
7625                                         break;
7626                                     }
7627                                 }
7628 
7629                                 if(i == ai4_num_coloc_cands[u1_pred_dir])
7630                                 {
7631                                     i = 0;
7632                                 }
7633                             }
7634                             else
7635                             {
7636                                 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7637                                                     .ps_search_node;
7638                             }
7639 
7640                             hme_set_mvp_node(
7641                                 ps_search_results,
7642                                 ps_coloc_node,
7643                                 u1_pred_dir,
7644                                 (i4_num_refinement_iterations > 1)
7645                                     ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7646                                     : u1_default_ref_id);
7647 
7648                             center_x = ps_coloc_node->ps_mv->i2_mvx;
7649                             center_y = ps_coloc_node->ps_mv->i2_mvy;
7650                             center_ref_idx = ps_coloc_node->i1_ref_idx;
7651                         }
7652 
7653                         /* Full-Pel search */
7654                         {
7655                             S32 num_unique_nodes;
7656 
7657                             memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7658 
7659                             num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7660                                 as_unique_search_nodes,
7661                                 s_search_prms_blk.ps_search_candts,
7662                                 au4_unique_node_map,
7663                                 pi1_pred_dir_to_ref_idx,
7664                                 i4_num_srch_cands,
7665                                 s_search_prms_blk.i4_num_init_candts,
7666                                 i4_refine_iter_ctr,
7667                                 i4_num_refinement_iterations,
7668                                 i4_num_act_ref_l0,
7669                                 center_ref_idx,
7670                                 center_x,
7671                                 center_y,
7672                                 ps_ctxt->s_frm_prms.bidir_enabled,
7673                                 e_me_quality_presets);
7674 
7675                             /*************************************************************************/
7676                             /* This array stores the ids of the partitions whose                     */
7677                             /* SADs are updated. Since the partitions whose SADs are updated may not */
7678                             /* be in contiguous order, we supply another level of indirection.       */
7679                             /*************************************************************************/
7680                             ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7681                                 s_search_prms_blk.i4_part_mask,
7682                                 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7683 
7684                             if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7685                             {
7686                                 S32 i;
7687                                 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7688                                 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7689                                                             (s_search_prms_blk.i4_cu_y_off * 4);
7690 
7691                                 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7692                                 {
7693                                     S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7694 
7695                                     hme_compute_final_sigma_of_pu_from_base_blocks(
7696                                         ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7697                                         ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7698                                         au8_final_src_sigmaX,
7699                                         au8_final_src_sigmaXSquared,
7700                                         16,
7701                                         4,
7702                                         i4_part_id,
7703                                         16);
7704                                 }
7705 
7706                                 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7707                                 s_common_frm_prms.pu8_part_src_sigmaXSquared =
7708                                     au8_final_src_sigmaXSquared;
7709 
7710                                 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7711                                 s_search_prms_blk.pu8_part_src_sigmaXSquared =
7712                                     au8_final_src_sigmaXSquared;
7713                             }
7714 
7715                             if(0 == num_unique_nodes)
7716                             {
7717                                 continue;
7718                             }
7719 
7720                             if(num_unique_nodes >= 2)
7721                             {
7722                                 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7723                                 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7724                                 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7725                                 {
7726                                     if(ps_ctxt->i4_temporal_layer == 1)
7727                                     {
7728                                         hme_fullpel_cand_sifter(
7729                                             &s_search_prms_blk,
7730                                             ps_curr_layer,
7731                                             &ps_ctxt->s_wt_pred,
7732                                             ALPHA_FOR_NOISE_TERM_IN_ME,
7733                                             u1_is_cu_noisy,
7734                                             ps_me_optimised_function_list);
7735                                     }
7736                                     else
7737                                     {
7738                                         hme_fullpel_cand_sifter(
7739                                             &s_search_prms_blk,
7740                                             ps_curr_layer,
7741                                             &ps_ctxt->s_wt_pred,
7742                                             ALPHA_FOR_NOISE_TERM_IN_ME,
7743                                             u1_is_cu_noisy,
7744                                             ps_me_optimised_function_list);
7745                                     }
7746                                 }
7747                                 else
7748                                 {
7749                                     hme_fullpel_cand_sifter(
7750                                         &s_search_prms_blk,
7751                                         ps_curr_layer,
7752                                         &ps_ctxt->s_wt_pred,
7753                                         ALPHA_FOR_NOISE_TERM_IN_ME_P,
7754                                         u1_is_cu_noisy,
7755                                         ps_me_optimised_function_list);
7756                                 }
7757                             }
7758 
7759                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7760 
7761                             hme_fullpel_refine(
7762                                 ps_refine_prms,
7763                                 &s_search_prms_blk,
7764                                 ps_curr_layer,
7765                                 &ps_ctxt->s_wt_pred,
7766                                 au4_unique_node_map,
7767                                 num_unique_nodes,
7768                                 blk_8x8_mask,
7769                                 center_x,
7770                                 center_y,
7771                                 center_ref_idx,
7772                                 e_me_quality_presets,
7773                                 ps_me_optimised_function_list);
7774                         }
7775 
7776                         /* Sub-Pel search */
7777                         {
7778                             hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7779 
7780                             s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7781                                 &ps_ctxt->s_buf_mgr,
7782                                 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7783                             /* MV limit is different based on ref. PIC */
7784                             for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7785                             {
7786                                 SCALE_RANGE_PRMS(
7787                                     as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7788                                 SCALE_RANGE_PRMS(
7789                                     as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7790                             }
7791                             s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7792                             s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7793 
7794                             hme_subpel_refine_cu_hs(
7795                                 &s_subpel_prms,
7796                                 ps_curr_layer,
7797                                 ps_search_results,
7798                                 u1_pred_dir,
7799                                 &ps_ctxt->s_wt_pred,
7800                                 blk_8x8_mask,
7801                                 ps_ctxt->ps_func_selector,
7802                                 ps_cmn_utils_optimised_function_list,
7803                                 ps_me_optimised_function_list);
7804                         }
7805                     }
7806                 }
7807                 /* Populate the new PU struct with the results post subpel refinement*/
7808                 {
7809                     inter_cu_results_t *ps_cu_results;
7810                     WORD32 best_inter_cost, intra_cost, posx, posy;
7811 
7812                     UWORD8 intra_8x8_enabled = 0;
7813 
7814                     /*  cost of 16x16 cu parent  */
7815                     WORD32 parent_cost = MAX_32BIT_VAL;
7816 
7817                     /*  cost of 8x8 cu children  */
7818                     /*********************************************************************/
7819                     /* Assuming parent is not split, then we signal 1 bit for this parent*/
7820                     /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7821                     /* So, 4*lambda is extra for children cost.                          */
7822                     /*********************************************************************/
7823                     WORD32 child_cost = 0;
7824 
7825                     ps_cu_results = ps_search_results->ps_cu_results;
7826 
7827                     /* Initialize the pu_results pointers to the first struct in the stack array */
7828                     ps_pu_results = as_inter_pu_results;
7829 
7830                     hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7831 
7832                     hme_populate_pus(
7833                         ps_thrd_ctxt,
7834                         ps_ctxt,
7835                         &s_subpel_prms,
7836                         ps_search_results,
7837                         ps_cu_results,
7838                         ps_pu_results,
7839                         &(as_pu_results[0][0][0]),
7840                         &s_common_frm_prms,
7841                         &ps_ctxt->s_wt_pred,
7842                         ps_curr_layer,
7843                         au1_pred_dir_searched,
7844                         i4_num_pred_dir);
7845 
7846                     ps_cu_results->i4_inp_offset =
7847                         (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7848 
7849                     hme_decide_part_types(
7850                         ps_cu_results,
7851                         ps_pu_results,
7852                         &s_common_frm_prms,
7853                         ps_ctxt,
7854                         ps_cmn_utils_optimised_function_list,
7855                         ps_me_optimised_function_list
7856 
7857                     );
7858 
7859                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7860                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7861                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7862                     {
7863                         WORD32 res_ctr;
7864 
7865                         for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7866                         {
7867                             WORD32 num_part = 2, part_ctr;
7868                             part_type_results_t *ps_best_results =
7869                                 &ps_cu_results->ps_best_results[res_ctr];
7870 
7871                             if(PRT_2Nx2N == ps_best_results->u1_part_type)
7872                                 num_part = 1;
7873 
7874                             for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7875                             {
7876                                 pu_result_t *ps_pu_results =
7877                                     &ps_best_results->as_pu_results[part_ctr];
7878 
7879                                 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7880 
7881                                 hme_update_dynamic_search_params(
7882                                     &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7883                                          .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7884                                     ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7885 
7886                                 /* Sanity Check */
7887                                 ASSERT(
7888                                     ps_pu_results->pu.mv.i1_l0_ref_idx <
7889                                     ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7890 
7891                                 /* No L1 for P Pic. */
7892                                 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7893                                 /* No BI for P Pic. */
7894                                 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7895                             }
7896                         }
7897                     }
7898 
7899                     /*****************************************************************/
7900                     /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
7901                     /*****************************************************************/
7902 
7903 #if DISABLE_INTRA_IN_BPICS
7904                     if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7905                              (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7906 #endif
7907                     {
7908                         if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7909                         {
7910                             hme_insert_intra_nodes_post_bipred(
7911                                 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7912                         }
7913                     }
7914 
7915 #if DISABLE_INTRA_IN_BPICS
7916                     if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7917                        (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7918                     {
7919                         intra_8x8_enabled = 0;
7920                     }
7921                     else
7922 #endif
7923                     {
7924                         /*TRAQO intra flag updation*/
7925                         if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7926                         {
7927                             best_inter_cost =
7928                                 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7929                             intra_cost =
7930                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7931                             /*@16x16 level*/
7932                             posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7933                                     << 2) >>
7934                                    4;
7935                             posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7936                                     << 2) >>
7937                                    4;
7938                         }
7939                         else
7940                         {
7941                             best_inter_cost =
7942                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7943                             posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7944                                     << 2) >>
7945                                    3;
7946                             posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7947                                     << 2) >>
7948                                    3;
7949                         }
7950 
7951                         /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7952                         if(ps_cur_ipe_ctb->u1_split_flag)
7953                         {
7954                             /* Id of the 32x32 block, 16x16 block in a CTB */
7955                             WORD32 i4_32x32_id =
7956                                 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7957                             WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7958                                                  ((ps_cu_results->u1_x_off >> 4) & 0x1);
7959 
7960                             if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7961                             {
7962                                 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7963                                        .as_intra16_analyse[i4_16x16_id]
7964                                        .b1_split_flag)
7965                                 {
7966                                     intra_8x8_enabled =
7967                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7968                                             .as_intra16_analyse[i4_16x16_id]
7969                                             .as_intra8_analyse[0]
7970                                             .b1_valid_cu;
7971                                     intra_8x8_enabled &=
7972                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7973                                             .as_intra16_analyse[i4_16x16_id]
7974                                             .as_intra8_analyse[1]
7975                                             .b1_valid_cu;
7976                                     intra_8x8_enabled &=
7977                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978                                             .as_intra16_analyse[i4_16x16_id]
7979                                             .as_intra8_analyse[2]
7980                                             .b1_valid_cu;
7981                                     intra_8x8_enabled &=
7982                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983                                             .as_intra16_analyse[i4_16x16_id]
7984                                             .as_intra8_analyse[3]
7985                                             .b1_valid_cu;
7986                                 }
7987                             }
7988                         }
7989                     }
7990 
7991                     if(blk_8x8_mask == 0xf)
7992                     {
7993                         parent_cost =
7994                             ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
7995                         ps_search_results->u1_split_flag = 0;
7996                     }
7997                     else
7998                     {
7999                         ps_search_results->u1_split_flag = 1;
8000                     }
8001 
8002                     ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8003 
8004                     if(s_common_frm_prms.u1_is_cu_noisy)
8005                     {
8006                         intra_8x8_enabled = 0;
8007                     }
8008 
8009                     /* Evalaute 8x8 if NxN part id is enabled */
8010                     if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8011                     {
8012                         /* Populates the PU's for the 4 8x8's in one call */
8013                         hme_populate_pus_8x8_cu(
8014                             ps_thrd_ctxt,
8015                             ps_ctxt,
8016                             &s_subpel_prms,
8017                             ps_search_results,
8018                             ps_cu_results,
8019                             ps_pu_results,
8020                             &(as_pu_results[0][0][0]),
8021                             &s_common_frm_prms,
8022                             au1_pred_dir_searched,
8023                             i4_num_pred_dir,
8024                             blk_8x8_mask);
8025 
8026                         /* Re-initialize the pu_results pointers to the first struct in the stack array */
8027                         ps_pu_results = as_inter_pu_results;
8028 
8029                         for(i = 0; i < 4; i++)
8030                         {
8031                             if((blk_8x8_mask & (1 << i)))
8032                             {
8033                                 if(ps_cu_results->i4_part_mask)
8034                                 {
8035                                     hme_decide_part_types(
8036                                         ps_cu_results,
8037                                         ps_pu_results,
8038                                         &s_common_frm_prms,
8039                                         ps_ctxt,
8040                                         ps_cmn_utils_optimised_function_list,
8041                                         ps_me_optimised_function_list
8042 
8043                                     );
8044                                 }
8045                                 /*****************************************************************/
8046                                 /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
8047                                 /*****************************************************************/
8048 #if DISABLE_INTRA_IN_BPICS
8049                                 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8050                                          (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8051                                           TEMPORAL_LAYER_DISABLE)))
8052 #endif
8053                                 {
8054                                     if(!(DISABLE_INTRA_WHEN_NOISY &&
8055                                          s_common_frm_prms.u1_is_cu_noisy))
8056                                     {
8057                                         hme_insert_intra_nodes_post_bipred(
8058                                             ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8059                                     }
8060                                 }
8061 
8062                                 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8063                             }
8064 
8065                             ps_cu_results++;
8066                             ps_pu_results++;
8067                         }
8068 
8069                         /* Compare 16x16 vs 8x8 cost */
8070                         if(child_cost < parent_cost)
8071                         {
8072                             ps_search_results->best_cu_cost = child_cost;
8073                             ps_search_results->u1_split_flag = 1;
8074                         }
8075                     }
8076                 }
8077 
8078                 hme_update_mv_bank_encode(
8079                     ps_search_results,
8080                     ps_curr_layer->ps_layer_mvbank,
8081                     blk_x,
8082                     blk_y,
8083                     &s_mv_update_prms,
8084                     au1_pred_dir_searched,
8085                     i4_num_act_ref_l0);
8086 
8087                 /*********************************************************************/
8088                 /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
8089                 /* useful for doing things like predictor for cost calculation or    */
8090                 /* also for merge calculations if need be.                           */
8091                 /*********************************************************************/
8092                 hme_map_mvs_to_grid(
8093                     &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8094             }
8095 
8096             /* Set the CU tree nodes appropriately */
8097             if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8098             {
8099                 WORD32 i, j;
8100 
8101                 for(i = 0; i < 16; i++)
8102                 {
8103                     cur_ctb_cu_tree_t *ps_tree_node =
8104                         ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8105                     search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8106 
8107                     switch(i >> 2)
8108                     {
8109                     case 0:
8110                     {
8111                         ps_tree_node = ps_tree_node->ps_child_node_tl;
8112 
8113                         break;
8114                     }
8115                     case 1:
8116                     {
8117                         ps_tree_node = ps_tree_node->ps_child_node_tr;
8118 
8119                         break;
8120                     }
8121                     case 2:
8122                     {
8123                         ps_tree_node = ps_tree_node->ps_child_node_bl;
8124 
8125                         break;
8126                     }
8127                     case 3:
8128                     {
8129                         ps_tree_node = ps_tree_node->ps_child_node_br;
8130 
8131                         break;
8132                     }
8133                     }
8134 
8135                     switch(i % 4)
8136                     {
8137                     case 0:
8138                     {
8139                         ps_tree_node = ps_tree_node->ps_child_node_tl;
8140 
8141                         break;
8142                     }
8143                     case 1:
8144                     {
8145                         ps_tree_node = ps_tree_node->ps_child_node_tr;
8146 
8147                         break;
8148                     }
8149                     case 2:
8150                     {
8151                         ps_tree_node = ps_tree_node->ps_child_node_bl;
8152 
8153                         break;
8154                     }
8155                     case 3:
8156                     {
8157                         ps_tree_node = ps_tree_node->ps_child_node_br;
8158 
8159                         break;
8160                     }
8161                     }
8162 
8163                     if(ai4_blk_8x8_mask[i] == 15)
8164                     {
8165                         if(!ps_results->u1_split_flag)
8166                         {
8167                             ps_tree_node->is_node_valid = 1;
8168                             NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8169                         }
8170                         else
8171                         {
8172                             ps_tree_node->is_node_valid = 0;
8173                             ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8174                         }
8175                     }
8176                     else
8177                     {
8178                         cur_ctb_cu_tree_t *ps_tree_child;
8179 
8180                         ps_tree_node->is_node_valid = 0;
8181 
8182                         for(j = 0; j < 4; j++)
8183                         {
8184                             switch(j)
8185                             {
8186                             case 0:
8187                             {
8188                                 ps_tree_child = ps_tree_node->ps_child_node_tl;
8189 
8190                                 break;
8191                             }
8192                             case 1:
8193                             {
8194                                 ps_tree_child = ps_tree_node->ps_child_node_tr;
8195 
8196                                 break;
8197                             }
8198                             case 2:
8199                             {
8200                                 ps_tree_child = ps_tree_node->ps_child_node_bl;
8201 
8202                                 break;
8203                             }
8204                             case 3:
8205                             {
8206                                 ps_tree_child = ps_tree_node->ps_child_node_br;
8207 
8208                                 break;
8209                             }
8210                             }
8211 
8212                             ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8213                         }
8214                     }
8215                 }
8216             }
8217 
8218             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8219             {
8220                 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8221 
8222                 hme_analyse_mv_clustering(
8223                     ps_ctxt->as_search_results_16x16,
8224                     ps_ctxt->as_cu16x16_results,
8225                     ps_ctxt->as_cu8x8_results,
8226                     ps_ctxt->ps_ctb_cluster_info,
8227                     ps_ctxt->ai1_future_list,
8228                     ps_ctxt->ai1_past_list,
8229                     ps_ctxt->s_frm_prms.bidir_enabled,
8230                     e_me_quality_presets);
8231 
8232 #if DISABLE_BLK_MERGE_WHEN_NOISY
8233                 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8234                 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8235                 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8236                 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8237                 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8238                 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8239                 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8240                 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8241                 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8242                 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8243 #endif
8244 
8245                 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8246                                  (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8247                                  (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8248                                  (ps_tree->ps_child_node_br->is_node_valid << 3);
8249 
8250                 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8251                                      (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8252                                      (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8253                                      (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8254                                      (ps_tree->u1_inter_eval_enable << 4);
8255             }
8256             else
8257             {
8258                 en_merge_execution = 0x1f;
8259 
8260 #if DISABLE_BLK_MERGE_WHEN_NOISY
8261                 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8262                                  ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8263                                  ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8264                                  ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8265 #endif
8266             }
8267 
8268             /* Re-initialize the pu_results pointers to the first struct in the stack array */
8269             ps_pu_results = as_inter_pu_results;
8270 
8271             {
8272                 WORD32 ref_ctr;
8273 
8274                 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8275                 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8276 
8277                 /* MV limit is different based on ref. PIC */
8278                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8279                 {
8280                     SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8281                     SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8282                 }
8283 
8284                 e_merge_result = CU_SPLIT;
8285                 merge_count_32x32 = 0;
8286 
8287                 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8288                 {
8289                     range_prms_t *ps_pic_limit;
8290                     if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8291                     {
8292                         ps_pic_limit = &s_pic_limit_rec;
8293                     }
8294                     else
8295                     {
8296                         ps_pic_limit = &s_pic_limit_inp;
8297                     }
8298                     /* MV limit is different based on ref. PIC */
8299                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8300                     {
8301                         hme_derive_search_range(
8302                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8303                             ps_pic_limit,
8304                             &as_mv_limit[ref_ctr],
8305                             i4_ctb_x << 6,
8306                             i4_ctb_y << 6,
8307                             32,
8308                             32);
8309 
8310                         SCALE_RANGE_PRMS_POINTERS(
8311                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8312                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8313                             2);
8314                     }
8315                     s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8316                     s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8317                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8318 
8319                     e_merge_result = hme_try_merge_high_speed(
8320                         ps_thrd_ctxt,
8321                         ps_ctxt,
8322                         ps_cur_ipe_ctb,
8323                         &s_subpel_prms,
8324                         &s_merge_prms_32x32_tl,
8325                         ps_pu_results,
8326                         &as_pu_results[0][0][0]);
8327 
8328                     if(e_merge_result == CU_MERGED)
8329                     {
8330                         inter_cu_results_t *ps_cu_results =
8331                             s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8332 
8333                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8334                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8335                         {
8336                             hme_map_mvs_to_grid(
8337                                 &aps_mv_grid[0],
8338                                 s_merge_prms_32x32_tl.ps_results_merge,
8339                                 s_merge_prms_32x32_tl.au1_pred_dir_searched,
8340                                 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8341                         }
8342 
8343                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8344                         {
8345                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8346                                 .ps_child_node_tl->is_node_valid = 1;
8347                             NULLIFY_THE_CHILDREN_NODES(
8348                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8349                                     .ps_child_node_tl);
8350                         }
8351 
8352                         merge_count_32x32++;
8353                         e_merge_result = CU_SPLIT;
8354                     }
8355                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8356                     {
8357 #if ENABLE_CU_TREE_CULLING
8358                         cur_ctb_cu_tree_t *ps_tree =
8359                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8360 
8361                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8362                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8363                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8364                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8365                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8366                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8367                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8368 #endif
8369                     }
8370                 }
8371                 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8372                 {
8373 #if ENABLE_CU_TREE_CULLING
8374                     cur_ctb_cu_tree_t *ps_tree =
8375                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8376 
8377                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8378                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8379                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8380                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8381                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8382 #endif
8383 
8384                     if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8385                     {
8386                         ps_tree->is_node_valid = 0;
8387                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8388                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8389                     }
8390                 }
8391 
8392                 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8393                 {
8394                     range_prms_t *ps_pic_limit;
8395                     if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8396                     {
8397                         ps_pic_limit = &s_pic_limit_rec;
8398                     }
8399                     else
8400                     {
8401                         ps_pic_limit = &s_pic_limit_inp;
8402                     }
8403                     /* MV limit is different based on ref. PIC */
8404                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8405                     {
8406                         hme_derive_search_range(
8407                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8408                             ps_pic_limit,
8409                             &as_mv_limit[ref_ctr],
8410                             (i4_ctb_x << 6) + 32,
8411                             i4_ctb_y << 6,
8412                             32,
8413                             32);
8414                         SCALE_RANGE_PRMS_POINTERS(
8415                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8416                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8417                             2);
8418                     }
8419                     s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8420                     s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8421                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8422 
8423                     e_merge_result = hme_try_merge_high_speed(
8424                         ps_thrd_ctxt,
8425                         ps_ctxt,
8426                         ps_cur_ipe_ctb,
8427                         &s_subpel_prms,
8428                         &s_merge_prms_32x32_tr,
8429                         ps_pu_results,
8430                         &as_pu_results[0][0][0]);
8431 
8432                     if(e_merge_result == CU_MERGED)
8433                     {
8434                         inter_cu_results_t *ps_cu_results =
8435                             s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8436 
8437                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8438                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8439                         {
8440                             hme_map_mvs_to_grid(
8441                                 &aps_mv_grid[0],
8442                                 s_merge_prms_32x32_tr.ps_results_merge,
8443                                 s_merge_prms_32x32_tr.au1_pred_dir_searched,
8444                                 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8445                         }
8446 
8447                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8448                         {
8449                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8450                                 .ps_child_node_tr->is_node_valid = 1;
8451                             NULLIFY_THE_CHILDREN_NODES(
8452                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8453                                     .ps_child_node_tr);
8454                         }
8455 
8456                         merge_count_32x32++;
8457                         e_merge_result = CU_SPLIT;
8458                     }
8459                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8460                     {
8461 #if ENABLE_CU_TREE_CULLING
8462                         cur_ctb_cu_tree_t *ps_tree =
8463                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8464 
8465                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8466                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8467                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8468                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8469                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8470                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8471                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8472 #endif
8473                     }
8474                 }
8475                 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8476                 {
8477 #if ENABLE_CU_TREE_CULLING
8478                     cur_ctb_cu_tree_t *ps_tree =
8479                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8480 
8481                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8482                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8483                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8484                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8485                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8486 #endif
8487 
8488                     if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8489                     {
8490                         ps_tree->is_node_valid = 0;
8491                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8492                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8493                     }
8494                 }
8495 
8496                 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8497                 {
8498                     range_prms_t *ps_pic_limit;
8499                     if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8500                     {
8501                         ps_pic_limit = &s_pic_limit_rec;
8502                     }
8503                     else
8504                     {
8505                         ps_pic_limit = &s_pic_limit_inp;
8506                     }
8507                     /* MV limit is different based on ref. PIC */
8508                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8509                     {
8510                         hme_derive_search_range(
8511                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8512                             ps_pic_limit,
8513                             &as_mv_limit[ref_ctr],
8514                             i4_ctb_x << 6,
8515                             (i4_ctb_y << 6) + 32,
8516                             32,
8517                             32);
8518                         SCALE_RANGE_PRMS_POINTERS(
8519                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8520                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8521                             2);
8522                     }
8523                     s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8524                     s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8525                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8526 
8527                     e_merge_result = hme_try_merge_high_speed(
8528                         ps_thrd_ctxt,
8529                         ps_ctxt,
8530                         ps_cur_ipe_ctb,
8531                         &s_subpel_prms,
8532                         &s_merge_prms_32x32_bl,
8533                         ps_pu_results,
8534                         &as_pu_results[0][0][0]);
8535 
8536                     if(e_merge_result == CU_MERGED)
8537                     {
8538                         inter_cu_results_t *ps_cu_results =
8539                             s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8540 
8541                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8542                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8543                         {
8544                             hme_map_mvs_to_grid(
8545                                 &aps_mv_grid[0],
8546                                 s_merge_prms_32x32_bl.ps_results_merge,
8547                                 s_merge_prms_32x32_bl.au1_pred_dir_searched,
8548                                 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8549                         }
8550 
8551                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8552                         {
8553                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8554                                 .ps_child_node_bl->is_node_valid = 1;
8555                             NULLIFY_THE_CHILDREN_NODES(
8556                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8557                                     .ps_child_node_bl);
8558                         }
8559 
8560                         merge_count_32x32++;
8561                         e_merge_result = CU_SPLIT;
8562                     }
8563                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8564                     {
8565 #if ENABLE_CU_TREE_CULLING
8566                         cur_ctb_cu_tree_t *ps_tree =
8567                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8568 
8569                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8570                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8571                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8572                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8573                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8574                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8575                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8576 #endif
8577                     }
8578                 }
8579                 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8580                 {
8581 #if ENABLE_CU_TREE_CULLING
8582                     cur_ctb_cu_tree_t *ps_tree =
8583                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8584 
8585                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8586                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8587                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8588                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8589                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8590 #endif
8591 
8592                     if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8593                     {
8594                         ps_tree->is_node_valid = 0;
8595                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8596                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8597                     }
8598                 }
8599 
8600                 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8601                 {
8602                     range_prms_t *ps_pic_limit;
8603                     if(s_merge_prms_32x32_br.i4_use_rec == 1)
8604                     {
8605                         ps_pic_limit = &s_pic_limit_rec;
8606                     }
8607                     else
8608                     {
8609                         ps_pic_limit = &s_pic_limit_inp;
8610                     }
8611                     /* MV limit is different based on ref. PIC */
8612                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8613                     {
8614                         hme_derive_search_range(
8615                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8616                             ps_pic_limit,
8617                             &as_mv_limit[ref_ctr],
8618                             (i4_ctb_x << 6) + 32,
8619                             (i4_ctb_y << 6) + 32,
8620                             32,
8621                             32);
8622 
8623                         SCALE_RANGE_PRMS_POINTERS(
8624                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8625                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8626                             2);
8627                     }
8628                     s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8629                     s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8630                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8631 
8632                     e_merge_result = hme_try_merge_high_speed(
8633                         ps_thrd_ctxt,
8634                         ps_ctxt,
8635                         ps_cur_ipe_ctb,
8636                         &s_subpel_prms,
8637                         &s_merge_prms_32x32_br,
8638                         ps_pu_results,
8639                         &as_pu_results[0][0][0]);
8640 
8641                     if(e_merge_result == CU_MERGED)
8642                     {
8643                         /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8644 
8645                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8646                         (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8647                         {
8648                         hme_map_mvs_to_grid
8649                         (
8650                         &aps_mv_grid[0],
8651                         s_merge_prms_32x32_br.ps_results_merge,
8652                         s_merge_prms_32x32_br.au1_pred_dir_searched,
8653                         s_merge_prms_32x32_br.i4_num_pred_dir_actual
8654                         );
8655                         }*/
8656 
8657                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8658                         {
8659                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8660                                 .ps_child_node_br->is_node_valid = 1;
8661                             NULLIFY_THE_CHILDREN_NODES(
8662                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8663                                     .ps_child_node_br);
8664                         }
8665 
8666                         merge_count_32x32++;
8667                         e_merge_result = CU_SPLIT;
8668                     }
8669                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8670                     {
8671 #if ENABLE_CU_TREE_CULLING
8672                         cur_ctb_cu_tree_t *ps_tree =
8673                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8674 
8675                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8676                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8677                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8678                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8679                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8680                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8681                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8682 #endif
8683                     }
8684                 }
8685                 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8686                 {
8687 #if ENABLE_CU_TREE_CULLING
8688                     cur_ctb_cu_tree_t *ps_tree =
8689                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8690 
8691                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8692                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8693                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8694                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8695                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8696 #endif
8697 
8698                     if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8699                     {
8700                         ps_tree->is_node_valid = 0;
8701                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8702                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8703                     }
8704                 }
8705 
8706                 /* Try merging all 32x32 to 64x64 candts */
8707                 if(((en_merge_32x32 & 0xf) == 0xf) &&
8708                    (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8709                     ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8710                     if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8711                          !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8712                         (e_me_quality_presets != ME_XTREME_SPEED_25)))
8713                     {
8714                         range_prms_t *ps_pic_limit;
8715                         if(s_merge_prms_64x64.i4_use_rec == 1)
8716                         {
8717                             ps_pic_limit = &s_pic_limit_rec;
8718                         }
8719                         else
8720                         {
8721                             ps_pic_limit = &s_pic_limit_inp;
8722                         }
8723                         /* MV limit is different based on ref. PIC */
8724                         for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8725                         {
8726                             hme_derive_search_range(
8727                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8728                                 ps_pic_limit,
8729                                 &as_mv_limit[ref_ctr],
8730                                 i4_ctb_x << 6,
8731                                 i4_ctb_y << 6,
8732                                 64,
8733                                 64);
8734 
8735                             SCALE_RANGE_PRMS_POINTERS(
8736                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8737                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8738                                 2);
8739                         }
8740                         s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8741                         s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8742                         s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8743 
8744                         e_merge_result = hme_try_merge_high_speed(
8745                             ps_thrd_ctxt,
8746                             ps_ctxt,
8747                             ps_cur_ipe_ctb,
8748                             &s_subpel_prms,
8749                             &s_merge_prms_64x64,
8750                             ps_pu_results,
8751                             &as_pu_results[0][0][0]);
8752 
8753                         if((e_merge_result == CU_MERGED) &&
8754                            (ME_PRISTINE_QUALITY != e_me_quality_presets))
8755                         {
8756                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8757                                 .is_node_valid = 1;
8758                             NULLIFY_THE_CHILDREN_NODES(
8759                                 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8760                         }
8761                         else if(
8762                             (e_merge_result == CU_SPLIT) &&
8763                             (ME_PRISTINE_QUALITY == e_me_quality_presets))
8764                         {
8765                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8766                                 .is_node_valid = 0;
8767                         }
8768                     }
8769 
8770                 /*****************************************************************/
8771                 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
8772                 /*****************************************************************/
8773                 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8774 
8775                 {
8776 #ifdef _DEBUG
8777                     S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8778                                  ? 64
8779                                  : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8780                     S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8781                                  ? 64
8782                                  : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8783                     ASSERT(
8784                         (wd * ht) ==
8785                         ihevce_compute_area_of_valid_cus_in_ctb(
8786                             &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8787 #endif
8788                 }
8789             }
8790 
8791             /* set the dependency for the corresponding row in enc loop */
8792             ihevce_dmgr_set_row_row_sync(
8793                 pv_dep_mngr_encloop_dep_me,
8794                 (i4_ctb_x + 1),
8795                 i4_ctb_y,
8796                 tile_col_idx /* Col Tile No. */);
8797 
8798             left_ctb_in_diff_tile = 0;
8799         }
8800     }
8801 }
8802 
8803 /**
8804 ********************************************************************************
8805 *  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8806 *                       refine_layer_prms_t *ps_refine_prms)
8807 *
8808 *  @brief  Top level entry point for refinement ME
8809 *
8810 *  @param[in,out]  ps_ctxt: ME Handle
8811 *
8812 *  @param[in]  ps_refine_prms : refinement layer prms
8813 *
8814 *  @return None
8815 ********************************************************************************
8816 */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8817 void hme_refine_no_encode(
8818     coarse_me_ctxt_t *ps_ctxt,
8819     refine_prms_t *ps_refine_prms,
8820     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8821     S32 lyr_job_type,
8822     WORD32 i4_ping_pong,
8823     void **ppv_dep_mngr_hme_sync)
8824 {
8825     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8826     ME_QUALITY_PRESETS_T e_me_quality_presets =
8827         ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8828 
8829     /*************************************************************************/
8830     /* Complexity of search: Low to High                                     */
8831     /*************************************************************************/
8832     SEARCH_COMPLEXITY_T e_search_complexity;
8833 
8834     /*************************************************************************/
8835     /* Config parameter structures for varius ME submodules                  */
8836     /*************************************************************************/
8837     hme_search_prms_t s_search_prms_blk;
8838     mvbank_update_prms_t s_mv_update_prms;
8839 
8840     /*************************************************************************/
8841     /* All types of search candidates for predictor based search.            */
8842     /*************************************************************************/
8843     S32 num_init_candts = 0;
8844     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8845     search_node_t as_top_neighbours[4], as_left_neighbours[3];
8846     search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8847     search_node_t *ps_candt_l, *ps_candt_t;
8848     search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8849     search_node_t *ps_candt_prj_bl[2];
8850     search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8851     search_node_t *ps_candt_prj_coloc[2];
8852 
8853     pf_get_wt_inp fp_get_wt_inp;
8854 
8855     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8856     U32 au4_unique_node_map[MAP_X_MAX * 2];
8857 
8858     /*EIID */
8859     WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
8860     WORD32 i4_num_comparisions = 0;  //debug code
8861     WORD32 i4_threshold_multiplier;
8862     WORD32 i4_threshold_divider;
8863     WORD32 i4_temporal_layer =
8864         ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8865 
8866     /*************************************************************************/
8867     /* points ot the search results for the blk level search (8x8/16x16)     */
8868     /*************************************************************************/
8869     search_results_t *ps_search_results;
8870 
8871     /*************************************************************************/
8872     /* Coordinates                                                           */
8873     /*************************************************************************/
8874     S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8875     //S32 i4_ctb_y;
8876     S32 pos_x, pos_y;
8877     S32 blk_id_in_full_ctb;
8878     S32 i4_num_srch_cands;
8879 
8880     S32 blk_y;
8881 
8882     /*************************************************************************/
8883     /* Related to dimensions of block being searched and pic dimensions      */
8884     /*************************************************************************/
8885     S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8886     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8887     S32 num_results_prev_layer;
8888 
8889     /*************************************************************************/
8890     /* Size of a basic unit for this layer. For non encode layers, we search */
8891     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8892     /* basic unit size is the ctb size.                                      */
8893     /*************************************************************************/
8894     S32 unit_size;
8895 
8896     /*************************************************************************/
8897     /* Pointers to context in current and coarser layers                     */
8898     /*************************************************************************/
8899     layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8900 
8901     /*************************************************************************/
8902     /* to store mv range per blk, and picture limit, allowed search range    */
8903     /* range prms in hpel and qpel units as well                             */
8904     /*************************************************************************/
8905     range_prms_t s_range_prms_inp, s_range_prms_rec;
8906     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8907     /*************************************************************************/
8908     /* These variables are used to track number of references at different   */
8909     /* stages of ME.                                                         */
8910     /*************************************************************************/
8911     S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8912     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8913     S32 lambda_inp = ps_refine_prms->lambda_inp;
8914 
8915     /*************************************************************************/
8916     /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8917     /* Explicit means it searches on all active ref idx.                     */
8918     /*************************************************************************/
8919     S32 curr_layer_implicit, prev_layer_implicit;
8920 
8921     /*************************************************************************/
8922     /* Variables for loop counts                                             */
8923     /*************************************************************************/
8924     S32 id;
8925     S08 i1_ref_idx;
8926 
8927     /*************************************************************************/
8928     /* Input pointer and stride                                              */
8929     /*************************************************************************/
8930     U08 *pu1_inp;
8931     S32 i4_inp_stride;
8932 
8933     S32 end_of_frame;
8934 
8935     S32 num_sync_units_in_row;
8936 
8937     PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8938     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8939 
8940     /*************************************************************************/
8941     /* Pointers to current and coarse layer are needed for projection */
8942     /* Pointer to prev layer are needed for other candts like coloc   */
8943     /*************************************************************************/
8944     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8945 
8946     ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8947 
8948     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8949 
8950     /* Function pointer is selected based on the C vc X86 macro */
8951 
8952     fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8953                         ->pf_get_wt_inp_8x8;
8954 
8955     i4_inp_stride = ps_curr_layer->i4_inp_stride;
8956     i4_pic_wd = ps_curr_layer->i4_wd;
8957     i4_pic_ht = ps_curr_layer->i4_ht;
8958     e_search_complexity = ps_refine_prms->e_search_complexity;
8959 
8960     end_of_frame = 0;
8961 
8962     /* If the previous layer is non-encode layer, then use dyadic projection */
8963     if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8964         pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8965     else
8966         pf_hme_project_coloc_candt = hme_project_coloc_candt;
8967 
8968     /* This points to all the initial candts */
8969     ps_search_candts = &as_search_candts[0];
8970 
8971     {
8972         e_search_blk_size = BLK_8x8;
8973         blk_wd = blk_ht = 8;
8974         blk_size_shift = 3;
8975         s_mv_update_prms.i4_shift = 0;
8976         /*********************************************************************/
8977         /* In case we do not encode this layer, we search 8x8 with or without*/
8978         /* enable 4x4 SAD.                                                   */
8979         /*********************************************************************/
8980         {
8981             S32 i4_mask = (ENABLE_2Nx2N);
8982 
8983             e_result_blk_size = BLK_8x8;
8984             if(ps_refine_prms->i4_enable_4x4_part)
8985             {
8986                 i4_mask |= (ENABLE_NxN);
8987                 e_result_blk_size = BLK_4x4;
8988                 s_mv_update_prms.i4_shift = 1;
8989             }
8990 
8991             s_search_prms_blk.i4_part_mask = i4_mask;
8992         }
8993 
8994         unit_size = blk_wd;
8995         s_search_prms_blk.i4_inp_stride = unit_size;
8996     }
8997 
8998     /* This is required to properly update the layer mv bank */
8999     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9000     s_search_prms_blk.e_blk_size = e_search_blk_size;
9001 
9002     /*************************************************************************/
9003     /* If current layer is explicit, then the number of ref frames are to    */
9004     /* be same as previous layer. Else it will be 2                          */
9005     /*************************************************************************/
9006     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9007     if(ps_refine_prms->explicit_ref)
9008     {
9009         curr_layer_implicit = 0;
9010         i4_num_ref_fpel = i4_num_ref_prev_layer;
9011         /* 100578 : Using same mv cost fun. for all presets. */
9012         s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9013     }
9014     else
9015     {
9016         i4_num_ref_fpel = 2;
9017         curr_layer_implicit = 1;
9018         {
9019             if(ME_MEDIUM_SPEED > e_me_quality_presets)
9020             {
9021                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9022             }
9023             else
9024             {
9025 #if USE_MODIFIED == 1
9026                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9027 #else
9028                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9029 #endif
9030             }
9031         }
9032     }
9033 
9034     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9035     if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9036            IV_IDR_FRAME ||
9037        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9038     {
9039         i4_num_ref_fpel = 1;
9040     }
9041     if(i4_num_ref_prev_layer <= 2)
9042     {
9043         prev_layer_implicit = 1;
9044         curr_layer_implicit = 1;
9045         i4_num_ref_each_dir = 1;
9046     }
9047     else
9048     {
9049         /* It is assumed that we have equal number of references in each dir */
9050         //ASSERT(!(i4_num_ref_prev_layer & 1));
9051         prev_layer_implicit = 0;
9052         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9053     }
9054     s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9055     s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9056     s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9057 
9058     /* this can be kept to 1 or 2 */
9059     i4_num_ref_before_merge = 2;
9060     i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9061 
9062     /* Set up place holders to hold the search nodes of each initial candt */
9063     for(i = 0; i < MAX_INIT_CANDTS; i++)
9064     {
9065         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9066         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9067     }
9068 
9069     /* redundant, but doing it here since it is used in pred ctxt init */
9070     ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9071     for(i = 0; i < 3; i++)
9072     {
9073         search_node_t *ps_search_node;
9074         ps_search_node = &as_left_neighbours[i];
9075         INIT_SEARCH_NODE(ps_search_node, 0);
9076         ps_search_node = &as_top_neighbours[i];
9077         INIT_SEARCH_NODE(ps_search_node, 0);
9078     }
9079 
9080     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9081     /* bottom left node always not available for the blk being searched */
9082     as_left_neighbours[2].u1_is_avail = 0;
9083     /*************************************************************************/
9084     /* Initialize all the search results structure here. We update all the   */
9085     /* search results to default values, and configure things like blk sizes */
9086     /*************************************************************************/
9087     if(ps_refine_prms->i4_encode == 0)
9088     {
9089         S32 pred_lx;
9090         search_results_t *ps_search_results;
9091 
9092         ps_search_results = &ps_ctxt->s_search_results_8x8;
9093         hme_init_search_results(
9094             ps_search_results,
9095             i4_num_ref_fpel,
9096             ps_refine_prms->i4_num_fpel_results,
9097             ps_refine_prms->i4_num_results_per_part,
9098             e_search_blk_size,
9099             0,
9100             0,
9101             &ps_ctxt->au1_is_past[0]);
9102         for(pred_lx = 0; pred_lx < 2; pred_lx++)
9103         {
9104             hme_init_pred_ctxt_no_encode(
9105                 &ps_search_results->as_pred_ctxt[pred_lx],
9106                 ps_search_results,
9107                 &as_top_neighbours[0],
9108                 &as_left_neighbours[0],
9109                 &ps_candt_prj_coloc[0],
9110                 ps_candt_zeromv,
9111                 ps_candt_zeromv,
9112                 pred_lx,
9113                 lambda_inp,
9114                 ps_refine_prms->lambda_q_shift,
9115                 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9116                 &ps_ctxt->ai2_ref_scf[0]);
9117         }
9118     }
9119 
9120     /*********************************************************************/
9121     /* Initialize the dyn. search range params. for each reference index */
9122     /* in current layer ctxt                                             */
9123     /*********************************************************************/
9124     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9125     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9126     {
9127         WORD32 ref_ctr;
9128 
9129         for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9130         {
9131             INIT_DYN_SEARCH_PRMS(
9132                 &ps_ctxt->s_coarse_dyn_range_prms
9133                      .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9134                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9135         }
9136     }
9137 
9138     /* Next set up initial candidates according to a given set of rules.   */
9139     /* The number of initial candidates affects the quality of ME in the   */
9140     /* case of motion with multiple degrees of freedom. In case of simple  */
9141     /* translational motion, a current and a few causal and non causal     */
9142     /* candts would suffice. More candidates help to cover more complex    */
9143     /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9144     /* where multiple ref helps etc.                                       */
9145     /* The candidate choice also depends on the following parameters.      */
9146     /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
9147     /* Whether we encode or not, and the type of search across reference   */
9148     /* i.e. the previous layer may have been explicit/implicit and curr    */
9149     /* layer may be explicit/implicit                                      */
9150 
9151     /* 0, 0, L, T, projected coloc best always presnt by default */
9152     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9153     ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9154     ps_search_candts[id].u1_num_steps_refine = 0;
9155     ps_candt_zeromv->s_mv.i2_mvx = 0;
9156     ps_candt_zeromv->s_mv.i2_mvy = 0;
9157 
9158     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9159     ps_candt_l = ps_search_candts[id].ps_search_node;
9160     ps_search_candts[id].u1_num_steps_refine = 0;
9161 
9162     /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9163     /* not at the CTB boundary use the causal T and */
9164     /* not the projected T, although the candidate is */
9165     /* still pointed to by ps_candt_prj_t[0] */
9166     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9167     {
9168         /* Using Projected top to eliminate sync */
9169         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9170             PROJECTED_TOP0, e_me_quality_presets);
9171         ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9172         ps_search_candts[id].u1_num_steps_refine = 1;
9173     }
9174     else
9175     {
9176         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9177             SPATIAL_TOP0, e_me_quality_presets);
9178         ps_candt_t = ps_search_candts[id].ps_search_node;
9179         ps_search_candts[id].u1_num_steps_refine = 0;
9180     }
9181 
9182     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9183         PROJECTED_COLOC0, e_me_quality_presets);
9184     ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9185     ps_search_candts[id].u1_num_steps_refine = 1;
9186 
9187     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9188         PROJECTED_COLOC1, e_me_quality_presets);
9189     ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9190     ps_search_candts[id].u1_num_steps_refine = 1;
9191 
9192     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9193     {
9194         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9195             PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9196         ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9197         ps_search_candts[id].u1_num_steps_refine = 1;
9198 
9199         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9200             PROJECTED_TOP_LEFT0, e_me_quality_presets);
9201         ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9202         ps_search_candts[id].u1_num_steps_refine = 1;
9203     }
9204     else
9205     {
9206         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9207             SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9208         ps_candt_tr = ps_search_candts[id].ps_search_node;
9209         ps_search_candts[id].u1_num_steps_refine = 0;
9210 
9211         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9212             SPATIAL_TOP_LEFT0, e_me_quality_presets);
9213         ps_candt_tl = ps_search_candts[id].ps_search_node;
9214         ps_search_candts[id].u1_num_steps_refine = 0;
9215     }
9216 
9217     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9218         PROJECTED_RIGHT0, e_me_quality_presets);
9219     ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9220     ps_search_candts[id].u1_num_steps_refine = 1;
9221 
9222     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9223         PROJECTED_BOTTOM0, e_me_quality_presets);
9224     ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9225     ps_search_candts[id].u1_num_steps_refine = 1;
9226 
9227     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9228         PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9229     ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9230     ps_search_candts[id].u1_num_steps_refine = 1;
9231 
9232     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233         PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9234     ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9235     ps_search_candts[id].u1_num_steps_refine = 1;
9236 
9237     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238         PROJECTED_RIGHT1, e_me_quality_presets);
9239     ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9240     ps_search_candts[id].u1_num_steps_refine = 1;
9241 
9242     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243         PROJECTED_BOTTOM1, e_me_quality_presets);
9244     ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9245     ps_search_candts[id].u1_num_steps_refine = 1;
9246 
9247     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248         PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9249     ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9250     ps_search_candts[id].u1_num_steps_refine = 1;
9251 
9252     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253         PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9254     ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9255     ps_search_candts[id].u1_num_steps_refine = 1;
9256 
9257     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9258     ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9259     ps_search_candts[id].u1_num_steps_refine = 1;
9260 
9261     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9262         PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9263     ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9264     ps_search_candts[id].u1_num_steps_refine = 1;
9265 
9266     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9267         PROJECTED_TOP_LEFT1, e_me_quality_presets);
9268     ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9269     ps_search_candts[id].u1_num_steps_refine = 1;
9270 
9271     /*************************************************************************/
9272     /* Now that the candidates have been ordered, to choose the right number */
9273     /* of initial candidates.                                                */
9274     /*************************************************************************/
9275     if(curr_layer_implicit && !prev_layer_implicit)
9276     {
9277         if(e_search_complexity == SEARCH_CX_LOW)
9278             num_init_candts = 7;
9279         else if(e_search_complexity == SEARCH_CX_MED)
9280             num_init_candts = 13;
9281         else if(e_search_complexity == SEARCH_CX_HIGH)
9282             num_init_candts = 18;
9283         else
9284             ASSERT(0);
9285     }
9286     else
9287     {
9288         if(e_search_complexity == SEARCH_CX_LOW)
9289             num_init_candts = 5;
9290         else if(e_search_complexity == SEARCH_CX_MED)
9291             num_init_candts = 11;
9292         else if(e_search_complexity == SEARCH_CX_HIGH)
9293             num_init_candts = 16;
9294         else
9295             ASSERT(0);
9296     }
9297 
9298     if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9299     {
9300         num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9301     }
9302 
9303     /*************************************************************************/
9304     /* The following search parameters are fixed throughout the search across*/
9305     /* all blks. So these are configured outside processing loop             */
9306     /*************************************************************************/
9307     s_search_prms_blk.i4_num_init_candts = num_init_candts;
9308     s_search_prms_blk.i4_start_step = 1;
9309     s_search_prms_blk.i4_use_satd = 0;
9310     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9311     /* we use recon only for encoded layers, otherwise it is not available */
9312     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9313 
9314     s_search_prms_blk.ps_search_candts = ps_search_candts;
9315     /* We use the same mv_range for all ref. pic. So assign to member 0 */
9316     if(s_search_prms_blk.i4_use_rec)
9317         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9318     else
9319         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9320     /*************************************************************************/
9321     /* Initialize coordinates. Meaning as follows                            */
9322     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
9323     /* blk_y : same as above, y coord.                                       */
9324     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
9325     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
9326     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
9327     /* corner of the picture. Always multiple of 64.                         */
9328     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
9329     /*************************************************************************/
9330     blk_y = 0;
9331     blk_id_in_ctb = 0;
9332 
9333     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9334 
9335     /* Get the number of sync units in a row based on encode/non enocde layer */
9336     num_sync_units_in_row = num_blks_in_row;
9337 
9338     /*************************************************************************/
9339     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
9340     /* every block given its coordinate. Note thsi assumes that the min amt  */
9341     /* of padding to right of pic is equal to the blk size. If we go all the */
9342     /* way upto 64x64, then the min padding on right size of picture should  */
9343     /* be 64, and also on bottom side of picture.                            */
9344     /*************************************************************************/
9345     SET_PIC_LIMIT(
9346         s_pic_limit_inp,
9347         ps_curr_layer->i4_pad_x_inp,
9348         ps_curr_layer->i4_pad_y_inp,
9349         ps_curr_layer->i4_wd,
9350         ps_curr_layer->i4_ht,
9351         s_search_prms_blk.i4_num_steps_post_refine);
9352 
9353     SET_PIC_LIMIT(
9354         s_pic_limit_rec,
9355         ps_curr_layer->i4_pad_x_rec,
9356         ps_curr_layer->i4_pad_y_rec,
9357         ps_curr_layer->i4_wd,
9358         ps_curr_layer->i4_ht,
9359         s_search_prms_blk.i4_num_steps_post_refine);
9360 
9361     /*************************************************************************/
9362     /* set the MV limit per ref. pic.                                        */
9363     /*    - P pic. : Based on the config params.                             */
9364     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9365     /*************************************************************************/
9366     {
9367         WORD32 ref_ctr;
9368         /* Only for B/b pic. */
9369         if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9370         {
9371             WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9372             WORD32 cur_poc, ref_poc, abs_poc_diff;
9373 
9374             cur_poc = ps_ctxt->i4_curr_poc;
9375 
9376             /* Get abs MAX for symmetric search */
9377             i2_mv_y_per_poc = MAX(
9378                 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9379                 (ABS(ps_ctxt->s_coarse_dyn_range_prms
9380                          .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9381 
9382             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9383             {
9384                 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9385                 abs_poc_diff = ABS((cur_poc - ref_poc));
9386                 /* Get the cur. max MV based on POC distance */
9387                 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9388                 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9389 
9390                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9391                 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9392                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9393                 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9394             }
9395         }
9396         else
9397         {
9398             /* Set the Config. File Params for P pic. */
9399             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9400             {
9401                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9402                 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9403                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9404                 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9405             }
9406         }
9407     }
9408 
9409     /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9410     if(e_me_quality_presets == ME_MEDIUM_SPEED)
9411     {
9412         i4_threshold_multiplier = 1;
9413         i4_threshold_divider = 4;
9414     }
9415     else if(e_me_quality_presets == ME_HIGH_SPEED)
9416     {
9417         i4_threshold_multiplier = 1;
9418         i4_threshold_divider = 2;
9419     }
9420     else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9421     {
9422 #if OLD_XTREME_SPEED
9423         /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9424         i4_temporal_layer = 1;
9425 #endif
9426         if(i4_temporal_layer == 0)
9427         {
9428             i4_threshold_multiplier = 3;
9429             i4_threshold_divider = 4;
9430         }
9431         else if(i4_temporal_layer == 1)
9432         {
9433             i4_threshold_multiplier = 3;
9434             i4_threshold_divider = 4;
9435         }
9436         else if(i4_temporal_layer == 2)
9437         {
9438             i4_threshold_multiplier = 1;
9439             i4_threshold_divider = 1;
9440         }
9441         else
9442         {
9443             i4_threshold_multiplier = 5;
9444             i4_threshold_divider = 4;
9445         }
9446     }
9447     else if(e_me_quality_presets == ME_HIGH_QUALITY)
9448     {
9449         i4_threshold_multiplier = 1;
9450         i4_threshold_divider = 1;
9451     }
9452 
9453     /*************************************************************************/
9454     /*************************************************************************/
9455     /*************************************************************************/
9456     /* START OF THE CORE LOOP                                                */
9457     /* If Encode is 0, then we just loop over each blk                       */
9458     /*************************************************************************/
9459     /*************************************************************************/
9460     /*************************************************************************/
9461     while(0 == end_of_frame)
9462     {
9463         job_queue_t *ps_job;
9464         ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
9465         WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
9466         WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
9467         //+3 to get ceil values when divided by 4
9468         WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9469             8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
9470         //if there is variable for ctb size use that and this variable can be derived
9471         WORD32 offset_val, check_dep_pos, set_dep_pos;
9472         void *pv_hme_dep_mngr;
9473         ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9474 
9475         /* Get the current layer HME Dep Mngr       */
9476         /* Note : Use layer_id - 1 in HME layers    */
9477 
9478         pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9479 
9480         /* Get the current row from the job queue */
9481         ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9482             ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9483 
9484         /* If all rows are done, set the end of process flag to 1, */
9485         /* and the current row to -1 */
9486         if(NULL == ps_job)
9487         {
9488             blk_y = -1;
9489             end_of_frame = 1;
9490 
9491             continue;
9492         }
9493 
9494         if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9495         {
9496             /* set the output dependency of current row */
9497             ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9498             continue;
9499         }
9500 
9501         blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9502         blk_x = 0;
9503         i4_ctb_x = 0;
9504 
9505         /* wait for Corresponding Pre intra Job to be completed */
9506         if(1 == ps_refine_prms->i4_layer_id)
9507         {
9508             volatile UWORD32 i4_l1_done;
9509             volatile UWORD32 *pi4_l1_done;
9510             pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9511                               ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9512             i4_l1_done = *pi4_l1_done;
9513             while(!i4_l1_done)
9514             {
9515                 i4_l1_done = *pi4_l1_done;
9516             }
9517         }
9518         /* Set Variables for Dep. Checking and Setting */
9519         set_dep_pos = blk_y + 1;
9520         if(blk_y > 0)
9521         {
9522             offset_val = 2;
9523             check_dep_pos = blk_y - 1;
9524         }
9525         else
9526         {
9527             /* First row should run without waiting */
9528             offset_val = -1;
9529             check_dep_pos = 0;
9530         }
9531 
9532         /* EIID: calculate ed_blk_ctxt pointer for current row */
9533         /* valid for only layer-1. not varified and used for other layers */
9534         i4_ctb_row_ctr = blk_y / 4;
9535         ps_ed_blk_ctxt_curr_row =
9536             ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9537                                   i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
9538         ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9539 
9540         /* if non-encode layer then i4_ctb_x will be same as blk_x */
9541         /* loop over all the units is a row                        */
9542         for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9543         {
9544             ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
9545             ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9546             WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9547 
9548             /* Wait till top row block is processed   */
9549             /* Currently checking till top right block*/
9550 
9551             /* Disabled since all candidates, except for */
9552             /* L and C, are projected from the coarser layer, */
9553             /* only in ME_HIGH_SPEED mode */
9554             if((ME_MEDIUM_SPEED > e_me_quality_presets))
9555             {
9556                 if(i4_ctb_x < (num_sync_units_in_row - 1))
9557                 {
9558                     ihevce_dmgr_chk_row_row_sync(
9559                         pv_hme_dep_mngr,
9560                         i4_ctb_x,
9561                         offset_val,
9562                         check_dep_pos,
9563                         0, /* Col Tile No. : Not supported in PreEnc*/
9564                         ps_ctxt->thrd_id);
9565                 }
9566             }
9567 
9568             {
9569                 /* for non encoder layer only one block is processed */
9570                 num_blks_in_this_ctb = 1;
9571             }
9572 
9573             /* EIID: derive ed_ctxt ptr for current CTB */
9574             ps_ed_blk_ctxt_curr_ctb =
9575                 ps_ed_blk_ctxt_curr_row +
9576                 (i4_ctb_blk_ctr *
9577                  i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
9578             ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9579 
9580             /* loop over all the blocks in CTB will always be 1 */
9581             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9582             {
9583                 {
9584                     /* non encode layer */
9585                     blk_x = i4_ctb_x;
9586                     blk_id_in_full_ctb = 0;
9587                     s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9588                 }
9589 
9590                 /* get the current input blk point */
9591                 pos_x = blk_x << blk_size_shift;
9592                 pos_y = blk_y << blk_size_shift;
9593                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9594 
9595                 /*********************************************************************/
9596                 /* replicate the inp buffer at blk or ctb level for each ref id,     */
9597                 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9598                 /* thereby avoiding a bloat up of memory. If we did all references   */
9599                 /* weighted pred, we will end up with a duplicate copy of each ref   */
9600                 /* at each layer, since we need to preserve the original reference.  */
9601                 /* ToDo: Need to observe performance with this mechanism and compare */
9602                 /* with case where ref is weighted.                                  */
9603                 /*********************************************************************/
9604                 if(blk_id_in_ctb == 0)
9605                 {
9606                     fp_get_wt_inp(
9607                         ps_curr_layer,
9608                         &ps_ctxt->s_wt_pred,
9609                         unit_size,
9610                         pos_x,
9611                         pos_y,
9612                         unit_size,
9613                         ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9614                         ps_ctxt->i4_wt_pred_enable_flag);
9615                 }
9616 
9617                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9618                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9619                 /* Select search results from a suitable search result in the context */
9620                 {
9621                     ps_search_results = &ps_ctxt->s_search_results_8x8;
9622                 }
9623 
9624                 s_search_prms_blk.ps_search_results = ps_search_results;
9625 
9626                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9627                 hme_reset_search_results(
9628                     ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9629 
9630                 /* Loop across different Ref IDx */
9631                 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9632                 {
9633                     S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9634                     S32 prev_blk_offset = 6;
9635                     S32 resultid;
9636 
9637                     /*********************************************************************/
9638                     /* For every blk in the picture, the search range needs to be derived*/
9639                     /* Any blk can have any mv, but practical search constraints are     */
9640                     /* imposed by the picture boundary and amt of padding.               */
9641                     /*********************************************************************/
9642                     /* MV limit is different based on ref. PIC */
9643                     hme_derive_search_range(
9644                         &s_range_prms_inp,
9645                         &s_pic_limit_inp,
9646                         &as_mv_limit[i1_ref_idx],
9647                         pos_x,
9648                         pos_y,
9649                         blk_wd,
9650                         blk_ht);
9651                     hme_derive_search_range(
9652                         &s_range_prms_rec,
9653                         &s_pic_limit_rec,
9654                         &as_mv_limit[i1_ref_idx],
9655                         pos_x,
9656                         pos_y,
9657                         blk_wd,
9658                         blk_ht);
9659 
9660                     s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9661                     ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9662 
9663                     i4_num_srch_cands = 1;
9664 
9665                     if(1 != ps_refine_prms->i4_layer_id)
9666                     {
9667                         S32 x, y;
9668                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9669                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9670 
9671                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
9672                         {
9673                             hme_get_spatial_candt(
9674                                 ps_curr_layer,
9675                                 e_search_blk_size,
9676                                 blk_x,
9677                                 blk_y,
9678                                 i1_ref_idx,
9679                                 &as_top_neighbours[0],
9680                                 &as_left_neighbours[0],
9681                                 0,
9682                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9683                                 0,
9684                                 ps_refine_prms->i4_encode);
9685 
9686                             *ps_candt_tr = as_top_neighbours[3];
9687                             *ps_candt_t = as_top_neighbours[1];
9688                             *ps_candt_tl = as_top_neighbours[0];
9689                             i4_num_srch_cands += 3;
9690                         }
9691                         else
9692                         {
9693                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9694                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9695                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9696                             search_node_t *ps_search_node;
9697                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9698                             hme_mv_t *ps_mv, *ps_mv_base;
9699                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
9700                             S32 jump = 1, mvs_in_blk, mvs_in_row;
9701                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9702 
9703                             if(i4_blk_size1 != i4_blk_size2)
9704                             {
9705                                 blk_x_temp <<= 1;
9706                                 blk_y_temp <<= 1;
9707                                 jump = 2;
9708                                 if((i4_blk_size1 << 2) == i4_blk_size2)
9709                                 {
9710                                     blk_x_temp <<= 1;
9711                                     blk_y_temp <<= 1;
9712                                     jump = 4;
9713                                 }
9714                             }
9715 
9716                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9717                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9718 
9719                             /* Adjust teh blk coord to point to top left locn */
9720                             blk_x_temp -= 1;
9721                             blk_y_temp -= 1;
9722 
9723                             /* Pick up the mvs from the location */
9724                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9725                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9726 
9727                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9728                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9729 
9730                             ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9731                             pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9732 
9733                             ps_mv_base = ps_mv;
9734                             pi1_ref_idx_base = pi1_ref_idx;
9735 
9736                             ps_search_node = &as_left_neighbours[0];
9737                             ps_mv = ps_mv_base + mvs_in_row;
9738                             pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9739                             COPY_MV_TO_SEARCH_NODE(
9740                                 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9741 
9742                             i4_num_srch_cands++;
9743                         }
9744                     }
9745                     else
9746                     {
9747                         S32 x, y;
9748                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9749                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9750 
9751                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
9752                         {
9753                             hme_get_spatial_candt_in_l1_me(
9754                                 ps_curr_layer,
9755                                 e_search_blk_size,
9756                                 blk_x,
9757                                 blk_y,
9758                                 i1_ref_idx,
9759                                 !ps_search_results->pu1_is_past[i1_ref_idx],
9760                                 &as_top_neighbours[0],
9761                                 &as_left_neighbours[0],
9762                                 0,
9763                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9764                                 0,
9765                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9766                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9767 
9768                             *ps_candt_tr = as_top_neighbours[3];
9769                             *ps_candt_t = as_top_neighbours[1];
9770                             *ps_candt_tl = as_top_neighbours[0];
9771 
9772                             i4_num_srch_cands += 3;
9773                         }
9774                         else
9775                         {
9776                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9777                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9778                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9779                             S32 i4_mv_pos_in_implicit_array;
9780                             search_node_t *ps_search_node;
9781                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9782                             hme_mv_t *ps_mv, *ps_mv_base;
9783                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
9784                             S32 jump = 1, mvs_in_blk, mvs_in_row;
9785                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9786                             U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9787                             S32 i4_num_results_in_given_dir =
9788                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9789                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9790                                                     : (ps_layer_mvbank->i4_num_mvs_per_ref *
9791                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9792 
9793                             if(i4_blk_size1 != i4_blk_size2)
9794                             {
9795                                 blk_x_temp <<= 1;
9796                                 blk_y_temp <<= 1;
9797                                 jump = 2;
9798                                 if((i4_blk_size1 << 2) == i4_blk_size2)
9799                                 {
9800                                     blk_x_temp <<= 1;
9801                                     blk_y_temp <<= 1;
9802                                     jump = 4;
9803                                 }
9804                             }
9805 
9806                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9807                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9808 
9809                             /* Adjust teh blk coord to point to top left locn */
9810                             blk_x_temp -= 1;
9811                             blk_y_temp -= 1;
9812 
9813                             /* Pick up the mvs from the location */
9814                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9815                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9816 
9817                             i4_offset +=
9818                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9819                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9820                                                     : 0);
9821 
9822                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9823                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9824 
9825                             ps_mv_base = ps_mv;
9826                             pi1_ref_idx_base = pi1_ref_idx;
9827 
9828                             {
9829                                 /* ps_mv and pi1_ref_idx now point to the top left locn */
9830                                 ps_search_node = &as_left_neighbours[0];
9831                                 ps_mv = ps_mv_base + mvs_in_row;
9832                                 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9833 
9834                                 i4_mv_pos_in_implicit_array =
9835                                     hme_find_pos_of_implicitly_stored_ref_id(
9836                                         pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9837 
9838                                 if(-1 != i4_mv_pos_in_implicit_array)
9839                                 {
9840                                     COPY_MV_TO_SEARCH_NODE(
9841                                         ps_search_node,
9842                                         &ps_mv[i4_mv_pos_in_implicit_array],
9843                                         &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9844                                         i1_ref_idx,
9845                                         shift);
9846                                 }
9847                                 else
9848                                 {
9849                                     ps_search_node->u1_is_avail = 0;
9850                                     ps_search_node->s_mv.i2_mvx = 0;
9851                                     ps_search_node->s_mv.i2_mvy = 0;
9852                                     ps_search_node->i1_ref_idx = i1_ref_idx;
9853                                 }
9854 
9855                                 i4_num_srch_cands++;
9856                             }
9857                         }
9858                     }
9859 
9860                     *ps_candt_l = as_left_neighbours[0];
9861 
9862                     /* when 16x16 is searched in an encode layer, and the prev layer */
9863                     /* stores results for 4x4 blks, we project 5 candts corresponding */
9864                     /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9865                     /* However in other cases, only 2,2 best and 2nd best reqd */
9866                     resultid = 0;
9867                     pf_hme_project_coloc_candt(
9868                         ps_candt_prj_coloc[0],
9869                         ps_curr_layer,
9870                         ps_coarse_layer,
9871                         pos_x + 2,
9872                         pos_y + 2,
9873                         i1_ref_idx,
9874                         resultid);
9875 
9876                     i4_num_srch_cands++;
9877 
9878                     resultid = 1;
9879                     if(num_results_prev_layer > 1)
9880                     {
9881                         pf_hme_project_coloc_candt(
9882                             ps_candt_prj_coloc[1],
9883                             ps_curr_layer,
9884                             ps_coarse_layer,
9885                             pos_x + 2,
9886                             pos_y + 2,
9887                             i1_ref_idx,
9888                             resultid);
9889 
9890                         i4_num_srch_cands++;
9891                     }
9892 
9893                     resultid = 0;
9894 
9895                     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9896                     {
9897                         pf_hme_project_coloc_candt(
9898                             ps_candt_prj_t[0],
9899                             ps_curr_layer,
9900                             ps_coarse_layer,
9901                             pos_x,
9902                             pos_y - prev_blk_offset,
9903                             i1_ref_idx,
9904                             resultid);
9905 
9906                         i4_num_srch_cands++;
9907                     }
9908 
9909                     {
9910                         pf_hme_project_coloc_candt(
9911                             ps_candt_prj_br[0],
9912                             ps_curr_layer,
9913                             ps_coarse_layer,
9914                             pos_x + next_blk_offset,
9915                             pos_y + next_blk_offset,
9916                             i1_ref_idx,
9917                             resultid);
9918                         pf_hme_project_coloc_candt(
9919                             ps_candt_prj_bl[0],
9920                             ps_curr_layer,
9921                             ps_coarse_layer,
9922                             pos_x - prev_blk_offset,
9923                             pos_y + next_blk_offset,
9924                             i1_ref_idx,
9925                             resultid);
9926                         pf_hme_project_coloc_candt(
9927                             ps_candt_prj_r[0],
9928                             ps_curr_layer,
9929                             ps_coarse_layer,
9930                             pos_x + next_blk_offset,
9931                             pos_y,
9932                             i1_ref_idx,
9933                             resultid);
9934                         pf_hme_project_coloc_candt(
9935                             ps_candt_prj_b[0],
9936                             ps_curr_layer,
9937                             ps_coarse_layer,
9938                             pos_x,
9939                             pos_y + next_blk_offset,
9940                             i1_ref_idx,
9941                             resultid);
9942 
9943                         i4_num_srch_cands += 4;
9944 
9945                         if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9946                         {
9947                             pf_hme_project_coloc_candt(
9948                                 ps_candt_prj_tr[0],
9949                                 ps_curr_layer,
9950                                 ps_coarse_layer,
9951                                 pos_x + next_blk_offset,
9952                                 pos_y - prev_blk_offset,
9953                                 i1_ref_idx,
9954                                 resultid);
9955                             pf_hme_project_coloc_candt(
9956                                 ps_candt_prj_tl[0],
9957                                 ps_curr_layer,
9958                                 ps_coarse_layer,
9959                                 pos_x - prev_blk_offset,
9960                                 pos_y - prev_blk_offset,
9961                                 i1_ref_idx,
9962                                 resultid);
9963 
9964                             i4_num_srch_cands += 2;
9965                         }
9966                     }
9967                     if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9968                     {
9969                         resultid = 1;
9970                         pf_hme_project_coloc_candt(
9971                             ps_candt_prj_br[1],
9972                             ps_curr_layer,
9973                             ps_coarse_layer,
9974                             pos_x + next_blk_offset,
9975                             pos_y + next_blk_offset,
9976                             i1_ref_idx,
9977                             resultid);
9978                         pf_hme_project_coloc_candt(
9979                             ps_candt_prj_bl[1],
9980                             ps_curr_layer,
9981                             ps_coarse_layer,
9982                             pos_x - prev_blk_offset,
9983                             pos_y + next_blk_offset,
9984                             i1_ref_idx,
9985                             resultid);
9986                         pf_hme_project_coloc_candt(
9987                             ps_candt_prj_r[1],
9988                             ps_curr_layer,
9989                             ps_coarse_layer,
9990                             pos_x + next_blk_offset,
9991                             pos_y,
9992                             i1_ref_idx,
9993                             resultid);
9994                         pf_hme_project_coloc_candt(
9995                             ps_candt_prj_b[1],
9996                             ps_curr_layer,
9997                             ps_coarse_layer,
9998                             pos_x,
9999                             pos_y + next_blk_offset,
10000                             i1_ref_idx,
10001                             resultid);
10002 
10003                         i4_num_srch_cands += 4;
10004 
10005                         pf_hme_project_coloc_candt(
10006                             ps_candt_prj_tr[1],
10007                             ps_curr_layer,
10008                             ps_coarse_layer,
10009                             pos_x + next_blk_offset,
10010                             pos_y - prev_blk_offset,
10011                             i1_ref_idx,
10012                             resultid);
10013                         pf_hme_project_coloc_candt(
10014                             ps_candt_prj_tl[1],
10015                             ps_curr_layer,
10016                             ps_coarse_layer,
10017                             pos_x - prev_blk_offset,
10018                             pos_y - prev_blk_offset,
10019                             i1_ref_idx,
10020                             resultid);
10021                         pf_hme_project_coloc_candt(
10022                             ps_candt_prj_t[1],
10023                             ps_curr_layer,
10024                             ps_coarse_layer,
10025                             pos_x,
10026                             pos_y - prev_blk_offset,
10027                             i1_ref_idx,
10028                             resultid);
10029 
10030                         i4_num_srch_cands += 3;
10031                     }
10032 
10033                     /* Note this block also clips the MV range for all candidates */
10034 #ifdef _DEBUG
10035                     {
10036                         S32 candt;
10037                         range_prms_t *ps_range_prms;
10038 
10039                         S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10040                         for(candt = 0; candt < i4_num_srch_cands; candt++)
10041                         {
10042                             search_node_t *ps_search_node;
10043 
10044                             ps_search_node =
10045                                 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10046 
10047                             ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10048 
10049                             if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10050                                (ps_search_node->i1_ref_idx < 0))
10051                             {
10052                                 ASSERT(0);
10053                             }
10054                         }
10055                     }
10056 #endif
10057 
10058                     {
10059                         S32 srch_cand;
10060                         S32 num_unique_nodes = 0;
10061                         S32 num_nodes_searched = 0;
10062                         S32 num_best_cand = 0;
10063                         S08 i1_grid_enable = 0;
10064                         search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10065                         /* has list of valid partition to search terminated by -1 */
10066                         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10067                         S32 center_x;
10068                         S32 center_y;
10069 
10070                         /* indicates if the centre point of grid needs to be explicitly added for search */
10071                         S32 add_centre = 0;
10072 
10073                         memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10074                         center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10075                         center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10076 
10077                         for(srch_cand = 0;
10078                             (srch_cand < i4_num_srch_cands) &&
10079                             (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10080                             srch_cand++)
10081                         {
10082                             search_node_t s_search_node_temp =
10083                                 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10084 
10085                             s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
10086 
10087                             /* Clip the motion vectors as well here since after clipping
10088                             two candidates can become same and they will be removed during deduplication */
10089                             CLIP_MV_WITHIN_RANGE(
10090                                 s_search_node_temp.s_mv.i2_mvx,
10091                                 s_search_node_temp.s_mv.i2_mvy,
10092                                 s_search_prms_blk.aps_mv_range[0],
10093                                 ps_refine_prms->i4_num_steps_fpel_refine,
10094                                 ps_refine_prms->i4_num_steps_hpel_refine,
10095                                 ps_refine_prms->i4_num_steps_qpel_refine);
10096 
10097                             /* PT_C */
10098                             INSERT_NEW_NODE(
10099                                 as_unique_search_nodes,
10100                                 num_unique_nodes,
10101                                 s_search_node_temp,
10102                                 0,
10103                                 au4_unique_node_map,
10104                                 center_x,
10105                                 center_y,
10106                                 1);
10107 
10108                             num_nodes_searched += 1;
10109                         }
10110                         num_unique_nodes =
10111                             MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10112 
10113                         /* If number of candidates projected/number of candidates to be refined are more than 2,
10114                         then filter out and choose the best two here */
10115                         if(num_unique_nodes >= 2)
10116                         {
10117                             S32 num_results;
10118                             S32 cnt;
10119                             S32 *pi4_valid_part_ids;
10120                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10121                             s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10122                             pi4_valid_part_ids = &ai4_valid_part_ids[0];
10123 
10124                             /* pi4_valid_part_ids is updated inside */
10125                             hme_pred_search_no_encode(
10126                                 &s_search_prms_blk,
10127                                 ps_curr_layer,
10128                                 &ps_ctxt->s_wt_pred,
10129                                 pi4_valid_part_ids,
10130                                 1,
10131                                 e_me_quality_presets,
10132                                 i1_grid_enable,
10133                                 (ihevce_me_optimised_function_list_t *)
10134                                     ps_ctxt->pv_me_optimised_function_list
10135 
10136                             );
10137 
10138                             num_best_cand = 0;
10139                             cnt = 0;
10140                             num_results = ps_search_results->u1_num_results_per_part;
10141 
10142                             while((id = pi4_valid_part_ids[cnt++]) >= 0)
10143                             {
10144                                 num_results =
10145                                     MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10146 
10147                                 for(i = 0; i < num_results; i++)
10148                                 {
10149                                     search_node_t s_search_node_temp;
10150                                     s_search_node_temp =
10151                                         *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10152                                     if(s_search_node_temp.i1_ref_idx >= 0)
10153                                     {
10154                                         INSERT_NEW_NODE_NOMAP(
10155                                             as_best_two_proj_node,
10156                                             num_best_cand,
10157                                             s_search_node_temp,
10158                                             0);
10159                                     }
10160                                 }
10161                             }
10162                         }
10163                         else
10164                         {
10165                             add_centre = 1;
10166                             num_best_cand = num_unique_nodes;
10167                             as_best_two_proj_node[0] = as_unique_search_nodes[0];
10168                         }
10169 
10170                         num_unique_nodes = 0;
10171                         num_nodes_searched = 0;
10172 
10173                         if(1 == num_best_cand)
10174                         {
10175                             search_node_t s_search_node_temp = as_best_two_proj_node[0];
10176                             S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10177                             S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10178                             S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10179 
10180                             i1_grid_enable = 1;
10181 
10182                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10183                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10184                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10185 
10186                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10187                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10188                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10189 
10190                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10191                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10192                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10193 
10194                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10195                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10196                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10197 
10198                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10199                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10200                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10201 
10202                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10203                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10204                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10205 
10206                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10207                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10208                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10209 
10210                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10211                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10212                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10213 
10214                             if(add_centre)
10215                             {
10216                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10217                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10218                                 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10219                             }
10220                         }
10221                         else
10222                         {
10223                             /* For the candidates where refinement was required, choose the best two */
10224                             for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10225                             {
10226                                 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10227                                 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10228                                 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10229 
10230                                 /* Because there may not be two best unique candidates (because of clipping),
10231                                 second best candidate can be uninitialized, ignore that */
10232                                 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10233                                    s_search_node_temp.i1_ref_idx < 0)
10234                                 {
10235                                     num_nodes_searched++;
10236                                     continue;
10237                                 }
10238 
10239                                 /* PT_C */
10240                                 /* Since the center point has already be evaluated and best results are persistent,
10241                                 it will not be evaluated again */
10242                                 if(add_centre) /* centre point added explicitly again if search results is not updated */
10243                                 {
10244                                     INSERT_NEW_NODE(
10245                                         as_unique_search_nodes,
10246                                         num_unique_nodes,
10247                                         s_search_node_temp,
10248                                         0,
10249                                         au4_unique_node_map,
10250                                         center_x,
10251                                         center_y,
10252                                         1);
10253                                 }
10254 
10255                                 /* PT_L */
10256                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10257                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
10258                                 INSERT_NEW_NODE(
10259                                     as_unique_search_nodes,
10260                                     num_unique_nodes,
10261                                     s_search_node_temp,
10262                                     0,
10263                                     au4_unique_node_map,
10264                                     center_x,
10265                                     center_y,
10266                                     1);
10267 
10268                                 /* PT_T */
10269                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
10270                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10271                                 INSERT_NEW_NODE(
10272                                     as_unique_search_nodes,
10273                                     num_unique_nodes,
10274                                     s_search_node_temp,
10275                                     0,
10276                                     au4_unique_node_map,
10277                                     center_x,
10278                                     center_y,
10279                                     1);
10280 
10281                                 /* PT_R */
10282                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10283                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
10284                                 INSERT_NEW_NODE(
10285                                     as_unique_search_nodes,
10286                                     num_unique_nodes,
10287                                     s_search_node_temp,
10288                                     0,
10289                                     au4_unique_node_map,
10290                                     center_x,
10291                                     center_y,
10292                                     1);
10293 
10294                                 /* PT_B */
10295                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
10296                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10297                                 INSERT_NEW_NODE(
10298                                     as_unique_search_nodes,
10299                                     num_unique_nodes,
10300                                     s_search_node_temp,
10301                                     0,
10302                                     au4_unique_node_map,
10303                                     center_x,
10304                                     center_y,
10305                                     1);
10306 
10307                                 /* PT_TL */
10308                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10309                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10310                                 INSERT_NEW_NODE(
10311                                     as_unique_search_nodes,
10312                                     num_unique_nodes,
10313                                     s_search_node_temp,
10314                                     0,
10315                                     au4_unique_node_map,
10316                                     center_x,
10317                                     center_y,
10318                                     1);
10319 
10320                                 /* PT_TR */
10321                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10322                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10323                                 INSERT_NEW_NODE(
10324                                     as_unique_search_nodes,
10325                                     num_unique_nodes,
10326                                     s_search_node_temp,
10327                                     0,
10328                                     au4_unique_node_map,
10329                                     center_x,
10330                                     center_y,
10331                                     1);
10332 
10333                                 /* PT_BL */
10334                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10335                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10336                                 INSERT_NEW_NODE(
10337                                     as_unique_search_nodes,
10338                                     num_unique_nodes,
10339                                     s_search_node_temp,
10340                                     0,
10341                                     au4_unique_node_map,
10342                                     center_x,
10343                                     center_y,
10344                                     1);
10345 
10346                                 /* PT_BR */
10347                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10348                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10349                                 INSERT_NEW_NODE(
10350                                     as_unique_search_nodes,
10351                                     num_unique_nodes,
10352                                     s_search_node_temp,
10353                                     0,
10354                                     au4_unique_node_map,
10355                                     center_x,
10356                                     center_y,
10357                                     1);
10358                             }
10359                         }
10360 
10361                         s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10362                         s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10363 
10364                         /*****************************************************************/
10365                         /* Call the search algorithm, this includes:                     */
10366                         /* Pre-Search-Refinement (for coarse candts)                     */
10367                         /* Search on each candidate                                      */
10368                         /* Post Search Refinement on winners/other new candidates        */
10369                         /*****************************************************************/
10370 
10371                         hme_pred_search_no_encode(
10372                             &s_search_prms_blk,
10373                             ps_curr_layer,
10374                             &ps_ctxt->s_wt_pred,
10375                             ai4_valid_part_ids,
10376                             0,
10377                             e_me_quality_presets,
10378                             i1_grid_enable,
10379                             (ihevce_me_optimised_function_list_t *)
10380                                 ps_ctxt->pv_me_optimised_function_list);
10381 
10382                         i1_grid_enable = 0;
10383                     }
10384                 }
10385 
10386                 /* for non encode layer update MV and end processing for block */
10387                 {
10388                     WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10389                     search_node_t *ps_search_node;
10390                     /* now update the reqd results back to the layer mv bank. */
10391                     if(1 == ps_refine_prms->i4_layer_id)
10392                     {
10393                         hme_update_mv_bank_in_l1_me(
10394                             ps_search_results,
10395                             ps_curr_layer->ps_layer_mvbank,
10396                             blk_x,
10397                             blk_y,
10398                             &s_mv_update_prms);
10399                     }
10400                     else
10401                     {
10402                         hme_update_mv_bank_noencode(
10403                             ps_search_results,
10404                             ps_curr_layer->ps_layer_mvbank,
10405                             blk_x,
10406                             blk_y,
10407                             &s_mv_update_prms);
10408                     }
10409 
10410                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10411                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10412                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10413                     {
10414                         WORD32 i4_j;
10415                         layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10416 
10417                         //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10418                         /* Not considering this for Dyn. Search Update */
10419                         {
10420                             for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10421                                 i4_ref_id++)
10422                             {
10423                                 ps_search_node =
10424                                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10425 
10426                                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10427                                 {
10428                                     hme_update_dynamic_search_params(
10429                                         &ps_ctxt->s_coarse_dyn_range_prms
10430                                              .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10431                                                                [i4_ref_id],
10432                                         ps_search_node->s_mv.i2_mvy);
10433 
10434                                     ps_search_node++;
10435                                 }
10436                             }
10437                         }
10438                     }
10439 
10440                     if(1 == ps_refine_prms->i4_layer_id)
10441                     {
10442                         WORD32 wt_pred_val, log_wt_pred_val;
10443                         WORD32 ref_id_of_nearest_poc = 0;
10444                         WORD32 max_val = 0x7fffffff;
10445                         WORD32 max_l0_val = 0x7fffffff;
10446                         WORD32 max_l1_val = 0x7fffffff;
10447                         WORD32 cur_val;
10448                         WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10449 
10450                         WORD32 bestl0_sad = 0x7fffffff;
10451                         WORD32 bestl1_sad = 0x7fffffff;
10452                         search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10453 
10454                         for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10455                             i4_ref_id++)
10456                         {
10457                             wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10458                             log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10459 
10460                             ps_search_node =
10461                                 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10462 
10463                             i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10464                                                      ((1 << log_wt_pred_val) >> 1)) >>
10465                                                     log_wt_pred_val;
10466 
10467                             i4_local_cost_weighted_pred =
10468                                 i4_local_weighted_sad +
10469                                 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10470                             //the loop is redundant as the results are already sorted based on total cost
10471                             //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10472                             {
10473                                 if(i4_local_cost_weighted_pred < min_cost)
10474                                 {
10475                                     min_cost = i4_local_cost_weighted_pred;
10476                                     min_sad = i4_local_weighted_sad;
10477                                 }
10478                             }
10479 
10480                             /* For P frame, calculate the nearest poc which is either P or I frame*/
10481                             if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10482                             {
10483                                 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10484                                 {
10485                                     cur_val =
10486                                         ABS(ps_ctxt->i4_curr_poc -
10487                                             ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10488                                     if(cur_val < max_val)
10489                                     {
10490                                         max_val = cur_val;
10491                                         ref_id_of_nearest_poc = i4_ref_id;
10492                                     }
10493                                 }
10494                             }
10495                         }
10496                         /*Store me cost wrt. to past frame only for P frame  */
10497                         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10498                         {
10499                             if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10500                             {
10501                                 WORD16 i2_mvx, i2_mvy;
10502 
10503                                 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10504                                 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10505                                 WORD32 z_scan_idx =
10506                                     gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10507                                 WORD32 wt, log_wt;
10508 
10509                                 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10510                                 <= (1 + ps_ctxt->num_b_frms));*/
10511 
10512                                 /*obtain mvx and mvy */
10513                                 i2_mvx =
10514                                     ps_search_results
10515                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10516                                         ->s_mv.i2_mvx;
10517                                 i2_mvy =
10518                                     ps_search_results
10519                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10520                                         ->s_mv.i2_mvy;
10521 
10522                                 /*register the min cost for l1 me in blk context */
10523                                 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10524                                 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10525 
10526                                 /*register the min cost for l1 me in blk context */
10527                                 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10528                                     ((ps_search_results
10529                                           ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10530                                           ->i4_sad *
10531                                       wt) +
10532                                      ((1 << log_wt) >> 1)) >>
10533                                     log_wt;
10534                                 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10535                                     ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10536                                     (ps_search_results
10537                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10538                                          ->i4_tot_cost -
10539                                      ps_search_results
10540                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10541                                          ->i4_sad);
10542                                 /*for complexity change detection*/
10543                                 ps_ctxt->i4_num_blks++;
10544                                 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10545                                    (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10546                                 {
10547                                     ps_ctxt->i4_num_blks_high_sad++;
10548                                 }
10549                             }
10550                         }
10551                     }
10552 
10553                     /* EIID: Early inter intra decisions */
10554                     /* tap L1 level SAD for inter intra decisions */
10555                     if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10556                        (!ps_ctxt->s_frm_prms
10557                              .is_i_pic))  //for high-quality preset->disable early decisions
10558                     {
10559                         if(1 == ps_refine_prms->i4_layer_id)
10560                         {
10561                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
10562                             ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10563                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10564                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10565                             WORD32 z_scan_idx =
10566                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10567                             ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10568 
10569                             /*register the min cost for l1 me in blk context */
10570                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10571                                 i4_min_sad_cost_8x8_block;
10572                             i4_num_comparisions++;
10573 
10574                             /* take early inter-intra decision here */
10575                             ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10576 #if DISABLE_INTRA_IN_BPICS
10577                             if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10578                                (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10579                             {
10580                                 ps_curr_ed_blk_ctxt->intra_or_inter =
10581                                     2; /*eval only inter if inter cost is less */
10582                                 i4_num_inter_wins++;
10583                             }
10584                             else
10585 #endif
10586                             {
10587                                 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10588                                    ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10589                                      i4_threshold_multiplier) /
10590                                     i4_threshold_divider))
10591                                 {
10592                                     ps_curr_ed_blk_ctxt->intra_or_inter =
10593                                         2; /*eval only inter if inter cost is less */
10594                                     i4_num_inter_wins++;
10595                                 }
10596                             }
10597 
10598                             //{
10599                             //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10600                             //      blk_x,blk_y,
10601                             //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
10602                             //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10603                             //      i4_min_sad_cost_8x8_block
10604                             //      );
10605                             //}
10606 
10607                         }  //end of layer-1
10608                     }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10609                     else
10610                     {
10611                         if(1 == ps_refine_prms->i4_layer_id)
10612                         {
10613                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
10614                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10615                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10616                             WORD32 z_scan_idx =
10617                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10618 
10619                             /*register the min cost for l1 me in blk context */
10620                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10621                                 i4_min_sad_cost_8x8_block;
10622                         }
10623                     }
10624                     if(1 == ps_refine_prms->i4_layer_id)
10625                     {
10626                         WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10627                         WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10628                         WORD32 z_scan_idx =
10629                             gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10630 
10631                         ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10632                             min_sad;
10633 
10634                         if(min_cost <
10635                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10636                         {
10637                             ps_ctxt->i4_L1_hme_best_cost += min_cost;
10638                             ps_ctxt->i4_L1_hme_sad += min_sad;
10639                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10640                         }
10641                         else
10642                         {
10643                             ps_ctxt->i4_L1_hme_best_cost +=
10644                                 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10645                             ps_ctxt->i4_L1_hme_sad +=
10646                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10647                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10648                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10649                         }
10650                     }
10651                 }
10652             }
10653 
10654             /* Update the number of blocks processed in the current row */
10655             if((ME_MEDIUM_SPEED > e_me_quality_presets))
10656             {
10657                 ihevce_dmgr_set_row_row_sync(
10658                     pv_hme_dep_mngr,
10659                     (i4_ctb_x + 1),
10660                     blk_y,
10661                     0 /* Col Tile No. : Not supported in PreEnc*/);
10662             }
10663         }
10664 
10665         /* set the output dependency after completion of row */
10666         ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10667     }
10668 }
10669