1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_refine.c
23 *
24 * @brief
25 * Contains the implementation of the refinement layer searches and related
26 * functionality like CU merge.
27 *
28 * @author
29 * Ittiam
30 *
31 *
32 * List of Functions
33 *
34 *
35 ******************************************************************************
36 */
37
38 /*****************************************************************************/
39 /* File Includes */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_fullpel.h"
103 #include "hme_subpel.h"
104 #include "hme_refine.h"
105 #include "hme_err_compute.h"
106 #include "hme_common_utils.h"
107 #include "hme_search_algo.h"
108 #include "ihevce_stasino_helpers.h"
109 #include "ihevce_common_utils.h"
110
111 /*****************************************************************************/
112 /* Globals */
113 /*****************************************************************************/
114
115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117 { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118 };
119
120 /*****************************************************************************/
121 /* Extern Fucntion declaration */
122 /*****************************************************************************/
123 extern ctb_boundary_attrs_t *
124 get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125
126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127 search_node_t *ps_search_node,
128 layer_ctxt_t *ps_curr_layer,
129 layer_ctxt_t *ps_coarse_layer,
130 S32 i4_pos_x,
131 S32 i4_pos_y,
132 S08 i1_ref_id,
133 S32 i4_result_id);
134
135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136 search_node_t *ps_search_node,
137 layer_ctxt_t *ps_curr_layer,
138 layer_ctxt_t *ps_coarse_layer,
139 S32 i4_pos_x,
140 S32 i4_pos_y,
141 S32 i4_num_act_ref_l0,
142 U08 u1_pred_dir,
143 U08 u1_default_ref_id,
144 S32 i4_result_id);
145
146 /*****************************************************************************/
147 /* Function Definitions */
148 /*****************************************************************************/
149
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150 void ihevce_no_wt_copy(
151 coarse_me_ctxt_t *ps_ctxt,
152 layer_ctxt_t *ps_curr_layer,
153 pu_t *ps_pu,
154 UWORD8 *pu1_temp_pred,
155 WORD32 temp_stride,
156 WORD32 blk_x,
157 WORD32 blk_y)
158 {
159 UWORD8 *pu1_ref;
160 WORD32 ref_stride, ref_offset;
161 WORD32 row, col, i4_tmp;
162
163 ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164
165 if(ps_pu->b2_pred_mode == PRED_L0)
166 {
167 WORD8 i1_ref_idx;
168
169 i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171
172 ref_stride = ps_curr_layer->i4_inp_stride;
173
174 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175 ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176
177 pu1_ref += ref_offset;
178
179 for(row = 0; row < temp_stride; row++)
180 {
181 for(col = 0; col < temp_stride; col++)
182 {
183 i4_tmp = pu1_ref[col];
184 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185 }
186
187 pu1_ref += ref_stride;
188 pu1_temp_pred += temp_stride;
189 }
190 }
191 else
192 {
193 WORD8 i1_ref_idx;
194
195 i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197
198 ref_stride = ps_curr_layer->i4_inp_stride;
199
200 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201 ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202
203 pu1_ref += ref_offset;
204
205 for(row = 0; row < temp_stride; row++)
206 {
207 for(col = 0; col < temp_stride; col++)
208 {
209 i4_tmp = pu1_ref[col];
210 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211 }
212
213 pu1_ref += ref_stride;
214 pu1_temp_pred += temp_stride;
215 }
216 }
217 }
218
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
220 cluster_data_t *ps_cluster_base,
221 search_node_t *ps_merge_cand,
222 range_prms_t **pps_range_prms,
223 U08 *pu1_refid_to_pred_dir_list,
224 WORD32 i4_num_clusters,
225 U08 u1_pred_dir)
226 {
227 WORD32 i, j, k;
228 WORD32 i4_num_cands_added = 0;
229 WORD32 i4_num_mvs_in_cluster;
230
231 for(i = 0; i < i4_num_clusters; i++)
232 {
233 cluster_data_t *ps_data = &ps_cluster_base[i];
234
235 if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236 {
237 i4_num_mvs_in_cluster = ps_data->num_mvs;
238
239 for(j = 0; j < i4_num_mvs_in_cluster; j++)
240 {
241 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244
245 CLIP_MV_WITHIN_RANGE(
246 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248 pps_range_prms[ps_data->ref_id],
249 0,
250 0,
251 0);
252
253 for(k = 0; k < i4_num_cands_added; k++)
254 {
255 if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256 (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257 (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258 {
259 break;
260 }
261 }
262
263 if(k == i4_num_cands_added)
264 {
265 i4_num_cands_added++;
266 }
267 }
268 }
269 }
270
271 return i4_num_cands_added;
272 }
273
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274 static WORD32 hme_add_me_best_as_merge_cands(
275 search_results_t **pps_child_data_array,
276 inter_cu_results_t *ps_8x8cu_results,
277 search_node_t *ps_merge_cand,
278 range_prms_t **pps_range_prms,
279 U08 *pu1_refid_to_pred_dir_list,
280 S08 *pi1_past_list,
281 S08 *pi1_future_list,
282 BLK_SIZE_T e_blk_size,
283 ME_QUALITY_PRESETS_T e_quality_preset,
284 S32 i4_num_cands_added,
285 U08 u1_pred_dir)
286 {
287 WORD32 i, j, k;
288 WORD32 i4_max_cands_to_add;
289
290 WORD32 i4_result_id = 0;
291
292 ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293 ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294 ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295 ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296
297 switch(e_quality_preset)
298 {
299 case ME_PRISTINE_QUALITY:
300 {
301 i4_max_cands_to_add = MAX_MERGE_CANDTS;
302
303 break;
304 }
305 case ME_HIGH_QUALITY:
306 {
307 /* All 4 children are split and each grandchild contributes an MV */
308 /* and 2 best results per grandchild */
309 i4_max_cands_to_add = 4 * 4 * 2;
310
311 break;
312 }
313 case ME_MEDIUM_SPEED:
314 {
315 i4_max_cands_to_add = 4 * 2 * 2;
316
317 break;
318 }
319 case ME_HIGH_SPEED:
320 case ME_XTREME_SPEED:
321 case ME_XTREME_SPEED_25:
322 {
323 i4_max_cands_to_add = 4 * 2 * 1;
324
325 break;
326 }
327 }
328
329 while(i4_result_id < 4)
330 {
331 for(i = 0; i < 4; i++)
332 {
333 inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334 inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335
336 if(!pps_child_data_array[i]->u1_split_flag)
337 {
338 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339
340 if(ps_child_data->u1_num_best_results <= i4_result_id)
341 {
342 continue;
343 }
344
345 if(ps_data->as_pu_results->pu.b1_intra_flag)
346 {
347 continue;
348 }
349
350 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351 {
352 mv_t *ps_mv;
353
354 S08 i1_ref_idx;
355
356 pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357
358 if(u1_pred_dir !=
359 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360 {
361 continue;
362 }
363
364 if(u1_pred_dir)
365 {
366 ps_mv = &ps_pu->mv.s_l1_mv;
367 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368 }
369 else
370 {
371 ps_mv = &ps_pu->mv.s_l0_mv;
372 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373 }
374
375 if(-1 == i1_ref_idx)
376 {
377 continue;
378 }
379
380 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383
384 CLIP_MV_WITHIN_RANGE(
385 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387 pps_range_prms[i1_ref_idx],
388 0,
389 0,
390 0);
391
392 for(k = 0; k < i4_num_cands_added; k++)
393 {
394 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397 {
398 break;
399 }
400 }
401
402 if(k == i4_num_cands_added)
403 {
404 i4_num_cands_added++;
405
406 if(i4_max_cands_to_add <= i4_num_cands_added)
407 {
408 return i4_num_cands_added;
409 }
410 }
411 }
412 }
413 else
414 {
415 for(j = 0; j < 4; j++)
416 {
417 mv_t *ps_mv;
418
419 S08 i1_ref_idx;
420
421 part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422 pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423
424 ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425
426 if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427 {
428 continue;
429 }
430
431 if(ps_data->as_pu_results->pu.b1_intra_flag)
432 {
433 continue;
434 }
435
436 if(u1_pred_dir !=
437 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438 {
439 continue;
440 }
441
442 if(u1_pred_dir)
443 {
444 ps_mv = &ps_pu->mv.s_l1_mv;
445 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446 }
447 else
448 {
449 ps_mv = &ps_pu->mv.s_l0_mv;
450 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451 }
452
453 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456
457 CLIP_MV_WITHIN_RANGE(
458 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460 pps_range_prms[i1_ref_idx],
461 0,
462 0,
463 0);
464
465 for(k = 0; k < i4_num_cands_added; k++)
466 {
467 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470 {
471 break;
472 }
473 }
474
475 if(k == i4_num_cands_added)
476 {
477 i4_num_cands_added++;
478
479 if(i4_max_cands_to_add <= i4_num_cands_added)
480 {
481 return i4_num_cands_added;
482 }
483 }
484 }
485 }
486 }
487
488 i4_result_id++;
489 }
490
491 return i4_num_cands_added;
492 }
493
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494 WORD32 hme_add_cands_for_merge_eval(
495 ctb_cluster_info_t *ps_cluster_info,
496 search_results_t **pps_child_data_array,
497 inter_cu_results_t *ps_8x8cu_results,
498 range_prms_t **pps_range_prms,
499 search_node_t *ps_merge_cand,
500 U08 *pu1_refid_to_pred_dir_list,
501 S08 *pi1_past_list,
502 S08 *pi1_future_list,
503 ME_QUALITY_PRESETS_T e_quality_preset,
504 BLK_SIZE_T e_blk_size,
505 U08 u1_pred_dir,
506 U08 u1_blk_id)
507 {
508 WORD32 i4_num_cands_added = 0;
509
510 if(ME_PRISTINE_QUALITY == e_quality_preset)
511 {
512 cluster_data_t *ps_cluster_primo;
513
514 WORD32 i4_num_clusters;
515
516 if(BLK_32x32 == e_blk_size)
517 {
518 ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519 i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520 }
521 else
522 {
523 ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524 i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525 }
526
527 i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528 ps_cluster_primo,
529 ps_merge_cand,
530 pps_range_prms,
531 pu1_refid_to_pred_dir_list,
532 i4_num_clusters,
533 u1_pred_dir);
534 }
535
536 i4_num_cands_added = hme_add_me_best_as_merge_cands(
537 pps_child_data_array,
538 ps_8x8cu_results,
539 ps_merge_cand,
540 pps_range_prms,
541 pu1_refid_to_pred_dir_list,
542 pi1_past_list,
543 pi1_future_list,
544 e_blk_size,
545 e_quality_preset,
546 i4_num_cands_added,
547 u1_pred_dir);
548
549 return i4_num_cands_added;
550 }
551
552 /**
553 ********************************************************************************
554 * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555 * S08 i1_ref_idx,
556 * S32 i4_best_part_type,
557 * S32 i4_is_vert)
558 *
559 * @brief Given a target partition orientation in the merged CU, and the
560 * partition type of most likely partition this fxn picks up
561 * candidates from the 4 constituent CUs and does refinement search
562 * to identify best results for the merge CU across active partitions
563 *
564 * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565 * these params, the search result structure is also derived and
566 * updated during the search
567 *
568 * @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569 * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570 *
571 * @param[in] i4_best_part_type : partition type of potential partition in the
572 * merged CU, -1 if the merge process has not yet been able to
573 * determine this.
574 *
575 * @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576 * orientation or horizontal orientation.
577 *
578 * @return Number of merge candidates
579 ********************************************************************************
580 */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581 WORD32 hme_pick_eval_merge_candts(
582 hme_merge_prms_t *ps_merge_prms,
583 hme_subpel_prms_t *ps_subpel_prms,
584 S32 i4_search_idx,
585 S32 i4_best_part_type,
586 S32 i4_is_vert,
587 wgt_pred_ctxt_t *ps_wt_inp_prms,
588 S32 i4_frm_qstep,
589 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591 {
592 S32 x_off, y_off;
593 search_node_t *ps_search_node;
594 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595 S32 i4_num_valid_parts;
596 pred_ctxt_t *ps_pred_ctxt;
597
598 search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599 S32 num_unique_nodes_cu_merge = 0;
600
601 search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602 CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603 S32 i4_part_mask = ps_search_results->i4_part_mask;
604
605 search_results_t *aps_child_results[4];
606 layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607
608 S32 i4_ref_stride, i, j;
609 result_upd_prms_t s_result_prms;
610
611 BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612 S32 i4_offset;
613
614 /*************************************************************************/
615 /* Function pointer for SAD/SATD, array and prms structure to pass to */
616 /* This function */
617 /*************************************************************************/
618 PF_SAD_FXN_T pf_err_compute;
619 S32 ai4_sad_grid[9][17];
620 err_prms_t s_err_prms;
621
622 /*************************************************************************/
623 /* Allowed MV RANGE */
624 /*************************************************************************/
625 range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626 PF_INTERP_FXN_T pf_qpel_interp;
627 PF_MV_COST_FXN pf_mv_cost_compute;
628 WORD32 pred_lx;
629 U08 *apu1_hpel_ref[4];
630
631 interp_prms_t s_interp_prms;
632 S32 i4_interp_buf_id;
633
634 S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635 S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636
637 /* Sanity checks */
638 ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639
640 s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641
642 /* Initialize all the ptrs to child CUs for merge decision */
643 aps_child_results[0] = ps_merge_prms->ps_results_tl;
644 aps_child_results[1] = ps_merge_prms->ps_results_tr;
645 aps_child_results[2] = ps_merge_prms->ps_results_bl;
646 aps_child_results[3] = ps_merge_prms->ps_results_br;
647
648 num_unique_nodes_cu_merge = 0;
649
650 pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651
652 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653 {
654 num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655 ps_merge_prms->ps_cluster_info,
656 aps_child_results,
657 ps_merge_prms->ps_8x8_cu_results,
658 pps_range_prms,
659 as_merge_unique_node,
660 ps_search_results->pu1_is_past,
661 ps_merge_prms->pi1_past_list,
662 ps_merge_prms->pi1_future_list,
663 ps_merge_prms->e_quality_preset,
664 e_blk_size,
665 i4_search_idx,
666 (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668 }
669 else
670 {
671 /*************************************************************************/
672 /* Populate the list of unique search nodes in the child CUs for merge */
673 /* evaluation */
674 /*************************************************************************/
675 for(i = 0; i < 4; i++)
676 {
677 search_node_t s_search_node;
678
679 PART_TYPE_T e_part_type;
680 PART_ID_T e_part_id;
681
682 WORD32 part_num;
683
684 search_results_t *ps_child = aps_child_results[i];
685
686 if(ps_child->ps_cu_results->u1_num_best_results)
687 {
688 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689 (1 == ps_child->ps_cu_results->u1_num_best_results)))
690 {
691 e_part_type =
692 (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693
694 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695
696 /* Insert mvs of NxN partitions. */
697 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698 part_num++)
699 {
700 e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701
702 if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703 {
704 s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705 if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706 {
707 CLIP_MV_WITHIN_RANGE(
708 s_search_node.s_mv.i2_mvx,
709 s_search_node.s_mv.i2_mvy,
710 pps_range_prms[s_search_node.i1_ref_idx],
711 0,
712 0,
713 0);
714
715 INSERT_NEW_NODE_NOMAP(
716 as_merge_unique_node,
717 num_unique_nodes_cu_merge,
718 s_search_node,
719 1);
720 }
721 }
722 }
723 }
724 }
725 else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726 .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727 (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728 .ps_cu_results->u1_num_best_results)))
729 {
730 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731
732 for(j = 0; j < 4; j++)
733 {
734 e_part_type = (PART_TYPE_T)ps_results_root[j]
735 .ps_cu_results->ps_best_results[0]
736 .u1_part_type;
737
738 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739
740 /* Insert mvs of NxN partitions. */
741 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742 part_num++)
743 {
744 e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745
746 if((ps_results_root[j]
747 .aps_part_results[i4_search_idx][e_part_id]
748 ->i1_ref_idx != -1) &&
749 (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750 .b1_intra_flag))
751 {
752 s_search_node =
753 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754 if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755 {
756 CLIP_MV_WITHIN_RANGE(
757 s_search_node.s_mv.i2_mvx,
758 s_search_node.s_mv.i2_mvy,
759 pps_range_prms[s_search_node.i1_ref_idx],
760 0,
761 0,
762 0);
763
764 INSERT_NEW_NODE_NOMAP(
765 as_merge_unique_node,
766 num_unique_nodes_cu_merge,
767 s_search_node,
768 1);
769 }
770 }
771 }
772 }
773 }
774 }
775 }
776
777 if(0 == num_unique_nodes_cu_merge)
778 {
779 return 0;
780 }
781
782 /*************************************************************************/
783 /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784 /* fixed through this subpel refinement for this partition. */
785 /* Note, we do not enable grid sads since one pt is evaluated per node */
786 /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
787 /*************************************************************************/
788 i4_part_mask = ps_search_results->i4_part_mask;
789
790 /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791 if(ps_subpel_prms->i4_use_satd)
792 {
793 if(BLK_32x32 == e_blk_size)
794 {
795 pf_err_compute = hme_evalsatd_pt_pu_32x32;
796 }
797 else
798 {
799 pf_err_compute = hme_evalsatd_pt_pu_64x64;
800 }
801 }
802 else
803 {
804 pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805 }
806
807 i4_ref_stride = ps_curr_layer->i4_rec_stride;
808
809 x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810 y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811 i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812
813 /*************************************************************************/
814 /* This array stores the ids of the partitions whose */
815 /* SADs are updated. Since the partitions whose SADs are updated may not */
816 /* be in contiguous order, we supply another level of indirection. */
817 /*************************************************************************/
818 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819
820 /* Initialize result params used for partition update */
821 s_result_prms.pf_mv_cost_compute = NULL;
822 s_result_prms.ps_search_results = ps_search_results;
823 s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824 s_result_prms.i1_ref_idx = i4_search_idx;
825 s_result_prms.i4_part_mask = i4_part_mask;
826 s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827 s_result_prms.i4_grid_mask = 1;
828
829 /* One time Initialization of error params used for SAD/SATD compute */
830 s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831 s_err_prms.i4_ref_stride = i4_ref_stride;
832 s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833 s_err_prms.i4_grid_mask = 1;
834 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835 s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836 s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837 s_err_prms.i4_step = 1;
838
839 /*************************************************************************/
840 /* One time preparation of non changing interpolation params. */
841 /*************************************************************************/
842 s_interp_prms.i4_ref_stride = i4_ref_stride;
843 s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844 s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845 s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846 s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847 i4_interp_buf_id = 0;
848
849 pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850
851 /***************************************************************************/
852 /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853 /* results */
854 /***************************************************************************/
855 for(i = 0; i < num_unique_nodes_cu_merge; i++)
856 {
857 WORD8 i1_ref_idx;
858 ps_search_node = &as_merge_unique_node[i];
859
860 /*********************************************************************/
861 /* Compute the base pointer for input, interpolated buffers */
862 /* The base pointers point as follows: */
863 /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864 /* To these, we need to add the offset of the current node */
865 /*********************************************************************/
866 i1_ref_idx = ps_search_node->i1_ref_idx;
867 apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868 apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869 apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870 apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871
872 s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873
874 pf_qpel_interp(
875 &s_interp_prms,
876 ps_search_node->s_mv.i2_mvx,
877 ps_search_node->s_mv.i2_mvy,
878 i4_interp_buf_id);
879
880 pred_lx = i4_search_idx;
881 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882
883 s_result_prms.u1_pred_lx = pred_lx;
884 s_result_prms.ps_search_node_base = ps_search_node;
885 s_err_prms.pu1_inp =
886 ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887 s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888 s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889
890 /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891 Here the tu recursion logic is restricted with the size of the PU*/
892 pf_err_compute(&s_err_prms);
893
894 if(ps_subpel_prms->u1_is_cu_noisy &&
895 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896 {
897 ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898 s_err_prms.pu1_ref,
899 s_err_prms.i4_ref_stride,
900 ai4_valid_part_ids,
901 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903 s_err_prms.pi4_sad_grid,
904 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907 i4_num_valid_parts,
908 ps_wt_inp_prms->wpred_log_wdc,
909 (BLK_32x32 == e_blk_size) ? 32 : 64);
910 }
911
912 /* Update the mv's */
913 s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914 s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915
916 /* Update best results */
917 hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918 }
919
920 /************************************************************************/
921 /* Update mv cost and total cost for each valid partition in the CU */
922 /************************************************************************/
923 for(i = 0; i < TOT_NUM_PARTS; i++)
924 {
925 if(i4_part_mask & (1 << i))
926 {
927 WORD32 j;
928 WORD32 i4_mv_cost;
929
930 ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931
932 for(j = 0;
933 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934 j++)
935 {
936 if(ps_search_node->i1_ref_idx != -1)
937 {
938 pred_lx = i4_search_idx;
939 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940
941 /* Prediction context should now deal with qpel units */
942 HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943
944 ps_search_node->u1_subpel_done = 1;
945 ps_search_node->u1_is_avail = 1;
946
947 i4_mv_cost =
948 pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949
950 ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951 ps_search_node->i4_mv_cost = i4_mv_cost;
952
953 ps_search_node++;
954 }
955 }
956 }
957 }
958
959 return num_unique_nodes_cu_merge;
960 }
961
962 #define CU_MERGE_MAX_INTRA_PARTS 4
963
964 /**
965 ********************************************************************************
966 * @fn hme_try_merge_high_speed
967 *
968 * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969 entity or with partititons for high speed preset
970 *
971 * @param[in,out] hme_merge_prms_t: Params for CU merge
972 *
973 * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974 ********************************************************************************
975 */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
977 me_ctxt_t *ps_thrd_ctxt,
978 me_frm_ctxt_t *ps_ctxt,
979 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980 hme_subpel_prms_t *ps_subpel_prms,
981 hme_merge_prms_t *ps_merge_prms,
982 inter_pu_results_t *ps_pu_results,
983 pu_result_t *ps_pu_result)
984 {
985 search_results_t *ps_results_tl, *ps_results_tr;
986 search_results_t *ps_results_bl, *ps_results_br;
987
988 S32 i;
989 S32 i4_search_idx;
990 S32 i4_cost_parent;
991 S32 intra_cu_size;
992 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993
994 search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995 wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996
997 S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998 S32 is_vert = 0, i4_best_part_type = -1;
999 S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000 S32 i4_cost_children = 0;
1001 S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002 S32 i4_num_merge_cands_evaluated = 0;
1003 U08 u1_x_off = ps_results_merge->u1_x_off;
1004 U08 u1_y_off = ps_results_merge->u1_y_off;
1005 S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006
1007 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011 ps_results_tl = ps_merge_prms->ps_results_tl;
1012 ps_results_tr = ps_merge_prms->ps_results_tr;
1013 ps_results_bl = ps_merge_prms->ps_results_bl;
1014 ps_results_br = ps_merge_prms->ps_results_br;
1015
1016 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017 {
1018 i4_part_mask &= ~ENABLE_AMP;
1019 }
1020
1021 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022 {
1023 i4_part_mask &= ~ENABLE_AMP;
1024
1025 i4_part_mask &= ~ENABLE_SMP;
1026 }
1027
1028 ps_merge_prms->i4_num_pred_dir_actual = 0;
1029
1030 /*************************************************************************/
1031 /* The logic for High speed CU merge goes as follows: */
1032 /* */
1033 /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034 /* exceed 7 */
1035 /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036 /* are identical */
1037 /* 3. Find the all unique mvs of best partitions of children CUs and */
1038 /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */
1039 /* best parent cost is lower than sum of the best children costs */
1040 /* return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041 /* */
1042 /*************************************************************************/
1043
1044 /* Count the number of best partitions in child CUs, early exit if > 7 */
1045 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046 (CU_32x32 == ps_results_merge->e_cu_size))
1047 {
1048 S32 num_parts_in_32x32 = 0;
1049 WORD32 i4_part_type;
1050
1051 if(ps_results_tl->u1_split_flag)
1052 {
1053 num_parts_in_32x32 += 4;
1054
1055 #define COST_INTERCHANGE 0
1056 i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057 ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058 ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059 ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060 }
1061 else
1062 {
1063 i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065 i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066 }
1067
1068 if(ps_results_tr->u1_split_flag)
1069 {
1070 num_parts_in_32x32 += 4;
1071
1072 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076 }
1077 else
1078 {
1079 i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081 i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082 }
1083
1084 if(ps_results_bl->u1_split_flag)
1085 {
1086 num_parts_in_32x32 += 4;
1087
1088 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092 }
1093 else
1094 {
1095 i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097 i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098 }
1099
1100 if(ps_results_br->u1_split_flag)
1101 {
1102 num_parts_in_32x32 += 4;
1103
1104 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108 }
1109 else
1110 {
1111 i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113 i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114 }
1115
1116 if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117 {
1118 return CU_SPLIT;
1119 }
1120
1121 if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122 (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123 {
1124 return CU_SPLIT;
1125 }
1126 }
1127
1128 /* Accumulate intra percentage before merge for early CU_SPLIT decision */
1129 /* Note : Each intra part represent a NxN unit of the children CUs */
1130 /* This is essentially 1/16th of the CUsize under consideration for merge */
1131 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132 {
1133 if(CU_64x64 == ps_results_merge->e_cu_size)
1134 {
1135 i4_intra_parts =
1136 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137 ? 16
1138 : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139 }
1140 else
1141 {
1142 switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143 {
1144 case 0:
1145 {
1146 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147 ->u1_inter_eval_enable)
1148 ? 16
1149 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150 ->ps_child_node_tl->u1_intra_eval_enable);
1151
1152 break;
1153 }
1154 case 1:
1155 {
1156 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157 ->u1_inter_eval_enable)
1158 ? 16
1159 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160 ->ps_child_node_tr->u1_intra_eval_enable);
1161
1162 break;
1163 }
1164 case 2:
1165 {
1166 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167 ->u1_inter_eval_enable)
1168 ? 16
1169 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170 ->ps_child_node_bl->u1_intra_eval_enable);
1171
1172 break;
1173 }
1174 case 3:
1175 {
1176 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177 ->u1_inter_eval_enable)
1178 ? 16
1179 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180 ->ps_child_node_br->u1_intra_eval_enable);
1181
1182 break;
1183 }
1184 }
1185 }
1186 }
1187 else
1188 {
1189 for(i = 0; i < 4; i++)
1190 {
1191 search_results_t *ps_results =
1192 (i == 0) ? ps_results_tl
1193 : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194
1195 part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196
1197 if(ps_results->u1_split_flag)
1198 {
1199 U08 u1_x_off = ps_results->u1_x_off;
1200 U08 u1_y_off = ps_results->u1_y_off;
1201 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202 2;
1203
1204 /* Special case to handle 8x8 CUs when 16x16 is split */
1205 ASSERT(ps_results->e_cu_size == CU_16x16);
1206
1207 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208
1209 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210 i4_intra_parts += 1;
1211
1212 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213
1214 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215 i4_intra_parts += 1;
1216
1217 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218
1219 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220 i4_intra_parts += 1;
1221
1222 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223
1224 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225 i4_intra_parts += 1;
1226 }
1227 else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228 {
1229 i4_intra_parts += 4;
1230 }
1231 }
1232 }
1233
1234 /* Determine the max intra CU size indicated by IPE */
1235 intra_cu_size = CU_64x64;
1236 if(ps_cur_ipe_ctb->u1_split_flag)
1237 {
1238 intra_cu_size = CU_32x32;
1239 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240 {
1241 intra_cu_size = CU_16x16;
1242 }
1243 }
1244
1245 if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246 (intra_cu_size < ps_results_merge->e_cu_size) &&
1247 (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248 (i4_intra_parts == 16))
1249 {
1250 S32 i4_merge_outcome;
1251
1252 i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253 ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255 : (!ps_cur_ipe_ctb->u1_split_flag);
1256
1257 i4_merge_outcome = i4_merge_outcome ||
1258 (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259
1260 i4_merge_outcome = i4_merge_outcome &&
1261 !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262
1263 if(i4_merge_outcome)
1264 {
1265 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266 part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267 pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268
1269 ps_cu_results->u1_num_best_results = 1;
1270 ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271 ps_cu_results->u1_x_off = u1_x_off;
1272 ps_cu_results->u1_y_off = u1_y_off;
1273
1274 ps_best_result->u1_part_type = PRT_2Nx2N;
1275 ps_best_result->ai4_tu_split_flag[0] = 0;
1276 ps_best_result->ai4_tu_split_flag[1] = 0;
1277 ps_best_result->ai4_tu_split_flag[2] = 0;
1278 ps_best_result->ai4_tu_split_flag[3] = 0;
1279 ps_best_result->i4_tot_cost =
1280 (CU_64x64 == ps_results_merge->e_cu_size)
1281 ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282 : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283
1284 ps_pu->b1_intra_flag = 1;
1285 ps_pu->b4_pos_x = u1_x_off >> 2;
1286 ps_pu->b4_pos_y = u1_y_off >> 2;
1287 ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288 ps_pu->b4_ht = ps_pu->b4_wd;
1289 ps_pu->mv.i1_l0_ref_idx = -1;
1290 ps_pu->mv.i1_l1_ref_idx = -1;
1291 ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292 ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293 ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294 ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295
1296 return CU_MERGED;
1297 }
1298 else
1299 {
1300 return CU_SPLIT;
1301 }
1302 }
1303
1304 if(i4_intra_parts)
1305 {
1306 i4_part_mask = ENABLE_2Nx2N;
1307 }
1308
1309 ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310
1311 hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312
1313 ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314 ps_merge_prms->i4_num_pred_dir_actual = 0;
1315
1316 if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317 {
1318 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319 S32 i4_num_valid_parts;
1320 S32 i4_sigma_array_offset;
1321
1322 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323
1324 /*********************************************************************************************************************************************/
1325 /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */
1326 /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327 /* increment as there will be 256 4x4 blocks in a CTB */
1328 /*********************************************************************************************************************************************/
1329 i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331
1332 for(i = 0; i < i4_num_valid_parts; i++)
1333 {
1334 S32 i4_part_id = ai4_valid_part_ids[i];
1335
1336 hme_compute_final_sigma_of_pu_from_base_blocks(
1337 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339 au8_final_src_sigmaX,
1340 au8_final_src_sigmaXSquared,
1341 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342 4,
1343 i4_part_id,
1344 16);
1345 }
1346
1347 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349 }
1350
1351 /*************************************************************************/
1352 /* Loop through all ref idx and pick the merge candts and refine based */
1353 /* on the active partitions. At this stage num ref will be 1 or 2 */
1354 /*************************************************************************/
1355 for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356 {
1357 S32 i4_cands;
1358 U08 u1_pred_dir = 0;
1359
1360 if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361 {
1362 u1_pred_dir = i4_search_idx;
1363 }
1364 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365 {
1366 u1_pred_dir = 1;
1367 }
1368 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369 {
1370 u1_pred_dir = 0;
1371 }
1372 else
1373 {
1374 ASSERT(0);
1375 }
1376
1377 /* call the function to pick and evaluate the merge candts, given */
1378 /* a ref id and a part mask. */
1379 i4_cands = hme_pick_eval_merge_candts(
1380 ps_merge_prms,
1381 ps_subpel_prms,
1382 u1_pred_dir,
1383 i4_best_part_type,
1384 is_vert,
1385 ps_wt_inp_prms,
1386 i4_frm_qstep,
1387 ps_cmn_utils_optimised_function_list,
1388 ps_me_optimised_function_list);
1389
1390 if(i4_cands)
1391 {
1392 ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393 u1_pred_dir;
1394 ps_merge_prms->i4_num_pred_dir_actual++;
1395 }
1396
1397 i4_num_merge_cands_evaluated += i4_cands;
1398 }
1399
1400 /* Call the decide_part_types function here */
1401 /* Populate the new PU struct with the results post subpel refinement*/
1402 if(i4_num_merge_cands_evaluated)
1403 {
1404 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405
1406 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407
1408 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410
1411 hme_populate_pus(
1412 ps_thrd_ctxt,
1413 ps_ctxt,
1414 ps_subpel_prms,
1415 ps_results_merge,
1416 ps_cu_results,
1417 ps_pu_results,
1418 ps_pu_result,
1419 ps_merge_prms->ps_inter_ctb_prms,
1420 &ps_ctxt->s_wt_pred,
1421 ps_merge_prms->ps_layer_ctxt,
1422 ps_merge_prms->au1_pred_dir_searched,
1423 ps_merge_prms->i4_num_pred_dir_actual);
1424
1425 ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426
1427 hme_decide_part_types(
1428 ps_cu_results,
1429 ps_pu_results,
1430 ps_merge_prms->ps_inter_ctb_prms,
1431 ps_ctxt,
1432 ps_cmn_utils_optimised_function_list,
1433 ps_me_optimised_function_list
1434
1435 );
1436
1437 /*****************************************************************/
1438 /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */
1439 /*****************************************************************/
1440 #if DISABLE_INTRA_IN_BPICS
1441 if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443 #endif
1444 {
1445 if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446 {
1447 hme_insert_intra_nodes_post_bipred(
1448 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449 }
1450 }
1451 }
1452 else
1453 {
1454 return CU_SPLIT;
1455 }
1456
1457 /* We check the best result of ref idx 0 and compare for parent vs child */
1458 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459 (CU_32x32 == ps_results_merge->e_cu_size))
1460 {
1461 i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462 /*********************************************************************/
1463 /* Add the cost of signaling the CU tree bits. */
1464 /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466 /* So, 4*lambda is extra for children cost. :Lokesh */
1467 /*********************************************************************/
1468 {
1469 pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470
1471 i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472 }
1473
1474 if(i4_cost_parent < i4_cost_children)
1475 {
1476 return CU_MERGED;
1477 }
1478
1479 return CU_SPLIT;
1480 }
1481 else
1482 {
1483 return CU_MERGED;
1484 }
1485 }
1486
1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
1488 { \
1489 (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \
1490 (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \
1491 *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \
1492 }
1493
1494 /**
1495 ********************************************************************************
1496 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497 * layer_mv_t *ps_layer_mv,
1498 * S32 i4_search_blk_x,
1499 * S32 i4_search_blk_y,
1500 * mvbank_update_prms_t *ps_prms)
1501 *
1502 * @brief Updates the mv bank in case there is no further encodign to be done
1503 *
1504 * @param[in] ps_search_results: contains results for the block just searched
1505 *
1506 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1507 *
1508 * @param[in] i4_search_blk_x : col num of blk being searched
1509 *
1510 * @param[in] i4_search_blk_y : row num of blk being searched
1511 *
1512 * @param[in] ps_prms : contains certain parameters which govern how updatedone
1513 *
1514 * @return None
1515 ********************************************************************************
1516 */
1517
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518 void hme_update_mv_bank_noencode(
1519 search_results_t *ps_search_results,
1520 layer_mv_t *ps_layer_mv,
1521 S32 i4_search_blk_x,
1522 S32 i4_search_blk_y,
1523 mvbank_update_prms_t *ps_prms)
1524 {
1525 hme_mv_t *ps_mv;
1526 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528 S32 i4_blk_x, i4_blk_y, i4_offset;
1529 S32 i4_j, i4_ref_id;
1530 search_node_t *ps_search_node;
1531 search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532 search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533 search_node_t *ps_search_node_4x4_4;
1534
1535 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538
1539 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540
1541 /* Identify the correct offset in the mvbank and the reference id buf */
1542 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544
1545 /*************************************************************************/
1546 /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1547 /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1548 /* do a straightforward single update of results. This will have a 1-1 */
1549 /* correspondence. */
1550 /*************************************************************************/
1551 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552 {
1553 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554 {
1555 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557 {
1558 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559 ps_mv++;
1560 pi1_ref_idx++;
1561 ps_search_node++;
1562 }
1563 }
1564 return;
1565 }
1566
1567 /*************************************************************************/
1568 /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569 /* case, we need to have NxN partitions enabled in search. */
1570 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572 /*************************************************************************/
1573 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574 ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576
1577 /*************************************************************************/
1578 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579 /* hence the below check. */
1580 /*************************************************************************/
1581 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582
1583 ps_mv1 = ps_mv;
1584 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587 pi1_ref_idx1 = pi1_ref_idx;
1588 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591
1592 for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593 {
1594 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595
1596 ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597
1598 ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599
1600 ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601
1602 ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603
1604 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605 ps_mv1++;
1606 pi1_ref_idx1++;
1607 ps_search_node_4x4_1++;
1608 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609 ps_mv2++;
1610 pi1_ref_idx2++;
1611 ps_search_node_4x4_2++;
1612 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613 ps_mv3++;
1614 pi1_ref_idx3++;
1615 ps_search_node_4x4_3++;
1616 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617 ps_mv4++;
1618 pi1_ref_idx4++;
1619 ps_search_node_4x4_4++;
1620
1621 if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622 {
1623 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624 ps_mv1++;
1625 pi1_ref_idx1++;
1626 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627 ps_mv2++;
1628 pi1_ref_idx2++;
1629 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630 ps_mv3++;
1631 pi1_ref_idx3++;
1632 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633 ps_mv4++;
1634 pi1_ref_idx4++;
1635 }
1636
1637 for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638 {
1639 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640 ps_mv1++;
1641 pi1_ref_idx1++;
1642 ps_search_node_4x4_1++;
1643 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644 ps_mv2++;
1645 pi1_ref_idx2++;
1646 ps_search_node_4x4_2++;
1647 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648 ps_mv3++;
1649 pi1_ref_idx3++;
1650 ps_search_node_4x4_3++;
1651 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652 ps_mv4++;
1653 pi1_ref_idx4++;
1654 ps_search_node_4x4_4++;
1655 }
1656 }
1657 }
1658
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659 void hme_update_mv_bank_encode(
1660 search_results_t *ps_search_results,
1661 layer_mv_t *ps_layer_mv,
1662 S32 i4_search_blk_x,
1663 S32 i4_search_blk_y,
1664 mvbank_update_prms_t *ps_prms,
1665 U08 *pu1_pred_dir_searched,
1666 S32 i4_num_act_ref_l0)
1667 {
1668 hme_mv_t *ps_mv;
1669 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671 S32 i4_blk_x, i4_blk_y, i4_offset;
1672 S32 j, i, num_parts;
1673 search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674 search_node_t *ps_search_node_bl, *ps_search_node_br;
1675 search_node_t s_zero_mv;
1676 WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677
1678 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681
1682 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683
1684 /* Identify the correct offset in the mvbank and the reference id buf */
1685 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687
1688 ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689 ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690
1691 /*************************************************************************/
1692 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693 /* hence the below check. */
1694 /*************************************************************************/
1695 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696
1697 ps_mv1 = ps_mv;
1698 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701 pi1_ref_idx1 = pi1_ref_idx;
1702 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705
1706 /* Initialize zero mv: default mv used for intra mvs */
1707 s_zero_mv.s_mv.i2_mvx = 0;
1708 s_zero_mv.s_mv.i2_mvy = 0;
1709 s_zero_mv.i1_ref_idx = 0;
1710
1711 if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712 (ps_search_results->i4_part_mask & ENABLE_NxN))
1713 {
1714 i4_part_type = PRT_NxN;
1715 }
1716
1717 for(i = 0; i < ps_prms->i4_num_ref; i++)
1718 {
1719 for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720 {
1721 WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722
1723 num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724
1725 ps_search_node_tl =
1726 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727
1728 if(num_parts == 1)
1729 {
1730 ps_search_node_tr = ps_search_node_tl;
1731 ps_search_node_bl = ps_search_node_tl;
1732 ps_search_node_br = ps_search_node_tl;
1733 }
1734 else if(num_parts == 2)
1735 {
1736 /* For vertically oriented partitions, tl, bl pt to same result */
1737 /* For horizontally oriented partition, tl, tr pt to same result */
1738 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740 /* and right 2 8x8 have 12x16R partition */
1741 if(gau1_is_vert_part[i4_part_type])
1742 {
1743 ps_search_node_tr =
1744 ps_search_results
1745 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746 ps_search_node_bl = ps_search_node_tl;
1747 }
1748 else
1749 {
1750 ps_search_node_tr = ps_search_node_tl;
1751 ps_search_node_bl =
1752 ps_search_results
1753 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754 }
1755 ps_search_node_br =
1756 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757 }
1758 else
1759 {
1760 /* 4 unique results */
1761 ps_search_node_tr =
1762 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763 ps_search_node_bl =
1764 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765 ps_search_node_br =
1766 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767 }
1768
1769 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770 ps_search_node_tl++;
1771 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772 ps_search_node_tr++;
1773 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774 ps_search_node_bl++;
1775 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776 ps_search_node_br++;
1777
1778 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779 ps_mv1++;
1780 pi1_ref_idx1++;
1781 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782 ps_mv2++;
1783 pi1_ref_idx2++;
1784 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785 ps_mv3++;
1786 pi1_ref_idx3++;
1787 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788 ps_mv4++;
1789 pi1_ref_idx4++;
1790
1791 if(ps_prms->i4_num_results_to_store > 1)
1792 {
1793 ps_search_node_tl =
1794 &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795
1796 if(num_parts == 1)
1797 {
1798 ps_search_node_tr = ps_search_node_tl;
1799 ps_search_node_bl = ps_search_node_tl;
1800 ps_search_node_br = ps_search_node_tl;
1801 }
1802 else if(num_parts == 2)
1803 {
1804 /* For vertically oriented partitions, tl, bl pt to same result */
1805 /* For horizontally oriented partition, tl, tr pt to same result */
1806 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808 /* and right 2 8x8 have 12x16R partition */
1809 if(gau1_is_vert_part[i4_part_type])
1810 {
1811 ps_search_node_tr =
1812 &ps_search_results
1813 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814 ps_search_node_bl = ps_search_node_tl;
1815 }
1816 else
1817 {
1818 ps_search_node_tr = ps_search_node_tl;
1819 ps_search_node_bl =
1820 &ps_search_results
1821 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822 }
1823 ps_search_node_br =
1824 &ps_search_results
1825 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826 }
1827 else
1828 {
1829 /* 4 unique results */
1830 ps_search_node_tr =
1831 &ps_search_results
1832 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833 ps_search_node_bl =
1834 &ps_search_results
1835 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836 ps_search_node_br =
1837 &ps_search_results
1838 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839 }
1840
1841 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842 ps_search_node_tl++;
1843 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844 ps_search_node_tr++;
1845 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846 ps_search_node_bl++;
1847 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848 ps_search_node_br++;
1849
1850 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851 ps_mv1++;
1852 pi1_ref_idx1++;
1853 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854 ps_mv2++;
1855 pi1_ref_idx2++;
1856 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857 ps_mv3++;
1858 pi1_ref_idx3++;
1859 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860 ps_mv4++;
1861 pi1_ref_idx4++;
1862 }
1863 }
1864 }
1865 }
1866
1867 /**
1868 ********************************************************************************
1869 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870 * layer_mv_t *ps_layer_mv,
1871 * S32 i4_search_blk_x,
1872 * S32 i4_search_blk_y,
1873 * mvbank_update_prms_t *ps_prms)
1874 *
1875 * @brief Updates the mv bank in case there is no further encodign to be done
1876 *
1877 * @param[in] ps_search_results: contains results for the block just searched
1878 *
1879 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1880 *
1881 * @param[in] i4_search_blk_x : col num of blk being searched
1882 *
1883 * @param[in] i4_search_blk_y : row num of blk being searched
1884 *
1885 * @param[in] ps_prms : contains certain parameters which govern how updatedone
1886 *
1887 * @return None
1888 ********************************************************************************
1889 */
1890
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891 void hme_update_mv_bank_in_l1_me(
1892 search_results_t *ps_search_results,
1893 layer_mv_t *ps_layer_mv,
1894 S32 i4_search_blk_x,
1895 S32 i4_search_blk_y,
1896 mvbank_update_prms_t *ps_prms)
1897 {
1898 hme_mv_t *ps_mv;
1899 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901 S32 i4_blk_x, i4_blk_y, i4_offset;
1902 S32 i4_j, i4_ref_id;
1903 search_node_t *ps_search_node;
1904 search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905
1906 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909
1910 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911
1912 /* Identify the correct offset in the mvbank and the reference id buf */
1913 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915
1916 /*************************************************************************/
1917 /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1918 /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1919 /* do a straightforward single update of results. This will have a 1-1 */
1920 /* correspondence. */
1921 /*************************************************************************/
1922 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923 {
1924 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925
1926 hme_mv_t *ps_mv_l0_root = ps_mv;
1927 hme_mv_t *ps_mv_l1_root =
1928 ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929
1930 U32 u4_num_l0_results_updated = 0;
1931 U32 u4_num_l1_results_updated = 0;
1932
1933 S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934 S08 *pi1_ref_idx_l1_root =
1935 pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936
1937 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938 {
1939 U32 *pu4_num_results_updated;
1940 search_node_t **pps_result_nodes;
1941
1942 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943
1944 if(u1_pred_dir_of_cur_ref)
1945 {
1946 pu4_num_results_updated = &u4_num_l1_results_updated;
1947 pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948 }
1949 else
1950 {
1951 pu4_num_results_updated = &u4_num_l0_results_updated;
1952 pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953 }
1954
1955 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956
1957 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958 {
1959 hme_add_new_node_to_a_sorted_array(
1960 &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961
1962 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963 (*pu4_num_results_updated)++;
1964 }
1965 }
1966
1967 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968 {
1969 COPY_SEARCH_RESULT(
1970 &ps_mv_l0_root[i4_j],
1971 &pi1_ref_idx_l0_root[i4_j],
1972 aps_result_nodes_sorted[0][i4_j],
1973 0);
1974 }
1975
1976 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977 {
1978 COPY_SEARCH_RESULT(
1979 &ps_mv_l1_root[i4_j],
1980 &pi1_ref_idx_l1_root[i4_j],
1981 aps_result_nodes_sorted[1][i4_j],
1982 0);
1983 }
1984
1985 return;
1986 }
1987
1988 /*************************************************************************/
1989 /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990 /* case, we need to have NxN partitions enabled in search. */
1991 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993 /*************************************************************************/
1994 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995 ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997
1998 /*************************************************************************/
1999 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000 /* hence the below check. */
2001 /*************************************************************************/
2002 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003
2004 ps_mv1 = ps_mv;
2005 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008 pi1_ref_idx1 = pi1_ref_idx;
2009 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012
2013 {
2014 /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016 U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017
2018 S32 i;
2019
2020 hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021 hme_mv_t *ps_mv1_l1_root =
2022 ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023 hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024 hme_mv_t *ps_mv2_l1_root =
2025 ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026 hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027 hme_mv_t *ps_mv3_l1_root =
2028 ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029 hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030 hme_mv_t *ps_mv4_l1_root =
2031 ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032
2033 U32 u4_num_l0_results_updated = 0;
2034 U32 u4_num_l1_results_updated = 0;
2035
2036 S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037 S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038 ps_layer_mv->i4_num_mvs_per_ref);
2039 S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040 S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041 ps_layer_mv->i4_num_mvs_per_ref);
2042 S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043 S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044 ps_layer_mv->i4_num_mvs_per_ref);
2045 S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046 S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047 ps_layer_mv->i4_num_mvs_per_ref);
2048
2049 for(i = 0; i < 4; i++)
2050 {
2051 hme_mv_t *ps_mv_l0_root;
2052 hme_mv_t *ps_mv_l1_root;
2053
2054 S08 *pi1_ref_idx_l0_root;
2055 S08 *pi1_ref_idx_l1_root;
2056
2057 for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058 {
2059 U32 *pu4_num_results_updated;
2060 search_node_t **pps_result_nodes;
2061 U08 *pu1_cost_shifts_for_sorted_node;
2062
2063 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064
2065 if(u1_pred_dir_of_cur_ref)
2066 {
2067 pu4_num_results_updated = &u4_num_l1_results_updated;
2068 pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070 }
2071 else
2072 {
2073 pu4_num_results_updated = &u4_num_l0_results_updated;
2074 pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076 }
2077
2078 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079
2080 ps_search_node_4x4 =
2081 ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082
2083 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084 {
2085 hme_add_new_node_to_a_sorted_array(
2086 &ps_search_node_4x4[i4_j],
2087 pps_result_nodes,
2088 pu1_cost_shifts_for_sorted_node,
2089 *pu4_num_results_updated,
2090 0);
2091
2092 (*pu4_num_results_updated)++;
2093
2094 hme_add_new_node_to_a_sorted_array(
2095 &ps_search_node_8x8[i4_j],
2096 pps_result_nodes,
2097 pu1_cost_shifts_for_sorted_node,
2098 *pu4_num_results_updated,
2099 2);
2100
2101 (*pu4_num_results_updated)++;
2102 }
2103 }
2104
2105 switch(i)
2106 {
2107 case 0:
2108 {
2109 ps_mv_l0_root = ps_mv1_l0_root;
2110 ps_mv_l1_root = ps_mv1_l1_root;
2111
2112 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114
2115 break;
2116 }
2117 case 1:
2118 {
2119 ps_mv_l0_root = ps_mv2_l0_root;
2120 ps_mv_l1_root = ps_mv2_l1_root;
2121
2122 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124
2125 break;
2126 }
2127 case 2:
2128 {
2129 ps_mv_l0_root = ps_mv3_l0_root;
2130 ps_mv_l1_root = ps_mv3_l1_root;
2131
2132 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134
2135 break;
2136 }
2137 case 3:
2138 {
2139 ps_mv_l0_root = ps_mv4_l0_root;
2140 ps_mv_l1_root = ps_mv4_l1_root;
2141
2142 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144
2145 break;
2146 }
2147 }
2148
2149 u4_num_l0_results_updated =
2150 MIN((S32)u4_num_l0_results_updated,
2151 ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152
2153 u4_num_l1_results_updated =
2154 MIN((S32)u4_num_l1_results_updated,
2155 ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156
2157 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158 {
2159 COPY_SEARCH_RESULT(
2160 &ps_mv_l0_root[i4_j],
2161 &pi1_ref_idx_l0_root[i4_j],
2162 aps_result_nodes_sorted[0][i4_j],
2163 0);
2164 }
2165
2166 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167 {
2168 COPY_SEARCH_RESULT(
2169 &ps_mv_l1_root[i4_j],
2170 &pi1_ref_idx_l1_root[i4_j],
2171 aps_result_nodes_sorted[1][i4_j],
2172 0);
2173 }
2174 }
2175 }
2176 }
2177
2178 /**
2179 ******************************************************************************
2180 * @brief Scales motion vector component projecte from a diff layer in same
2181 * picture (so no ref id related delta poc scaling required)
2182 ******************************************************************************
2183 */
2184
2185 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \
2186 ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187 /**
2188 ********************************************************************************
2189 * @fn hme_project_coloc_candt(search_node_t *ps_search_node,
2190 * layer_ctxt_t *ps_curr_layer,
2191 * layer_ctxt_t *ps_coarse_layer,
2192 * S32 i4_pos_x,
2193 * S32 i4_pos_y,
2194 * S08 i1_ref_id,
2195 * S08 i1_result_id)
2196 *
2197 * @brief From a coarser layer, projects a candidated situated at "colocated"
2198 * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199 *
2200 * @param[out] ps_search_node : contains the projected result
2201 *
2202 * @param[in] ps_curr_layer : current layer context
2203 *
2204 * @param[in] ps_coarse_layer : coarser layer context
2205 *
2206 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2207 *
2208 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2209 *
2210 * @param[in] i1_ref_id : reference id for which the candidate required
2211 *
2212 * @param[in] i4_result_id : result id for which the candidate required
2213 * (0 : best result, 1 : next best)
2214 *
2215 * @return None
2216 ********************************************************************************
2217 */
2218
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2219 void hme_project_coloc_candt(
2220 search_node_t *ps_search_node,
2221 layer_ctxt_t *ps_curr_layer,
2222 layer_ctxt_t *ps_coarse_layer,
2223 S32 i4_pos_x,
2224 S32 i4_pos_y,
2225 S08 i1_ref_id,
2226 S32 i4_result_id)
2227 {
2228 S32 wd_c, ht_c, wd_p, ht_p;
2229 S32 blksize_p, blk_x, blk_y, i4_offset;
2230 layer_mv_t *ps_layer_mvbank;
2231 hme_mv_t *ps_mv;
2232 S08 *pi1_ref_idx;
2233
2234 /* Width and ht of current and prev layers */
2235 wd_c = ps_curr_layer->i4_wd;
2236 ht_c = ps_curr_layer->i4_ht;
2237 wd_p = ps_coarse_layer->i4_wd;
2238 ht_p = ps_coarse_layer->i4_ht;
2239
2240 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241 blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242
2243 /* Safety check to avoid uninitialized access across temporal layers */
2244 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246
2247 /* Project the positions to prev layer */
2248 /* TODO: convert these to scale factors at pic level */
2249 blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250 blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251
2252 /* Pick up the mvs from the location */
2253 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255
2256 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258
2259 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261
2262 ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263 ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265 ps_search_node->u1_subpel_done = 0;
2266 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267 {
2268 ps_search_node->i1_ref_idx = i1_ref_id;
2269 ps_search_node->s_mv.i2_mvx = 0;
2270 ps_search_node->s_mv.i2_mvy = 0;
2271 }
2272 }
2273
2274 /**
2275 ********************************************************************************
2276 * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277 * layer_ctxt_t *ps_curr_layer,
2278 * layer_ctxt_t *ps_coarse_layer,
2279 * S32 i4_pos_x,
2280 * S32 i4_pos_y,
2281 * S08 i1_ref_id,
2282 * S08 i1_result_id)
2283 *
2284 * @brief From a coarser layer, projects a candidated situated at "colocated"
2285 * position in the picture when the ratios are dyadic
2286 *
2287 * @param[out] ps_search_node : contains the projected result
2288 *
2289 * @param[in] ps_curr_layer : current layer context
2290 *
2291 * @param[in] ps_coarse_layer : coarser layer context
2292 *
2293 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2294 *
2295 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2296 *
2297 * @param[in] i1_ref_id : reference id for which the candidate required
2298 *
2299 * @param[in] i4_result_id : result id for which the candidate required
2300 * (0 : best result, 1 : next best)
2301 *
2302 * @return None
2303 ********************************************************************************
2304 */
2305
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2306 void hme_project_coloc_candt_dyadic(
2307 search_node_t *ps_search_node,
2308 layer_ctxt_t *ps_curr_layer,
2309 layer_ctxt_t *ps_coarse_layer,
2310 S32 i4_pos_x,
2311 S32 i4_pos_y,
2312 S08 i1_ref_id,
2313 S32 i4_result_id)
2314 {
2315 S32 wd_c, ht_c, wd_p, ht_p;
2316 S32 blksize_p, blk_x, blk_y, i4_offset;
2317 layer_mv_t *ps_layer_mvbank;
2318 hme_mv_t *ps_mv;
2319 S08 *pi1_ref_idx;
2320
2321 /* Width and ht of current and prev layers */
2322 wd_c = ps_curr_layer->i4_wd;
2323 ht_c = ps_curr_layer->i4_ht;
2324 wd_p = ps_coarse_layer->i4_wd;
2325 ht_p = ps_coarse_layer->i4_ht;
2326
2327 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328 /* blksize_p = log2(wd) + 1 */
2329 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330
2331 /* ASSERT for valid sizes */
2332 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333
2334 /* Safety check to avoid uninitialized access across temporal layers */
2335 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337
2338 /* Project the positions to prev layer */
2339 /* TODO: convert these to scale factors at pic level */
2340 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2341 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2342
2343 /* Pick up the mvs from the location */
2344 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346
2347 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349
2350 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352
2353 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357 {
2358 ps_search_node->i1_ref_idx = i1_ref_id;
2359 ps_search_node->s_mv.i2_mvx = 0;
2360 ps_search_node->s_mv.i2_mvy = 0;
2361 }
2362 }
2363
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2364 void hme_project_coloc_candt_dyadic_implicit(
2365 search_node_t *ps_search_node,
2366 layer_ctxt_t *ps_curr_layer,
2367 layer_ctxt_t *ps_coarse_layer,
2368 S32 i4_pos_x,
2369 S32 i4_pos_y,
2370 S32 i4_num_act_ref_l0,
2371 U08 u1_pred_dir,
2372 U08 u1_default_ref_id,
2373 S32 i4_result_id)
2374 {
2375 S32 wd_c, ht_c, wd_p, ht_p;
2376 S32 blksize_p, blk_x, blk_y, i4_offset;
2377 layer_mv_t *ps_layer_mvbank;
2378 hme_mv_t *ps_mv;
2379 S08 *pi1_ref_idx;
2380
2381 /* Width and ht of current and prev layers */
2382 wd_c = ps_curr_layer->i4_wd;
2383 ht_c = ps_curr_layer->i4_ht;
2384 wd_p = ps_coarse_layer->i4_wd;
2385 ht_p = ps_coarse_layer->i4_ht;
2386
2387 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389
2390 /* ASSERT for valid sizes */
2391 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392
2393 /* Safety check to avoid uninitialized access across temporal layers */
2394 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396 /* Project the positions to prev layer */
2397 /* TODO: convert these to scale factors at pic level */
2398 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2399 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2400
2401 /* Pick up the mvs from the location */
2402 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404
2405 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407
2408 if(u1_pred_dir == 1)
2409 {
2410 ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411 pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412 }
2413
2414 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418 {
2419 ps_search_node->i1_ref_idx = u1_default_ref_id;
2420 ps_search_node->s_mv.i2_mvx = 0;
2421 ps_search_node->s_mv.i2_mvy = 0;
2422 }
2423 }
2424
2425 #define SCALE_RANGE_PRMS(prm1, prm2, shift) \
2426 { \
2427 prm1.i2_min_x = prm2.i2_min_x << shift; \
2428 prm1.i2_max_x = prm2.i2_max_x << shift; \
2429 prm1.i2_min_y = prm2.i2_min_y << shift; \
2430 prm1.i2_max_y = prm2.i2_max_y << shift; \
2431 }
2432
2433 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \
2434 { \
2435 prm1->i2_min_x = prm2->i2_min_x << shift; \
2436 prm1->i2_max_x = prm2->i2_max_x << shift; \
2437 prm1->i2_min_y = prm2->i2_min_y << shift; \
2438 prm1->i2_max_y = prm2->i2_max_y << shift; \
2439 }
2440
2441 /**
2442 ********************************************************************************
2443 * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444 * refine_layer_prms_t *ps_refine_prms)
2445 *
2446 * @brief Frame init of refinemnet layers in ME
2447 *
2448 * @param[in,out] ps_ctxt: ME Handle
2449 *
2450 * @param[in] ps_refine_prms : refinement layer prms
2451 *
2452 * @return None
2453 ********************************************************************************
2454 */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2455 void hme_refine_frm_init(
2456 layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457 {
2458 /* local variables */
2459 BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460 S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461
2462 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463
2464 if(ps_refine_prms->explicit_ref)
2465 {
2466 i4_num_ref_fpel = i4_num_ref_prev_layer;
2467 }
2468 else
2469 {
2470 i4_num_ref_fpel = 2;
2471 }
2472
2473 if(ps_refine_prms->i4_enable_4x4_part)
2474 {
2475 e_result_blk_size = BLK_4x4;
2476 }
2477
2478 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479
2480 hme_init_mv_bank(
2481 ps_curr_layer,
2482 e_result_blk_size,
2483 i4_num_ref_fpel,
2484 ps_refine_prms->i4_num_mvbank_results,
2485 ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486 }
2487
2488 #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489 /**
2490 ********************************************************************************
2491 * @fn void hme_init_clusters_16x16
2492 * (
2493 * cluster_16x16_blk_t *ps_cluster_blk_16x16
2494 * )
2495 *
2496 * @brief Intialisations for the structs used in clustering algorithm
2497 *
2498 * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters
2499 * of 16x16 block
2500 *
2501 * @return None
2502 ********************************************************************************
2503 */
2504 static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2505 hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506 {
2507 S32 i;
2508
2509 ps_cluster_blk_16x16->num_clusters = 0;
2510 ps_cluster_blk_16x16->intra_mv_area = 0;
2511 ps_cluster_blk_16x16->best_inter_cost = 0;
2512
2513 for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514 {
2515 ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517
2518 ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519
2520 ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521 ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522 }
2523 for(i = 0; i < MAX_NUM_REF; i++)
2524 {
2525 ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526 }
2527 }
2528
2529 /**
2530 ********************************************************************************
2531 * @fn void hme_init_clusters_32x32
2532 * (
2533 * cluster_32x32_blk_t *ps_cluster_blk_32x32
2534 * )
2535 *
2536 * @brief Intialisations for the structs used in clustering algorithm
2537 *
2538 * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters
2539 * of 32x32 block
2540 *
2541 * @return None
2542 ********************************************************************************
2543 */
2544 static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2545 hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546 {
2547 S32 i;
2548
2549 ps_cluster_blk_32x32->num_clusters = 0;
2550 ps_cluster_blk_32x32->intra_mv_area = 0;
2551 ps_cluster_blk_32x32->best_alt_ref = -1;
2552 ps_cluster_blk_32x32->best_uni_ref = -1;
2553 ps_cluster_blk_32x32->best_inter_cost = 0;
2554 ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555
2556 for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557 {
2558 ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560 ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561
2562 ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563 ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564 }
2565 for(i = 0; i < MAX_NUM_REF; i++)
2566 {
2567 ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568 }
2569 }
2570
2571 /**
2572 ********************************************************************************
2573 * @fn void hme_init_clusters_64x64
2574 * (
2575 * cluster_64x64_blk_t *ps_cluster_blk_64x64
2576 * )
2577 *
2578 * @brief Intialisations for the structs used in clustering algorithm
2579 *
2580 * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters
2581 * of 64x64 block
2582 *
2583 * @return None
2584 ********************************************************************************
2585 */
2586 static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2587 hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588 {
2589 S32 i;
2590
2591 ps_cluster_blk_64x64->num_clusters = 0;
2592 ps_cluster_blk_64x64->intra_mv_area = 0;
2593 ps_cluster_blk_64x64->best_alt_ref = -1;
2594 ps_cluster_blk_64x64->best_uni_ref = -1;
2595 ps_cluster_blk_64x64->best_inter_cost = 0;
2596
2597 for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598 {
2599 ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601 ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602
2603 ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604 ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605 }
2606 for(i = 0; i < MAX_NUM_REF; i++)
2607 {
2608 ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609 }
2610 }
2611
2612 /**
2613 ********************************************************************************
2614 * @fn void hme_sort_and_assign_top_ref_ids_areawise
2615 * (
2616 * ctb_cluster_info_t *ps_ctb_cluster_info
2617 * )
2618 *
2619 * @brief Finds best_uni_ref and best_alt_ref
2620 *
2621 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2622 *
2623 * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is
2624 * enabled
2625 *
2626 * @param[in] block_width: width of the block in pels
2627 *
2628 * @param[in] e_cu_pos: position of the block within the CTB
2629 *
2630 * @return None
2631 ********************************************************************************
2632 */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2633 void hme_sort_and_assign_top_ref_ids_areawise(
2634 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635 {
2636 cluster_32x32_blk_t *ps_32x32 = NULL;
2637 cluster_64x64_blk_t *ps_64x64 = NULL;
2638 cluster_data_t *ps_data;
2639
2640 S32 j, k;
2641
2642 S32 ai4_uni_area[MAX_NUM_REF];
2643 S32 ai4_bi_area[MAX_NUM_REF];
2644 S32 ai4_ref_id_found[MAX_NUM_REF];
2645 S32 ai4_ref_id[MAX_NUM_REF];
2646
2647 S32 best_uni_ref = -1, best_alt_ref = -1;
2648 S32 num_clusters;
2649 S32 num_ref = 0;
2650 S32 num_clusters_evaluated = 0;
2651 S32 is_cur_blk_valid;
2652
2653 if(32 == block_width)
2654 {
2655 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657 num_clusters = ps_32x32->num_clusters;
2658 ps_data = &ps_32x32->as_cluster_data[0];
2659 }
2660 else
2661 {
2662 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663 ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664 num_clusters = ps_64x64->num_clusters;
2665 ps_data = &ps_64x64->as_cluster_data[0];
2666 }
2667
2668 #if !ENABLE_4CTB_EVALUATION
2669 if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670 {
2671 return;
2672 }
2673 #endif
2674 if(num_clusters == 0)
2675 {
2676 return;
2677 }
2678 else if(!is_cur_blk_valid)
2679 {
2680 return;
2681 }
2682
2683 memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684 memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685 memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686 memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687
2688 for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689 {
2690 S32 ref_id;
2691
2692 if(!ps_data->is_valid_cluster)
2693 {
2694 continue;
2695 }
2696
2697 ref_id = ps_data->ref_id;
2698
2699 num_clusters_evaluated++;
2700
2701 ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702 ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703
2704 if(!ai4_ref_id_found[ref_id])
2705 {
2706 ai4_ref_id[ref_id] = ref_id;
2707 ai4_ref_id_found[ref_id] = 1;
2708 num_ref++;
2709 }
2710 }
2711
2712 {
2713 S32 ai4_ref_id_temp[MAX_NUM_REF];
2714
2715 memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716
2717 for(k = 1; k < MAX_NUM_REF; k++)
2718 {
2719 if(ai4_uni_area[k] > ai4_uni_area[0])
2720 {
2721 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723 }
2724 }
2725
2726 best_uni_ref = ai4_ref_id_temp[0];
2727 }
2728
2729 if(bidir_enabled)
2730 {
2731 for(k = 1; k < MAX_NUM_REF; k++)
2732 {
2733 if(ai4_bi_area[k] > ai4_bi_area[0])
2734 {
2735 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737 }
2738 }
2739
2740 if(!ai4_bi_area[0])
2741 {
2742 best_alt_ref = -1;
2743
2744 if(32 == block_width)
2745 {
2746 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747 }
2748 else
2749 {
2750 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751 }
2752
2753 return;
2754 }
2755
2756 if(best_uni_ref == ai4_ref_id[0])
2757 {
2758 for(k = 2; k < MAX_NUM_REF; k++)
2759 {
2760 if(ai4_bi_area[k] > ai4_bi_area[1])
2761 {
2762 SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763 SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764 }
2765 }
2766
2767 best_alt_ref = ai4_ref_id[1];
2768 }
2769 else
2770 {
2771 best_alt_ref = ai4_ref_id[0];
2772 }
2773 }
2774
2775 if(32 == block_width)
2776 {
2777 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778 }
2779 else
2780 {
2781 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782 }
2783 }
2784
2785 /**
2786 ********************************************************************************
2787 * @fn void hme_find_top_ref_ids
2788 * (
2789 * ctb_cluster_info_t *ps_ctb_cluster_info
2790 * )
2791 *
2792 * @brief Finds best_uni_ref and best_alt_ref
2793 *
2794 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2795 *
2796 * @return None
2797 ********************************************************************************
2798 */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2799 void hme_find_top_ref_ids(
2800 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801 {
2802 S32 i;
2803
2804 if(32 == block_width)
2805 {
2806 for(i = 0; i < 4; i++)
2807 {
2808 hme_sort_and_assign_top_ref_ids_areawise(
2809 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810 }
2811 }
2812 else if(64 == block_width)
2813 {
2814 hme_sort_and_assign_top_ref_ids_areawise(
2815 ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816 }
2817 }
2818
2819 /**
2820 ********************************************************************************
2821 * @fn void hme_boot_out_outlier
2822 * (
2823 * ctb_cluster_info_t *ps_ctb_cluster_info
2824 * )
2825 *
2826 * @brief Removes outlier clusters before CU tree population
2827 *
2828 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2829 *
2830 * @return None
2831 ********************************************************************************
2832 */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2833 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834 {
2835 cluster_32x32_blk_t *ps_32x32;
2836
2837 S32 i;
2838
2839 cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840
2841 S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842
2843 if(32 == blk_width)
2844 {
2845 /* 32x32 clusters */
2846 for(i = 0; i < 4; i++)
2847 {
2848 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849
2850 if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851 {
2852 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853 }
2854 }
2855 }
2856 else if(64 == blk_width)
2857 {
2858 /* 64x64 clusters */
2859 if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860 {
2861 BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862 }
2863 }
2864 }
2865
2866 /**
2867 ********************************************************************************
2868 * @fn void hme_update_cluster_attributes
2869 * (
2870 * cluster_data_t *ps_cluster_data,
2871 * S32 mvx,
2872 * S32 mvy,
2873 * PART_ID_T e_part_id
2874 * )
2875 *
2876 * @brief Implementation fo the clustering algorithm
2877 *
2878 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2879 *
2880 * @param[in] mvx : x co-ordinate of the motion vector
2881 *
2882 * @param[in] mvy : y co-ordinate of the motion vector
2883 *
2884 * @param[in] ref_idx : ref_id of the motion vector
2885 *
2886 * @param[in] e_part_id : partition id of the motion vector
2887 *
2888 * @return None
2889 ********************************************************************************
2890 */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2891 static __inline void hme_update_cluster_attributes(
2892 cluster_data_t *ps_cluster_data,
2893 S32 mvx,
2894 S32 mvy,
2895 S32 mvdx,
2896 S32 mvdy,
2897 S32 ref_id,
2898 S32 sdi,
2899 U08 is_part_of_bi,
2900 PART_ID_T e_part_id)
2901 {
2902 LWORD64 i8_mvx_sum_q8;
2903 LWORD64 i8_mvy_sum_q8;
2904
2905 S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906 S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907
2908 if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909 {
2910 ps_cluster_data->min_x = mvx;
2911 }
2912 else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913 {
2914 ps_cluster_data->max_x = mvx;
2915 }
2916
2917 if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918 {
2919 ps_cluster_data->min_y = mvy;
2920 }
2921 else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922 {
2923 ps_cluster_data->max_y = mvy;
2924 }
2925
2926 {
2927 S32 num_mvs = ps_cluster_data->num_mvs;
2928
2929 ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930 ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931 ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932
2933 /***************************/
2934 ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935 ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936 /**************************/
2937 }
2938
2939 /* Updation of centroid */
2940 {
2941 i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942 i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943
2944 ps_cluster_data->num_mvs++;
2945
2946 ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947 (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948 ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949 (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950 }
2951
2952 ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953
2954 if(is_part_of_bi)
2955 {
2956 ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957 }
2958 else
2959 {
2960 ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961 }
2962 }
2963
2964 /**
2965 ********************************************************************************
2966 * @fn void hme_try_cluster_merge
2967 * (
2968 * cluster_data_t *ps_cluster_data,
2969 * S32 *pi4_num_clusters,
2970 * S32 idx_of_updated_cluster
2971 * )
2972 *
2973 * @brief Implementation fo the clustering algorithm
2974 *
2975 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2976 *
2977 * @param[in/out] pi4_num_clusters : pointer to number of clusters
2978 *
2979 * @param[in] idx_of_updated_cluster : index of the cluster most recently
2980 * updated
2981 *
2982 * @return Nothing
2983 ********************************************************************************
2984 */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2985 void hme_try_cluster_merge(
2986 cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987 {
2988 centroid_t *ps_centroid;
2989
2990 S32 cur_pos_x_q8;
2991 S32 cur_pos_y_q8;
2992 S32 i;
2993 S32 max_dist_from_centroid;
2994 S32 mvd;
2995 S32 mvdx_q8;
2996 S32 mvdx;
2997 S32 mvdy_q8;
2998 S32 mvdy;
2999 S32 num_clusters, num_clusters_evaluated;
3000 S32 other_pos_x_q8;
3001 S32 other_pos_y_q8;
3002
3003 cluster_data_t *ps_root = ps_cluster_data;
3004 cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005 centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006
3007 /* Merge is superfluous if num_clusters is 1 */
3008 if(*pu1_num_clusters == 1)
3009 {
3010 return;
3011 }
3012
3013 cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014 cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015
3016 max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017
3018 num_clusters = *pu1_num_clusters;
3019 num_clusters_evaluated = 0;
3020
3021 for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022 {
3023 if(!ps_cluster_data->is_valid_cluster)
3024 {
3025 continue;
3026 }
3027 if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028 {
3029 num_clusters_evaluated++;
3030 continue;
3031 }
3032
3033 ps_centroid = &ps_cluster_data->s_centroid;
3034
3035 other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036 other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037
3038 mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039 mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042
3043 mvd = ABS(mvdx) + ABS(mvdy);
3044
3045 if(mvd <= (max_dist_from_centroid >> 1))
3046 {
3047 /* 0 => no updates */
3048 /* 1 => min updated */
3049 /* 2 => max updated */
3050 S32 minmax_x_update_id;
3051 S32 minmax_y_update_id;
3052
3053 LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054 LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055 LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056 LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057
3058 (*pu1_num_clusters)--;
3059
3060 ps_cluster_data->is_valid_cluster = 0;
3061
3062 memcpy(
3063 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064 ps_cluster_data->as_mv,
3065 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066
3067 ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068 ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069 ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070 ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071 i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072 i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073
3074 ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075 ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076
3077 minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078 ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079 : 1;
3080 minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081 ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082 : 1;
3083
3084 /* Updation of centroid spread */
3085 switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086 {
3087 case 1:
3088 {
3089 S32 mvd, mvd_q8;
3090
3091 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092
3093 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094 mvd = (mvd_q8 + (1 << 7)) >> 8;
3095
3096 if(mvd > (max_dist_from_centroid))
3097 {
3098 ps_cluster_data->max_dist_from_centroid = mvd;
3099 }
3100 break;
3101 }
3102 case 2:
3103 {
3104 S32 mvd, mvd_q8;
3105
3106 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107
3108 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109 mvd = (mvd_q8 + (1 << 7)) >> 8;
3110
3111 if(mvd > (max_dist_from_centroid))
3112 {
3113 ps_cluster_data->max_dist_from_centroid = mvd;
3114 }
3115 break;
3116 }
3117 case 4:
3118 {
3119 S32 mvd, mvd_q8;
3120
3121 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122
3123 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124 mvd = (mvd_q8 + (1 << 7)) >> 8;
3125
3126 if(mvd > (max_dist_from_centroid))
3127 {
3128 ps_cluster_data->max_dist_from_centroid = mvd;
3129 }
3130 break;
3131 }
3132 case 5:
3133 {
3134 S32 mvd;
3135 S32 mvdx, mvdx_q8;
3136 S32 mvdy, mvdy_q8;
3137
3138 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140
3141 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143
3144 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145
3146 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148
3149 if(mvd > max_dist_from_centroid)
3150 {
3151 ps_cluster_data->max_dist_from_centroid = mvd;
3152 }
3153 break;
3154 }
3155 case 6:
3156 {
3157 S32 mvd;
3158 S32 mvdx, mvdx_q8;
3159 S32 mvdy, mvdy_q8;
3160
3161 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163
3164 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166
3167 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168
3169 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171
3172 if(mvd > max_dist_from_centroid)
3173 {
3174 ps_cluster_data->max_dist_from_centroid = mvd;
3175 }
3176 break;
3177 }
3178 case 8:
3179 {
3180 S32 mvd, mvd_q8;
3181
3182 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183
3184 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185 mvd = (mvd_q8 + (1 << 7)) >> 8;
3186
3187 if(mvd > (max_dist_from_centroid))
3188 {
3189 ps_cluster_data->max_dist_from_centroid = mvd;
3190 }
3191 break;
3192 }
3193 case 9:
3194 {
3195 S32 mvd;
3196 S32 mvdx, mvdx_q8;
3197 S32 mvdy, mvdy_q8;
3198
3199 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201
3202 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204
3205 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206
3207 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209
3210 if(mvd > max_dist_from_centroid)
3211 {
3212 ps_cluster_data->max_dist_from_centroid = mvd;
3213 }
3214 break;
3215 }
3216 case 10:
3217 {
3218 S32 mvd;
3219 S32 mvdx, mvdx_q8;
3220 S32 mvdy, mvdy_q8;
3221
3222 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224
3225 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227
3228 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229
3230 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232
3233 if(mvd > ps_cluster_data->max_dist_from_centroid)
3234 {
3235 ps_cluster_data->max_dist_from_centroid = mvd;
3236 }
3237 break;
3238 }
3239 default:
3240 {
3241 break;
3242 }
3243 }
3244
3245 hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246
3247 return;
3248 }
3249
3250 num_clusters_evaluated++;
3251 }
3252 }
3253
3254 /**
3255 ********************************************************************************
3256 * @fn void hme_find_and_update_clusters
3257 * (
3258 * cluster_data_t *ps_cluster_data,
3259 * S32 *pi4_num_clusters,
3260 * S32 mvx,
3261 * S32 mvy,
3262 * S32 ref_idx,
3263 * PART_ID_T e_part_id
3264 * )
3265 *
3266 * @brief Implementation fo the clustering algorithm
3267 *
3268 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
3269 *
3270 * @param[in/out] pi4_num_clusters : pointer to number of clusters
3271 *
3272 * @param[in] mvx : x co-ordinate of the motion vector
3273 *
3274 * @param[in] mvy : y co-ordinate of the motion vector
3275 *
3276 * @param[in] ref_idx : ref_id of the motion vector
3277 *
3278 * @param[in] e_part_id : partition id of the motion vector
3279 *
3280 * @return None
3281 ********************************************************************************
3282 */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3283 void hme_find_and_update_clusters(
3284 cluster_data_t *ps_cluster_data,
3285 U08 *pu1_num_clusters,
3286 S16 i2_mv_x,
3287 S16 i2_mv_y,
3288 U08 i1_ref_idx,
3289 S32 i4_sdi,
3290 PART_ID_T e_part_id,
3291 U08 is_part_of_bi)
3292 {
3293 S32 i;
3294 S32 min_mvd_cluster_id = -1;
3295 S32 mvd, mvd_limit, mvdx, mvdy;
3296 S32 min_mvdx, min_mvdy;
3297
3298 S32 min_mvd = MAX_32BIT_VAL;
3299 S32 num_clusters = *pu1_num_clusters;
3300
3301 S32 mvx = i2_mv_x;
3302 S32 mvy = i2_mv_y;
3303 S32 ref_idx = i1_ref_idx;
3304 S32 sdi = i4_sdi;
3305 S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306
3307 if(num_clusters == 0)
3308 {
3309 cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310
3311 ps_data->num_mvs = 1;
3312 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314 ps_data->ref_id = ref_idx;
3315 ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317 ps_data->as_mv[0].mvx = mvx;
3318 ps_data->as_mv[0].mvy = mvy;
3319
3320 /***************************/
3321 ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322 ps_data->as_mv[0].sdi = sdi;
3323 if(is_part_of_bi)
3324 {
3325 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326 }
3327 else
3328 {
3329 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330 }
3331 /**************************/
3332 ps_data->max_x = mvx;
3333 ps_data->min_x = mvx;
3334 ps_data->max_y = mvy;
3335 ps_data->min_y = mvy;
3336
3337 ps_data->is_valid_cluster = 1;
3338
3339 *pu1_num_clusters = 1;
3340 }
3341 else
3342 {
3343 S32 num_clusters_evaluated = 0;
3344
3345 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346 {
3347 cluster_data_t *ps_data = &ps_cluster_data[i];
3348
3349 centroid_t *ps_centroid;
3350
3351 S32 mvx_q8;
3352 S32 mvy_q8;
3353 S32 posx_q8;
3354 S32 posy_q8;
3355 S32 mvdx_q8;
3356 S32 mvdy_q8;
3357
3358 /* In anticipation of a possible merging of clusters */
3359 if(ps_data->is_valid_cluster == 0)
3360 {
3361 new_cluster_idx = i;
3362 continue;
3363 }
3364
3365 if(ref_idx != ps_data->ref_id)
3366 {
3367 num_clusters_evaluated++;
3368 continue;
3369 }
3370
3371 ps_centroid = &ps_data->s_centroid;
3372 posx_q8 = ps_centroid->i4_pos_x_q8;
3373 posy_q8 = ps_centroid->i4_pos_y_q8;
3374
3375 mvx_q8 = mvx << 8;
3376 mvy_q8 = mvy << 8;
3377
3378 mvdx_q8 = posx_q8 - mvx_q8;
3379 mvdy_q8 = posy_q8 - mvy_q8;
3380
3381 mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382 mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383
3384 mvd = ABS(mvdx) + ABS(mvdy);
3385
3386 if(mvd < min_mvd)
3387 {
3388 min_mvd = mvd;
3389 min_mvdx = mvdx;
3390 min_mvdy = mvdy;
3391 min_mvd_cluster_id = i;
3392 }
3393
3394 num_clusters_evaluated++;
3395 }
3396
3397 mvd_limit = (min_mvd_cluster_id == -1)
3398 ? ps_cluster_data[0].max_dist_from_centroid
3399 : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400
3401 /* This condition implies that min_mvd has been updated */
3402 if(min_mvd <= mvd_limit)
3403 {
3404 hme_update_cluster_attributes(
3405 &ps_cluster_data[min_mvd_cluster_id],
3406 mvx,
3407 mvy,
3408 min_mvdx,
3409 min_mvdy,
3410 ref_idx,
3411 sdi,
3412 is_part_of_bi,
3413 e_part_id);
3414
3415 if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416 {
3417 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418 }
3419 }
3420 else
3421 {
3422 cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423 ? &ps_cluster_data[num_clusters]
3424 : &ps_cluster_data[new_cluster_idx];
3425
3426 ps_data->num_mvs = 1;
3427 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429 ps_data->ref_id = ref_idx;
3430 ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432 ps_data->as_mv[0].mvx = mvx;
3433 ps_data->as_mv[0].mvy = mvy;
3434
3435 /***************************/
3436 ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437 ps_data->as_mv[0].sdi = sdi;
3438 if(is_part_of_bi)
3439 {
3440 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441 }
3442 else
3443 {
3444 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445 }
3446 /**************************/
3447 ps_data->max_x = mvx;
3448 ps_data->min_x = mvx;
3449 ps_data->max_y = mvy;
3450 ps_data->min_y = mvy;
3451
3452 ps_data->is_valid_cluster = 1;
3453
3454 num_clusters++;
3455 *pu1_num_clusters = num_clusters;
3456 }
3457 }
3458 }
3459
3460 /**
3461 ********************************************************************************
3462 * @fn void hme_update_32x32_cluster_attributes
3463 * (
3464 * cluster_32x32_blk_t *ps_blk_32x32,
3465 * cluster_data_t *ps_cluster_data
3466 * )
3467 *
3468 * @brief Updates attributes for 32x32 clusters based on the attributes of
3469 * the constituent 16x16 clusters
3470 *
3471 * @param[out] ps_blk_32x32: structure containing 32x32 block results
3472 *
3473 * @param[in] ps_cluster_data : structure containing 16x16 block results
3474 *
3475 * @return None
3476 ********************************************************************************
3477 */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3478 void hme_update_32x32_cluster_attributes(
3479 cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480 {
3481 cluster_data_t *ps_cur_cluster_32;
3482
3483 S32 i;
3484 S32 mvd_limit;
3485
3486 S32 num_clusters = ps_blk_32x32->num_clusters;
3487
3488 if(0 == num_clusters)
3489 {
3490 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491
3492 ps_blk_32x32->num_clusters++;
3493 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494
3495 ps_cur_cluster_32->is_valid_cluster = 1;
3496
3497 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500
3501 memcpy(
3502 ps_cur_cluster_32->as_mv,
3503 ps_cluster_data->as_mv,
3504 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505
3506 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507
3508 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509
3510 ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511 ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512 ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513 ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514
3515 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516 }
3517 else
3518 {
3519 centroid_t *ps_centroid;
3520
3521 S32 cur_posx_q8, cur_posy_q8;
3522 S32 min_mvd_cluster_id = -1;
3523 S32 mvd;
3524 S32 mvdx;
3525 S32 mvdy;
3526 S32 mvdx_min;
3527 S32 mvdy_min;
3528 S32 mvdx_q8;
3529 S32 mvdy_q8;
3530
3531 S32 num_clusters_evaluated = 0;
3532
3533 S32 mvd_min = MAX_32BIT_VAL;
3534
3535 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537
3538 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539 {
3540 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541
3542 if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543 {
3544 num_clusters_evaluated++;
3545 continue;
3546 }
3547 if(!ps_cluster_data->is_valid_cluster)
3548 {
3549 continue;
3550 }
3551
3552 num_clusters_evaluated++;
3553
3554 ps_centroid = &ps_cur_cluster_32->s_centroid;
3555
3556 cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557 cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558
3559 mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560 mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561
3562 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564
3565 mvd = ABS(mvdx) + ABS(mvdy);
3566
3567 if(mvd < mvd_min)
3568 {
3569 mvd_min = mvd;
3570 mvdx_min = mvdx;
3571 mvdy_min = mvdy;
3572 min_mvd_cluster_id = i;
3573 }
3574 }
3575
3576 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577
3578 mvd_limit = (min_mvd_cluster_id == -1)
3579 ? ps_cur_cluster_32[0].max_dist_from_centroid
3580 : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581
3582 if(mvd_min <= mvd_limit)
3583 {
3584 LWORD64 i8_updated_posx;
3585 LWORD64 i8_updated_posy;
3586 WORD32 minmax_updated_x = 0;
3587 WORD32 minmax_updated_y = 0;
3588
3589 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590
3591 ps_centroid = &ps_cur_cluster_32->s_centroid;
3592
3593 ps_cur_cluster_32->is_valid_cluster = 1;
3594
3595 ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598
3599 memcpy(
3600 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601 ps_cluster_data->as_mv,
3602 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603
3604 if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605 {
3606 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607 minmax_updated_x = 1;
3608 }
3609 else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610 {
3611 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612 minmax_updated_x = 2;
3613 }
3614
3615 if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616 {
3617 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618 minmax_updated_y = 1;
3619 }
3620 else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621 {
3622 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623 minmax_updated_y = 2;
3624 }
3625
3626 switch((minmax_updated_y << 2) + minmax_updated_x)
3627 {
3628 case 1:
3629 {
3630 S32 mvd, mvd_q8;
3631
3632 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633 mvd = (mvd_q8 + (1 << 7)) >> 8;
3634
3635 if(mvd > (mvd_limit))
3636 {
3637 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638 }
3639 break;
3640 }
3641 case 2:
3642 {
3643 S32 mvd, mvd_q8;
3644
3645 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646 mvd = (mvd_q8 + (1 << 7)) >> 8;
3647
3648 if(mvd > (mvd_limit))
3649 {
3650 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651 }
3652 break;
3653 }
3654 case 4:
3655 {
3656 S32 mvd, mvd_q8;
3657
3658 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659 mvd = (mvd_q8 + (1 << 7)) >> 8;
3660
3661 if(mvd > (mvd_limit))
3662 {
3663 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664 }
3665 break;
3666 }
3667 case 5:
3668 {
3669 S32 mvd;
3670 S32 mvdx, mvdx_q8;
3671 S32 mvdy, mvdy_q8;
3672
3673 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675
3676 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678
3679 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680
3681 if(mvd > mvd_limit)
3682 {
3683 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684 }
3685 break;
3686 }
3687 case 6:
3688 {
3689 S32 mvd;
3690 S32 mvdx, mvdx_q8;
3691 S32 mvdy, mvdy_q8;
3692
3693 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695
3696 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698
3699 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700
3701 if(mvd > mvd_limit)
3702 {
3703 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704 }
3705 break;
3706 }
3707 case 8:
3708 {
3709 S32 mvd, mvd_q8;
3710
3711 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712 mvd = (mvd_q8 + (1 << 7)) >> 8;
3713
3714 if(mvd > (mvd_limit))
3715 {
3716 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717 }
3718 break;
3719 }
3720 case 9:
3721 {
3722 S32 mvd;
3723 S32 mvdx, mvdx_q8;
3724 S32 mvdy, mvdy_q8;
3725
3726 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728
3729 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731
3732 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733
3734 if(mvd > mvd_limit)
3735 {
3736 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737 }
3738 break;
3739 }
3740 case 10:
3741 {
3742 S32 mvd;
3743 S32 mvdx, mvdx_q8;
3744 S32 mvdy, mvdy_q8;
3745
3746 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748
3749 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751
3752 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753
3754 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755 {
3756 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757 }
3758 break;
3759 }
3760 default:
3761 {
3762 break;
3763 }
3764 }
3765
3766 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770
3771 ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772
3773 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775 }
3776 else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777 {
3778 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779
3780 ps_blk_32x32->num_clusters++;
3781 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782
3783 ps_cur_cluster_32->is_valid_cluster = 1;
3784
3785 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788
3789 memcpy(
3790 ps_cur_cluster_32->as_mv,
3791 ps_cluster_data->as_mv,
3792 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793
3794 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795
3796 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797
3798 ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799 ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800 ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801 ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802
3803 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804 }
3805 }
3806 }
3807
3808 /**
3809 ********************************************************************************
3810 * @fn void hme_update_64x64_cluster_attributes
3811 * (
3812 * cluster_64x64_blk_t *ps_blk_32x32,
3813 * cluster_data_t *ps_cluster_data
3814 * )
3815 *
3816 * @brief Updates attributes for 64x64 clusters based on the attributes of
3817 * the constituent 16x16 clusters
3818 *
3819 * @param[out] ps_blk_64x64: structure containing 64x64 block results
3820 *
3821 * @param[in] ps_cluster_data : structure containing 32x32 block results
3822 *
3823 * @return None
3824 ********************************************************************************
3825 */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3826 void hme_update_64x64_cluster_attributes(
3827 cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828 {
3829 cluster_data_t *ps_cur_cluster_64;
3830
3831 S32 i;
3832 S32 mvd_limit;
3833
3834 S32 num_clusters = ps_blk_64x64->num_clusters;
3835
3836 if(0 == num_clusters)
3837 {
3838 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839
3840 ps_blk_64x64->num_clusters++;
3841 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842
3843 ps_cur_cluster_64->is_valid_cluster = 1;
3844
3845 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848
3849 memcpy(
3850 ps_cur_cluster_64->as_mv,
3851 ps_cluster_data->as_mv,
3852 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853
3854 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855
3856 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857
3858 ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859 ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860 ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861 ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862
3863 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864 }
3865 else
3866 {
3867 centroid_t *ps_centroid;
3868
3869 S32 cur_posx_q8, cur_posy_q8;
3870 S32 min_mvd_cluster_id = -1;
3871 S32 mvd;
3872 S32 mvdx;
3873 S32 mvdy;
3874 S32 mvdx_min;
3875 S32 mvdy_min;
3876 S32 mvdx_q8;
3877 S32 mvdy_q8;
3878
3879 S32 num_clusters_evaluated = 0;
3880
3881 S32 mvd_min = MAX_32BIT_VAL;
3882
3883 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885
3886 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887 {
3888 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889
3890 if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891 {
3892 num_clusters_evaluated++;
3893 continue;
3894 }
3895
3896 if(!ps_cur_cluster_64->is_valid_cluster)
3897 {
3898 continue;
3899 }
3900
3901 num_clusters_evaluated++;
3902
3903 ps_centroid = &ps_cur_cluster_64->s_centroid;
3904
3905 cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906 cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907
3908 mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909 mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910
3911 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913
3914 mvd = ABS(mvdx) + ABS(mvdy);
3915
3916 if(mvd < mvd_min)
3917 {
3918 mvd_min = mvd;
3919 mvdx_min = mvdx;
3920 mvdy_min = mvdy;
3921 min_mvd_cluster_id = i;
3922 }
3923 }
3924
3925 ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926
3927 mvd_limit = (min_mvd_cluster_id == -1)
3928 ? ps_cur_cluster_64[0].max_dist_from_centroid
3929 : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930
3931 if(mvd_min <= mvd_limit)
3932 {
3933 LWORD64 i8_updated_posx;
3934 LWORD64 i8_updated_posy;
3935 WORD32 minmax_updated_x = 0;
3936 WORD32 minmax_updated_y = 0;
3937
3938 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939
3940 ps_centroid = &ps_cur_cluster_64->s_centroid;
3941
3942 ps_cur_cluster_64->is_valid_cluster = 1;
3943
3944 ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947
3948 memcpy(
3949 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950 ps_cluster_data->as_mv,
3951 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952
3953 if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954 {
3955 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956 minmax_updated_x = 1;
3957 }
3958 else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959 {
3960 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961 minmax_updated_x = 2;
3962 }
3963
3964 if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965 {
3966 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967 minmax_updated_y = 1;
3968 }
3969 else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970 {
3971 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972 minmax_updated_y = 2;
3973 }
3974
3975 switch((minmax_updated_y << 2) + minmax_updated_x)
3976 {
3977 case 1:
3978 {
3979 S32 mvd, mvd_q8;
3980
3981 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982 mvd = (mvd_q8 + (1 << 7)) >> 8;
3983
3984 if(mvd > (mvd_limit))
3985 {
3986 ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987 }
3988 break;
3989 }
3990 case 2:
3991 {
3992 S32 mvd, mvd_q8;
3993
3994 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995 mvd = (mvd_q8 + (1 << 7)) >> 8;
3996
3997 if(mvd > (mvd_limit))
3998 {
3999 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000 }
4001 break;
4002 }
4003 case 4:
4004 {
4005 S32 mvd, mvd_q8;
4006
4007 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008 mvd = (mvd_q8 + (1 << 7)) >> 8;
4009
4010 if(mvd > (mvd_limit))
4011 {
4012 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013 }
4014 break;
4015 }
4016 case 5:
4017 {
4018 S32 mvd;
4019 S32 mvdx, mvdx_q8;
4020 S32 mvdy, mvdy_q8;
4021
4022 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024
4025 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027
4028 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029
4030 if(mvd > mvd_limit)
4031 {
4032 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033 }
4034 break;
4035 }
4036 case 6:
4037 {
4038 S32 mvd;
4039 S32 mvdx, mvdx_q8;
4040 S32 mvdy, mvdy_q8;
4041
4042 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044
4045 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047
4048 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049
4050 if(mvd > mvd_limit)
4051 {
4052 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053 }
4054 break;
4055 }
4056 case 8:
4057 {
4058 S32 mvd, mvd_q8;
4059
4060 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061 mvd = (mvd_q8 + (1 << 7)) >> 8;
4062
4063 if(mvd > (mvd_limit))
4064 {
4065 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066 }
4067 break;
4068 }
4069 case 9:
4070 {
4071 S32 mvd;
4072 S32 mvdx, mvdx_q8;
4073 S32 mvdy, mvdy_q8;
4074
4075 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077
4078 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080
4081 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082
4083 if(mvd > mvd_limit)
4084 {
4085 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086 }
4087 break;
4088 }
4089 case 10:
4090 {
4091 S32 mvd;
4092 S32 mvdx, mvdx_q8;
4093 S32 mvdy, mvdy_q8;
4094
4095 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097
4098 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100
4101 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102
4103 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104 {
4105 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106 }
4107 break;
4108 }
4109 default:
4110 {
4111 break;
4112 }
4113 }
4114
4115 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119
4120 ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121
4122 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124 }
4125 else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126 {
4127 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128
4129 ps_blk_64x64->num_clusters++;
4130 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131
4132 ps_cur_cluster_64->is_valid_cluster = 1;
4133
4134 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137
4138 memcpy(
4139 &ps_cur_cluster_64->as_mv[0],
4140 ps_cluster_data->as_mv,
4141 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142
4143 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144
4145 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146
4147 ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148 ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149 ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150 ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151
4152 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153 }
4154 }
4155 }
4156
4157 /**
4158 ********************************************************************************
4159 * @fn void hme_update_32x32_clusters
4160 * (
4161 * cluster_32x32_blk_t *ps_blk_32x32,
4162 * cluster_16x16_blk_t *ps_blk_16x16
4163 * )
4164 *
4165 * @brief Updates attributes for 32x32 clusters based on the attributes of
4166 * the constituent 16x16 clusters
4167 *
4168 * @param[out] ps_blk_32x32: structure containing 32x32 block results
4169 *
4170 * @param[in] ps_blk_16x16 : structure containing 16x16 block results
4171 *
4172 * @return None
4173 ********************************************************************************
4174 */
4175 static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4176 hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177 {
4178 cluster_16x16_blk_t *ps_blk_16x16_cur;
4179 cluster_data_t *ps_cur_cluster;
4180
4181 S32 i, j;
4182 S32 num_clusters_cur_16x16_blk;
4183
4184 for(i = 0; i < 4; i++)
4185 {
4186 S32 num_clusters_evaluated = 0;
4187
4188 ps_blk_16x16_cur = &ps_blk_16x16[i];
4189
4190 num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191
4192 ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193
4194 ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195
4196 for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197 {
4198 ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199
4200 if(!ps_cur_cluster->is_valid_cluster)
4201 {
4202 continue;
4203 }
4204
4205 hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206
4207 num_clusters_evaluated++;
4208 }
4209 }
4210 }
4211
4212 /**
4213 ********************************************************************************
4214 * @fn void hme_update_64x64_clusters
4215 * (
4216 * cluster_64x64_blk_t *ps_blk_64x64,
4217 * cluster_32x32_blk_t *ps_blk_32x32
4218 * )
4219 *
4220 * @brief Updates attributes for 64x64 clusters based on the attributes of
4221 * the constituent 16x16 clusters
4222 *
4223 * @param[out] ps_blk_64x64: structure containing 32x32 block results
4224 *
4225 * @param[in] ps_blk_32x32 : structure containing 16x16 block results
4226 *
4227 * @return None
4228 ********************************************************************************
4229 */
4230 static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4231 hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232 {
4233 cluster_32x32_blk_t *ps_blk_32x32_cur;
4234 cluster_data_t *ps_cur_cluster;
4235
4236 S32 i, j;
4237 S32 num_clusters_cur_32x32_blk;
4238
4239 for(i = 0; i < 4; i++)
4240 {
4241 S32 num_clusters_evaluated = 0;
4242
4243 ps_blk_32x32_cur = &ps_blk_32x32[i];
4244
4245 num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246
4247 ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248 ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249
4250 for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251 {
4252 ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253
4254 if(!ps_cur_cluster->is_valid_cluster)
4255 {
4256 continue;
4257 }
4258
4259 hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260
4261 num_clusters_evaluated++;
4262 }
4263 }
4264 }
4265
4266 /**
4267 ********************************************************************************
4268 * @fn void hme_try_merge_clusters_blksize_gt_16
4269 * (
4270 * cluster_data_t *ps_cluster_data,
4271 * S32 num_clusters
4272 * )
4273 *
4274 * @brief Merging clusters from blocks of size 32x32 and greater
4275 *
4276 * @param[in/out] ps_cluster_data: structure containing cluster data
4277 *
4278 * @param[in/out] pi4_num_clusters : pointer to number of clusters
4279 *
4280 * @return Success or failure
4281 ********************************************************************************
4282 */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4283 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284 {
4285 centroid_t *ps_cur_centroid;
4286 cluster_data_t *ps_cur_cluster;
4287
4288 S32 i, mvd;
4289 S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290
4291 centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292
4293 S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294 S32 ref_id = ps_cluster_data->ref_id;
4295
4296 S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297 S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298 S32 num_clusters_evaluated = 1;
4299 S32 ret_value = 0;
4300
4301 if(1 >= num_clusters)
4302 {
4303 return ret_value;
4304 }
4305
4306 for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307 {
4308 S32 cur_posx_q8;
4309 S32 cur_posy_q8;
4310
4311 ps_cur_cluster = &ps_cluster_data[i];
4312
4313 if((ref_id != ps_cur_cluster->ref_id))
4314 {
4315 num_clusters_evaluated++;
4316 continue;
4317 }
4318
4319 if((!ps_cur_cluster->is_valid_cluster))
4320 {
4321 continue;
4322 }
4323
4324 num_clusters_evaluated++;
4325
4326 ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327
4328 cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329 cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330
4331 mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332 mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333
4334 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336
4337 mvd = ABS(mvdx) + ABS(mvdy);
4338
4339 if(mvd <= (mvd_limit >> 1))
4340 {
4341 LWORD64 i8_updated_posx;
4342 LWORD64 i8_updated_posy;
4343 WORD32 minmax_updated_x = 0;
4344 WORD32 minmax_updated_y = 0;
4345
4346 ps_cur_cluster->is_valid_cluster = 0;
4347
4348 ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349 ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350 ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351
4352 memcpy(
4353 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354 ps_cur_cluster->as_mv,
4355 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356
4357 if(mvdx > 0)
4358 {
4359 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360 minmax_updated_x = 1;
4361 }
4362 else
4363 {
4364 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365 minmax_updated_x = 2;
4366 }
4367
4368 if(mvdy > 0)
4369 {
4370 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371 minmax_updated_y = 1;
4372 }
4373 else
4374 {
4375 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376 minmax_updated_y = 2;
4377 }
4378
4379 switch((minmax_updated_y << 2) + minmax_updated_x)
4380 {
4381 case 1:
4382 {
4383 S32 mvd, mvd_q8;
4384
4385 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386 mvd = (mvd_q8 + (1 << 7)) >> 8;
4387
4388 if(mvd > (mvd_limit))
4389 {
4390 ps_cluster_data->max_dist_from_centroid = mvd;
4391 }
4392 break;
4393 }
4394 case 2:
4395 {
4396 S32 mvd, mvd_q8;
4397
4398 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399 mvd = (mvd_q8 + (1 << 7)) >> 8;
4400
4401 if(mvd > (mvd_limit))
4402 {
4403 ps_cluster_data->max_dist_from_centroid = mvd;
4404 }
4405 break;
4406 }
4407 case 4:
4408 {
4409 S32 mvd, mvd_q8;
4410
4411 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412 mvd = (mvd_q8 + (1 << 7)) >> 8;
4413
4414 if(mvd > (mvd_limit))
4415 {
4416 ps_cluster_data->max_dist_from_centroid = mvd;
4417 }
4418 break;
4419 }
4420 case 5:
4421 {
4422 S32 mvd;
4423 S32 mvdx, mvdx_q8;
4424 S32 mvdy, mvdy_q8;
4425
4426 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428
4429 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431
4432 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433
4434 if(mvd > mvd_limit)
4435 {
4436 ps_cluster_data->max_dist_from_centroid = mvd;
4437 }
4438 break;
4439 }
4440 case 6:
4441 {
4442 S32 mvd;
4443 S32 mvdx, mvdx_q8;
4444 S32 mvdy, mvdy_q8;
4445
4446 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448
4449 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451
4452 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453
4454 if(mvd > mvd_limit)
4455 {
4456 ps_cluster_data->max_dist_from_centroid = mvd;
4457 }
4458 break;
4459 }
4460 case 8:
4461 {
4462 S32 mvd, mvd_q8;
4463
4464 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465 mvd = (mvd_q8 + (1 << 7)) >> 8;
4466
4467 if(mvd > (mvd_limit))
4468 {
4469 ps_cluster_data->max_dist_from_centroid = mvd;
4470 }
4471 break;
4472 }
4473 case 9:
4474 {
4475 S32 mvd;
4476 S32 mvdx, mvdx_q8;
4477 S32 mvdy, mvdy_q8;
4478
4479 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481
4482 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484
4485 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486
4487 if(mvd > mvd_limit)
4488 {
4489 ps_cluster_data->max_dist_from_centroid = mvd;
4490 }
4491 break;
4492 }
4493 case 10:
4494 {
4495 S32 mvd;
4496 S32 mvdx, mvdx_q8;
4497 S32 mvdy, mvdy_q8;
4498
4499 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501
4502 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504
4505 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506
4507 if(mvd > ps_cluster_data->max_dist_from_centroid)
4508 {
4509 ps_cluster_data->max_dist_from_centroid = mvd;
4510 }
4511 break;
4512 }
4513 default:
4514 {
4515 break;
4516 }
4517 }
4518
4519 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520 ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522 ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523
4524 ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525
4526 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528
4529 if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530 {
4531 num_clusters--;
4532 num_clusters_evaluated = 1;
4533 i = 0;
4534 ret_value++;
4535 }
4536 else
4537 {
4538 ret_value++;
4539
4540 return ret_value;
4541 }
4542 }
4543 }
4544
4545 if(ret_value)
4546 {
4547 for(i = 1; i < (num_clusters + ret_value); i++)
4548 {
4549 if(ps_cluster_data[i].is_valid_cluster)
4550 {
4551 break;
4552 }
4553 }
4554 if(i == (num_clusters + ret_value))
4555 {
4556 return ret_value;
4557 }
4558 }
4559 else
4560 {
4561 i = 1;
4562 }
4563
4564 return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565 ret_value;
4566 }
4567
4568 /**
4569 ********************************************************************************
4570 * @fn S32 hme_determine_validity_32x32
4571 * (
4572 * ctb_cluster_info_t *ps_ctb_cluster_info
4573 * )
4574 *
4575 * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop
4576 * while recursing through the CU tree or not
4577 *
4578 * @param[in] ps_cluster_data: structure containing cluster data
4579 *
4580 * @return Success or failure
4581 ********************************************************************************
4582 */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4583 __inline S32 hme_determine_validity_32x32(
4584 ctb_cluster_info_t *ps_ctb_cluster_info,
4585 S32 *pi4_children_nodes_required,
4586 S32 blk_validity_wrt_pic_bndry,
4587 S32 parent_blk_validity_wrt_pic_bndry)
4588 {
4589 cluster_data_t *ps_data;
4590
4591 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593
4594 S32 num_clusters = ps_32x32_blk->num_clusters;
4595 S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596
4597 if(!blk_validity_wrt_pic_bndry)
4598 {
4599 *pi4_children_nodes_required = 1;
4600 return 0;
4601 }
4602
4603 if(!parent_blk_validity_wrt_pic_bndry)
4604 {
4605 *pi4_children_nodes_required = 1;
4606 return 1;
4607 }
4608
4609 if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610 {
4611 *pi4_children_nodes_required = 1;
4612 return 0;
4613 }
4614
4615 if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616 {
4617 *pi4_children_nodes_required = 1;
4618
4619 return 1;
4620 }
4621 else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622 {
4623 *pi4_children_nodes_required = 0;
4624
4625 return 1;
4626 }
4627 else
4628 {
4629 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630 {
4631 *pi4_children_nodes_required = 0;
4632 return 1;
4633 }
4634 else
4635 {
4636 S32 i;
4637
4638 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639 S32 min_area = MAX_32BIT_VAL;
4640 S32 num_clusters_evaluated = 0;
4641
4642 for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643 {
4644 ps_data = &ps_32x32_blk->as_cluster_data[i];
4645
4646 if(!ps_data->is_valid_cluster)
4647 {
4648 continue;
4649 }
4650
4651 num_clusters_evaluated++;
4652
4653 if(ps_data->area_in_pixels < min_area)
4654 {
4655 min_area = ps_data->area_in_pixels;
4656 }
4657 }
4658
4659 if((min_area << 4) < area_of_parent)
4660 {
4661 *pi4_children_nodes_required = 1;
4662 return 0;
4663 }
4664 else
4665 {
4666 *pi4_children_nodes_required = 0;
4667 return 1;
4668 }
4669 }
4670 }
4671 }
4672
4673 /**
4674 ********************************************************************************
4675 * @fn S32 hme_determine_validity_16x16
4676 * (
4677 * ctb_cluster_info_t *ps_ctb_cluster_info
4678 * )
4679 *
4680 * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop
4681 * while recursing through the CU tree or not
4682 *
4683 * @param[in] ps_cluster_data: structure containing cluster data
4684 *
4685 * @return Success or failure
4686 ********************************************************************************
4687 */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4688 __inline S32 hme_determine_validity_16x16(
4689 ctb_cluster_info_t *ps_ctb_cluster_info,
4690 S32 *pi4_children_nodes_required,
4691 S32 blk_validity_wrt_pic_bndry,
4692 S32 parent_blk_validity_wrt_pic_bndry)
4693 {
4694 cluster_data_t *ps_data;
4695
4696 cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699
4700 S32 num_clusters = ps_16x16_blk->num_clusters;
4701 S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702 S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703
4704 if(!blk_validity_wrt_pic_bndry)
4705 {
4706 *pi4_children_nodes_required = 1;
4707 return 0;
4708 }
4709
4710 if(!parent_blk_validity_wrt_pic_bndry)
4711 {
4712 *pi4_children_nodes_required = 1;
4713 return 1;
4714 }
4715
4716 if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717 (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718 {
4719 *pi4_children_nodes_required = 1;
4720 return 1;
4721 }
4722
4723 /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724 /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725 if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726 {
4727 if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728 {
4729 *pi4_children_nodes_required = 0;
4730
4731 return 1;
4732 }
4733 else
4734 {
4735 *pi4_children_nodes_required = 1;
4736
4737 return 0;
4738 }
4739 }
4740 /* Implies nc_64 >= 3 */
4741 else
4742 {
4743 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744 {
4745 *pi4_children_nodes_required = 0;
4746 return 1;
4747 }
4748 else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749 {
4750 *pi4_children_nodes_required = 1;
4751 return 0;
4752 }
4753 else
4754 {
4755 S32 i;
4756
4757 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758 S32 min_area = MAX_32BIT_VAL;
4759 S32 num_clusters_evaluated = 0;
4760
4761 for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762 {
4763 ps_data = &ps_16x16_blk->as_cluster_data[i];
4764
4765 if(!ps_data->is_valid_cluster)
4766 {
4767 continue;
4768 }
4769
4770 num_clusters_evaluated++;
4771
4772 if(ps_data->area_in_pixels < min_area)
4773 {
4774 min_area = ps_data->area_in_pixels;
4775 }
4776 }
4777
4778 if((min_area << 4) < area_of_parent)
4779 {
4780 *pi4_children_nodes_required = 1;
4781 return 0;
4782 }
4783 else
4784 {
4785 *pi4_children_nodes_required = 0;
4786 return 1;
4787 }
4788 }
4789 }
4790 }
4791
4792 /**
4793 ********************************************************************************
4794 * @fn void hme_build_cu_tree
4795 * (
4796 * ctb_cluster_info_t *ps_ctb_cluster_info,
4797 * cur_ctb_cu_tree_t *ps_cu_tree,
4798 * S32 tree_depth,
4799 * CU_POS_T e_grand_parent_blk_pos,
4800 * CU_POS_T e_parent_blk_pos,
4801 * CU_POS_T e_cur_blk_pos
4802 * )
4803 *
4804 * @brief Recursive function for CU tree initialisation
4805 *
4806 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4807 * corresponding to all block sizes from 64x64
4808 * to 16x16
4809 *
4810 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4811 * applicable
4812 *
4813 * @param[in] e_cur_blk_pos: position of current block wrt parent
4814 *
4815 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4816 *
4817 * @param[in] tree_depth : specifies depth of the CU tree
4818 *
4819 * @return Nothing
4820 ********************************************************************************
4821 */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4822 void hme_build_cu_tree(
4823 ctb_cluster_info_t *ps_ctb_cluster_info,
4824 cur_ctb_cu_tree_t *ps_cu_tree,
4825 S32 tree_depth,
4826 CU_POS_T e_grandparent_blk_pos,
4827 CU_POS_T e_parent_blk_pos,
4828 CU_POS_T e_cur_blk_pos)
4829 {
4830 ihevce_cu_tree_init(
4831 ps_cu_tree,
4832 ps_ctb_cluster_info->ps_cu_tree_root,
4833 &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834 tree_depth,
4835 e_grandparent_blk_pos,
4836 e_parent_blk_pos,
4837 e_cur_blk_pos);
4838 }
4839
4840 /**
4841 ********************************************************************************
4842 * @fn S32 hme_sdi_based_cluster_spread_eligibility
4843 * (
4844 * cluster_32x32_blk_t *ps_blk_32x32
4845 * )
4846 *
4847 * @brief Determines whether the spread of high SDI MV's around each cluster
4848 * center is below a pre-determined threshold
4849 *
4850 * @param[in] ps_blk_32x32: structure containing pointers to clusters
4851 * corresponding to all block sizes from 64x64
4852 * to 16x16
4853 *
4854 * @return 1 if the spread is constrained, else 0
4855 ********************************************************************************
4856 */
4857 __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4858 hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859 {
4860 S32 cumulative_mv_distance;
4861 S32 i, j;
4862 S32 num_high_sdi_mvs;
4863
4864 S32 num_clusters = ps_blk_32x32->num_clusters;
4865
4866 for(i = 0; i < num_clusters; i++)
4867 {
4868 cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869
4870 num_high_sdi_mvs = 0;
4871 cumulative_mv_distance = 0;
4872
4873 for(j = 0; j < ps_data->num_mvs; j++)
4874 {
4875 mv_data_t *ps_mv = &ps_data->as_mv[j];
4876
4877 if(ps_mv->sdi >= sdi_threshold)
4878 {
4879 num_high_sdi_mvs++;
4880
4881 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882 }
4883 }
4884
4885 if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886 {
4887 return 0;
4888 }
4889 }
4890
4891 return 1;
4892 }
4893
4894 /**
4895 ********************************************************************************
4896 * @fn S32 hme_populate_cu_tree
4897 * (
4898 * ctb_cluster_info_t *ps_ctb_cluster_info,
4899 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900 * cur_ctb_cu_tree_t *ps_cu_tree,
4901 * S32 tree_depth,
4902 * CU_POS_T e_parent_blk_pos,
4903 * CU_POS_T e_cur_blk_pos
4904 * )
4905 *
4906 * @brief Recursive function for CU tree population based on output of
4907 * clustering algorithm
4908 *
4909 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4910 * corresponding to all block sizes from 64x64
4911 * to 16x16
4912 *
4913 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4914 applicable
4915 *
4916 * @param[in] e_cur_blk_pos: position of current block wrt parent
4917 *
4918 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
4919 *
4920 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4921 *
4922 * @param[in] tree_depth : specifies depth of the CU tree
4923 *
4924 * @param[in] ipe_decision_precedence : specifies whether precedence should
4925 * be given to decisions made either by IPE(1) or clustering algos.
4926 *
4927 * @return 1 if re-evaluation of parent node's validity is not required,
4928 else 0
4929 ********************************************************************************
4930 */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4931 void hme_populate_cu_tree(
4932 ctb_cluster_info_t *ps_ctb_cluster_info,
4933 cur_ctb_cu_tree_t *ps_cu_tree,
4934 S32 tree_depth,
4935 ME_QUALITY_PRESETS_T e_quality_preset,
4936 CU_POS_T e_grandparent_blk_pos,
4937 CU_POS_T e_parent_blk_pos,
4938 CU_POS_T e_cur_blk_pos)
4939 {
4940 S32 area_of_cur_blk;
4941 S32 area_limit_for_me_decision_precedence;
4942 S32 children_nodes_required;
4943 S32 intra_mv_area;
4944 S32 intra_eval_enable;
4945 S32 inter_eval_enable;
4946 S32 ipe_decision_precedence;
4947 S32 node_validity;
4948 S32 num_clusters;
4949
4950 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951
4952 if(NULL == ps_cu_tree)
4953 {
4954 return;
4955 }
4956
4957 switch(tree_depth)
4958 {
4959 case 0:
4960 {
4961 /* 64x64 block */
4962 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963
4964 cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965
4966 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968 children_nodes_required = 0;
4969 intra_mv_area = ps_blk_64x64->intra_mv_area;
4970
4971 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972
4973 intra_eval_enable = ipe_decision_precedence;
4974 inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975
4976 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977 if(e_quality_preset >= ME_HIGH_QUALITY)
4978 {
4979 inter_eval_enable = 1;
4980 node_validity = (blk_32x32_mask == 0xf);
4981 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983 #endif
4984 break;
4985 }
4986 #endif
4987
4988 #if ENABLE_4CTB_EVALUATION
4989 node_validity = (blk_32x32_mask == 0xf);
4990
4991 break;
4992 #else
4993 {
4994 S32 i;
4995
4996 num_clusters = ps_blk_64x64->num_clusters;
4997
4998 node_validity = (ipe_decision_precedence)
4999 ? (!ps_cur_ipe_ctb->u1_split_flag)
5000 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001
5002 for(i = 0; i < MAX_NUM_REF; i++)
5003 {
5004 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006 }
5007
5008 node_validity = node_validity && (blk_32x32_mask == 0xf);
5009 }
5010 break;
5011 #endif
5012 }
5013 case 1:
5014 {
5015 /* 32x32 block */
5016 S32 is_percent_intra_area_gt_threshold;
5017
5018 cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019
5020 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021
5022 #if !ENABLE_4CTB_EVALUATION
5023 S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024 S32 best_intra_cost =
5025 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026 ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027 4) < 0)
5028 ? MAX_32BIT_VAL
5029 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030 ps_ctb_cluster_info->i4_frame_qstep *
5031 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033 S32 cost_differential = (best_inter_cost - best_cost);
5034 #endif
5035
5036 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038 intra_mv_area = ps_blk_32x32->intra_mv_area;
5039 is_percent_intra_area_gt_threshold =
5040 (intra_mv_area > area_limit_for_me_decision_precedence);
5041 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042
5043 intra_eval_enable = ipe_decision_precedence;
5044 inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045 children_nodes_required = 1;
5046
5047 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048 if(e_quality_preset >= ME_HIGH_QUALITY)
5049 {
5050 inter_eval_enable = 1;
5051 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054 #endif
5055 break;
5056 }
5057 #endif
5058
5059 #if ENABLE_4CTB_EVALUATION
5060 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061
5062 break;
5063 #else
5064 {
5065 S32 i;
5066 num_clusters = ps_blk_32x32->num_clusters;
5067
5068 if(ipe_decision_precedence)
5069 {
5070 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072 }
5073 else
5074 {
5075 node_validity =
5076 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077 (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078 (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079
5080 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081 {
5082 node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084 }
5085
5086 if(node_validity)
5087 {
5088 node_validity = node_validity &&
5089 hme_sdi_based_cluster_spread_eligibility(
5090 ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091 }
5092 }
5093 }
5094
5095 break;
5096 #endif
5097 }
5098 case 2:
5099 {
5100 cluster_16x16_blk_t *ps_blk_16x16 =
5101 &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102
5103 S32 blk_8x8_mask =
5104 ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105
5106 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108 children_nodes_required = 1;
5109 intra_mv_area = ps_blk_16x16->intra_mv_area;
5110 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111 num_clusters = ps_blk_16x16->num_clusters;
5112
5113 intra_eval_enable = ipe_decision_precedence;
5114 inter_eval_enable = 1;
5115
5116 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117 if(e_quality_preset >= ME_HIGH_QUALITY)
5118 {
5119 node_validity =
5120 !ps_ctb_cluster_info
5121 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122 children_nodes_required = !node_validity;
5123 break;
5124 }
5125 #endif
5126
5127 #if ENABLE_4CTB_EVALUATION
5128 node_validity = (blk_8x8_mask == 0xf);
5129
5130 #if ENABLE_CU_TREE_CULLING
5131 {
5132 cur_ctb_cu_tree_t *ps_32x32_root;
5133
5134 switch(e_parent_blk_pos)
5135 {
5136 case POS_TL:
5137 {
5138 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139
5140 break;
5141 }
5142 case POS_TR:
5143 {
5144 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145
5146 break;
5147 }
5148 case POS_BL:
5149 {
5150 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151
5152 break;
5153 }
5154 case POS_BR:
5155 {
5156 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157
5158 break;
5159 }
5160 }
5161
5162 if(ps_32x32_root->is_node_valid)
5163 {
5164 node_validity =
5165 node_validity &&
5166 !ps_ctb_cluster_info
5167 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5168 children_nodes_required = !node_validity;
5169 }
5170 }
5171 #endif
5172
5173 break;
5174 #else
5175
5176 if(ipe_decision_precedence)
5177 {
5178 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5179 .as_intra16_analyse[e_cur_blk_pos]
5180 .b1_merge_flag);
5181 S32 valid_flag = (blk_8x8_mask == 0xf);
5182
5183 node_validity = merge_flag_16 && valid_flag;
5184 }
5185 else
5186 {
5187 node_validity = (blk_8x8_mask == 0xf);
5188 }
5189
5190 break;
5191 #endif
5192 }
5193 case 3:
5194 {
5195 S32 blk_8x8_mask =
5196 ps_ctb_cluster_info
5197 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5198 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5199 .as_intra16_analyse[e_parent_blk_pos]
5200 .b1_merge_flag);
5201 S32 merge_flag_32 =
5202 (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5203
5204 intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5205 inter_eval_enable = 1;
5206 children_nodes_required = 0;
5207
5208 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5209 if(e_quality_preset >= ME_HIGH_QUALITY)
5210 {
5211 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5212 break;
5213 }
5214 #endif
5215
5216 #if ENABLE_4CTB_EVALUATION
5217 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5218
5219 break;
5220 #else
5221 {
5222 cur_ctb_cu_tree_t *ps_32x32_root;
5223 cur_ctb_cu_tree_t *ps_16x16_root;
5224 cluster_32x32_blk_t *ps_32x32_blk;
5225
5226 switch(e_grandparent_blk_pos)
5227 {
5228 case POS_TL:
5229 {
5230 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5231
5232 break;
5233 }
5234 case POS_TR:
5235 {
5236 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5237
5238 break;
5239 }
5240 case POS_BL:
5241 {
5242 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5243
5244 break;
5245 }
5246 case POS_BR:
5247 {
5248 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5249
5250 break;
5251 }
5252 }
5253
5254 switch(e_parent_blk_pos)
5255 {
5256 case POS_TL:
5257 {
5258 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5259
5260 break;
5261 }
5262 case POS_TR:
5263 {
5264 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5265
5266 break;
5267 }
5268 case POS_BL:
5269 {
5270 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5271
5272 break;
5273 }
5274 case POS_BR:
5275 {
5276 ps_16x16_root = ps_32x32_root->ps_child_node_br;
5277
5278 break;
5279 }
5280 }
5281
5282 ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5283
5284 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5285 ((!ps_32x32_root->is_node_valid) ||
5286 (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5287 (!ps_16x16_root->is_node_valid));
5288
5289 break;
5290 }
5291 #endif
5292 }
5293 }
5294
5295 /* Fill the current cu_tree node */
5296 ps_cu_tree->is_node_valid = node_validity;
5297 ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5298 ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5299
5300 if(children_nodes_required)
5301 {
5302 tree_depth++;
5303
5304 hme_populate_cu_tree(
5305 ps_ctb_cluster_info,
5306 ps_cu_tree->ps_child_node_tl,
5307 tree_depth,
5308 e_quality_preset,
5309 e_parent_blk_pos,
5310 e_cur_blk_pos,
5311 POS_TL);
5312
5313 hme_populate_cu_tree(
5314 ps_ctb_cluster_info,
5315 ps_cu_tree->ps_child_node_tr,
5316 tree_depth,
5317 e_quality_preset,
5318 e_parent_blk_pos,
5319 e_cur_blk_pos,
5320 POS_TR);
5321
5322 hme_populate_cu_tree(
5323 ps_ctb_cluster_info,
5324 ps_cu_tree->ps_child_node_bl,
5325 tree_depth,
5326 e_quality_preset,
5327 e_parent_blk_pos,
5328 e_cur_blk_pos,
5329 POS_BL);
5330
5331 hme_populate_cu_tree(
5332 ps_ctb_cluster_info,
5333 ps_cu_tree->ps_child_node_br,
5334 tree_depth,
5335 e_quality_preset,
5336 e_parent_blk_pos,
5337 e_cur_blk_pos,
5338 POS_BR);
5339 }
5340 }
5341
5342 /**
5343 ********************************************************************************
5344 * @fn void hme_analyse_mv_clustering
5345 * (
5346 * search_results_t *ps_search_results,
5347 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5348 * cur_ctb_cu_tree_t *ps_cu_tree
5349 * )
5350 *
5351 * @brief Implementation for the clustering algorithm
5352 *
5353 * @param[in] ps_search_results: structure containing 16x16 block results
5354 *
5355 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
5356 *
5357 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
5358 *
5359 * @return None
5360 ********************************************************************************
5361 */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5362 void hme_analyse_mv_clustering(
5363 search_results_t *ps_search_results,
5364 inter_cu_results_t *ps_16x16_cu_results,
5365 inter_cu_results_t *ps_8x8_cu_results,
5366 ctb_cluster_info_t *ps_ctb_cluster_info,
5367 S08 *pi1_future_list,
5368 S08 *pi1_past_list,
5369 S32 bidir_enabled,
5370 ME_QUALITY_PRESETS_T e_quality_preset)
5371 {
5372 cluster_16x16_blk_t *ps_blk_16x16;
5373 cluster_32x32_blk_t *ps_blk_32x32;
5374 cluster_64x64_blk_t *ps_blk_64x64;
5375
5376 part_type_results_t *ps_best_result;
5377 pu_result_t *aps_part_result[MAX_NUM_PARTS];
5378 pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5379
5380 PART_ID_T e_part_id;
5381 PART_TYPE_T e_part_type;
5382
5383 S32 enable_64x64_merge;
5384 S32 i, j, k;
5385 S32 mvx, mvy;
5386 S32 num_parts;
5387 S32 ref_idx;
5388 S32 ai4_pred_mode[MAX_NUM_PARTS];
5389
5390 S32 num_32x32_merges = 0;
5391
5392 /*****************************************/
5393 /*****************************************/
5394 /********* Enter ye who is HQ ************/
5395 /*****************************************/
5396 /*****************************************/
5397
5398 ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5399
5400 /* Initialise data in each of the clusters */
5401 for(i = 0; i < 16; i++)
5402 {
5403 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5404
5405 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5406 if(e_quality_preset < ME_HIGH_QUALITY)
5407 {
5408 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5409 }
5410 else
5411 {
5412 ps_blk_16x16->best_inter_cost = 0;
5413 ps_blk_16x16->intra_mv_area = 0;
5414 }
5415 #else
5416 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5417 #endif
5418 }
5419
5420 for(i = 0; i < 4; i++)
5421 {
5422 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5423
5424 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5425 if(e_quality_preset < ME_HIGH_QUALITY)
5426 {
5427 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5428 }
5429 else
5430 {
5431 ps_blk_32x32->best_inter_cost = 0;
5432 ps_blk_32x32->intra_mv_area = 0;
5433 }
5434 #else
5435 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5436 #endif
5437 }
5438
5439 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440 if(e_quality_preset < ME_HIGH_QUALITY)
5441 {
5442 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5443 }
5444 else
5445 {
5446 ps_blk_64x64->best_inter_cost = 0;
5447 ps_blk_64x64->intra_mv_area = 0;
5448 }
5449 #else
5450 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5451 #endif
5452
5453 /* Initialise data for all nodes in the CU tree */
5454 hme_build_cu_tree(
5455 ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5456
5457 if(e_quality_preset >= ME_HIGH_QUALITY)
5458 {
5459 memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5460 }
5461
5462 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5463 return;
5464 #endif
5465
5466 for(i = 0; i < 16; i++)
5467 {
5468 S32 blk_8x8_mask;
5469 S32 is_16x16_blk_valid;
5470 S32 num_clusters_updated;
5471 S32 num_clusters;
5472
5473 blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5474
5475 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5476
5477 is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5478
5479 if(is_16x16_blk_valid)
5480 {
5481 /* Use 8x8 data when 16x16 CU is split */
5482 if(ps_search_results[i].u1_split_flag)
5483 {
5484 S32 blk_8x8_idx = i << 2;
5485
5486 num_parts = 4;
5487 e_part_type = PRT_NxN;
5488
5489 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5490 {
5491 /* Only 2Nx2N partition supported for 8x8 block */
5492 ASSERT(
5493 ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5494 ((PART_TYPE_T)PRT_2Nx2N));
5495
5496 aps_part_result[j] =
5497 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5498 aps_inferior_parts[j] =
5499 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5500 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5501 }
5502 }
5503 else
5504 {
5505 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5506
5507 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5508 num_parts = gau1_num_parts_in_part_type[e_part_type];
5509
5510 for(j = 0; j < num_parts; j++)
5511 {
5512 aps_part_result[j] = &ps_best_result->as_pu_results[j];
5513 aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5514 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5515 }
5516
5517 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5518 }
5519
5520 for(j = 0; j < num_parts; j++)
5521 {
5522 pu_result_t *ps_part_result = aps_part_result[j];
5523
5524 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5525
5526 e_part_id = ge_part_type_to_part_id[e_part_type][j];
5527
5528 /* Skip clustering if best mode is intra */
5529 if((ps_part_result->pu.b1_intra_flag))
5530 {
5531 ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5532 ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5533 continue;
5534 }
5535 else
5536 {
5537 ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5538 }
5539
5540 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5541 if(e_quality_preset >= ME_HIGH_QUALITY)
5542 {
5543 continue;
5544 }
5545 #endif
5546
5547 for(k = 0; k < num_mvs; k++)
5548 {
5549 mv_t *ps_mv;
5550
5551 pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5552
5553 S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5554
5555 ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5556
5557 mvx = ps_mv->i2_mvx;
5558 mvy = ps_mv->i2_mvy;
5559
5560 ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5561 : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5562
5563 num_clusters = ps_blk_16x16->num_clusters;
5564
5565 hme_find_and_update_clusters(
5566 ps_blk_16x16->as_cluster_data,
5567 &(ps_blk_16x16->num_clusters),
5568 mvx,
5569 mvy,
5570 ref_idx,
5571 ps_part_result->i4_sdi,
5572 e_part_id,
5573 (ai4_pred_mode[j] == 2));
5574
5575 num_clusters_updated = (ps_blk_16x16->num_clusters);
5576
5577 ps_blk_16x16->au1_num_clusters[ref_idx] +=
5578 (num_clusters_updated - num_clusters);
5579 }
5580 }
5581 }
5582 }
5583
5584 /* Search for 32x32 clusters */
5585 for(i = 0; i < 4; i++)
5586 {
5587 S32 num_clusters_merged;
5588
5589 S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5590
5591 if(is_32x32_blk_valid)
5592 {
5593 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5594 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5595
5596 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5597 if(e_quality_preset >= ME_HIGH_QUALITY)
5598 {
5599 for(j = 0; j < 4; j++, ps_blk_16x16++)
5600 {
5601 ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5602
5603 ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5604 }
5605 continue;
5606 }
5607 #endif
5608
5609 hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5610
5611 if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5612 {
5613 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5614 ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5615
5616 if(num_clusters_merged)
5617 {
5618 ps_blk_32x32->num_clusters -= num_clusters_merged;
5619
5620 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5621 }
5622 }
5623 }
5624 }
5625
5626 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5627 /* Eliminate outlier 32x32 clusters */
5628 if(e_quality_preset < ME_HIGH_QUALITY)
5629 #endif
5630 {
5631 hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5632
5633 /* Find best_uni_ref and best_alt_ref */
5634 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5635 }
5636
5637 /* Populate the CU tree for depths 1 and higher */
5638 {
5639 cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5640 cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5641 cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5642 cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5643 cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5644
5645 hme_populate_cu_tree(
5646 ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5647
5648 num_32x32_merges += (ps_tl->is_node_valid == 1);
5649
5650 hme_populate_cu_tree(
5651 ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5652
5653 num_32x32_merges += (ps_tr->is_node_valid == 1);
5654
5655 hme_populate_cu_tree(
5656 ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5657
5658 num_32x32_merges += (ps_bl->is_node_valid == 1);
5659
5660 hme_populate_cu_tree(
5661 ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5662
5663 num_32x32_merges += (ps_br->is_node_valid == 1);
5664 }
5665
5666 #if !ENABLE_4CTB_EVALUATION
5667 if(e_quality_preset < ME_HIGH_QUALITY)
5668 {
5669 enable_64x64_merge = (num_32x32_merges >= 3);
5670 }
5671 #else
5672 if(e_quality_preset < ME_HIGH_QUALITY)
5673 {
5674 enable_64x64_merge = 1;
5675 }
5676 #endif
5677
5678 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5679 if(e_quality_preset >= ME_HIGH_QUALITY)
5680 {
5681 enable_64x64_merge = 1;
5682 }
5683 #else
5684 if(e_quality_preset >= ME_HIGH_QUALITY)
5685 {
5686 enable_64x64_merge = (num_32x32_merges >= 3);
5687 }
5688 #endif
5689
5690 if(enable_64x64_merge)
5691 {
5692 S32 num_clusters_merged;
5693
5694 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5695
5696 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5697 if(e_quality_preset >= ME_HIGH_QUALITY)
5698 {
5699 for(j = 0; j < 4; j++, ps_blk_32x32++)
5700 {
5701 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5702
5703 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5704 }
5705 }
5706 else
5707 #endif
5708 {
5709 hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5710
5711 if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5712 {
5713 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5714 ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5715
5716 if(num_clusters_merged)
5717 {
5718 ps_blk_64x64->num_clusters -= num_clusters_merged;
5719
5720 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5721 }
5722 }
5723 }
5724
5725 #if !ENABLE_4CTB_EVALUATION
5726 if(e_quality_preset < ME_HIGH_QUALITY)
5727 {
5728 S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5729 S32 best_intra_cost =
5730 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5731 ps_ctb_cluster_info->i4_frame_qstep *
5732 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5733 ? MAX_32BIT_VAL
5734 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5735 ps_ctb_cluster_info->i4_frame_qstep *
5736 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5737 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5738 S32 cost_differential = (best_inter_cost - best_cost);
5739
5740 enable_64x64_merge =
5741 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5742 }
5743 #endif
5744 }
5745
5746 if(enable_64x64_merge)
5747 {
5748 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5749 if(e_quality_preset < ME_HIGH_QUALITY)
5750 #endif
5751 {
5752 hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5753
5754 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5755 }
5756
5757 hme_populate_cu_tree(
5758 ps_ctb_cluster_info,
5759 ps_ctb_cluster_info->ps_cu_tree_root,
5760 0,
5761 e_quality_preset,
5762 POS_NA,
5763 POS_NA,
5764 POS_NA);
5765 }
5766 }
5767 #endif
5768
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5769 static __inline void hme_merge_prms_init(
5770 hme_merge_prms_t *ps_prms,
5771 layer_ctxt_t *ps_curr_layer,
5772 refine_prms_t *ps_refine_prms,
5773 me_frm_ctxt_t *ps_me_ctxt,
5774 range_prms_t *ps_range_prms_rec,
5775 range_prms_t *ps_range_prms_inp,
5776 mv_grid_t **pps_mv_grid,
5777 inter_ctb_prms_t *ps_inter_ctb_prms,
5778 S32 i4_num_pred_dir,
5779 S32 i4_32x32_id,
5780 BLK_SIZE_T e_blk_size,
5781 ME_QUALITY_PRESETS_T e_me_quality_presets)
5782 {
5783 S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5784 S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5785
5786 /* Currently not enabling segmentation info from prev layers */
5787 ps_prms->i4_seg_info_avail = 0;
5788 ps_prms->i4_part_mask = 0;
5789
5790 /* Number of reference pics in which to do merge */
5791 ps_prms->i4_num_ref = i4_num_pred_dir;
5792
5793 /* Layer ctxt info */
5794 ps_prms->ps_layer_ctxt = ps_curr_layer;
5795
5796 ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5797
5798 /* Top left, top right, bottom left and bottom right 16x16 units */
5799 if(BLK_32x32 == e_blk_size)
5800 {
5801 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5802 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5803 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5804 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5805
5806 /* Merge results stored here */
5807 ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5808
5809 /* This could be lesser than the number of 16x16results generated*/
5810 /* For now, keeping it to be same */
5811 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5812 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5813 ps_prms->ps_results_grandchild = NULL;
5814 }
5815 else
5816 {
5817 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5818 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5819 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5820 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5821
5822 /* Merge results stored here */
5823 ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5824
5825 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5826 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5827 ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5828 }
5829
5830 if(i4_use_rec)
5831 {
5832 WORD32 ref_ctr;
5833
5834 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5835 {
5836 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5837 }
5838 }
5839 else
5840 {
5841 WORD32 ref_ctr;
5842
5843 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5844 {
5845 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5846 }
5847 }
5848 ps_prms->i4_use_rec = i4_use_rec;
5849
5850 ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5851
5852 ps_prms->pps_mv_grid = pps_mv_grid;
5853
5854 ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5855
5856 ps_prms->e_quality_preset = e_me_quality_presets;
5857 ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5858 ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5859 ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5860 }
5861
5862 /**
5863 ********************************************************************************
5864 * @fn void hme_refine(me_ctxt_t *ps_ctxt,
5865 * refine_layer_prms_t *ps_refine_prms)
5866 *
5867 * @brief Top level entry point for refinement ME
5868 *
5869 * @param[in,out] ps_ctxt: ME Handle
5870 *
5871 * @param[in] ps_refine_prms : refinement layer prms
5872 *
5873 * @return None
5874 ********************************************************************************
5875 */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5876 void hme_refine(
5877 me_ctxt_t *ps_thrd_ctxt,
5878 refine_prms_t *ps_refine_prms,
5879 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5880 layer_ctxt_t *ps_coarse_layer,
5881 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5882 S32 lyr_job_type,
5883 S32 thrd_id,
5884 S32 me_frm_id,
5885 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5886 {
5887 inter_ctb_prms_t s_common_frm_prms;
5888
5889 BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5890 WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5891 me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5892 ME_QUALITY_PRESETS_T e_me_quality_presets =
5893 ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5894
5895 WORD32 num_rows_proc = 0;
5896 WORD32 num_act_ref_pics;
5897 WORD16 i2_prev_enc_frm_max_mv_y;
5898 WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5899
5900 /*************************************************************************/
5901 /* Complexity of search: Low to High */
5902 /*************************************************************************/
5903 SEARCH_COMPLEXITY_T e_search_complexity;
5904
5905 /*************************************************************************/
5906 /* to store the PU results which are passed to the decide_part_types */
5907 /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5908 /*************************************************************************/
5909
5910 pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5911 inter_pu_results_t as_inter_pu_results[4];
5912 inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5913
5914 /*************************************************************************/
5915 /* Config parameter structures for varius ME submodules */
5916 /*************************************************************************/
5917 hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5918 hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5919 hme_merge_prms_t s_merge_prms_64x64;
5920 hme_search_prms_t s_search_prms_blk;
5921 mvbank_update_prms_t s_mv_update_prms;
5922 hme_ctb_prms_t s_ctb_prms;
5923 hme_subpel_prms_t s_subpel_prms;
5924 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5925 ctb_cluster_info_t *ps_ctb_cluster_info;
5926 fpel_srch_cand_init_data_t s_srch_cand_init_data;
5927
5928 /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5929 S32 en_merge_32x32;
5930 /* 5 lsb's specify whether or not merge algorithm is required */
5931 /* to be executed or not. Relevant only in PQ. Ought to be */
5932 /* used in conjunction with en_merge_32x32 and */
5933 /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5934 /* required when all children are deemed to be intras */
5935 S32 en_merge_execution;
5936
5937 /*************************************************************************/
5938 /* All types of search candidates for predictor based search. */
5939 /*************************************************************************/
5940 S32 num_init_candts = 0;
5941 S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5942 S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5943 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5944 search_node_t as_top_neighbours[4], as_left_neighbours[3];
5945
5946 pf_get_wt_inp fp_get_wt_inp;
5947
5948 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5949 U32 au4_unique_node_map[MAP_X_MAX * 2];
5950
5951 /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5952 ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5953
5954 /*************************************************************************/
5955 /* points ot the search results for the blk level search (8x8/16x16) */
5956 /*************************************************************************/
5957 search_results_t *ps_search_results;
5958
5959 /*************************************************************************/
5960 /* Coordinates */
5961 /*************************************************************************/
5962 S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5963 S32 pos_x, pos_y;
5964 S32 blk_id_in_full_ctb;
5965
5966 /*************************************************************************/
5967 /* Related to dimensions of block being searched and pic dimensions */
5968 /*************************************************************************/
5969 S32 blk_4x4_to_16x16;
5970 S32 blk_wd, blk_ht, blk_size_shift;
5971 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5972 S32 num_results_prev_layer;
5973
5974 /*************************************************************************/
5975 /* Size of a basic unit for this layer. For non encode layers, we search */
5976 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5977 /* basic unit size is the ctb size. */
5978 /*************************************************************************/
5979 S32 unit_size;
5980
5981 /*************************************************************************/
5982 /* Local variable storing results of any 4 CU merge to bigger CU */
5983 /*************************************************************************/
5984 CU_MERGE_RESULT_T e_merge_result;
5985
5986 /*************************************************************************/
5987 /* This mv grid stores results during and after fpel search, during */
5988 /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
5989 /* meant for the 2 directions of search (l0 and l1). */
5990 /*************************************************************************/
5991 mv_grid_t *aps_mv_grid[2];
5992
5993 /*************************************************************************/
5994 /* Pointers to context in current and coarser layers */
5995 /*************************************************************************/
5996 layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
5997
5998 /*************************************************************************/
5999 /* to store mv range per blk, and picture limit, allowed search range */
6000 /* range prms in hpel and qpel units as well */
6001 /*************************************************************************/
6002 range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6003 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6004 range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6005
6006 /*************************************************************************/
6007 /* These variables are used to track number of references at different */
6008 /* stages of ME. */
6009 /*************************************************************************/
6010 S32 i4_num_pred_dir;
6011 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6012 S32 lambda_recon = ps_refine_prms->lambda_recon;
6013
6014 /* Counts successful merge to 32x32 every CTB (0-4) */
6015 S32 merge_count_32x32;
6016
6017 S32 ai4_id_coloc[14], ai4_id_Z[2];
6018 U08 au1_search_candidate_list_index[2];
6019 S32 ai4_num_coloc_cands[2];
6020 U08 u1_pred_dir, u1_pred_dir_ctr;
6021
6022 /*************************************************************************/
6023 /* Input pointer and stride */
6024 /*************************************************************************/
6025 U08 *pu1_inp;
6026 S32 i4_inp_stride;
6027 S32 end_of_frame;
6028 S32 num_sync_units_in_row, num_sync_units_in_tile;
6029
6030 /*************************************************************************/
6031 /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6032 /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6033 /* we need to stop merges and force 8x8 CUs for that 16x16 blk */
6034 /*************************************************************************/
6035 S32 blk_8x8_mask;
6036 S32 ai4_blk_8x8_mask[16];
6037 U08 au1_is_64x64Blk_noisy[1];
6038 U08 au1_is_32x32Blk_noisy[4];
6039 U08 au1_is_16x16Blk_noisy[16];
6040
6041 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6042 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6043 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6044 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6045
6046 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6047
6048 /*************************************************************************/
6049 /* Pointers to current and coarse layer are needed for projection */
6050 /* Pointer to prev layer are needed for other candts like coloc */
6051 /*************************************************************************/
6052 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6053
6054 ps_prev_layer = hme_get_past_layer_ctxt(
6055 ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6056
6057 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6058
6059 /* Function pointer is selected based on the C vc X86 macro */
6060
6061 fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6062
6063 i4_inp_stride = ps_curr_layer->i4_inp_stride;
6064 i4_pic_wd = ps_curr_layer->i4_wd;
6065 i4_pic_ht = ps_curr_layer->i4_ht;
6066 e_search_complexity = ps_refine_prms->e_search_complexity;
6067 end_of_frame = 0;
6068
6069 /* This points to all the initial candts */
6070 ps_search_candts = &as_search_candts[0];
6071
6072 /* mv grid being huge strucutre is part of context */
6073 aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6074 aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6075
6076 /*************************************************************************/
6077 /* If the current layer is encoded (since it may be multicast or final */
6078 /* layer (finest)), then we use 16x16 blk size with some selected parts */
6079 /* If the current layer is not encoded, then we use 8x8 blk size, with */
6080 /* enable or disable of 4x4 partitions depending on the input prms */
6081 /*************************************************************************/
6082 e_search_blk_size = BLK_16x16;
6083 blk_wd = blk_ht = 16;
6084 blk_size_shift = 4;
6085 e_result_blk_size = BLK_8x8;
6086 s_mv_update_prms.i4_shift = 1;
6087
6088 if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6089 {
6090 blk_4x4_to_16x16 = 1;
6091 }
6092 else
6093 {
6094 blk_4x4_to_16x16 = 0;
6095 }
6096
6097 unit_size = 1 << ps_ctxt->log_ctb_size;
6098 s_search_prms_blk.i4_inp_stride = unit_size;
6099
6100 /* This is required to properly update the layer mv bank */
6101 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6102 s_search_prms_blk.e_blk_size = e_search_blk_size;
6103
6104 /*************************************************************************/
6105 /* If current layer is explicit, then the number of ref frames are to */
6106 /* be same as previous layer. Else it will be 2 */
6107 /*************************************************************************/
6108 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6109 i4_num_pred_dir =
6110 (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6111 1;
6112
6113 #if USE_MODIFIED == 1
6114 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6115 #else
6116 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6117 #endif
6118
6119 i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6120 if(i4_num_ref_prev_layer <= 2)
6121 {
6122 i4_num_ref_each_dir = 1;
6123 }
6124 else
6125 {
6126 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6127 }
6128
6129 s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6130 s_mv_update_prms.i4_num_results_to_store =
6131 MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6132 : (i4_num_act_ref_l0 > 1) + 1,
6133 ps_refine_prms->i4_num_results_per_part);
6134
6135 /*************************************************************************/
6136 /* Initialization of merge params for 16x16 to 32x32 merge. */
6137 /* There are 4 32x32 units in a CTB, so 4 param structures initialized */
6138 /*************************************************************************/
6139 {
6140 hme_merge_prms_t *aps_merge_prms[4];
6141 aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6142 aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6143 aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6144 aps_merge_prms[3] = &s_merge_prms_32x32_br;
6145 for(i = 0; i < 4; i++)
6146 {
6147 hme_merge_prms_init(
6148 aps_merge_prms[i],
6149 ps_curr_layer,
6150 ps_refine_prms,
6151 ps_ctxt,
6152 as_range_prms_rec,
6153 as_range_prms_inp,
6154 &aps_mv_grid[0],
6155 &s_common_frm_prms,
6156 i4_num_pred_dir,
6157 i,
6158 BLK_32x32,
6159 e_me_quality_presets);
6160 }
6161 }
6162
6163 /*************************************************************************/
6164 /* Initialization of merge params for 32x32 to 64x64 merge. */
6165 /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */
6166 /*************************************************************************/
6167 {
6168 hme_merge_prms_init(
6169 &s_merge_prms_64x64,
6170 ps_curr_layer,
6171 ps_refine_prms,
6172 ps_ctxt,
6173 as_range_prms_rec,
6174 as_range_prms_inp,
6175 &aps_mv_grid[0],
6176 &s_common_frm_prms,
6177 i4_num_pred_dir,
6178 0,
6179 BLK_64x64,
6180 e_me_quality_presets);
6181 }
6182
6183 /* Pointers to cu_results are initialised here */
6184 {
6185 WORD32 i;
6186
6187 ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6188
6189 for(i = 0; i < 4; i++)
6190 {
6191 ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6192 }
6193
6194 for(i = 0; i < 16; i++)
6195 {
6196 ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6197 }
6198 }
6199
6200 /*************************************************************************/
6201 /* SUBPEL Params initialized here */
6202 /*************************************************************************/
6203 {
6204 s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6205 s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6206 s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6207
6208 s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6209 s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6210 s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6211
6212 s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6213 s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6214
6215 s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6216
6217 s_subpel_prms.i4_inp_stride = unit_size;
6218
6219 s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6220 s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6221 s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6222
6223 s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6224
6225 {
6226 WORD32 ref_ctr;
6227 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6228 {
6229 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6230 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6231 }
6232 }
6233 s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6234
6235 #if USE_MODIFIED == 0
6236 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6237 #else
6238 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6239 #endif
6240 s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6241
6242 /* BI Refinement done only if this field is 1 */
6243 s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6244
6245 s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6246
6247 s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6248 s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6249 s_subpel_prms.u1_max_num_subpel_refine_centers =
6250 ps_refine_prms->u1_max_num_subpel_refine_centers;
6251 }
6252
6253 /* inter_ctb_prms_t struct initialisation */
6254 {
6255 inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6256 hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6257
6258 ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6259 ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6260 ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6261 ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6262 ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6263 ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6264 ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6265 ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6266 ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6267 ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6268 ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6269 ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6270 ps_inter_ctb_prms->i4_lamda = lambda_recon;
6271 ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6272 ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6273 ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6274 ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6275 ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6276 ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6277 ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6278 ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6279 }
6280
6281 for(i = 0; i < MAX_INIT_CANDTS; i++)
6282 {
6283 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6284 ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6285
6286 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6287 }
6288 num_act_ref_pics =
6289 ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6290
6291 if(num_act_ref_pics)
6292 {
6293 hme_search_cand_data_init(
6294 ai4_id_Z,
6295 ai4_id_coloc,
6296 ai4_num_coloc_cands,
6297 au1_search_candidate_list_index,
6298 i4_num_act_ref_l0,
6299 i4_num_act_ref_l1,
6300 ps_ctxt->s_frm_prms.bidir_enabled,
6301 blk_4x4_to_16x16);
6302 }
6303
6304 if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6305 {
6306 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6307 ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6308 }
6309 else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6310 {
6311 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6312 }
6313
6314 for(i = 0; i < 3; i++)
6315 {
6316 search_node_t *ps_search_node;
6317 ps_search_node = &as_left_neighbours[i];
6318 INIT_SEARCH_NODE(ps_search_node, 0);
6319 ps_search_node = &as_top_neighbours[i];
6320 INIT_SEARCH_NODE(ps_search_node, 0);
6321 }
6322
6323 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6324 as_left_neighbours[2].u1_is_avail = 0;
6325
6326 /*************************************************************************/
6327 /* Initialize all the search results structure here. We update all the */
6328 /* search results to default values, and configure things like blk sizes */
6329 /*************************************************************************/
6330 if(num_act_ref_pics)
6331 {
6332 S32 i4_x, i4_y;
6333 /* 16x16 results */
6334 for(i = 0; i < 16; i++)
6335 {
6336 search_results_t *ps_search_results;
6337 S32 pred_lx;
6338 ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6339 i4_x = (S32)gau1_encode_to_raster_x[i];
6340 i4_y = (S32)gau1_encode_to_raster_y[i];
6341 i4_x <<= 4;
6342 i4_y <<= 4;
6343
6344 hme_init_search_results(
6345 ps_search_results,
6346 i4_num_pred_dir,
6347 ps_refine_prms->i4_num_fpel_results,
6348 ps_refine_prms->i4_num_results_per_part,
6349 e_search_blk_size,
6350 i4_x,
6351 i4_y,
6352 &ps_ctxt->au1_is_past[0]);
6353
6354 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6355 {
6356 pred_ctxt_t *ps_pred_ctxt;
6357
6358 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6359
6360 hme_init_pred_ctxt_encode(
6361 ps_pred_ctxt,
6362 ps_search_results,
6363 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6364 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6365 aps_mv_grid[pred_lx],
6366 pred_lx,
6367 lambda_recon,
6368 ps_refine_prms->lambda_q_shift,
6369 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6370 &ps_ctxt->ai2_ref_scf[0]);
6371 }
6372 }
6373
6374 for(i = 0; i < 4; i++)
6375 {
6376 search_results_t *ps_search_results;
6377 S32 pred_lx;
6378 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6379
6380 i4_x = (S32)gau1_encode_to_raster_x[i];
6381 i4_y = (S32)gau1_encode_to_raster_y[i];
6382 i4_x <<= 5;
6383 i4_y <<= 5;
6384
6385 hme_init_search_results(
6386 ps_search_results,
6387 i4_num_pred_dir,
6388 ps_refine_prms->i4_num_32x32_merge_results,
6389 ps_refine_prms->i4_num_results_per_part,
6390 BLK_32x32,
6391 i4_x,
6392 i4_y,
6393 &ps_ctxt->au1_is_past[0]);
6394
6395 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6396 {
6397 pred_ctxt_t *ps_pred_ctxt;
6398
6399 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6400
6401 hme_init_pred_ctxt_encode(
6402 ps_pred_ctxt,
6403 ps_search_results,
6404 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6405 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6406 aps_mv_grid[pred_lx],
6407 pred_lx,
6408 lambda_recon,
6409 ps_refine_prms->lambda_q_shift,
6410 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6411 &ps_ctxt->ai2_ref_scf[0]);
6412 }
6413 }
6414
6415 {
6416 search_results_t *ps_search_results;
6417 S32 pred_lx;
6418 ps_search_results = &ps_ctxt->s_search_results_64x64;
6419
6420 hme_init_search_results(
6421 ps_search_results,
6422 i4_num_pred_dir,
6423 ps_refine_prms->i4_num_64x64_merge_results,
6424 ps_refine_prms->i4_num_results_per_part,
6425 BLK_64x64,
6426 0,
6427 0,
6428 &ps_ctxt->au1_is_past[0]);
6429
6430 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6431 {
6432 pred_ctxt_t *ps_pred_ctxt;
6433
6434 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6435
6436 hme_init_pred_ctxt_encode(
6437 ps_pred_ctxt,
6438 ps_search_results,
6439 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6440 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6441 aps_mv_grid[pred_lx],
6442 pred_lx,
6443 lambda_recon,
6444 ps_refine_prms->lambda_q_shift,
6445 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6446 &ps_ctxt->ai2_ref_scf[0]);
6447 }
6448 }
6449 }
6450
6451 /* Initialise the structure used in clustering */
6452 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6453 {
6454 ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6455
6456 ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6457 ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6458 ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6459 ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6460 ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6461 ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6462 ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6463 }
6464
6465 /*********************************************************************/
6466 /* Initialize the dyn. search range params. for each reference index */
6467 /* in current layer ctxt */
6468 /*********************************************************************/
6469
6470 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6471 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6472 {
6473 WORD32 ref_ctr;
6474 /* set no. of act ref in L0 for further use at frame level */
6475 ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6476 ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6477
6478 for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6479 {
6480 INIT_DYN_SEARCH_PRMS(
6481 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6482 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6483 }
6484 }
6485 /*************************************************************************/
6486 /* Now that the candidates have been ordered, to choose the right number */
6487 /* of initial candidates. */
6488 /*************************************************************************/
6489 if(blk_4x4_to_16x16)
6490 {
6491 if(i4_num_ref_prev_layer > 2)
6492 {
6493 if(e_search_complexity == SEARCH_CX_LOW)
6494 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6495 else if(e_search_complexity == SEARCH_CX_MED)
6496 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6497 else if(e_search_complexity == SEARCH_CX_HIGH)
6498 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6499 else
6500 ASSERT(0);
6501 }
6502 else if(i4_num_ref_prev_layer == 2)
6503 {
6504 if(e_search_complexity == SEARCH_CX_LOW)
6505 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6506 else if(e_search_complexity == SEARCH_CX_MED)
6507 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6508 else if(e_search_complexity == SEARCH_CX_HIGH)
6509 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510 else
6511 ASSERT(0);
6512 }
6513 else
6514 {
6515 if(e_search_complexity == SEARCH_CX_LOW)
6516 num_init_candts = 5;
6517 else if(e_search_complexity == SEARCH_CX_MED)
6518 num_init_candts = 12;
6519 else if(e_search_complexity == SEARCH_CX_HIGH)
6520 num_init_candts = 19;
6521 else
6522 ASSERT(0);
6523 }
6524 }
6525 else
6526 {
6527 if(i4_num_ref_prev_layer > 2)
6528 {
6529 if(e_search_complexity == SEARCH_CX_LOW)
6530 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6531 else if(e_search_complexity == SEARCH_CX_MED)
6532 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6533 else if(e_search_complexity == SEARCH_CX_HIGH)
6534 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6535 else
6536 ASSERT(0);
6537 }
6538 else if(i4_num_ref_prev_layer == 2)
6539 {
6540 if(e_search_complexity == SEARCH_CX_LOW)
6541 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6542 else if(e_search_complexity == SEARCH_CX_MED)
6543 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6544 else if(e_search_complexity == SEARCH_CX_HIGH)
6545 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546 else
6547 ASSERT(0);
6548 }
6549 else
6550 {
6551 if(e_search_complexity == SEARCH_CX_LOW)
6552 num_init_candts = 5;
6553 else if(e_search_complexity == SEARCH_CX_MED)
6554 num_init_candts = 11;
6555 else if(e_search_complexity == SEARCH_CX_HIGH)
6556 num_init_candts = 16;
6557 else
6558 ASSERT(0);
6559 }
6560 }
6561
6562 /*************************************************************************/
6563 /* The following search parameters are fixed throughout the search across*/
6564 /* all blks. So these are configured outside processing loop */
6565 /*************************************************************************/
6566 s_search_prms_blk.i4_num_init_candts = num_init_candts;
6567 s_search_prms_blk.i4_start_step = 1;
6568 s_search_prms_blk.i4_use_satd = 0;
6569 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6570 /* we use recon only for encoded layers, otherwise it is not available */
6571 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6572
6573 s_search_prms_blk.ps_search_candts = ps_search_candts;
6574 if(s_search_prms_blk.i4_use_rec)
6575 {
6576 WORD32 ref_ctr;
6577 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6578 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6579 }
6580 else
6581 {
6582 WORD32 ref_ctr;
6583 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6584 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6585 }
6586
6587 /*************************************************************************/
6588 /* Initialize coordinates. Meaning as follows */
6589 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
6590 /* blk_y : same as above, y coord. */
6591 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
6592 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
6593 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
6594 /* corner of the picture. Always multiple of 64. */
6595 /* blk_id_in_ctb : encode order id of the blk in the ctb. */
6596 /*************************************************************************/
6597 blk_y = 0;
6598 blk_id_in_ctb = 0;
6599 i4_ctb_y = 0;
6600
6601 /*************************************************************************/
6602 /* Picture limit on all 4 sides. This will be used to set mv limits for */
6603 /* every block given its coordinate. Note thsi assumes that the min amt */
6604 /* of padding to right of pic is equal to the blk size. If we go all the */
6605 /* way upto 64x64, then the min padding on right size of picture should */
6606 /* be 64, and also on bottom side of picture. */
6607 /*************************************************************************/
6608 SET_PIC_LIMIT(
6609 s_pic_limit_inp,
6610 ps_curr_layer->i4_pad_x_rec,
6611 ps_curr_layer->i4_pad_y_rec,
6612 ps_curr_layer->i4_wd,
6613 ps_curr_layer->i4_ht,
6614 s_search_prms_blk.i4_num_steps_post_refine);
6615
6616 SET_PIC_LIMIT(
6617 s_pic_limit_rec,
6618 ps_curr_layer->i4_pad_x_rec,
6619 ps_curr_layer->i4_pad_y_rec,
6620 ps_curr_layer->i4_wd,
6621 ps_curr_layer->i4_ht,
6622 s_search_prms_blk.i4_num_steps_post_refine);
6623
6624 /*************************************************************************/
6625 /* set the MV limit per ref. pic. */
6626 /* - P pic. : Based on the config params. */
6627 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6628 /*************************************************************************/
6629 hme_set_mv_limit_using_dvsr_data(
6630 ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6631 s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6632 s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6633 s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6634 s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6635 s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6636 s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6637 s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6638 s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6639 s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6640 s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6641 s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6642
6643 while(0 == end_of_frame)
6644 {
6645 job_queue_t *ps_job;
6646 frm_ctb_ctxt_t *ps_frm_ctb_prms;
6647 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6648
6649 WORD32 i4_max_mv_x_in_ctb;
6650 WORD32 i4_max_mv_y_in_ctb;
6651 void *pv_dep_mngr_encloop_dep_me;
6652 WORD32 offset_val, check_dep_pos, set_dep_pos;
6653 WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6654
6655 pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6656
6657 ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6658
6659 /* Get the current row from the job queue */
6660 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6661 ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6662
6663 /* If all rows are done, set the end of process flag to 1, */
6664 /* and the current row to -1 */
6665 if(NULL == ps_job)
6666 {
6667 blk_y = -1;
6668 i4_ctb_y = -1;
6669 tile_col_idx = -1;
6670 end_of_frame = 1;
6671
6672 continue;
6673 }
6674
6675 /* set the output dependency after picking up the row */
6676 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6677
6678 /* Obtain the current row's details from the job */
6679 {
6680 ihevce_tile_params_t *ps_col_tile_params;
6681
6682 i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6683 /* Obtain the current colum tile index from the job */
6684 tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6685
6686 /* in encode layer block are 16x16 and CTB is 64 x 64 */
6687 /* note if ctb is 32x32 the this calc needs to be changed */
6688 num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6689 ps_ctxt->log_ctb_size;
6690
6691 /* The tile parameter for the col. idx. Use only the properties
6692 which is same for all the bottom tiles like width, start_x, etc.
6693 Don't use height, start_y, etc. */
6694 ps_col_tile_params =
6695 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6696 /* in encode layer block are 16x16 and CTB is 64 x 64 */
6697 /* note if ctb is 32x32 the this calc needs to be changed */
6698 num_sync_units_in_tile =
6699 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6700 ps_ctxt->log_ctb_size;
6701
6702 i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6703 i4_ctb_x = i4_first_ctb_x;
6704
6705 if(!num_act_ref_pics)
6706 {
6707 for(i4_ctb_x = i4_first_ctb_x;
6708 i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6709 i4_ctb_x++)
6710 {
6711 S32 blk_i = 0, blk_j = 0;
6712 /* set the dependency for the corresponding row in enc loop */
6713 ihevce_dmgr_set_row_row_sync(
6714 pv_dep_mngr_encloop_dep_me,
6715 (i4_ctb_x + 1),
6716 i4_ctb_y,
6717 tile_col_idx /* Col Tile No. */);
6718 }
6719
6720 continue;
6721 }
6722
6723 /* increment the number of rows proc */
6724 num_rows_proc++;
6725
6726 /* Set Variables for Dep. Checking and Setting */
6727 set_dep_pos = i4_ctb_y + 1;
6728 if(i4_ctb_y > 0)
6729 {
6730 offset_val = 2;
6731 check_dep_pos = i4_ctb_y - 1;
6732 }
6733 else
6734 {
6735 /* First row should run without waiting */
6736 offset_val = -1;
6737 check_dep_pos = 0;
6738 }
6739
6740 /* row ctb out pointer */
6741 ps_ctxt->ps_ctb_analyse_curr_row =
6742 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6743
6744 /* Row level CU Tree buffer */
6745 ps_ctxt->ps_cu_tree_curr_row =
6746 ps_ctxt->ps_cu_tree_base +
6747 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6748
6749 ps_ctxt->ps_me_ctb_data_curr_row =
6750 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6751 }
6752
6753 /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6754 left_ctb_in_diff_tile = 1;
6755
6756 /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */
6757 /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6758 {
6759 S32 i4_ref_id, i4_bits_req;
6760
6761 for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6762 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6763 i4_ref_id++)
6764 {
6765 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6766
6767 if(i4_bits_req > 12)
6768 {
6769 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6770 }
6771 else
6772 {
6773 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6774 }
6775 }
6776
6777 s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6778 }
6779
6780 /* if non-encode layer then i4_ctb_x will be same as blk_x */
6781 /* loop over all the units is a row */
6782 for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6783 i4_ctb_x++)
6784 {
6785 ihevce_ctb_noise_params *ps_ctb_noise_params =
6786 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6787
6788 s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6789 s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6790
6791 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6792 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6793 /* Initialize ptr to current IPE CTB */
6794 ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6795 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6796 {
6797 ps_ctb_bound_attrs =
6798 get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6799
6800 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6801 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6802 }
6803
6804 /* Block to initialise pointers to part_type_results_t */
6805 /* in each size-specific inter_cu_results_t */
6806 {
6807 WORD32 i;
6808
6809 for(i = 0; i < 64; i++)
6810 {
6811 ps_ctxt->as_cu8x8_results[i].ps_best_results =
6812 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6813 .as_8x8_block_data[i]
6814 .as_best_results;
6815 ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6816 }
6817
6818 for(i = 0; i < 16; i++)
6819 {
6820 ps_ctxt->as_cu16x16_results[i].ps_best_results =
6821 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6822 ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6823 }
6824
6825 for(i = 0; i < 4; i++)
6826 {
6827 ps_ctxt->as_cu32x32_results[i].ps_best_results =
6828 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6829 .as_32x32_block_data[i]
6830 .as_best_results;
6831 ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6832 }
6833
6834 ps_ctxt->s_cu64x64_results.ps_best_results =
6835 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6836 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6837 }
6838
6839 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6840 {
6841 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6842 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6843 ps_ctb_cluster_info->ps_cu_tree_root =
6844 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6845 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6846 }
6847
6848 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6849 {
6850 S32 i4_nodes_created_in_cu_tree = 1;
6851
6852 ihevce_cu_tree_init(
6853 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6854 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6855 &i4_nodes_created_in_cu_tree,
6856 0,
6857 POS_NA,
6858 POS_NA,
6859 POS_NA);
6860 }
6861
6862 memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6863
6864 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6865 {
6866 S32 j;
6867
6868 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6869
6870 ps_cur_ipe_ctb =
6871 ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6872 lambda_recon =
6873 hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6874
6875 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6876
6877 for(i = 0; i < 4; i++)
6878 {
6879 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6880
6881 for(j = 0; j < 2; j++)
6882 {
6883 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6884 }
6885 }
6886 ps_search_results = &ps_ctxt->s_search_results_64x64;
6887
6888 for(j = 0; j < 2; j++)
6889 {
6890 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6891 }
6892
6893 s_common_frm_prms.i4_lamda = lambda_recon;
6894 }
6895 else
6896 {
6897 lambda_recon = ps_refine_prms->lambda_recon;
6898 }
6899
6900 /*********************************************************************/
6901 /* replicate the inp buffer at blk or ctb level for each ref id, */
6902 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6903 /* thereby avoiding a bloat up of memory. If we did all references */
6904 /* weighted pred, we will end up with a duplicate copy of each ref */
6905 /* at each layer, since we need to preserve the original reference. */
6906 /* ToDo: Need to observe performance with this mechanism and compare */
6907 /* with case where ref is weighted. */
6908 /*********************************************************************/
6909 fp_get_wt_inp(
6910 ps_curr_layer,
6911 &ps_ctxt->s_wt_pred,
6912 unit_size,
6913 s_common_frm_prms.i4_ctb_x_off,
6914 s_common_frm_prms.i4_ctb_y_off,
6915 unit_size,
6916 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6917 ps_ctxt->i4_wt_pred_enable_flag);
6918
6919 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6920 {
6921 #if TEMPORAL_NOISE_DETECT
6922 {
6923 WORD32 had_block_size = 16;
6924 WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6925 ? 64
6926 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6927 WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6928 ? 64
6929 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6930 WORD32 num_pred_dir = i4_num_pred_dir;
6931 WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6932 WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6933
6934 WORD32 i;
6935 WORD32 noise_detected;
6936 WORD32 ctb_size;
6937 WORD32 num_comp_had_blocks;
6938 WORD32 noisy_block_cnt;
6939 WORD32 index_8x8_block;
6940 WORD32 num_8x8_in_ctb_row;
6941
6942 WORD32 ht_offset;
6943 WORD32 wd_offset;
6944 WORD32 block_ht;
6945 WORD32 block_wd;
6946
6947 WORD32 num_horz_blocks;
6948 WORD32 num_vert_blocks;
6949
6950 WORD32 mean;
6951 UWORD32 variance_8x8;
6952
6953 WORD32 hh_energy_percent;
6954
6955 /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6956 WORD32 min_noisy_block_cnt;
6957 WORD32 min_coeffs_above_avg;
6958 WORD32 min_coeff_avg_energy;
6959
6960 /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6961 WORD32 i4_cu_x_off, i4_cu_y_off;
6962 WORD32 is_noisy;
6963
6964 /* intialise the variables holding the constants */
6965 if(had_block_size == 8)
6966 {
6967 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;//
6968 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6969 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6970 }
6971 else
6972 {
6973 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;//
6974 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6975 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6976 }
6977
6978 /* initialize the variables */
6979 noise_detected = 0;
6980 noisy_block_cnt = 0;
6981 hh_energy_percent = 0;
6982 variance_8x8 = 0;
6983 block_ht = ctb_height;
6984 block_wd = ctb_width;
6985
6986 mean = 0;
6987
6988 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
6989 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
6990
6991 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
6992 num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size;
6993
6994 ht_offset = -had_block_size;
6995 wd_offset = -had_block_size;
6996
6997 num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb
6998 for(i = 0; i < num_comp_had_blocks; i++)
6999 {
7000 if(i % num_horz_blocks == 0)
7001 {
7002 wd_offset = -had_block_size;
7003 ht_offset += had_block_size;
7004 }
7005 wd_offset += had_block_size;
7006
7007 /* CU level offsets */
7008 i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16
7009 i4_cu_y_off = i4_y_off + (i / 4) * 16;
7010
7011 /* if 50 % or more of the CU is noisy then the return value is 1 */
7012 is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7013 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7014 (i % 4) * 16,
7015 (i / 4) * 16,
7016 16);
7017
7018 /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7019 if(is_noisy)
7020 {
7021 index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7022 (i % num_horz_blocks) * 2;
7023 noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7024 16,
7025 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7026 ? 64
7027 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7028 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7029 ? 64
7030 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7031 ps_ctb_noise_params,
7032 &s_srch_cand_init_data,
7033 &s_search_prms_blk,
7034 ps_ctxt,
7035 num_pred_dir,
7036 i4_num_act_ref_l0,
7037 i4_num_act_ref_l1,
7038 i4_cu_x_off,
7039 i4_cu_y_off,
7040 &ps_ctxt->s_wt_pred,
7041 unit_size,
7042 index_8x8_block,
7043 num_horz_blocks,
7044 /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra
7045 i);
7046 } /* if 16x16 is noisy */
7047 } /* loop over for all 16x16*/
7048
7049 if(noisy_block_cnt >= min_noisy_block_cnt)
7050 {
7051 noise_detected = 1;
7052 }
7053
7054 /* write back the noise presence detected for the current CTB to the structure */
7055 ps_ctb_noise_params->i4_noise_present = noise_detected;
7056 }
7057 #endif
7058
7059 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7060 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7061 ps_ctb_noise_params->i4_noise_present)
7062 {
7063 memset(
7064 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7065 1,
7066 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7067 }
7068 #endif
7069
7070 for(i = 0; i < 16; i++)
7071 {
7072 au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7073 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7074 }
7075
7076 for(i = 0; i < 4; i++)
7077 {
7078 au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7079 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7080 }
7081
7082 for(i = 0; i < 1; i++)
7083 {
7084 au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7085 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7086 }
7087
7088 if(ps_ctxt->s_frm_prms.bidir_enabled &&
7089 (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7090 MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7091 {
7092 ps_ctb_noise_params->i4_noise_present = 0;
7093 memset(
7094 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7095 0,
7096 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7097 }
7098
7099 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7100 for(i = 0; i < 4; i++)
7101 {
7102 S32 j;
7103 S32 lambda;
7104
7105 if(au1_is_32x32Blk_noisy[i])
7106 {
7107 lambda = lambda_recon;
7108 lambda =
7109 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7110
7111 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7112
7113 for(j = 0; j < 2; j++)
7114 {
7115 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7116 }
7117 }
7118 }
7119
7120 {
7121 S32 j;
7122 S32 lambda;
7123
7124 if(au1_is_64x64Blk_noisy[0])
7125 {
7126 lambda = lambda_recon;
7127 lambda =
7128 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7129
7130 ps_search_results = &ps_ctxt->s_search_results_64x64;
7131
7132 for(j = 0; j < 2; j++)
7133 {
7134 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7135 }
7136 }
7137 }
7138 #endif
7139 if(au1_is_64x64Blk_noisy[0])
7140 {
7141 U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7142 (s_common_frm_prms.i4_ctb_y_off *
7143 ps_curr_layer->i4_inp_stride));
7144
7145 hme_compute_sigmaX_and_sigmaXSquared(
7146 pu1_inp,
7147 ps_curr_layer->i4_inp_stride,
7148 ps_ctxt->au4_4x4_src_sigmaX,
7149 ps_ctxt->au4_4x4_src_sigmaXSquared,
7150 4,
7151 4,
7152 64,
7153 64,
7154 1,
7155 16);
7156 }
7157 else
7158 {
7159 for(i = 0; i < 4; i++)
7160 {
7161 if(au1_is_32x32Blk_noisy[i])
7162 {
7163 U08 *pu1_inp =
7164 ps_curr_layer->pu1_inp +
7165 (s_common_frm_prms.i4_ctb_x_off +
7166 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7167
7168 U08 u1_cu_size = 32;
7169 WORD32 i4_inp_buf_offset =
7170 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7171 ((i % 2) * u1_cu_size));
7172
7173 U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7174 U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7175 S32 i4_sigma_arr_offset =
7176 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7177 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7178
7179 hme_compute_sigmaX_and_sigmaXSquared(
7180 pu1_inp + i4_inp_buf_offset,
7181 ps_curr_layer->i4_inp_stride,
7182 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7183 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7184 4,
7185 4,
7186 32,
7187 32,
7188 1,
7189 16);
7190 }
7191 else
7192 {
7193 S32 j;
7194
7195 U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7196 U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7197 S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7198 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7199 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7200
7201 for(j = 0; j < 4; j++)
7202 {
7203 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7204 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7205 S32 i4_16x16_blk_index_in_ctb =
7206 i4_16x16_blk_start_index_in_i_th_32x32_blk +
7207 ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7208 ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7209
7210 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7211
7212 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7213 {
7214 U08 *pu1_inp =
7215 ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7216 (s_common_frm_prms.i4_ctb_y_off *
7217 ps_curr_layer->i4_inp_stride));
7218
7219 U08 u1_cu_size = 16;
7220 WORD32 i4_inp_buf_offset =
7221 (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7222 ((i4_16x16_blk_index_in_ctb / 4) *
7223 (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7224
7225 U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7226 U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7227 S32 i4_sigma_arr_offset =
7228 (((i4_16x16_blk_index_in_ctb % 4) *
7229 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7230 ((i4_16x16_blk_index_in_ctb / 4) *
7231 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7232
7233 hme_compute_sigmaX_and_sigmaXSquared(
7234 pu1_inp + i4_inp_buf_offset,
7235 ps_curr_layer->i4_inp_stride,
7236 (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7237 (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7238 4,
7239 4,
7240 16,
7241 16,
7242 1,
7243 16);
7244 }
7245 }
7246 }
7247 }
7248 }
7249 }
7250 else
7251 {
7252 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7253
7254 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7255
7256 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7257 }
7258
7259 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7260 {
7261 S32 ref_ctr;
7262 U08 au1_pred_dir_searched[2];
7263 U08 u1_is_cu_noisy;
7264 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7265
7266 {
7267 blk_x = (i4_ctb_x << 2) +
7268 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7269 blk_y = (i4_ctb_y << 2) +
7270 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7271
7272 blk_id_in_full_ctb =
7273 ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7274 blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7275 ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7276 s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7277 s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7278 }
7279
7280 /* get the current input blk point */
7281 pos_x = blk_x << blk_size_shift;
7282 pos_y = blk_y << blk_size_shift;
7283 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7284
7285 /*********************************************************************/
7286 /* For every blk in the picture, the search range needs to be derived*/
7287 /* Any blk can have any mv, but practical search constraints are */
7288 /* imposed by the picture boundary and amt of padding. */
7289 /*********************************************************************/
7290 /* MV limit is different based on ref. PIC */
7291 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7292 {
7293 if(!s_search_prms_blk.i4_use_rec)
7294 {
7295 hme_derive_search_range(
7296 &as_range_prms_inp[ref_ctr],
7297 &s_pic_limit_inp,
7298 &as_mv_limit[ref_ctr],
7299 pos_x,
7300 pos_y,
7301 blk_wd,
7302 blk_ht);
7303 }
7304 else
7305 {
7306 hme_derive_search_range(
7307 &as_range_prms_rec[ref_ctr],
7308 &s_pic_limit_rec,
7309 &as_mv_limit[ref_ctr],
7310 pos_x,
7311 pos_y,
7312 blk_wd,
7313 blk_ht);
7314 }
7315 }
7316 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7317 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7318 /* Select search results from a suitable search result in the context */
7319 {
7320 ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7321
7322 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7323 {
7324 S32 i;
7325
7326 for(i = 0; i < 2; i++)
7327 {
7328 ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7329 }
7330 }
7331 }
7332
7333 u1_is_cu_noisy = au1_is_16x16Blk_noisy
7334 [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7335
7336 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7337
7338 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7339 if(u1_is_cu_noisy)
7340 {
7341 S32 j;
7342 S32 lambda;
7343
7344 lambda = lambda_recon;
7345 lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7346
7347 for(j = 0; j < 2; j++)
7348 {
7349 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7350 }
7351 }
7352 else
7353 {
7354 S32 j;
7355 S32 lambda;
7356
7357 lambda = lambda_recon;
7358
7359 for(j = 0; j < 2; j++)
7360 {
7361 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7362 }
7363 }
7364 #endif
7365
7366 s_search_prms_blk.ps_search_results = ps_search_results;
7367
7368 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7369 pu1_inp,
7370 i4_inp_stride,
7371 ps_refine_prms->limit_active_partitions,
7372 ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7373 ps_ctxt->u1_is_curFrame_a_refFrame,
7374 blk_8x8_mask,
7375 e_me_quality_presets);
7376
7377 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7378 {
7379 ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7380 s_search_prms_blk.i4_part_mask;
7381 }
7382
7383 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7384 {
7385 /* Setting u1_num_active_refs to 2 */
7386 /* for the sole purpose of the */
7387 /* function called below */
7388 ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7389
7390 hme_reset_search_results(
7391 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7392
7393 ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7394 }
7395
7396 if(0 == blk_id_in_ctb)
7397 {
7398 UWORD8 u1_ctr;
7399 for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7400 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7401 u1_ctr++)
7402 {
7403 WORD32 i4_max_dep_ctb_y;
7404 WORD32 i4_max_dep_ctb_x;
7405
7406 /* Set max mv in ctb units */
7407 i4_max_mv_x_in_ctb =
7408 (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7409 ps_ctxt->log_ctb_size;
7410
7411 i4_max_mv_y_in_ctb =
7412 (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7413 ps_ctxt->log_ctb_size;
7414 /********************************************************************/
7415 /* Set max ctb_x and ctb_y dependency on reference picture */
7416 /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */
7417 /********************************************************************/
7418 i4_max_dep_ctb_x = CLIP3(
7419 (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7420 0,
7421 ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7422 i4_max_dep_ctb_y = CLIP3(
7423 (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7424 0,
7425 ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7426
7427 ihevce_dmgr_map_chk_sync(
7428 ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7429 ps_ctxt->thrd_id,
7430 i4_ctb_x,
7431 i4_ctb_y,
7432 i4_max_mv_x_in_ctb,
7433 i4_max_mv_y_in_ctb);
7434 }
7435 }
7436
7437 /* Loop across different Ref IDx */
7438 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7439 {
7440 S32 resultid;
7441 S08 u1_default_ref_id;
7442 S32 i4_num_srch_cands = 0;
7443 S32 i4_num_refinement_iterations;
7444 S32 i4_refine_iter_ctr;
7445
7446 if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7447 (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7448 {
7449 u1_pred_dir = u1_pred_dir_ctr;
7450 }
7451 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7452 {
7453 u1_pred_dir = 1;
7454 }
7455
7456 u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7457 : ps_ctxt->ai1_future_list[0];
7458 au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7459
7460 i4_num_srch_cands = 0;
7461 resultid = 0;
7462
7463 /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7464 if(0 == blk_id_in_ctb)
7465 {
7466 /*****************************************************************/
7467 /* Initialize the mv grid with results of neighbours for the next*/
7468 /* ctb. */
7469 /*****************************************************************/
7470 hme_fill_ctb_neighbour_mvs(
7471 ps_curr_layer,
7472 blk_x,
7473 blk_y,
7474 aps_mv_grid[u1_pred_dir],
7475 u1_pred_dir_ctr,
7476 u1_default_ref_id,
7477 ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7478 }
7479
7480 s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7481
7482 {
7483 if((blk_id_in_full_ctb % 4) == 0)
7484 {
7485 ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7486 .as_pred_ctxt[u1_pred_dir]
7487 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7488 }
7489
7490 if(blk_id_in_full_ctb == 0)
7491 {
7492 ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7493 }
7494
7495 ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7496 !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7497 }
7498
7499 {
7500 S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7501 S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7502 U08 u1_is_blk_at_ctb_boundary = !y;
7503
7504 s_srch_cand_init_data.u1_is_left_available =
7505 !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7506
7507 if(u1_is_blk_at_ctb_boundary)
7508 {
7509 s_srch_cand_init_data.u1_is_topRight_available = 0;
7510 s_srch_cand_init_data.u1_is_topLeft_available = 0;
7511 s_srch_cand_init_data.u1_is_top_available = 0;
7512 }
7513 else
7514 {
7515 s_srch_cand_init_data.u1_is_topRight_available =
7516 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7517 s_srch_cand_init_data.u1_is_top_available = 1;
7518 s_srch_cand_init_data.u1_is_topLeft_available =
7519 s_srch_cand_init_data.u1_is_left_available;
7520 }
7521 }
7522
7523 s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7524 s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7525 s_srch_cand_init_data.i4_pos_x = pos_x;
7526 s_srch_cand_init_data.i4_pos_y = pos_y;
7527 s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7528 s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7529 s_srch_cand_init_data.u1_search_candidate_list_index =
7530 au1_search_candidate_list_index[u1_pred_dir];
7531
7532 i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7533
7534 /* Note this block also clips the MV range for all candidates */
7535 {
7536 S08 i1_check_for_mult_refs;
7537
7538 i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7539 : (ps_ctxt->num_ref_past > 1);
7540
7541 ps_me_optimised_function_list->pf_mv_clipper(
7542 &s_search_prms_blk,
7543 i4_num_srch_cands,
7544 i1_check_for_mult_refs,
7545 ps_refine_prms->i4_num_steps_fpel_refine,
7546 ps_refine_prms->i4_num_steps_hpel_refine,
7547 ps_refine_prms->i4_num_steps_qpel_refine);
7548 }
7549
7550 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7551 i4_num_refinement_iterations =
7552 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7553 ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7554 : 1;
7555 #else
7556 i4_num_refinement_iterations =
7557 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7558 #endif
7559
7560 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7561 if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7562 {
7563 i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7564 : i4_num_act_ref_l1;
7565 }
7566 #endif
7567
7568 for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7569 i4_refine_iter_ctr++)
7570 {
7571 S32 center_x;
7572 S32 center_y;
7573 S32 center_ref_idx;
7574
7575 S08 *pi1_pred_dir_to_ref_idx =
7576 (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7577
7578 {
7579 WORD32 i4_i;
7580
7581 for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7582 {
7583 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7584 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7585 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7586 MAX_SIGNED_16BIT_VAL;
7587 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7588 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7589 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7590
7591 if(ps_refine_prms->i4_num_results_per_part == 2)
7592 {
7593 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7594 MAX_SIGNED_16BIT_VAL;
7595 ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7596 MAX_SIGNED_16BIT_VAL;
7597 ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7598 MAX_SIGNED_16BIT_VAL;
7599 ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7600 ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7601 ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7602 }
7603 }
7604
7605 s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7606 s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7607 }
7608
7609 {
7610 search_node_t *ps_coloc_node;
7611
7612 S32 i = 0;
7613
7614 if(i4_num_refinement_iterations > 1)
7615 {
7616 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7617 {
7618 ps_coloc_node =
7619 s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7620 .ps_search_node;
7621
7622 if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7623 ps_coloc_node->i1_ref_idx)
7624 {
7625 break;
7626 }
7627 }
7628
7629 if(i == ai4_num_coloc_cands[u1_pred_dir])
7630 {
7631 i = 0;
7632 }
7633 }
7634 else
7635 {
7636 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7637 .ps_search_node;
7638 }
7639
7640 hme_set_mvp_node(
7641 ps_search_results,
7642 ps_coloc_node,
7643 u1_pred_dir,
7644 (i4_num_refinement_iterations > 1)
7645 ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7646 : u1_default_ref_id);
7647
7648 center_x = ps_coloc_node->ps_mv->i2_mvx;
7649 center_y = ps_coloc_node->ps_mv->i2_mvy;
7650 center_ref_idx = ps_coloc_node->i1_ref_idx;
7651 }
7652
7653 /* Full-Pel search */
7654 {
7655 S32 num_unique_nodes;
7656
7657 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7658
7659 num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7660 as_unique_search_nodes,
7661 s_search_prms_blk.ps_search_candts,
7662 au4_unique_node_map,
7663 pi1_pred_dir_to_ref_idx,
7664 i4_num_srch_cands,
7665 s_search_prms_blk.i4_num_init_candts,
7666 i4_refine_iter_ctr,
7667 i4_num_refinement_iterations,
7668 i4_num_act_ref_l0,
7669 center_ref_idx,
7670 center_x,
7671 center_y,
7672 ps_ctxt->s_frm_prms.bidir_enabled,
7673 e_me_quality_presets);
7674
7675 /*************************************************************************/
7676 /* This array stores the ids of the partitions whose */
7677 /* SADs are updated. Since the partitions whose SADs are updated may not */
7678 /* be in contiguous order, we supply another level of indirection. */
7679 /*************************************************************************/
7680 ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7681 s_search_prms_blk.i4_part_mask,
7682 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7683
7684 if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7685 {
7686 S32 i;
7687 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7688 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7689 (s_search_prms_blk.i4_cu_y_off * 4);
7690
7691 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7692 {
7693 S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7694
7695 hme_compute_final_sigma_of_pu_from_base_blocks(
7696 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7697 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7698 au8_final_src_sigmaX,
7699 au8_final_src_sigmaXSquared,
7700 16,
7701 4,
7702 i4_part_id,
7703 16);
7704 }
7705
7706 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7707 s_common_frm_prms.pu8_part_src_sigmaXSquared =
7708 au8_final_src_sigmaXSquared;
7709
7710 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7711 s_search_prms_blk.pu8_part_src_sigmaXSquared =
7712 au8_final_src_sigmaXSquared;
7713 }
7714
7715 if(0 == num_unique_nodes)
7716 {
7717 continue;
7718 }
7719
7720 if(num_unique_nodes >= 2)
7721 {
7722 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7723 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7724 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7725 {
7726 if(ps_ctxt->i4_temporal_layer == 1)
7727 {
7728 hme_fullpel_cand_sifter(
7729 &s_search_prms_blk,
7730 ps_curr_layer,
7731 &ps_ctxt->s_wt_pred,
7732 ALPHA_FOR_NOISE_TERM_IN_ME,
7733 u1_is_cu_noisy,
7734 ps_me_optimised_function_list);
7735 }
7736 else
7737 {
7738 hme_fullpel_cand_sifter(
7739 &s_search_prms_blk,
7740 ps_curr_layer,
7741 &ps_ctxt->s_wt_pred,
7742 ALPHA_FOR_NOISE_TERM_IN_ME,
7743 u1_is_cu_noisy,
7744 ps_me_optimised_function_list);
7745 }
7746 }
7747 else
7748 {
7749 hme_fullpel_cand_sifter(
7750 &s_search_prms_blk,
7751 ps_curr_layer,
7752 &ps_ctxt->s_wt_pred,
7753 ALPHA_FOR_NOISE_TERM_IN_ME_P,
7754 u1_is_cu_noisy,
7755 ps_me_optimised_function_list);
7756 }
7757 }
7758
7759 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7760
7761 hme_fullpel_refine(
7762 ps_refine_prms,
7763 &s_search_prms_blk,
7764 ps_curr_layer,
7765 &ps_ctxt->s_wt_pred,
7766 au4_unique_node_map,
7767 num_unique_nodes,
7768 blk_8x8_mask,
7769 center_x,
7770 center_y,
7771 center_ref_idx,
7772 e_me_quality_presets,
7773 ps_me_optimised_function_list);
7774 }
7775
7776 /* Sub-Pel search */
7777 {
7778 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7779
7780 s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7781 &ps_ctxt->s_buf_mgr,
7782 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7783 /* MV limit is different based on ref. PIC */
7784 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7785 {
7786 SCALE_RANGE_PRMS(
7787 as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7788 SCALE_RANGE_PRMS(
7789 as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7790 }
7791 s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7792 s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7793
7794 hme_subpel_refine_cu_hs(
7795 &s_subpel_prms,
7796 ps_curr_layer,
7797 ps_search_results,
7798 u1_pred_dir,
7799 &ps_ctxt->s_wt_pred,
7800 blk_8x8_mask,
7801 ps_ctxt->ps_func_selector,
7802 ps_cmn_utils_optimised_function_list,
7803 ps_me_optimised_function_list);
7804 }
7805 }
7806 }
7807 /* Populate the new PU struct with the results post subpel refinement*/
7808 {
7809 inter_cu_results_t *ps_cu_results;
7810 WORD32 best_inter_cost, intra_cost, posx, posy;
7811
7812 UWORD8 intra_8x8_enabled = 0;
7813
7814 /* cost of 16x16 cu parent */
7815 WORD32 parent_cost = MAX_32BIT_VAL;
7816
7817 /* cost of 8x8 cu children */
7818 /*********************************************************************/
7819 /* Assuming parent is not split, then we signal 1 bit for this parent*/
7820 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7821 /* So, 4*lambda is extra for children cost. */
7822 /*********************************************************************/
7823 WORD32 child_cost = 0;
7824
7825 ps_cu_results = ps_search_results->ps_cu_results;
7826
7827 /* Initialize the pu_results pointers to the first struct in the stack array */
7828 ps_pu_results = as_inter_pu_results;
7829
7830 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7831
7832 hme_populate_pus(
7833 ps_thrd_ctxt,
7834 ps_ctxt,
7835 &s_subpel_prms,
7836 ps_search_results,
7837 ps_cu_results,
7838 ps_pu_results,
7839 &(as_pu_results[0][0][0]),
7840 &s_common_frm_prms,
7841 &ps_ctxt->s_wt_pred,
7842 ps_curr_layer,
7843 au1_pred_dir_searched,
7844 i4_num_pred_dir);
7845
7846 ps_cu_results->i4_inp_offset =
7847 (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7848
7849 hme_decide_part_types(
7850 ps_cu_results,
7851 ps_pu_results,
7852 &s_common_frm_prms,
7853 ps_ctxt,
7854 ps_cmn_utils_optimised_function_list,
7855 ps_me_optimised_function_list
7856
7857 );
7858
7859 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7860 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7861 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7862 {
7863 WORD32 res_ctr;
7864
7865 for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7866 {
7867 WORD32 num_part = 2, part_ctr;
7868 part_type_results_t *ps_best_results =
7869 &ps_cu_results->ps_best_results[res_ctr];
7870
7871 if(PRT_2Nx2N == ps_best_results->u1_part_type)
7872 num_part = 1;
7873
7874 for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7875 {
7876 pu_result_t *ps_pu_results =
7877 &ps_best_results->as_pu_results[part_ctr];
7878
7879 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7880
7881 hme_update_dynamic_search_params(
7882 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7883 .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7884 ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7885
7886 /* Sanity Check */
7887 ASSERT(
7888 ps_pu_results->pu.mv.i1_l0_ref_idx <
7889 ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7890
7891 /* No L1 for P Pic. */
7892 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7893 /* No BI for P Pic. */
7894 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7895 }
7896 }
7897 }
7898
7899 /*****************************************************************/
7900 /* INSERT INTRA RESULTS AT 16x16 LEVEL. */
7901 /*****************************************************************/
7902
7903 #if DISABLE_INTRA_IN_BPICS
7904 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7905 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7906 #endif
7907 {
7908 if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7909 {
7910 hme_insert_intra_nodes_post_bipred(
7911 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7912 }
7913 }
7914
7915 #if DISABLE_INTRA_IN_BPICS
7916 if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7917 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7918 {
7919 intra_8x8_enabled = 0;
7920 }
7921 else
7922 #endif
7923 {
7924 /*TRAQO intra flag updation*/
7925 if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7926 {
7927 best_inter_cost =
7928 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7929 intra_cost =
7930 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7931 /*@16x16 level*/
7932 posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7933 << 2) >>
7934 4;
7935 posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7936 << 2) >>
7937 4;
7938 }
7939 else
7940 {
7941 best_inter_cost =
7942 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7943 posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7944 << 2) >>
7945 3;
7946 posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7947 << 2) >>
7948 3;
7949 }
7950
7951 /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7952 if(ps_cur_ipe_ctb->u1_split_flag)
7953 {
7954 /* Id of the 32x32 block, 16x16 block in a CTB */
7955 WORD32 i4_32x32_id =
7956 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7957 WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7958 ((ps_cu_results->u1_x_off >> 4) & 0x1);
7959
7960 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7961 {
7962 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7963 .as_intra16_analyse[i4_16x16_id]
7964 .b1_split_flag)
7965 {
7966 intra_8x8_enabled =
7967 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7968 .as_intra16_analyse[i4_16x16_id]
7969 .as_intra8_analyse[0]
7970 .b1_valid_cu;
7971 intra_8x8_enabled &=
7972 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7973 .as_intra16_analyse[i4_16x16_id]
7974 .as_intra8_analyse[1]
7975 .b1_valid_cu;
7976 intra_8x8_enabled &=
7977 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978 .as_intra16_analyse[i4_16x16_id]
7979 .as_intra8_analyse[2]
7980 .b1_valid_cu;
7981 intra_8x8_enabled &=
7982 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983 .as_intra16_analyse[i4_16x16_id]
7984 .as_intra8_analyse[3]
7985 .b1_valid_cu;
7986 }
7987 }
7988 }
7989 }
7990
7991 if(blk_8x8_mask == 0xf)
7992 {
7993 parent_cost =
7994 ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
7995 ps_search_results->u1_split_flag = 0;
7996 }
7997 else
7998 {
7999 ps_search_results->u1_split_flag = 1;
8000 }
8001
8002 ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8003
8004 if(s_common_frm_prms.u1_is_cu_noisy)
8005 {
8006 intra_8x8_enabled = 0;
8007 }
8008
8009 /* Evalaute 8x8 if NxN part id is enabled */
8010 if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8011 {
8012 /* Populates the PU's for the 4 8x8's in one call */
8013 hme_populate_pus_8x8_cu(
8014 ps_thrd_ctxt,
8015 ps_ctxt,
8016 &s_subpel_prms,
8017 ps_search_results,
8018 ps_cu_results,
8019 ps_pu_results,
8020 &(as_pu_results[0][0][0]),
8021 &s_common_frm_prms,
8022 au1_pred_dir_searched,
8023 i4_num_pred_dir,
8024 blk_8x8_mask);
8025
8026 /* Re-initialize the pu_results pointers to the first struct in the stack array */
8027 ps_pu_results = as_inter_pu_results;
8028
8029 for(i = 0; i < 4; i++)
8030 {
8031 if((blk_8x8_mask & (1 << i)))
8032 {
8033 if(ps_cu_results->i4_part_mask)
8034 {
8035 hme_decide_part_types(
8036 ps_cu_results,
8037 ps_pu_results,
8038 &s_common_frm_prms,
8039 ps_ctxt,
8040 ps_cmn_utils_optimised_function_list,
8041 ps_me_optimised_function_list
8042
8043 );
8044 }
8045 /*****************************************************************/
8046 /* INSERT INTRA RESULTS AT 8x8 LEVEL. */
8047 /*****************************************************************/
8048 #if DISABLE_INTRA_IN_BPICS
8049 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8050 (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8051 TEMPORAL_LAYER_DISABLE)))
8052 #endif
8053 {
8054 if(!(DISABLE_INTRA_WHEN_NOISY &&
8055 s_common_frm_prms.u1_is_cu_noisy))
8056 {
8057 hme_insert_intra_nodes_post_bipred(
8058 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8059 }
8060 }
8061
8062 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8063 }
8064
8065 ps_cu_results++;
8066 ps_pu_results++;
8067 }
8068
8069 /* Compare 16x16 vs 8x8 cost */
8070 if(child_cost < parent_cost)
8071 {
8072 ps_search_results->best_cu_cost = child_cost;
8073 ps_search_results->u1_split_flag = 1;
8074 }
8075 }
8076 }
8077
8078 hme_update_mv_bank_encode(
8079 ps_search_results,
8080 ps_curr_layer->ps_layer_mvbank,
8081 blk_x,
8082 blk_y,
8083 &s_mv_update_prms,
8084 au1_pred_dir_searched,
8085 i4_num_act_ref_l0);
8086
8087 /*********************************************************************/
8088 /* Map the best results to an MV Grid. This is a 18x18 grid that is */
8089 /* useful for doing things like predictor for cost calculation or */
8090 /* also for merge calculations if need be. */
8091 /*********************************************************************/
8092 hme_map_mvs_to_grid(
8093 &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8094 }
8095
8096 /* Set the CU tree nodes appropriately */
8097 if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8098 {
8099 WORD32 i, j;
8100
8101 for(i = 0; i < 16; i++)
8102 {
8103 cur_ctb_cu_tree_t *ps_tree_node =
8104 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8105 search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8106
8107 switch(i >> 2)
8108 {
8109 case 0:
8110 {
8111 ps_tree_node = ps_tree_node->ps_child_node_tl;
8112
8113 break;
8114 }
8115 case 1:
8116 {
8117 ps_tree_node = ps_tree_node->ps_child_node_tr;
8118
8119 break;
8120 }
8121 case 2:
8122 {
8123 ps_tree_node = ps_tree_node->ps_child_node_bl;
8124
8125 break;
8126 }
8127 case 3:
8128 {
8129 ps_tree_node = ps_tree_node->ps_child_node_br;
8130
8131 break;
8132 }
8133 }
8134
8135 switch(i % 4)
8136 {
8137 case 0:
8138 {
8139 ps_tree_node = ps_tree_node->ps_child_node_tl;
8140
8141 break;
8142 }
8143 case 1:
8144 {
8145 ps_tree_node = ps_tree_node->ps_child_node_tr;
8146
8147 break;
8148 }
8149 case 2:
8150 {
8151 ps_tree_node = ps_tree_node->ps_child_node_bl;
8152
8153 break;
8154 }
8155 case 3:
8156 {
8157 ps_tree_node = ps_tree_node->ps_child_node_br;
8158
8159 break;
8160 }
8161 }
8162
8163 if(ai4_blk_8x8_mask[i] == 15)
8164 {
8165 if(!ps_results->u1_split_flag)
8166 {
8167 ps_tree_node->is_node_valid = 1;
8168 NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8169 }
8170 else
8171 {
8172 ps_tree_node->is_node_valid = 0;
8173 ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8174 }
8175 }
8176 else
8177 {
8178 cur_ctb_cu_tree_t *ps_tree_child;
8179
8180 ps_tree_node->is_node_valid = 0;
8181
8182 for(j = 0; j < 4; j++)
8183 {
8184 switch(j)
8185 {
8186 case 0:
8187 {
8188 ps_tree_child = ps_tree_node->ps_child_node_tl;
8189
8190 break;
8191 }
8192 case 1:
8193 {
8194 ps_tree_child = ps_tree_node->ps_child_node_tr;
8195
8196 break;
8197 }
8198 case 2:
8199 {
8200 ps_tree_child = ps_tree_node->ps_child_node_bl;
8201
8202 break;
8203 }
8204 case 3:
8205 {
8206 ps_tree_child = ps_tree_node->ps_child_node_br;
8207
8208 break;
8209 }
8210 }
8211
8212 ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8213 }
8214 }
8215 }
8216 }
8217
8218 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8219 {
8220 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8221
8222 hme_analyse_mv_clustering(
8223 ps_ctxt->as_search_results_16x16,
8224 ps_ctxt->as_cu16x16_results,
8225 ps_ctxt->as_cu8x8_results,
8226 ps_ctxt->ps_ctb_cluster_info,
8227 ps_ctxt->ai1_future_list,
8228 ps_ctxt->ai1_past_list,
8229 ps_ctxt->s_frm_prms.bidir_enabled,
8230 e_me_quality_presets);
8231
8232 #if DISABLE_BLK_MERGE_WHEN_NOISY
8233 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8234 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8235 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8236 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8237 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8238 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8239 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8240 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8241 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8242 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8243 #endif
8244
8245 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8246 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8247 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8248 (ps_tree->ps_child_node_br->is_node_valid << 3);
8249
8250 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8251 (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8252 (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8253 (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8254 (ps_tree->u1_inter_eval_enable << 4);
8255 }
8256 else
8257 {
8258 en_merge_execution = 0x1f;
8259
8260 #if DISABLE_BLK_MERGE_WHEN_NOISY
8261 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8262 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8263 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8264 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8265 #endif
8266 }
8267
8268 /* Re-initialize the pu_results pointers to the first struct in the stack array */
8269 ps_pu_results = as_inter_pu_results;
8270
8271 {
8272 WORD32 ref_ctr;
8273
8274 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8275 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8276
8277 /* MV limit is different based on ref. PIC */
8278 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8279 {
8280 SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8281 SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8282 }
8283
8284 e_merge_result = CU_SPLIT;
8285 merge_count_32x32 = 0;
8286
8287 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8288 {
8289 range_prms_t *ps_pic_limit;
8290 if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8291 {
8292 ps_pic_limit = &s_pic_limit_rec;
8293 }
8294 else
8295 {
8296 ps_pic_limit = &s_pic_limit_inp;
8297 }
8298 /* MV limit is different based on ref. PIC */
8299 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8300 {
8301 hme_derive_search_range(
8302 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8303 ps_pic_limit,
8304 &as_mv_limit[ref_ctr],
8305 i4_ctb_x << 6,
8306 i4_ctb_y << 6,
8307 32,
8308 32);
8309
8310 SCALE_RANGE_PRMS_POINTERS(
8311 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8312 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8313 2);
8314 }
8315 s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8316 s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8317 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8318
8319 e_merge_result = hme_try_merge_high_speed(
8320 ps_thrd_ctxt,
8321 ps_ctxt,
8322 ps_cur_ipe_ctb,
8323 &s_subpel_prms,
8324 &s_merge_prms_32x32_tl,
8325 ps_pu_results,
8326 &as_pu_results[0][0][0]);
8327
8328 if(e_merge_result == CU_MERGED)
8329 {
8330 inter_cu_results_t *ps_cu_results =
8331 s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8332
8333 if(!((ps_cu_results->u1_num_best_results == 1) &&
8334 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8335 {
8336 hme_map_mvs_to_grid(
8337 &aps_mv_grid[0],
8338 s_merge_prms_32x32_tl.ps_results_merge,
8339 s_merge_prms_32x32_tl.au1_pred_dir_searched,
8340 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8341 }
8342
8343 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8344 {
8345 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8346 .ps_child_node_tl->is_node_valid = 1;
8347 NULLIFY_THE_CHILDREN_NODES(
8348 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8349 .ps_child_node_tl);
8350 }
8351
8352 merge_count_32x32++;
8353 e_merge_result = CU_SPLIT;
8354 }
8355 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8356 {
8357 #if ENABLE_CU_TREE_CULLING
8358 cur_ctb_cu_tree_t *ps_tree =
8359 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8360
8361 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8362 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8363 ENABLE_THE_CHILDREN_NODES(ps_tree);
8364 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8365 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8366 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8367 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8368 #endif
8369 }
8370 }
8371 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8372 {
8373 #if ENABLE_CU_TREE_CULLING
8374 cur_ctb_cu_tree_t *ps_tree =
8375 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8376
8377 ENABLE_THE_CHILDREN_NODES(ps_tree);
8378 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8379 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8380 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8381 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8382 #endif
8383
8384 if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8385 {
8386 ps_tree->is_node_valid = 0;
8387 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8388 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8389 }
8390 }
8391
8392 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8393 {
8394 range_prms_t *ps_pic_limit;
8395 if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8396 {
8397 ps_pic_limit = &s_pic_limit_rec;
8398 }
8399 else
8400 {
8401 ps_pic_limit = &s_pic_limit_inp;
8402 }
8403 /* MV limit is different based on ref. PIC */
8404 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8405 {
8406 hme_derive_search_range(
8407 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8408 ps_pic_limit,
8409 &as_mv_limit[ref_ctr],
8410 (i4_ctb_x << 6) + 32,
8411 i4_ctb_y << 6,
8412 32,
8413 32);
8414 SCALE_RANGE_PRMS_POINTERS(
8415 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8416 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8417 2);
8418 }
8419 s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8420 s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8421 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8422
8423 e_merge_result = hme_try_merge_high_speed(
8424 ps_thrd_ctxt,
8425 ps_ctxt,
8426 ps_cur_ipe_ctb,
8427 &s_subpel_prms,
8428 &s_merge_prms_32x32_tr,
8429 ps_pu_results,
8430 &as_pu_results[0][0][0]);
8431
8432 if(e_merge_result == CU_MERGED)
8433 {
8434 inter_cu_results_t *ps_cu_results =
8435 s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8436
8437 if(!((ps_cu_results->u1_num_best_results == 1) &&
8438 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8439 {
8440 hme_map_mvs_to_grid(
8441 &aps_mv_grid[0],
8442 s_merge_prms_32x32_tr.ps_results_merge,
8443 s_merge_prms_32x32_tr.au1_pred_dir_searched,
8444 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8445 }
8446
8447 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8448 {
8449 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8450 .ps_child_node_tr->is_node_valid = 1;
8451 NULLIFY_THE_CHILDREN_NODES(
8452 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8453 .ps_child_node_tr);
8454 }
8455
8456 merge_count_32x32++;
8457 e_merge_result = CU_SPLIT;
8458 }
8459 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8460 {
8461 #if ENABLE_CU_TREE_CULLING
8462 cur_ctb_cu_tree_t *ps_tree =
8463 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8464
8465 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8466 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8467 ENABLE_THE_CHILDREN_NODES(ps_tree);
8468 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8469 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8470 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8471 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8472 #endif
8473 }
8474 }
8475 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8476 {
8477 #if ENABLE_CU_TREE_CULLING
8478 cur_ctb_cu_tree_t *ps_tree =
8479 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8480
8481 ENABLE_THE_CHILDREN_NODES(ps_tree);
8482 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8483 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8484 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8485 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8486 #endif
8487
8488 if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8489 {
8490 ps_tree->is_node_valid = 0;
8491 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8492 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8493 }
8494 }
8495
8496 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8497 {
8498 range_prms_t *ps_pic_limit;
8499 if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8500 {
8501 ps_pic_limit = &s_pic_limit_rec;
8502 }
8503 else
8504 {
8505 ps_pic_limit = &s_pic_limit_inp;
8506 }
8507 /* MV limit is different based on ref. PIC */
8508 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8509 {
8510 hme_derive_search_range(
8511 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8512 ps_pic_limit,
8513 &as_mv_limit[ref_ctr],
8514 i4_ctb_x << 6,
8515 (i4_ctb_y << 6) + 32,
8516 32,
8517 32);
8518 SCALE_RANGE_PRMS_POINTERS(
8519 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8520 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8521 2);
8522 }
8523 s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8524 s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8525 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8526
8527 e_merge_result = hme_try_merge_high_speed(
8528 ps_thrd_ctxt,
8529 ps_ctxt,
8530 ps_cur_ipe_ctb,
8531 &s_subpel_prms,
8532 &s_merge_prms_32x32_bl,
8533 ps_pu_results,
8534 &as_pu_results[0][0][0]);
8535
8536 if(e_merge_result == CU_MERGED)
8537 {
8538 inter_cu_results_t *ps_cu_results =
8539 s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8540
8541 if(!((ps_cu_results->u1_num_best_results == 1) &&
8542 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8543 {
8544 hme_map_mvs_to_grid(
8545 &aps_mv_grid[0],
8546 s_merge_prms_32x32_bl.ps_results_merge,
8547 s_merge_prms_32x32_bl.au1_pred_dir_searched,
8548 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8549 }
8550
8551 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8552 {
8553 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8554 .ps_child_node_bl->is_node_valid = 1;
8555 NULLIFY_THE_CHILDREN_NODES(
8556 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8557 .ps_child_node_bl);
8558 }
8559
8560 merge_count_32x32++;
8561 e_merge_result = CU_SPLIT;
8562 }
8563 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8564 {
8565 #if ENABLE_CU_TREE_CULLING
8566 cur_ctb_cu_tree_t *ps_tree =
8567 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8568
8569 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8570 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8571 ENABLE_THE_CHILDREN_NODES(ps_tree);
8572 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8573 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8574 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8575 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8576 #endif
8577 }
8578 }
8579 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8580 {
8581 #if ENABLE_CU_TREE_CULLING
8582 cur_ctb_cu_tree_t *ps_tree =
8583 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8584
8585 ENABLE_THE_CHILDREN_NODES(ps_tree);
8586 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8587 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8588 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8589 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8590 #endif
8591
8592 if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8593 {
8594 ps_tree->is_node_valid = 0;
8595 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8596 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8597 }
8598 }
8599
8600 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8601 {
8602 range_prms_t *ps_pic_limit;
8603 if(s_merge_prms_32x32_br.i4_use_rec == 1)
8604 {
8605 ps_pic_limit = &s_pic_limit_rec;
8606 }
8607 else
8608 {
8609 ps_pic_limit = &s_pic_limit_inp;
8610 }
8611 /* MV limit is different based on ref. PIC */
8612 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8613 {
8614 hme_derive_search_range(
8615 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8616 ps_pic_limit,
8617 &as_mv_limit[ref_ctr],
8618 (i4_ctb_x << 6) + 32,
8619 (i4_ctb_y << 6) + 32,
8620 32,
8621 32);
8622
8623 SCALE_RANGE_PRMS_POINTERS(
8624 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8625 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8626 2);
8627 }
8628 s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8629 s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8630 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8631
8632 e_merge_result = hme_try_merge_high_speed(
8633 ps_thrd_ctxt,
8634 ps_ctxt,
8635 ps_cur_ipe_ctb,
8636 &s_subpel_prms,
8637 &s_merge_prms_32x32_br,
8638 ps_pu_results,
8639 &as_pu_results[0][0][0]);
8640
8641 if(e_merge_result == CU_MERGED)
8642 {
8643 /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8644
8645 if(!((ps_cu_results->u1_num_best_results == 1) &&
8646 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8647 {
8648 hme_map_mvs_to_grid
8649 (
8650 &aps_mv_grid[0],
8651 s_merge_prms_32x32_br.ps_results_merge,
8652 s_merge_prms_32x32_br.au1_pred_dir_searched,
8653 s_merge_prms_32x32_br.i4_num_pred_dir_actual
8654 );
8655 }*/
8656
8657 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8658 {
8659 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8660 .ps_child_node_br->is_node_valid = 1;
8661 NULLIFY_THE_CHILDREN_NODES(
8662 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8663 .ps_child_node_br);
8664 }
8665
8666 merge_count_32x32++;
8667 e_merge_result = CU_SPLIT;
8668 }
8669 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8670 {
8671 #if ENABLE_CU_TREE_CULLING
8672 cur_ctb_cu_tree_t *ps_tree =
8673 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8674
8675 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8676 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8677 ENABLE_THE_CHILDREN_NODES(ps_tree);
8678 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8679 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8680 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8681 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8682 #endif
8683 }
8684 }
8685 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8686 {
8687 #if ENABLE_CU_TREE_CULLING
8688 cur_ctb_cu_tree_t *ps_tree =
8689 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8690
8691 ENABLE_THE_CHILDREN_NODES(ps_tree);
8692 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8693 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8694 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8695 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8696 #endif
8697
8698 if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8699 {
8700 ps_tree->is_node_valid = 0;
8701 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8702 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8703 }
8704 }
8705
8706 /* Try merging all 32x32 to 64x64 candts */
8707 if(((en_merge_32x32 & 0xf) == 0xf) &&
8708 (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8709 ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8710 if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8711 !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8712 (e_me_quality_presets != ME_XTREME_SPEED_25)))
8713 {
8714 range_prms_t *ps_pic_limit;
8715 if(s_merge_prms_64x64.i4_use_rec == 1)
8716 {
8717 ps_pic_limit = &s_pic_limit_rec;
8718 }
8719 else
8720 {
8721 ps_pic_limit = &s_pic_limit_inp;
8722 }
8723 /* MV limit is different based on ref. PIC */
8724 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8725 {
8726 hme_derive_search_range(
8727 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8728 ps_pic_limit,
8729 &as_mv_limit[ref_ctr],
8730 i4_ctb_x << 6,
8731 i4_ctb_y << 6,
8732 64,
8733 64);
8734
8735 SCALE_RANGE_PRMS_POINTERS(
8736 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8737 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8738 2);
8739 }
8740 s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8741 s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8742 s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8743
8744 e_merge_result = hme_try_merge_high_speed(
8745 ps_thrd_ctxt,
8746 ps_ctxt,
8747 ps_cur_ipe_ctb,
8748 &s_subpel_prms,
8749 &s_merge_prms_64x64,
8750 ps_pu_results,
8751 &as_pu_results[0][0][0]);
8752
8753 if((e_merge_result == CU_MERGED) &&
8754 (ME_PRISTINE_QUALITY != e_me_quality_presets))
8755 {
8756 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8757 .is_node_valid = 1;
8758 NULLIFY_THE_CHILDREN_NODES(
8759 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8760 }
8761 else if(
8762 (e_merge_result == CU_SPLIT) &&
8763 (ME_PRISTINE_QUALITY == e_me_quality_presets))
8764 {
8765 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8766 .is_node_valid = 0;
8767 }
8768 }
8769
8770 /*****************************************************************/
8771 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */
8772 /*****************************************************************/
8773 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8774
8775 {
8776 #ifdef _DEBUG
8777 S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8778 ? 64
8779 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8780 S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8781 ? 64
8782 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8783 ASSERT(
8784 (wd * ht) ==
8785 ihevce_compute_area_of_valid_cus_in_ctb(
8786 &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8787 #endif
8788 }
8789 }
8790
8791 /* set the dependency for the corresponding row in enc loop */
8792 ihevce_dmgr_set_row_row_sync(
8793 pv_dep_mngr_encloop_dep_me,
8794 (i4_ctb_x + 1),
8795 i4_ctb_y,
8796 tile_col_idx /* Col Tile No. */);
8797
8798 left_ctb_in_diff_tile = 0;
8799 }
8800 }
8801 }
8802
8803 /**
8804 ********************************************************************************
8805 * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8806 * refine_layer_prms_t *ps_refine_prms)
8807 *
8808 * @brief Top level entry point for refinement ME
8809 *
8810 * @param[in,out] ps_ctxt: ME Handle
8811 *
8812 * @param[in] ps_refine_prms : refinement layer prms
8813 *
8814 * @return None
8815 ********************************************************************************
8816 */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8817 void hme_refine_no_encode(
8818 coarse_me_ctxt_t *ps_ctxt,
8819 refine_prms_t *ps_refine_prms,
8820 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8821 S32 lyr_job_type,
8822 WORD32 i4_ping_pong,
8823 void **ppv_dep_mngr_hme_sync)
8824 {
8825 BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8826 ME_QUALITY_PRESETS_T e_me_quality_presets =
8827 ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8828
8829 /*************************************************************************/
8830 /* Complexity of search: Low to High */
8831 /*************************************************************************/
8832 SEARCH_COMPLEXITY_T e_search_complexity;
8833
8834 /*************************************************************************/
8835 /* Config parameter structures for varius ME submodules */
8836 /*************************************************************************/
8837 hme_search_prms_t s_search_prms_blk;
8838 mvbank_update_prms_t s_mv_update_prms;
8839
8840 /*************************************************************************/
8841 /* All types of search candidates for predictor based search. */
8842 /*************************************************************************/
8843 S32 num_init_candts = 0;
8844 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8845 search_node_t as_top_neighbours[4], as_left_neighbours[3];
8846 search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8847 search_node_t *ps_candt_l, *ps_candt_t;
8848 search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8849 search_node_t *ps_candt_prj_bl[2];
8850 search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8851 search_node_t *ps_candt_prj_coloc[2];
8852
8853 pf_get_wt_inp fp_get_wt_inp;
8854
8855 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8856 U32 au4_unique_node_map[MAP_X_MAX * 2];
8857
8858 /*EIID */
8859 WORD32 i4_num_inter_wins = 0; //debug code to find stat of
8860 WORD32 i4_num_comparisions = 0; //debug code
8861 WORD32 i4_threshold_multiplier;
8862 WORD32 i4_threshold_divider;
8863 WORD32 i4_temporal_layer =
8864 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8865
8866 /*************************************************************************/
8867 /* points ot the search results for the blk level search (8x8/16x16) */
8868 /*************************************************************************/
8869 search_results_t *ps_search_results;
8870
8871 /*************************************************************************/
8872 /* Coordinates */
8873 /*************************************************************************/
8874 S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8875 //S32 i4_ctb_y;
8876 S32 pos_x, pos_y;
8877 S32 blk_id_in_full_ctb;
8878 S32 i4_num_srch_cands;
8879
8880 S32 blk_y;
8881
8882 /*************************************************************************/
8883 /* Related to dimensions of block being searched and pic dimensions */
8884 /*************************************************************************/
8885 S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8886 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8887 S32 num_results_prev_layer;
8888
8889 /*************************************************************************/
8890 /* Size of a basic unit for this layer. For non encode layers, we search */
8891 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8892 /* basic unit size is the ctb size. */
8893 /*************************************************************************/
8894 S32 unit_size;
8895
8896 /*************************************************************************/
8897 /* Pointers to context in current and coarser layers */
8898 /*************************************************************************/
8899 layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8900
8901 /*************************************************************************/
8902 /* to store mv range per blk, and picture limit, allowed search range */
8903 /* range prms in hpel and qpel units as well */
8904 /*************************************************************************/
8905 range_prms_t s_range_prms_inp, s_range_prms_rec;
8906 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8907 /*************************************************************************/
8908 /* These variables are used to track number of references at different */
8909 /* stages of ME. */
8910 /*************************************************************************/
8911 S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8912 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8913 S32 lambda_inp = ps_refine_prms->lambda_inp;
8914
8915 /*************************************************************************/
8916 /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8917 /* Explicit means it searches on all active ref idx. */
8918 /*************************************************************************/
8919 S32 curr_layer_implicit, prev_layer_implicit;
8920
8921 /*************************************************************************/
8922 /* Variables for loop counts */
8923 /*************************************************************************/
8924 S32 id;
8925 S08 i1_ref_idx;
8926
8927 /*************************************************************************/
8928 /* Input pointer and stride */
8929 /*************************************************************************/
8930 U08 *pu1_inp;
8931 S32 i4_inp_stride;
8932
8933 S32 end_of_frame;
8934
8935 S32 num_sync_units_in_row;
8936
8937 PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8938 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8939
8940 /*************************************************************************/
8941 /* Pointers to current and coarse layer are needed for projection */
8942 /* Pointer to prev layer are needed for other candts like coloc */
8943 /*************************************************************************/
8944 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8945
8946 ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8947
8948 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8949
8950 /* Function pointer is selected based on the C vc X86 macro */
8951
8952 fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8953 ->pf_get_wt_inp_8x8;
8954
8955 i4_inp_stride = ps_curr_layer->i4_inp_stride;
8956 i4_pic_wd = ps_curr_layer->i4_wd;
8957 i4_pic_ht = ps_curr_layer->i4_ht;
8958 e_search_complexity = ps_refine_prms->e_search_complexity;
8959
8960 end_of_frame = 0;
8961
8962 /* If the previous layer is non-encode layer, then use dyadic projection */
8963 if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8964 pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8965 else
8966 pf_hme_project_coloc_candt = hme_project_coloc_candt;
8967
8968 /* This points to all the initial candts */
8969 ps_search_candts = &as_search_candts[0];
8970
8971 {
8972 e_search_blk_size = BLK_8x8;
8973 blk_wd = blk_ht = 8;
8974 blk_size_shift = 3;
8975 s_mv_update_prms.i4_shift = 0;
8976 /*********************************************************************/
8977 /* In case we do not encode this layer, we search 8x8 with or without*/
8978 /* enable 4x4 SAD. */
8979 /*********************************************************************/
8980 {
8981 S32 i4_mask = (ENABLE_2Nx2N);
8982
8983 e_result_blk_size = BLK_8x8;
8984 if(ps_refine_prms->i4_enable_4x4_part)
8985 {
8986 i4_mask |= (ENABLE_NxN);
8987 e_result_blk_size = BLK_4x4;
8988 s_mv_update_prms.i4_shift = 1;
8989 }
8990
8991 s_search_prms_blk.i4_part_mask = i4_mask;
8992 }
8993
8994 unit_size = blk_wd;
8995 s_search_prms_blk.i4_inp_stride = unit_size;
8996 }
8997
8998 /* This is required to properly update the layer mv bank */
8999 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9000 s_search_prms_blk.e_blk_size = e_search_blk_size;
9001
9002 /*************************************************************************/
9003 /* If current layer is explicit, then the number of ref frames are to */
9004 /* be same as previous layer. Else it will be 2 */
9005 /*************************************************************************/
9006 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9007 if(ps_refine_prms->explicit_ref)
9008 {
9009 curr_layer_implicit = 0;
9010 i4_num_ref_fpel = i4_num_ref_prev_layer;
9011 /* 100578 : Using same mv cost fun. for all presets. */
9012 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9013 }
9014 else
9015 {
9016 i4_num_ref_fpel = 2;
9017 curr_layer_implicit = 1;
9018 {
9019 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9020 {
9021 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9022 }
9023 else
9024 {
9025 #if USE_MODIFIED == 1
9026 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9027 #else
9028 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9029 #endif
9030 }
9031 }
9032 }
9033
9034 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9035 if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9036 IV_IDR_FRAME ||
9037 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9038 {
9039 i4_num_ref_fpel = 1;
9040 }
9041 if(i4_num_ref_prev_layer <= 2)
9042 {
9043 prev_layer_implicit = 1;
9044 curr_layer_implicit = 1;
9045 i4_num_ref_each_dir = 1;
9046 }
9047 else
9048 {
9049 /* It is assumed that we have equal number of references in each dir */
9050 //ASSERT(!(i4_num_ref_prev_layer & 1));
9051 prev_layer_implicit = 0;
9052 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9053 }
9054 s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9055 s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9056 s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9057
9058 /* this can be kept to 1 or 2 */
9059 i4_num_ref_before_merge = 2;
9060 i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9061
9062 /* Set up place holders to hold the search nodes of each initial candt */
9063 for(i = 0; i < MAX_INIT_CANDTS; i++)
9064 {
9065 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9066 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9067 }
9068
9069 /* redundant, but doing it here since it is used in pred ctxt init */
9070 ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9071 for(i = 0; i < 3; i++)
9072 {
9073 search_node_t *ps_search_node;
9074 ps_search_node = &as_left_neighbours[i];
9075 INIT_SEARCH_NODE(ps_search_node, 0);
9076 ps_search_node = &as_top_neighbours[i];
9077 INIT_SEARCH_NODE(ps_search_node, 0);
9078 }
9079
9080 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9081 /* bottom left node always not available for the blk being searched */
9082 as_left_neighbours[2].u1_is_avail = 0;
9083 /*************************************************************************/
9084 /* Initialize all the search results structure here. We update all the */
9085 /* search results to default values, and configure things like blk sizes */
9086 /*************************************************************************/
9087 if(ps_refine_prms->i4_encode == 0)
9088 {
9089 S32 pred_lx;
9090 search_results_t *ps_search_results;
9091
9092 ps_search_results = &ps_ctxt->s_search_results_8x8;
9093 hme_init_search_results(
9094 ps_search_results,
9095 i4_num_ref_fpel,
9096 ps_refine_prms->i4_num_fpel_results,
9097 ps_refine_prms->i4_num_results_per_part,
9098 e_search_blk_size,
9099 0,
9100 0,
9101 &ps_ctxt->au1_is_past[0]);
9102 for(pred_lx = 0; pred_lx < 2; pred_lx++)
9103 {
9104 hme_init_pred_ctxt_no_encode(
9105 &ps_search_results->as_pred_ctxt[pred_lx],
9106 ps_search_results,
9107 &as_top_neighbours[0],
9108 &as_left_neighbours[0],
9109 &ps_candt_prj_coloc[0],
9110 ps_candt_zeromv,
9111 ps_candt_zeromv,
9112 pred_lx,
9113 lambda_inp,
9114 ps_refine_prms->lambda_q_shift,
9115 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9116 &ps_ctxt->ai2_ref_scf[0]);
9117 }
9118 }
9119
9120 /*********************************************************************/
9121 /* Initialize the dyn. search range params. for each reference index */
9122 /* in current layer ctxt */
9123 /*********************************************************************/
9124 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9125 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9126 {
9127 WORD32 ref_ctr;
9128
9129 for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9130 {
9131 INIT_DYN_SEARCH_PRMS(
9132 &ps_ctxt->s_coarse_dyn_range_prms
9133 .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9134 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9135 }
9136 }
9137
9138 /* Next set up initial candidates according to a given set of rules. */
9139 /* The number of initial candidates affects the quality of ME in the */
9140 /* case of motion with multiple degrees of freedom. In case of simple */
9141 /* translational motion, a current and a few causal and non causal */
9142 /* candts would suffice. More candidates help to cover more complex */
9143 /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9144 /* where multiple ref helps etc. */
9145 /* The candidate choice also depends on the following parameters. */
9146 /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */
9147 /* Whether we encode or not, and the type of search across reference */
9148 /* i.e. the previous layer may have been explicit/implicit and curr */
9149 /* layer may be explicit/implicit */
9150
9151 /* 0, 0, L, T, projected coloc best always presnt by default */
9152 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9153 ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9154 ps_search_candts[id].u1_num_steps_refine = 0;
9155 ps_candt_zeromv->s_mv.i2_mvx = 0;
9156 ps_candt_zeromv->s_mv.i2_mvy = 0;
9157
9158 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9159 ps_candt_l = ps_search_candts[id].ps_search_node;
9160 ps_search_candts[id].u1_num_steps_refine = 0;
9161
9162 /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9163 /* not at the CTB boundary use the causal T and */
9164 /* not the projected T, although the candidate is */
9165 /* still pointed to by ps_candt_prj_t[0] */
9166 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9167 {
9168 /* Using Projected top to eliminate sync */
9169 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9170 PROJECTED_TOP0, e_me_quality_presets);
9171 ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9172 ps_search_candts[id].u1_num_steps_refine = 1;
9173 }
9174 else
9175 {
9176 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9177 SPATIAL_TOP0, e_me_quality_presets);
9178 ps_candt_t = ps_search_candts[id].ps_search_node;
9179 ps_search_candts[id].u1_num_steps_refine = 0;
9180 }
9181
9182 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9183 PROJECTED_COLOC0, e_me_quality_presets);
9184 ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9185 ps_search_candts[id].u1_num_steps_refine = 1;
9186
9187 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9188 PROJECTED_COLOC1, e_me_quality_presets);
9189 ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9190 ps_search_candts[id].u1_num_steps_refine = 1;
9191
9192 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9193 {
9194 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9195 PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9196 ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9197 ps_search_candts[id].u1_num_steps_refine = 1;
9198
9199 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9200 PROJECTED_TOP_LEFT0, e_me_quality_presets);
9201 ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9202 ps_search_candts[id].u1_num_steps_refine = 1;
9203 }
9204 else
9205 {
9206 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9207 SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9208 ps_candt_tr = ps_search_candts[id].ps_search_node;
9209 ps_search_candts[id].u1_num_steps_refine = 0;
9210
9211 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9212 SPATIAL_TOP_LEFT0, e_me_quality_presets);
9213 ps_candt_tl = ps_search_candts[id].ps_search_node;
9214 ps_search_candts[id].u1_num_steps_refine = 0;
9215 }
9216
9217 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9218 PROJECTED_RIGHT0, e_me_quality_presets);
9219 ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9220 ps_search_candts[id].u1_num_steps_refine = 1;
9221
9222 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9223 PROJECTED_BOTTOM0, e_me_quality_presets);
9224 ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9225 ps_search_candts[id].u1_num_steps_refine = 1;
9226
9227 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9228 PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9229 ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9230 ps_search_candts[id].u1_num_steps_refine = 1;
9231
9232 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233 PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9234 ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9235 ps_search_candts[id].u1_num_steps_refine = 1;
9236
9237 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238 PROJECTED_RIGHT1, e_me_quality_presets);
9239 ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9240 ps_search_candts[id].u1_num_steps_refine = 1;
9241
9242 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243 PROJECTED_BOTTOM1, e_me_quality_presets);
9244 ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9245 ps_search_candts[id].u1_num_steps_refine = 1;
9246
9247 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248 PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9249 ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9250 ps_search_candts[id].u1_num_steps_refine = 1;
9251
9252 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253 PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9254 ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9255 ps_search_candts[id].u1_num_steps_refine = 1;
9256
9257 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9258 ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9259 ps_search_candts[id].u1_num_steps_refine = 1;
9260
9261 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9262 PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9263 ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9264 ps_search_candts[id].u1_num_steps_refine = 1;
9265
9266 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9267 PROJECTED_TOP_LEFT1, e_me_quality_presets);
9268 ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9269 ps_search_candts[id].u1_num_steps_refine = 1;
9270
9271 /*************************************************************************/
9272 /* Now that the candidates have been ordered, to choose the right number */
9273 /* of initial candidates. */
9274 /*************************************************************************/
9275 if(curr_layer_implicit && !prev_layer_implicit)
9276 {
9277 if(e_search_complexity == SEARCH_CX_LOW)
9278 num_init_candts = 7;
9279 else if(e_search_complexity == SEARCH_CX_MED)
9280 num_init_candts = 13;
9281 else if(e_search_complexity == SEARCH_CX_HIGH)
9282 num_init_candts = 18;
9283 else
9284 ASSERT(0);
9285 }
9286 else
9287 {
9288 if(e_search_complexity == SEARCH_CX_LOW)
9289 num_init_candts = 5;
9290 else if(e_search_complexity == SEARCH_CX_MED)
9291 num_init_candts = 11;
9292 else if(e_search_complexity == SEARCH_CX_HIGH)
9293 num_init_candts = 16;
9294 else
9295 ASSERT(0);
9296 }
9297
9298 if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9299 {
9300 num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9301 }
9302
9303 /*************************************************************************/
9304 /* The following search parameters are fixed throughout the search across*/
9305 /* all blks. So these are configured outside processing loop */
9306 /*************************************************************************/
9307 s_search_prms_blk.i4_num_init_candts = num_init_candts;
9308 s_search_prms_blk.i4_start_step = 1;
9309 s_search_prms_blk.i4_use_satd = 0;
9310 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9311 /* we use recon only for encoded layers, otherwise it is not available */
9312 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9313
9314 s_search_prms_blk.ps_search_candts = ps_search_candts;
9315 /* We use the same mv_range for all ref. pic. So assign to member 0 */
9316 if(s_search_prms_blk.i4_use_rec)
9317 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9318 else
9319 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9320 /*************************************************************************/
9321 /* Initialize coordinates. Meaning as follows */
9322 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
9323 /* blk_y : same as above, y coord. */
9324 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
9325 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
9326 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
9327 /* corner of the picture. Always multiple of 64. */
9328 /* blk_id_in_ctb : encode order id of the blk in the ctb. */
9329 /*************************************************************************/
9330 blk_y = 0;
9331 blk_id_in_ctb = 0;
9332
9333 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9334
9335 /* Get the number of sync units in a row based on encode/non enocde layer */
9336 num_sync_units_in_row = num_blks_in_row;
9337
9338 /*************************************************************************/
9339 /* Picture limit on all 4 sides. This will be used to set mv limits for */
9340 /* every block given its coordinate. Note thsi assumes that the min amt */
9341 /* of padding to right of pic is equal to the blk size. If we go all the */
9342 /* way upto 64x64, then the min padding on right size of picture should */
9343 /* be 64, and also on bottom side of picture. */
9344 /*************************************************************************/
9345 SET_PIC_LIMIT(
9346 s_pic_limit_inp,
9347 ps_curr_layer->i4_pad_x_inp,
9348 ps_curr_layer->i4_pad_y_inp,
9349 ps_curr_layer->i4_wd,
9350 ps_curr_layer->i4_ht,
9351 s_search_prms_blk.i4_num_steps_post_refine);
9352
9353 SET_PIC_LIMIT(
9354 s_pic_limit_rec,
9355 ps_curr_layer->i4_pad_x_rec,
9356 ps_curr_layer->i4_pad_y_rec,
9357 ps_curr_layer->i4_wd,
9358 ps_curr_layer->i4_ht,
9359 s_search_prms_blk.i4_num_steps_post_refine);
9360
9361 /*************************************************************************/
9362 /* set the MV limit per ref. pic. */
9363 /* - P pic. : Based on the config params. */
9364 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9365 /*************************************************************************/
9366 {
9367 WORD32 ref_ctr;
9368 /* Only for B/b pic. */
9369 if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9370 {
9371 WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9372 WORD32 cur_poc, ref_poc, abs_poc_diff;
9373
9374 cur_poc = ps_ctxt->i4_curr_poc;
9375
9376 /* Get abs MAX for symmetric search */
9377 i2_mv_y_per_poc = MAX(
9378 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9379 (ABS(ps_ctxt->s_coarse_dyn_range_prms
9380 .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9381
9382 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9383 {
9384 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9385 abs_poc_diff = ABS((cur_poc - ref_poc));
9386 /* Get the cur. max MV based on POC distance */
9387 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9388 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9389
9390 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9391 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9392 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9393 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9394 }
9395 }
9396 else
9397 {
9398 /* Set the Config. File Params for P pic. */
9399 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9400 {
9401 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9402 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9403 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9404 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9405 }
9406 }
9407 }
9408
9409 /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9410 if(e_me_quality_presets == ME_MEDIUM_SPEED)
9411 {
9412 i4_threshold_multiplier = 1;
9413 i4_threshold_divider = 4;
9414 }
9415 else if(e_me_quality_presets == ME_HIGH_SPEED)
9416 {
9417 i4_threshold_multiplier = 1;
9418 i4_threshold_divider = 2;
9419 }
9420 else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9421 {
9422 #if OLD_XTREME_SPEED
9423 /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9424 i4_temporal_layer = 1;
9425 #endif
9426 if(i4_temporal_layer == 0)
9427 {
9428 i4_threshold_multiplier = 3;
9429 i4_threshold_divider = 4;
9430 }
9431 else if(i4_temporal_layer == 1)
9432 {
9433 i4_threshold_multiplier = 3;
9434 i4_threshold_divider = 4;
9435 }
9436 else if(i4_temporal_layer == 2)
9437 {
9438 i4_threshold_multiplier = 1;
9439 i4_threshold_divider = 1;
9440 }
9441 else
9442 {
9443 i4_threshold_multiplier = 5;
9444 i4_threshold_divider = 4;
9445 }
9446 }
9447 else if(e_me_quality_presets == ME_HIGH_QUALITY)
9448 {
9449 i4_threshold_multiplier = 1;
9450 i4_threshold_divider = 1;
9451 }
9452
9453 /*************************************************************************/
9454 /*************************************************************************/
9455 /*************************************************************************/
9456 /* START OF THE CORE LOOP */
9457 /* If Encode is 0, then we just loop over each blk */
9458 /*************************************************************************/
9459 /*************************************************************************/
9460 /*************************************************************************/
9461 while(0 == end_of_frame)
9462 {
9463 job_queue_t *ps_job;
9464 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID
9465 WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4)
9466 WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only
9467 //+3 to get ceil values when divided by 4
9468 WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9469 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now
9470 //if there is variable for ctb size use that and this variable can be derived
9471 WORD32 offset_val, check_dep_pos, set_dep_pos;
9472 void *pv_hme_dep_mngr;
9473 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9474
9475 /* Get the current layer HME Dep Mngr */
9476 /* Note : Use layer_id - 1 in HME layers */
9477
9478 pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9479
9480 /* Get the current row from the job queue */
9481 ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9482 ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9483
9484 /* If all rows are done, set the end of process flag to 1, */
9485 /* and the current row to -1 */
9486 if(NULL == ps_job)
9487 {
9488 blk_y = -1;
9489 end_of_frame = 1;
9490
9491 continue;
9492 }
9493
9494 if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9495 {
9496 /* set the output dependency of current row */
9497 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9498 continue;
9499 }
9500
9501 blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9502 blk_x = 0;
9503 i4_ctb_x = 0;
9504
9505 /* wait for Corresponding Pre intra Job to be completed */
9506 if(1 == ps_refine_prms->i4_layer_id)
9507 {
9508 volatile UWORD32 i4_l1_done;
9509 volatile UWORD32 *pi4_l1_done;
9510 pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9511 ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9512 i4_l1_done = *pi4_l1_done;
9513 while(!i4_l1_done)
9514 {
9515 i4_l1_done = *pi4_l1_done;
9516 }
9517 }
9518 /* Set Variables for Dep. Checking and Setting */
9519 set_dep_pos = blk_y + 1;
9520 if(blk_y > 0)
9521 {
9522 offset_val = 2;
9523 check_dep_pos = blk_y - 1;
9524 }
9525 else
9526 {
9527 /* First row should run without waiting */
9528 offset_val = -1;
9529 check_dep_pos = 0;
9530 }
9531
9532 /* EIID: calculate ed_blk_ctxt pointer for current row */
9533 /* valid for only layer-1. not varified and used for other layers */
9534 i4_ctb_row_ctr = blk_y / 4;
9535 ps_ed_blk_ctxt_curr_row =
9536 ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9537 i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only
9538 ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9539
9540 /* if non-encode layer then i4_ctb_x will be same as blk_x */
9541 /* loop over all the units is a row */
9542 for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9543 {
9544 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD
9545 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9546 WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9547
9548 /* Wait till top row block is processed */
9549 /* Currently checking till top right block*/
9550
9551 /* Disabled since all candidates, except for */
9552 /* L and C, are projected from the coarser layer, */
9553 /* only in ME_HIGH_SPEED mode */
9554 if((ME_MEDIUM_SPEED > e_me_quality_presets))
9555 {
9556 if(i4_ctb_x < (num_sync_units_in_row - 1))
9557 {
9558 ihevce_dmgr_chk_row_row_sync(
9559 pv_hme_dep_mngr,
9560 i4_ctb_x,
9561 offset_val,
9562 check_dep_pos,
9563 0, /* Col Tile No. : Not supported in PreEnc*/
9564 ps_ctxt->thrd_id);
9565 }
9566 }
9567
9568 {
9569 /* for non encoder layer only one block is processed */
9570 num_blks_in_this_ctb = 1;
9571 }
9572
9573 /* EIID: derive ed_ctxt ptr for current CTB */
9574 ps_ed_blk_ctxt_curr_ctb =
9575 ps_ed_blk_ctxt_curr_row +
9576 (i4_ctb_blk_ctr *
9577 i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only
9578 ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9579
9580 /* loop over all the blocks in CTB will always be 1 */
9581 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9582 {
9583 {
9584 /* non encode layer */
9585 blk_x = i4_ctb_x;
9586 blk_id_in_full_ctb = 0;
9587 s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9588 }
9589
9590 /* get the current input blk point */
9591 pos_x = blk_x << blk_size_shift;
9592 pos_y = blk_y << blk_size_shift;
9593 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9594
9595 /*********************************************************************/
9596 /* replicate the inp buffer at blk or ctb level for each ref id, */
9597 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9598 /* thereby avoiding a bloat up of memory. If we did all references */
9599 /* weighted pred, we will end up with a duplicate copy of each ref */
9600 /* at each layer, since we need to preserve the original reference. */
9601 /* ToDo: Need to observe performance with this mechanism and compare */
9602 /* with case where ref is weighted. */
9603 /*********************************************************************/
9604 if(blk_id_in_ctb == 0)
9605 {
9606 fp_get_wt_inp(
9607 ps_curr_layer,
9608 &ps_ctxt->s_wt_pred,
9609 unit_size,
9610 pos_x,
9611 pos_y,
9612 unit_size,
9613 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9614 ps_ctxt->i4_wt_pred_enable_flag);
9615 }
9616
9617 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9618 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9619 /* Select search results from a suitable search result in the context */
9620 {
9621 ps_search_results = &ps_ctxt->s_search_results_8x8;
9622 }
9623
9624 s_search_prms_blk.ps_search_results = ps_search_results;
9625
9626 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9627 hme_reset_search_results(
9628 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9629
9630 /* Loop across different Ref IDx */
9631 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9632 {
9633 S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9634 S32 prev_blk_offset = 6;
9635 S32 resultid;
9636
9637 /*********************************************************************/
9638 /* For every blk in the picture, the search range needs to be derived*/
9639 /* Any blk can have any mv, but practical search constraints are */
9640 /* imposed by the picture boundary and amt of padding. */
9641 /*********************************************************************/
9642 /* MV limit is different based on ref. PIC */
9643 hme_derive_search_range(
9644 &s_range_prms_inp,
9645 &s_pic_limit_inp,
9646 &as_mv_limit[i1_ref_idx],
9647 pos_x,
9648 pos_y,
9649 blk_wd,
9650 blk_ht);
9651 hme_derive_search_range(
9652 &s_range_prms_rec,
9653 &s_pic_limit_rec,
9654 &as_mv_limit[i1_ref_idx],
9655 pos_x,
9656 pos_y,
9657 blk_wd,
9658 blk_ht);
9659
9660 s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9661 ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9662
9663 i4_num_srch_cands = 1;
9664
9665 if(1 != ps_refine_prms->i4_layer_id)
9666 {
9667 S32 x, y;
9668 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9669 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9670
9671 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9672 {
9673 hme_get_spatial_candt(
9674 ps_curr_layer,
9675 e_search_blk_size,
9676 blk_x,
9677 blk_y,
9678 i1_ref_idx,
9679 &as_top_neighbours[0],
9680 &as_left_neighbours[0],
9681 0,
9682 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9683 0,
9684 ps_refine_prms->i4_encode);
9685
9686 *ps_candt_tr = as_top_neighbours[3];
9687 *ps_candt_t = as_top_neighbours[1];
9688 *ps_candt_tl = as_top_neighbours[0];
9689 i4_num_srch_cands += 3;
9690 }
9691 else
9692 {
9693 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9694 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9695 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9696 search_node_t *ps_search_node;
9697 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9698 hme_mv_t *ps_mv, *ps_mv_base;
9699 S08 *pi1_ref_idx, *pi1_ref_idx_base;
9700 S32 jump = 1, mvs_in_blk, mvs_in_row;
9701 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9702
9703 if(i4_blk_size1 != i4_blk_size2)
9704 {
9705 blk_x_temp <<= 1;
9706 blk_y_temp <<= 1;
9707 jump = 2;
9708 if((i4_blk_size1 << 2) == i4_blk_size2)
9709 {
9710 blk_x_temp <<= 1;
9711 blk_y_temp <<= 1;
9712 jump = 4;
9713 }
9714 }
9715
9716 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9717 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9718
9719 /* Adjust teh blk coord to point to top left locn */
9720 blk_x_temp -= 1;
9721 blk_y_temp -= 1;
9722
9723 /* Pick up the mvs from the location */
9724 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9725 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9726
9727 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9728 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9729
9730 ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9731 pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9732
9733 ps_mv_base = ps_mv;
9734 pi1_ref_idx_base = pi1_ref_idx;
9735
9736 ps_search_node = &as_left_neighbours[0];
9737 ps_mv = ps_mv_base + mvs_in_row;
9738 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9739 COPY_MV_TO_SEARCH_NODE(
9740 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9741
9742 i4_num_srch_cands++;
9743 }
9744 }
9745 else
9746 {
9747 S32 x, y;
9748 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9749 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9750
9751 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9752 {
9753 hme_get_spatial_candt_in_l1_me(
9754 ps_curr_layer,
9755 e_search_blk_size,
9756 blk_x,
9757 blk_y,
9758 i1_ref_idx,
9759 !ps_search_results->pu1_is_past[i1_ref_idx],
9760 &as_top_neighbours[0],
9761 &as_left_neighbours[0],
9762 0,
9763 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9764 0,
9765 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9766 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9767
9768 *ps_candt_tr = as_top_neighbours[3];
9769 *ps_candt_t = as_top_neighbours[1];
9770 *ps_candt_tl = as_top_neighbours[0];
9771
9772 i4_num_srch_cands += 3;
9773 }
9774 else
9775 {
9776 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9777 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9778 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9779 S32 i4_mv_pos_in_implicit_array;
9780 search_node_t *ps_search_node;
9781 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9782 hme_mv_t *ps_mv, *ps_mv_base;
9783 S08 *pi1_ref_idx, *pi1_ref_idx_base;
9784 S32 jump = 1, mvs_in_blk, mvs_in_row;
9785 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9786 U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9787 S32 i4_num_results_in_given_dir =
9788 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9789 ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9790 : (ps_layer_mvbank->i4_num_mvs_per_ref *
9791 ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9792
9793 if(i4_blk_size1 != i4_blk_size2)
9794 {
9795 blk_x_temp <<= 1;
9796 blk_y_temp <<= 1;
9797 jump = 2;
9798 if((i4_blk_size1 << 2) == i4_blk_size2)
9799 {
9800 blk_x_temp <<= 1;
9801 blk_y_temp <<= 1;
9802 jump = 4;
9803 }
9804 }
9805
9806 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9807 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9808
9809 /* Adjust teh blk coord to point to top left locn */
9810 blk_x_temp -= 1;
9811 blk_y_temp -= 1;
9812
9813 /* Pick up the mvs from the location */
9814 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9815 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9816
9817 i4_offset +=
9818 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9819 ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9820 : 0);
9821
9822 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9823 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9824
9825 ps_mv_base = ps_mv;
9826 pi1_ref_idx_base = pi1_ref_idx;
9827
9828 {
9829 /* ps_mv and pi1_ref_idx now point to the top left locn */
9830 ps_search_node = &as_left_neighbours[0];
9831 ps_mv = ps_mv_base + mvs_in_row;
9832 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9833
9834 i4_mv_pos_in_implicit_array =
9835 hme_find_pos_of_implicitly_stored_ref_id(
9836 pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9837
9838 if(-1 != i4_mv_pos_in_implicit_array)
9839 {
9840 COPY_MV_TO_SEARCH_NODE(
9841 ps_search_node,
9842 &ps_mv[i4_mv_pos_in_implicit_array],
9843 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9844 i1_ref_idx,
9845 shift);
9846 }
9847 else
9848 {
9849 ps_search_node->u1_is_avail = 0;
9850 ps_search_node->s_mv.i2_mvx = 0;
9851 ps_search_node->s_mv.i2_mvy = 0;
9852 ps_search_node->i1_ref_idx = i1_ref_idx;
9853 }
9854
9855 i4_num_srch_cands++;
9856 }
9857 }
9858 }
9859
9860 *ps_candt_l = as_left_neighbours[0];
9861
9862 /* when 16x16 is searched in an encode layer, and the prev layer */
9863 /* stores results for 4x4 blks, we project 5 candts corresponding */
9864 /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9865 /* However in other cases, only 2,2 best and 2nd best reqd */
9866 resultid = 0;
9867 pf_hme_project_coloc_candt(
9868 ps_candt_prj_coloc[0],
9869 ps_curr_layer,
9870 ps_coarse_layer,
9871 pos_x + 2,
9872 pos_y + 2,
9873 i1_ref_idx,
9874 resultid);
9875
9876 i4_num_srch_cands++;
9877
9878 resultid = 1;
9879 if(num_results_prev_layer > 1)
9880 {
9881 pf_hme_project_coloc_candt(
9882 ps_candt_prj_coloc[1],
9883 ps_curr_layer,
9884 ps_coarse_layer,
9885 pos_x + 2,
9886 pos_y + 2,
9887 i1_ref_idx,
9888 resultid);
9889
9890 i4_num_srch_cands++;
9891 }
9892
9893 resultid = 0;
9894
9895 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9896 {
9897 pf_hme_project_coloc_candt(
9898 ps_candt_prj_t[0],
9899 ps_curr_layer,
9900 ps_coarse_layer,
9901 pos_x,
9902 pos_y - prev_blk_offset,
9903 i1_ref_idx,
9904 resultid);
9905
9906 i4_num_srch_cands++;
9907 }
9908
9909 {
9910 pf_hme_project_coloc_candt(
9911 ps_candt_prj_br[0],
9912 ps_curr_layer,
9913 ps_coarse_layer,
9914 pos_x + next_blk_offset,
9915 pos_y + next_blk_offset,
9916 i1_ref_idx,
9917 resultid);
9918 pf_hme_project_coloc_candt(
9919 ps_candt_prj_bl[0],
9920 ps_curr_layer,
9921 ps_coarse_layer,
9922 pos_x - prev_blk_offset,
9923 pos_y + next_blk_offset,
9924 i1_ref_idx,
9925 resultid);
9926 pf_hme_project_coloc_candt(
9927 ps_candt_prj_r[0],
9928 ps_curr_layer,
9929 ps_coarse_layer,
9930 pos_x + next_blk_offset,
9931 pos_y,
9932 i1_ref_idx,
9933 resultid);
9934 pf_hme_project_coloc_candt(
9935 ps_candt_prj_b[0],
9936 ps_curr_layer,
9937 ps_coarse_layer,
9938 pos_x,
9939 pos_y + next_blk_offset,
9940 i1_ref_idx,
9941 resultid);
9942
9943 i4_num_srch_cands += 4;
9944
9945 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9946 {
9947 pf_hme_project_coloc_candt(
9948 ps_candt_prj_tr[0],
9949 ps_curr_layer,
9950 ps_coarse_layer,
9951 pos_x + next_blk_offset,
9952 pos_y - prev_blk_offset,
9953 i1_ref_idx,
9954 resultid);
9955 pf_hme_project_coloc_candt(
9956 ps_candt_prj_tl[0],
9957 ps_curr_layer,
9958 ps_coarse_layer,
9959 pos_x - prev_blk_offset,
9960 pos_y - prev_blk_offset,
9961 i1_ref_idx,
9962 resultid);
9963
9964 i4_num_srch_cands += 2;
9965 }
9966 }
9967 if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9968 {
9969 resultid = 1;
9970 pf_hme_project_coloc_candt(
9971 ps_candt_prj_br[1],
9972 ps_curr_layer,
9973 ps_coarse_layer,
9974 pos_x + next_blk_offset,
9975 pos_y + next_blk_offset,
9976 i1_ref_idx,
9977 resultid);
9978 pf_hme_project_coloc_candt(
9979 ps_candt_prj_bl[1],
9980 ps_curr_layer,
9981 ps_coarse_layer,
9982 pos_x - prev_blk_offset,
9983 pos_y + next_blk_offset,
9984 i1_ref_idx,
9985 resultid);
9986 pf_hme_project_coloc_candt(
9987 ps_candt_prj_r[1],
9988 ps_curr_layer,
9989 ps_coarse_layer,
9990 pos_x + next_blk_offset,
9991 pos_y,
9992 i1_ref_idx,
9993 resultid);
9994 pf_hme_project_coloc_candt(
9995 ps_candt_prj_b[1],
9996 ps_curr_layer,
9997 ps_coarse_layer,
9998 pos_x,
9999 pos_y + next_blk_offset,
10000 i1_ref_idx,
10001 resultid);
10002
10003 i4_num_srch_cands += 4;
10004
10005 pf_hme_project_coloc_candt(
10006 ps_candt_prj_tr[1],
10007 ps_curr_layer,
10008 ps_coarse_layer,
10009 pos_x + next_blk_offset,
10010 pos_y - prev_blk_offset,
10011 i1_ref_idx,
10012 resultid);
10013 pf_hme_project_coloc_candt(
10014 ps_candt_prj_tl[1],
10015 ps_curr_layer,
10016 ps_coarse_layer,
10017 pos_x - prev_blk_offset,
10018 pos_y - prev_blk_offset,
10019 i1_ref_idx,
10020 resultid);
10021 pf_hme_project_coloc_candt(
10022 ps_candt_prj_t[1],
10023 ps_curr_layer,
10024 ps_coarse_layer,
10025 pos_x,
10026 pos_y - prev_blk_offset,
10027 i1_ref_idx,
10028 resultid);
10029
10030 i4_num_srch_cands += 3;
10031 }
10032
10033 /* Note this block also clips the MV range for all candidates */
10034 #ifdef _DEBUG
10035 {
10036 S32 candt;
10037 range_prms_t *ps_range_prms;
10038
10039 S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10040 for(candt = 0; candt < i4_num_srch_cands; candt++)
10041 {
10042 search_node_t *ps_search_node;
10043
10044 ps_search_node =
10045 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10046
10047 ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10048
10049 if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10050 (ps_search_node->i1_ref_idx < 0))
10051 {
10052 ASSERT(0);
10053 }
10054 }
10055 }
10056 #endif
10057
10058 {
10059 S32 srch_cand;
10060 S32 num_unique_nodes = 0;
10061 S32 num_nodes_searched = 0;
10062 S32 num_best_cand = 0;
10063 S08 i1_grid_enable = 0;
10064 search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10065 /* has list of valid partition to search terminated by -1 */
10066 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10067 S32 center_x;
10068 S32 center_y;
10069
10070 /* indicates if the centre point of grid needs to be explicitly added for search */
10071 S32 add_centre = 0;
10072
10073 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10074 center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10075 center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10076
10077 for(srch_cand = 0;
10078 (srch_cand < i4_num_srch_cands) &&
10079 (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10080 srch_cand++)
10081 {
10082 search_node_t s_search_node_temp =
10083 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10084
10085 s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX;
10086
10087 /* Clip the motion vectors as well here since after clipping
10088 two candidates can become same and they will be removed during deduplication */
10089 CLIP_MV_WITHIN_RANGE(
10090 s_search_node_temp.s_mv.i2_mvx,
10091 s_search_node_temp.s_mv.i2_mvy,
10092 s_search_prms_blk.aps_mv_range[0],
10093 ps_refine_prms->i4_num_steps_fpel_refine,
10094 ps_refine_prms->i4_num_steps_hpel_refine,
10095 ps_refine_prms->i4_num_steps_qpel_refine);
10096
10097 /* PT_C */
10098 INSERT_NEW_NODE(
10099 as_unique_search_nodes,
10100 num_unique_nodes,
10101 s_search_node_temp,
10102 0,
10103 au4_unique_node_map,
10104 center_x,
10105 center_y,
10106 1);
10107
10108 num_nodes_searched += 1;
10109 }
10110 num_unique_nodes =
10111 MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10112
10113 /* If number of candidates projected/number of candidates to be refined are more than 2,
10114 then filter out and choose the best two here */
10115 if(num_unique_nodes >= 2)
10116 {
10117 S32 num_results;
10118 S32 cnt;
10119 S32 *pi4_valid_part_ids;
10120 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10121 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10122 pi4_valid_part_ids = &ai4_valid_part_ids[0];
10123
10124 /* pi4_valid_part_ids is updated inside */
10125 hme_pred_search_no_encode(
10126 &s_search_prms_blk,
10127 ps_curr_layer,
10128 &ps_ctxt->s_wt_pred,
10129 pi4_valid_part_ids,
10130 1,
10131 e_me_quality_presets,
10132 i1_grid_enable,
10133 (ihevce_me_optimised_function_list_t *)
10134 ps_ctxt->pv_me_optimised_function_list
10135
10136 );
10137
10138 num_best_cand = 0;
10139 cnt = 0;
10140 num_results = ps_search_results->u1_num_results_per_part;
10141
10142 while((id = pi4_valid_part_ids[cnt++]) >= 0)
10143 {
10144 num_results =
10145 MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10146
10147 for(i = 0; i < num_results; i++)
10148 {
10149 search_node_t s_search_node_temp;
10150 s_search_node_temp =
10151 *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10152 if(s_search_node_temp.i1_ref_idx >= 0)
10153 {
10154 INSERT_NEW_NODE_NOMAP(
10155 as_best_two_proj_node,
10156 num_best_cand,
10157 s_search_node_temp,
10158 0);
10159 }
10160 }
10161 }
10162 }
10163 else
10164 {
10165 add_centre = 1;
10166 num_best_cand = num_unique_nodes;
10167 as_best_two_proj_node[0] = as_unique_search_nodes[0];
10168 }
10169
10170 num_unique_nodes = 0;
10171 num_nodes_searched = 0;
10172
10173 if(1 == num_best_cand)
10174 {
10175 search_node_t s_search_node_temp = as_best_two_proj_node[0];
10176 S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10177 S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10178 S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10179
10180 i1_grid_enable = 1;
10181
10182 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10183 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10184 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10185
10186 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10187 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10188 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10189
10190 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10191 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10192 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10193
10194 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10195 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10196 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10197
10198 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10199 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10200 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10201
10202 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10203 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10204 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10205
10206 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10207 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10208 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10209
10210 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10211 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10212 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10213
10214 if(add_centre)
10215 {
10216 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10217 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10218 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10219 }
10220 }
10221 else
10222 {
10223 /* For the candidates where refinement was required, choose the best two */
10224 for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10225 {
10226 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10227 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10228 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10229
10230 /* Because there may not be two best unique candidates (because of clipping),
10231 second best candidate can be uninitialized, ignore that */
10232 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10233 s_search_node_temp.i1_ref_idx < 0)
10234 {
10235 num_nodes_searched++;
10236 continue;
10237 }
10238
10239 /* PT_C */
10240 /* Since the center point has already be evaluated and best results are persistent,
10241 it will not be evaluated again */
10242 if(add_centre) /* centre point added explicitly again if search results is not updated */
10243 {
10244 INSERT_NEW_NODE(
10245 as_unique_search_nodes,
10246 num_unique_nodes,
10247 s_search_node_temp,
10248 0,
10249 au4_unique_node_map,
10250 center_x,
10251 center_y,
10252 1);
10253 }
10254
10255 /* PT_L */
10256 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10257 s_search_node_temp.s_mv.i2_mvy = mv_y;
10258 INSERT_NEW_NODE(
10259 as_unique_search_nodes,
10260 num_unique_nodes,
10261 s_search_node_temp,
10262 0,
10263 au4_unique_node_map,
10264 center_x,
10265 center_y,
10266 1);
10267
10268 /* PT_T */
10269 s_search_node_temp.s_mv.i2_mvx = mv_x;
10270 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10271 INSERT_NEW_NODE(
10272 as_unique_search_nodes,
10273 num_unique_nodes,
10274 s_search_node_temp,
10275 0,
10276 au4_unique_node_map,
10277 center_x,
10278 center_y,
10279 1);
10280
10281 /* PT_R */
10282 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10283 s_search_node_temp.s_mv.i2_mvy = mv_y;
10284 INSERT_NEW_NODE(
10285 as_unique_search_nodes,
10286 num_unique_nodes,
10287 s_search_node_temp,
10288 0,
10289 au4_unique_node_map,
10290 center_x,
10291 center_y,
10292 1);
10293
10294 /* PT_B */
10295 s_search_node_temp.s_mv.i2_mvx = mv_x;
10296 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10297 INSERT_NEW_NODE(
10298 as_unique_search_nodes,
10299 num_unique_nodes,
10300 s_search_node_temp,
10301 0,
10302 au4_unique_node_map,
10303 center_x,
10304 center_y,
10305 1);
10306
10307 /* PT_TL */
10308 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10309 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10310 INSERT_NEW_NODE(
10311 as_unique_search_nodes,
10312 num_unique_nodes,
10313 s_search_node_temp,
10314 0,
10315 au4_unique_node_map,
10316 center_x,
10317 center_y,
10318 1);
10319
10320 /* PT_TR */
10321 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10322 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10323 INSERT_NEW_NODE(
10324 as_unique_search_nodes,
10325 num_unique_nodes,
10326 s_search_node_temp,
10327 0,
10328 au4_unique_node_map,
10329 center_x,
10330 center_y,
10331 1);
10332
10333 /* PT_BL */
10334 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10335 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10336 INSERT_NEW_NODE(
10337 as_unique_search_nodes,
10338 num_unique_nodes,
10339 s_search_node_temp,
10340 0,
10341 au4_unique_node_map,
10342 center_x,
10343 center_y,
10344 1);
10345
10346 /* PT_BR */
10347 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10348 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10349 INSERT_NEW_NODE(
10350 as_unique_search_nodes,
10351 num_unique_nodes,
10352 s_search_node_temp,
10353 0,
10354 au4_unique_node_map,
10355 center_x,
10356 center_y,
10357 1);
10358 }
10359 }
10360
10361 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10362 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10363
10364 /*****************************************************************/
10365 /* Call the search algorithm, this includes: */
10366 /* Pre-Search-Refinement (for coarse candts) */
10367 /* Search on each candidate */
10368 /* Post Search Refinement on winners/other new candidates */
10369 /*****************************************************************/
10370
10371 hme_pred_search_no_encode(
10372 &s_search_prms_blk,
10373 ps_curr_layer,
10374 &ps_ctxt->s_wt_pred,
10375 ai4_valid_part_ids,
10376 0,
10377 e_me_quality_presets,
10378 i1_grid_enable,
10379 (ihevce_me_optimised_function_list_t *)
10380 ps_ctxt->pv_me_optimised_function_list);
10381
10382 i1_grid_enable = 0;
10383 }
10384 }
10385
10386 /* for non encode layer update MV and end processing for block */
10387 {
10388 WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10389 search_node_t *ps_search_node;
10390 /* now update the reqd results back to the layer mv bank. */
10391 if(1 == ps_refine_prms->i4_layer_id)
10392 {
10393 hme_update_mv_bank_in_l1_me(
10394 ps_search_results,
10395 ps_curr_layer->ps_layer_mvbank,
10396 blk_x,
10397 blk_y,
10398 &s_mv_update_prms);
10399 }
10400 else
10401 {
10402 hme_update_mv_bank_noencode(
10403 ps_search_results,
10404 ps_curr_layer->ps_layer_mvbank,
10405 blk_x,
10406 blk_y,
10407 &s_mv_update_prms);
10408 }
10409
10410 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10411 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10412 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10413 {
10414 WORD32 i4_j;
10415 layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10416
10417 //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10418 /* Not considering this for Dyn. Search Update */
10419 {
10420 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10421 i4_ref_id++)
10422 {
10423 ps_search_node =
10424 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10425
10426 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10427 {
10428 hme_update_dynamic_search_params(
10429 &ps_ctxt->s_coarse_dyn_range_prms
10430 .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10431 [i4_ref_id],
10432 ps_search_node->s_mv.i2_mvy);
10433
10434 ps_search_node++;
10435 }
10436 }
10437 }
10438 }
10439
10440 if(1 == ps_refine_prms->i4_layer_id)
10441 {
10442 WORD32 wt_pred_val, log_wt_pred_val;
10443 WORD32 ref_id_of_nearest_poc = 0;
10444 WORD32 max_val = 0x7fffffff;
10445 WORD32 max_l0_val = 0x7fffffff;
10446 WORD32 max_l1_val = 0x7fffffff;
10447 WORD32 cur_val;
10448 WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10449
10450 WORD32 bestl0_sad = 0x7fffffff;
10451 WORD32 bestl1_sad = 0x7fffffff;
10452 search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10453
10454 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10455 i4_ref_id++)
10456 {
10457 wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10458 log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10459
10460 ps_search_node =
10461 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10462
10463 i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10464 ((1 << log_wt_pred_val) >> 1)) >>
10465 log_wt_pred_val;
10466
10467 i4_local_cost_weighted_pred =
10468 i4_local_weighted_sad +
10469 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10470 //the loop is redundant as the results are already sorted based on total cost
10471 //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10472 {
10473 if(i4_local_cost_weighted_pred < min_cost)
10474 {
10475 min_cost = i4_local_cost_weighted_pred;
10476 min_sad = i4_local_weighted_sad;
10477 }
10478 }
10479
10480 /* For P frame, calculate the nearest poc which is either P or I frame*/
10481 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10482 {
10483 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10484 {
10485 cur_val =
10486 ABS(ps_ctxt->i4_curr_poc -
10487 ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10488 if(cur_val < max_val)
10489 {
10490 max_val = cur_val;
10491 ref_id_of_nearest_poc = i4_ref_id;
10492 }
10493 }
10494 }
10495 }
10496 /*Store me cost wrt. to past frame only for P frame */
10497 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10498 {
10499 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10500 {
10501 WORD16 i2_mvx, i2_mvy;
10502
10503 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10504 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10505 WORD32 z_scan_idx =
10506 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10507 WORD32 wt, log_wt;
10508
10509 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10510 <= (1 + ps_ctxt->num_b_frms));*/
10511
10512 /*obtain mvx and mvy */
10513 i2_mvx =
10514 ps_search_results
10515 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10516 ->s_mv.i2_mvx;
10517 i2_mvy =
10518 ps_search_results
10519 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10520 ->s_mv.i2_mvy;
10521
10522 /*register the min cost for l1 me in blk context */
10523 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10524 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10525
10526 /*register the min cost for l1 me in blk context */
10527 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10528 ((ps_search_results
10529 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10530 ->i4_sad *
10531 wt) +
10532 ((1 << log_wt) >> 1)) >>
10533 log_wt;
10534 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10535 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10536 (ps_search_results
10537 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10538 ->i4_tot_cost -
10539 ps_search_results
10540 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10541 ->i4_sad);
10542 /*for complexity change detection*/
10543 ps_ctxt->i4_num_blks++;
10544 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10545 (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10546 {
10547 ps_ctxt->i4_num_blks_high_sad++;
10548 }
10549 }
10550 }
10551 }
10552
10553 /* EIID: Early inter intra decisions */
10554 /* tap L1 level SAD for inter intra decisions */
10555 if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10556 (!ps_ctxt->s_frm_prms
10557 .is_i_pic)) //for high-quality preset->disable early decisions
10558 {
10559 if(1 == ps_refine_prms->i4_layer_id)
10560 {
10561 WORD32 i4_min_sad_cost_8x8_block = min_cost;
10562 ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10563 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10564 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10565 WORD32 z_scan_idx =
10566 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10567 ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10568
10569 /*register the min cost for l1 me in blk context */
10570 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10571 i4_min_sad_cost_8x8_block;
10572 i4_num_comparisions++;
10573
10574 /* take early inter-intra decision here */
10575 ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10576 #if DISABLE_INTRA_IN_BPICS
10577 if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10578 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10579 {
10580 ps_curr_ed_blk_ctxt->intra_or_inter =
10581 2; /*eval only inter if inter cost is less */
10582 i4_num_inter_wins++;
10583 }
10584 else
10585 #endif
10586 {
10587 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10588 ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10589 i4_threshold_multiplier) /
10590 i4_threshold_divider))
10591 {
10592 ps_curr_ed_blk_ctxt->intra_or_inter =
10593 2; /*eval only inter if inter cost is less */
10594 i4_num_inter_wins++;
10595 }
10596 }
10597
10598 //{
10599 // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10600 // blk_x,blk_y,
10601 // i4_ctb_blk_ctr, i4_ctb_row_ctr,
10602 // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10603 // i4_min_sad_cost_8x8_block
10604 // );
10605 //}
10606
10607 } //end of layer-1
10608 } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10609 else
10610 {
10611 if(1 == ps_refine_prms->i4_layer_id)
10612 {
10613 WORD32 i4_min_sad_cost_8x8_block = min_cost;
10614 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10615 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10616 WORD32 z_scan_idx =
10617 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10618
10619 /*register the min cost for l1 me in blk context */
10620 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10621 i4_min_sad_cost_8x8_block;
10622 }
10623 }
10624 if(1 == ps_refine_prms->i4_layer_id)
10625 {
10626 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10627 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10628 WORD32 z_scan_idx =
10629 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10630
10631 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10632 min_sad;
10633
10634 if(min_cost <
10635 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10636 {
10637 ps_ctxt->i4_L1_hme_best_cost += min_cost;
10638 ps_ctxt->i4_L1_hme_sad += min_sad;
10639 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10640 }
10641 else
10642 {
10643 ps_ctxt->i4_L1_hme_best_cost +=
10644 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10645 ps_ctxt->i4_L1_hme_sad +=
10646 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10647 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10648 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10649 }
10650 }
10651 }
10652 }
10653
10654 /* Update the number of blocks processed in the current row */
10655 if((ME_MEDIUM_SPEED > e_me_quality_presets))
10656 {
10657 ihevce_dmgr_set_row_row_sync(
10658 pv_hme_dep_mngr,
10659 (i4_ctb_x + 1),
10660 blk_y,
10661 0 /* Col Tile No. : Not supported in PreEnc*/);
10662 }
10663 }
10664
10665 /* set the output dependency after completion of row */
10666 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10667 }
10668 }
10669