1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_refine.c
23 *
24 * @brief
25 * Contains the implementation of the refinement layer searches and related
26 * functionality like CU merge.
27 *
28 * @author
29 * Ittiam
30 *
31 *
32 * List of Functions
33 *
34 *
35 ******************************************************************************
36 */
37
38 /*****************************************************************************/
39 /* File Includes */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_fullpel.h"
103 #include "hme_subpel.h"
104 #include "hme_refine.h"
105 #include "hme_err_compute.h"
106 #include "hme_common_utils.h"
107 #include "hme_search_algo.h"
108 #include "ihevce_stasino_helpers.h"
109 #include "ihevce_common_utils.h"
110
111 /*****************************************************************************/
112 /* Globals */
113 /*****************************************************************************/
114
115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117 { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118 };
119
120 /*****************************************************************************/
121 /* Extern Fucntion declaration */
122 /*****************************************************************************/
123 extern ctb_boundary_attrs_t *
124 get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125
126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127 search_node_t *ps_search_node,
128 layer_ctxt_t *ps_curr_layer,
129 layer_ctxt_t *ps_coarse_layer,
130 S32 i4_pos_x,
131 S32 i4_pos_y,
132 S08 i1_ref_id,
133 S32 i4_result_id);
134
135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136 search_node_t *ps_search_node,
137 layer_ctxt_t *ps_curr_layer,
138 layer_ctxt_t *ps_coarse_layer,
139 S32 i4_pos_x,
140 S32 i4_pos_y,
141 S32 i4_num_act_ref_l0,
142 U08 u1_pred_dir,
143 U08 u1_default_ref_id,
144 S32 i4_result_id);
145
146 /*****************************************************************************/
147 /* Function Definitions */
148 /*****************************************************************************/
149
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150 void ihevce_no_wt_copy(
151 coarse_me_ctxt_t *ps_ctxt,
152 layer_ctxt_t *ps_curr_layer,
153 pu_t *ps_pu,
154 UWORD8 *pu1_temp_pred,
155 WORD32 temp_stride,
156 WORD32 blk_x,
157 WORD32 blk_y)
158 {
159 UWORD8 *pu1_ref;
160 WORD32 ref_stride, ref_offset;
161 WORD32 row, col, i4_tmp;
162
163 ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164
165 if(ps_pu->b2_pred_mode == PRED_L0)
166 {
167 WORD8 i1_ref_idx;
168
169 i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171
172 ref_stride = ps_curr_layer->i4_inp_stride;
173
174 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175 ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176
177 pu1_ref += ref_offset;
178
179 for(row = 0; row < temp_stride; row++)
180 {
181 for(col = 0; col < temp_stride; col++)
182 {
183 i4_tmp = pu1_ref[col];
184 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185 }
186
187 pu1_ref += ref_stride;
188 pu1_temp_pred += temp_stride;
189 }
190 }
191 else
192 {
193 WORD8 i1_ref_idx;
194
195 i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197
198 ref_stride = ps_curr_layer->i4_inp_stride;
199
200 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201 ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202
203 pu1_ref += ref_offset;
204
205 for(row = 0; row < temp_stride; row++)
206 {
207 for(col = 0; col < temp_stride; col++)
208 {
209 i4_tmp = pu1_ref[col];
210 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211 }
212
213 pu1_ref += ref_stride;
214 pu1_temp_pred += temp_stride;
215 }
216 }
217 }
218
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
220 cluster_data_t *ps_cluster_base,
221 search_node_t *ps_merge_cand,
222 range_prms_t **pps_range_prms,
223 U08 *pu1_refid_to_pred_dir_list,
224 WORD32 i4_num_clusters,
225 U08 u1_pred_dir)
226 {
227 WORD32 i, j, k;
228 WORD32 i4_num_cands_added = 0;
229 WORD32 i4_num_mvs_in_cluster;
230
231 for(i = 0; i < i4_num_clusters; i++)
232 {
233 cluster_data_t *ps_data = &ps_cluster_base[i];
234
235 if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236 {
237 i4_num_mvs_in_cluster = ps_data->num_mvs;
238
239 for(j = 0; j < i4_num_mvs_in_cluster; j++)
240 {
241 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244
245 CLIP_MV_WITHIN_RANGE(
246 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248 pps_range_prms[ps_data->ref_id],
249 0,
250 0,
251 0);
252
253 for(k = 0; k < i4_num_cands_added; k++)
254 {
255 if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256 (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257 (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258 {
259 break;
260 }
261 }
262
263 if(k == i4_num_cands_added)
264 {
265 i4_num_cands_added++;
266 }
267 }
268 }
269 }
270
271 return i4_num_cands_added;
272 }
273
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274 static WORD32 hme_add_me_best_as_merge_cands(
275 search_results_t **pps_child_data_array,
276 inter_cu_results_t *ps_8x8cu_results,
277 search_node_t *ps_merge_cand,
278 range_prms_t **pps_range_prms,
279 U08 *pu1_refid_to_pred_dir_list,
280 S08 *pi1_past_list,
281 S08 *pi1_future_list,
282 BLK_SIZE_T e_blk_size,
283 ME_QUALITY_PRESETS_T e_quality_preset,
284 S32 i4_num_cands_added,
285 U08 u1_pred_dir)
286 {
287 WORD32 i, j, k;
288 WORD32 i4_max_cands_to_add;
289
290 WORD32 i4_result_id = 0;
291
292 ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293 ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294 ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295 ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296
297 switch(e_quality_preset)
298 {
299 case ME_PRISTINE_QUALITY:
300 {
301 i4_max_cands_to_add = MAX_MERGE_CANDTS;
302
303 break;
304 }
305 case ME_HIGH_QUALITY:
306 {
307 /* All 4 children are split and each grandchild contributes an MV */
308 /* and 2 best results per grandchild */
309 i4_max_cands_to_add = 4 * 4 * 2;
310
311 break;
312 }
313 case ME_MEDIUM_SPEED:
314 {
315 i4_max_cands_to_add = 4 * 2 * 2;
316
317 break;
318 }
319 case ME_HIGH_SPEED:
320 case ME_XTREME_SPEED:
321 case ME_XTREME_SPEED_25:
322 {
323 i4_max_cands_to_add = 4 * 2 * 1;
324
325 break;
326 }
327 }
328
329 while(i4_result_id < 4)
330 {
331 for(i = 0; i < 4; i++)
332 {
333 inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334 inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335
336 if(!pps_child_data_array[i]->u1_split_flag)
337 {
338 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339
340 if(ps_child_data->u1_num_best_results <= i4_result_id)
341 {
342 continue;
343 }
344
345 if(ps_data->as_pu_results->pu.b1_intra_flag)
346 {
347 continue;
348 }
349
350 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351 {
352 mv_t *ps_mv;
353
354 S08 i1_ref_idx;
355
356 pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357
358 if(u1_pred_dir !=
359 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360 {
361 continue;
362 }
363
364 if(u1_pred_dir)
365 {
366 ps_mv = &ps_pu->mv.s_l1_mv;
367 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368 }
369 else
370 {
371 ps_mv = &ps_pu->mv.s_l0_mv;
372 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373 }
374
375 if(-1 == i1_ref_idx)
376 {
377 continue;
378 }
379
380 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383
384 CLIP_MV_WITHIN_RANGE(
385 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387 pps_range_prms[i1_ref_idx],
388 0,
389 0,
390 0);
391
392 for(k = 0; k < i4_num_cands_added; k++)
393 {
394 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397 {
398 break;
399 }
400 }
401
402 if(k == i4_num_cands_added)
403 {
404 i4_num_cands_added++;
405
406 if(i4_max_cands_to_add <= i4_num_cands_added)
407 {
408 return i4_num_cands_added;
409 }
410 }
411 }
412 }
413 else
414 {
415 for(j = 0; j < 4; j++)
416 {
417 mv_t *ps_mv;
418
419 S08 i1_ref_idx;
420
421 part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422 pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423
424 ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425
426 if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427 {
428 continue;
429 }
430
431 if(ps_data->as_pu_results->pu.b1_intra_flag)
432 {
433 continue;
434 }
435
436 if(u1_pred_dir !=
437 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438 {
439 continue;
440 }
441
442 if(u1_pred_dir)
443 {
444 ps_mv = &ps_pu->mv.s_l1_mv;
445 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446 }
447 else
448 {
449 ps_mv = &ps_pu->mv.s_l0_mv;
450 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451 }
452
453 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456
457 CLIP_MV_WITHIN_RANGE(
458 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460 pps_range_prms[i1_ref_idx],
461 0,
462 0,
463 0);
464
465 for(k = 0; k < i4_num_cands_added; k++)
466 {
467 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470 {
471 break;
472 }
473 }
474
475 if(k == i4_num_cands_added)
476 {
477 i4_num_cands_added++;
478
479 if(i4_max_cands_to_add <= i4_num_cands_added)
480 {
481 return i4_num_cands_added;
482 }
483 }
484 }
485 }
486 }
487
488 i4_result_id++;
489 }
490
491 return i4_num_cands_added;
492 }
493
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494 WORD32 hme_add_cands_for_merge_eval(
495 ctb_cluster_info_t *ps_cluster_info,
496 search_results_t **pps_child_data_array,
497 inter_cu_results_t *ps_8x8cu_results,
498 range_prms_t **pps_range_prms,
499 search_node_t *ps_merge_cand,
500 U08 *pu1_refid_to_pred_dir_list,
501 S08 *pi1_past_list,
502 S08 *pi1_future_list,
503 ME_QUALITY_PRESETS_T e_quality_preset,
504 BLK_SIZE_T e_blk_size,
505 U08 u1_pred_dir,
506 U08 u1_blk_id)
507 {
508 WORD32 i4_num_cands_added = 0;
509
510 if(ME_PRISTINE_QUALITY == e_quality_preset)
511 {
512 cluster_data_t *ps_cluster_primo;
513
514 WORD32 i4_num_clusters;
515
516 if(BLK_32x32 == e_blk_size)
517 {
518 ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519 i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520 }
521 else
522 {
523 ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524 i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525 }
526
527 i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528 ps_cluster_primo,
529 ps_merge_cand,
530 pps_range_prms,
531 pu1_refid_to_pred_dir_list,
532 i4_num_clusters,
533 u1_pred_dir);
534 }
535
536 i4_num_cands_added = hme_add_me_best_as_merge_cands(
537 pps_child_data_array,
538 ps_8x8cu_results,
539 ps_merge_cand,
540 pps_range_prms,
541 pu1_refid_to_pred_dir_list,
542 pi1_past_list,
543 pi1_future_list,
544 e_blk_size,
545 e_quality_preset,
546 i4_num_cands_added,
547 u1_pred_dir);
548
549 return i4_num_cands_added;
550 }
551
552 /**
553 ********************************************************************************
554 * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555 * S08 i1_ref_idx,
556 * S32 i4_best_part_type,
557 * S32 i4_is_vert)
558 *
559 * @brief Given a target partition orientation in the merged CU, and the
560 * partition type of most likely partition this fxn picks up
561 * candidates from the 4 constituent CUs and does refinement search
562 * to identify best results for the merge CU across active partitions
563 *
564 * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565 * these params, the search result structure is also derived and
566 * updated during the search
567 *
568 * @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569 * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570 *
571 * @param[in] i4_best_part_type : partition type of potential partition in the
572 * merged CU, -1 if the merge process has not yet been able to
573 * determine this.
574 *
575 * @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576 * orientation or horizontal orientation.
577 *
578 * @return Number of merge candidates
579 ********************************************************************************
580 */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581 WORD32 hme_pick_eval_merge_candts(
582 hme_merge_prms_t *ps_merge_prms,
583 hme_subpel_prms_t *ps_subpel_prms,
584 S32 i4_search_idx,
585 S32 i4_best_part_type,
586 S32 i4_is_vert,
587 wgt_pred_ctxt_t *ps_wt_inp_prms,
588 S32 i4_frm_qstep,
589 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591 {
592 S32 x_off, y_off;
593 search_node_t *ps_search_node;
594 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595 S32 i4_num_valid_parts;
596 pred_ctxt_t *ps_pred_ctxt;
597
598 search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599 S32 num_unique_nodes_cu_merge = 0;
600
601 search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602 CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603 S32 i4_part_mask = ps_search_results->i4_part_mask;
604
605 search_results_t *aps_child_results[4];
606 layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607
608 S32 i4_ref_stride, i, j;
609 result_upd_prms_t s_result_prms;
610
611 BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612 S32 i4_offset;
613
614 /*************************************************************************/
615 /* Function pointer for SAD/SATD, array and prms structure to pass to */
616 /* This function */
617 /*************************************************************************/
618 PF_SAD_FXN_T pf_err_compute;
619 S32 ai4_sad_grid[9][17];
620 err_prms_t s_err_prms;
621
622 /*************************************************************************/
623 /* Allowed MV RANGE */
624 /*************************************************************************/
625 range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626 PF_INTERP_FXN_T pf_qpel_interp;
627 PF_MV_COST_FXN pf_mv_cost_compute;
628 WORD32 pred_lx;
629 U08 *apu1_hpel_ref[4];
630
631 interp_prms_t s_interp_prms;
632 S32 i4_interp_buf_id;
633
634 S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635 S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636
637 /* Sanity checks */
638 ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639
640 s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641
642 /* Initialize all the ptrs to child CUs for merge decision */
643 aps_child_results[0] = ps_merge_prms->ps_results_tl;
644 aps_child_results[1] = ps_merge_prms->ps_results_tr;
645 aps_child_results[2] = ps_merge_prms->ps_results_bl;
646 aps_child_results[3] = ps_merge_prms->ps_results_br;
647
648 num_unique_nodes_cu_merge = 0;
649
650 pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651
652 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653 {
654 num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655 ps_merge_prms->ps_cluster_info,
656 aps_child_results,
657 ps_merge_prms->ps_8x8_cu_results,
658 pps_range_prms,
659 as_merge_unique_node,
660 ps_search_results->pu1_is_past,
661 ps_merge_prms->pi1_past_list,
662 ps_merge_prms->pi1_future_list,
663 ps_merge_prms->e_quality_preset,
664 e_blk_size,
665 i4_search_idx,
666 (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668 }
669 else
670 {
671 /*************************************************************************/
672 /* Populate the list of unique search nodes in the child CUs for merge */
673 /* evaluation */
674 /*************************************************************************/
675 for(i = 0; i < 4; i++)
676 {
677 search_node_t s_search_node;
678
679 PART_TYPE_T e_part_type;
680 PART_ID_T e_part_id;
681
682 WORD32 part_num;
683
684 search_results_t *ps_child = aps_child_results[i];
685
686 if(ps_child->ps_cu_results->u1_num_best_results)
687 {
688 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689 (1 == ps_child->ps_cu_results->u1_num_best_results)))
690 {
691 e_part_type =
692 (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693
694 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695
696 /* Insert mvs of NxN partitions. */
697 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698 part_num++)
699 {
700 e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701
702 if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703 {
704 s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705 if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706 {
707 CLIP_MV_WITHIN_RANGE(
708 s_search_node.s_mv.i2_mvx,
709 s_search_node.s_mv.i2_mvy,
710 pps_range_prms[s_search_node.i1_ref_idx],
711 0,
712 0,
713 0);
714
715 INSERT_NEW_NODE_NOMAP(
716 as_merge_unique_node,
717 num_unique_nodes_cu_merge,
718 s_search_node,
719 1);
720 }
721 }
722 }
723 }
724 }
725 else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726 .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727 (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728 .ps_cu_results->u1_num_best_results)))
729 {
730 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731
732 for(j = 0; j < 4; j++)
733 {
734 e_part_type = (PART_TYPE_T)ps_results_root[j]
735 .ps_cu_results->ps_best_results[0]
736 .u1_part_type;
737
738 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739
740 /* Insert mvs of NxN partitions. */
741 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742 part_num++)
743 {
744 e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745
746 if((ps_results_root[j]
747 .aps_part_results[i4_search_idx][e_part_id]
748 ->i1_ref_idx != -1) &&
749 (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750 .b1_intra_flag))
751 {
752 s_search_node =
753 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754 if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755 {
756 CLIP_MV_WITHIN_RANGE(
757 s_search_node.s_mv.i2_mvx,
758 s_search_node.s_mv.i2_mvy,
759 pps_range_prms[s_search_node.i1_ref_idx],
760 0,
761 0,
762 0);
763
764 INSERT_NEW_NODE_NOMAP(
765 as_merge_unique_node,
766 num_unique_nodes_cu_merge,
767 s_search_node,
768 1);
769 }
770 }
771 }
772 }
773 }
774 }
775 }
776
777 if(0 == num_unique_nodes_cu_merge)
778 {
779 return 0;
780 }
781
782 /*************************************************************************/
783 /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784 /* fixed through this subpel refinement for this partition. */
785 /* Note, we do not enable grid sads since one pt is evaluated per node */
786 /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
787 /*************************************************************************/
788 i4_part_mask = ps_search_results->i4_part_mask;
789
790 /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791 if(ps_subpel_prms->i4_use_satd)
792 {
793 if(BLK_32x32 == e_blk_size)
794 {
795 pf_err_compute = hme_evalsatd_pt_pu_32x32;
796 }
797 else
798 {
799 pf_err_compute = hme_evalsatd_pt_pu_64x64;
800 }
801 }
802 else
803 {
804 pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805 }
806
807 i4_ref_stride = ps_curr_layer->i4_rec_stride;
808
809 x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810 y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811 i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812
813 /*************************************************************************/
814 /* This array stores the ids of the partitions whose */
815 /* SADs are updated. Since the partitions whose SADs are updated may not */
816 /* be in contiguous order, we supply another level of indirection. */
817 /*************************************************************************/
818 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819
820 /* Initialize result params used for partition update */
821 s_result_prms.pf_mv_cost_compute = NULL;
822 s_result_prms.ps_search_results = ps_search_results;
823 s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824 s_result_prms.i1_ref_idx = i4_search_idx;
825 s_result_prms.i4_part_mask = i4_part_mask;
826 s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827 s_result_prms.i4_grid_mask = 1;
828
829 /* One time Initialization of error params used for SAD/SATD compute */
830 s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831 s_err_prms.i4_ref_stride = i4_ref_stride;
832 s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833 s_err_prms.i4_grid_mask = 1;
834 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835 s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836 s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837 s_err_prms.i4_step = 1;
838
839 /*************************************************************************/
840 /* One time preparation of non changing interpolation params. */
841 /*************************************************************************/
842 s_interp_prms.i4_ref_stride = i4_ref_stride;
843 s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844 s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845 s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846 s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847 i4_interp_buf_id = 0;
848
849 pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850
851 /***************************************************************************/
852 /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853 /* results */
854 /***************************************************************************/
855 for(i = 0; i < num_unique_nodes_cu_merge; i++)
856 {
857 WORD8 i1_ref_idx;
858 ps_search_node = &as_merge_unique_node[i];
859
860 /*********************************************************************/
861 /* Compute the base pointer for input, interpolated buffers */
862 /* The base pointers point as follows: */
863 /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864 /* To these, we need to add the offset of the current node */
865 /*********************************************************************/
866 i1_ref_idx = ps_search_node->i1_ref_idx;
867 apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868 apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869 apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870 apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871
872 s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873
874 pf_qpel_interp(
875 &s_interp_prms,
876 ps_search_node->s_mv.i2_mvx,
877 ps_search_node->s_mv.i2_mvy,
878 i4_interp_buf_id);
879
880 pred_lx = i4_search_idx;
881 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882
883 s_result_prms.u1_pred_lx = pred_lx;
884 s_result_prms.ps_search_node_base = ps_search_node;
885 s_err_prms.pu1_inp =
886 ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887 s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888 s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889
890 /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891 Here the tu recursion logic is restricted with the size of the PU*/
892 pf_err_compute(&s_err_prms);
893
894 if(ps_subpel_prms->u1_is_cu_noisy &&
895 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896 {
897 ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898 s_err_prms.pu1_ref,
899 s_err_prms.i4_ref_stride,
900 ai4_valid_part_ids,
901 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903 s_err_prms.pi4_sad_grid,
904 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907 i4_num_valid_parts,
908 ps_wt_inp_prms->wpred_log_wdc,
909 (BLK_32x32 == e_blk_size) ? 32 : 64);
910 }
911
912 /* Update the mv's */
913 s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914 s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915
916 /* Update best results */
917 hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918 }
919
920 /************************************************************************/
921 /* Update mv cost and total cost for each valid partition in the CU */
922 /************************************************************************/
923 for(i = 0; i < TOT_NUM_PARTS; i++)
924 {
925 if(i4_part_mask & (1 << i))
926 {
927 WORD32 j;
928 WORD32 i4_mv_cost;
929
930 ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931
932 for(j = 0;
933 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934 j++)
935 {
936 if(ps_search_node->i1_ref_idx != -1)
937 {
938 pred_lx = i4_search_idx;
939 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940
941 /* Prediction context should now deal with qpel units */
942 HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943
944 ps_search_node->u1_subpel_done = 1;
945 ps_search_node->u1_is_avail = 1;
946
947 i4_mv_cost =
948 pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949
950 ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951 ps_search_node->i4_mv_cost = i4_mv_cost;
952
953 ps_search_node++;
954 }
955 }
956 }
957 }
958
959 return num_unique_nodes_cu_merge;
960 }
961
962 #define CU_MERGE_MAX_INTRA_PARTS 4
963
964 /**
965 ********************************************************************************
966 * @fn hme_try_merge_high_speed
967 *
968 * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969 entity or with partititons for high speed preset
970 *
971 * @param[in,out] hme_merge_prms_t: Params for CU merge
972 *
973 * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974 ********************************************************************************
975 */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
977 me_ctxt_t *ps_thrd_ctxt,
978 me_frm_ctxt_t *ps_ctxt,
979 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980 hme_subpel_prms_t *ps_subpel_prms,
981 hme_merge_prms_t *ps_merge_prms,
982 inter_pu_results_t *ps_pu_results,
983 pu_result_t *ps_pu_result)
984 {
985 search_results_t *ps_results_tl, *ps_results_tr;
986 search_results_t *ps_results_bl, *ps_results_br;
987
988 S32 i;
989 S32 i4_search_idx;
990 S32 i4_cost_parent;
991 S32 intra_cu_size;
992 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993
994 search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995 wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996
997 S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998 S32 is_vert = 0, i4_best_part_type = -1;
999 S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000 S32 i4_cost_children = 0;
1001 S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002 S32 i4_num_merge_cands_evaluated = 0;
1003 U08 u1_x_off = ps_results_merge->u1_x_off;
1004 U08 u1_y_off = ps_results_merge->u1_y_off;
1005 S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006
1007 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011 ps_results_tl = ps_merge_prms->ps_results_tl;
1012 ps_results_tr = ps_merge_prms->ps_results_tr;
1013 ps_results_bl = ps_merge_prms->ps_results_bl;
1014 ps_results_br = ps_merge_prms->ps_results_br;
1015
1016 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017 {
1018 i4_part_mask &= ~ENABLE_AMP;
1019 }
1020
1021 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022 {
1023 i4_part_mask &= ~ENABLE_AMP;
1024
1025 i4_part_mask &= ~ENABLE_SMP;
1026 }
1027
1028 ps_merge_prms->i4_num_pred_dir_actual = 0;
1029
1030 /*************************************************************************/
1031 /* The logic for High speed CU merge goes as follows: */
1032 /* */
1033 /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034 /* exceed 7 */
1035 /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036 /* are identical */
1037 /* 3. Find the all unique mvs of best partitions of children CUs and */
1038 /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */
1039 /* best parent cost is lower than sum of the best children costs */
1040 /* return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041 /* */
1042 /*************************************************************************/
1043
1044 /* Count the number of best partitions in child CUs, early exit if > 7 */
1045 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046 (CU_32x32 == ps_results_merge->e_cu_size))
1047 {
1048 S32 num_parts_in_32x32 = 0;
1049 WORD32 i4_part_type;
1050
1051 if(ps_results_tl->u1_split_flag)
1052 {
1053 num_parts_in_32x32 += 4;
1054
1055 #define COST_INTERCHANGE 0
1056 i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057 ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058 ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059 ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060 }
1061 else
1062 {
1063 i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065 i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066 }
1067
1068 if(ps_results_tr->u1_split_flag)
1069 {
1070 num_parts_in_32x32 += 4;
1071
1072 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076 }
1077 else
1078 {
1079 i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081 i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082 }
1083
1084 if(ps_results_bl->u1_split_flag)
1085 {
1086 num_parts_in_32x32 += 4;
1087
1088 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092 }
1093 else
1094 {
1095 i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097 i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098 }
1099
1100 if(ps_results_br->u1_split_flag)
1101 {
1102 num_parts_in_32x32 += 4;
1103
1104 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108 }
1109 else
1110 {
1111 i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113 i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114 }
1115
1116 if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117 {
1118 return CU_SPLIT;
1119 }
1120
1121 if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122 (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123 {
1124 return CU_SPLIT;
1125 }
1126 }
1127
1128 /* Accumulate intra percentage before merge for early CU_SPLIT decision */
1129 /* Note : Each intra part represent a NxN unit of the children CUs */
1130 /* This is essentially 1/16th of the CUsize under consideration for merge */
1131 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132 {
1133 if(CU_64x64 == ps_results_merge->e_cu_size)
1134 {
1135 i4_intra_parts =
1136 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137 ? 16
1138 : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139 }
1140 else
1141 {
1142 switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143 {
1144 case 0:
1145 {
1146 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147 ->u1_inter_eval_enable)
1148 ? 16
1149 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150 ->ps_child_node_tl->u1_intra_eval_enable);
1151
1152 break;
1153 }
1154 case 1:
1155 {
1156 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157 ->u1_inter_eval_enable)
1158 ? 16
1159 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160 ->ps_child_node_tr->u1_intra_eval_enable);
1161
1162 break;
1163 }
1164 case 2:
1165 {
1166 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167 ->u1_inter_eval_enable)
1168 ? 16
1169 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170 ->ps_child_node_bl->u1_intra_eval_enable);
1171
1172 break;
1173 }
1174 case 3:
1175 {
1176 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177 ->u1_inter_eval_enable)
1178 ? 16
1179 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180 ->ps_child_node_br->u1_intra_eval_enable);
1181
1182 break;
1183 }
1184 }
1185 }
1186 }
1187 else
1188 {
1189 for(i = 0; i < 4; i++)
1190 {
1191 search_results_t *ps_results =
1192 (i == 0) ? ps_results_tl
1193 : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194
1195 part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196
1197 if(ps_results->u1_split_flag)
1198 {
1199 U08 u1_x_off = ps_results->u1_x_off;
1200 U08 u1_y_off = ps_results->u1_y_off;
1201 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202 2;
1203
1204 /* Special case to handle 8x8 CUs when 16x16 is split */
1205 ASSERT(ps_results->e_cu_size == CU_16x16);
1206
1207 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208
1209 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210 i4_intra_parts += 1;
1211
1212 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213
1214 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215 i4_intra_parts += 1;
1216
1217 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218
1219 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220 i4_intra_parts += 1;
1221
1222 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223
1224 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225 i4_intra_parts += 1;
1226 }
1227 else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228 {
1229 i4_intra_parts += 4;
1230 }
1231 }
1232 }
1233
1234 /* Determine the max intra CU size indicated by IPE */
1235 intra_cu_size = CU_64x64;
1236 if(ps_cur_ipe_ctb->u1_split_flag)
1237 {
1238 intra_cu_size = CU_32x32;
1239 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240 {
1241 intra_cu_size = CU_16x16;
1242 }
1243 }
1244
1245 if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246 (intra_cu_size < ps_results_merge->e_cu_size) &&
1247 (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248 (i4_intra_parts == 16))
1249 {
1250 S32 i4_merge_outcome;
1251
1252 i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253 ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255 : (!ps_cur_ipe_ctb->u1_split_flag);
1256
1257 i4_merge_outcome = i4_merge_outcome ||
1258 (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259
1260 i4_merge_outcome = i4_merge_outcome &&
1261 !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262
1263 if(i4_merge_outcome)
1264 {
1265 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266 part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267 pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268
1269 ps_cu_results->u1_num_best_results = 1;
1270 ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271 ps_cu_results->u1_x_off = u1_x_off;
1272 ps_cu_results->u1_y_off = u1_y_off;
1273
1274 ps_best_result->u1_part_type = PRT_2Nx2N;
1275 ps_best_result->ai4_tu_split_flag[0] = 0;
1276 ps_best_result->ai4_tu_split_flag[1] = 0;
1277 ps_best_result->ai4_tu_split_flag[2] = 0;
1278 ps_best_result->ai4_tu_split_flag[3] = 0;
1279 ps_best_result->i4_tot_cost =
1280 (CU_64x64 == ps_results_merge->e_cu_size)
1281 ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282 : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283
1284 ps_pu->b1_intra_flag = 1;
1285 ps_pu->b4_pos_x = u1_x_off >> 2;
1286 ps_pu->b4_pos_y = u1_y_off >> 2;
1287 ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288 ps_pu->b4_ht = ps_pu->b4_wd;
1289 ps_pu->mv.i1_l0_ref_idx = -1;
1290 ps_pu->mv.i1_l1_ref_idx = -1;
1291 ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292 ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293 ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294 ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295
1296 return CU_MERGED;
1297 }
1298 else
1299 {
1300 return CU_SPLIT;
1301 }
1302 }
1303
1304 if(i4_intra_parts)
1305 {
1306 i4_part_mask = ENABLE_2Nx2N;
1307 }
1308
1309 ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310
1311 hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312
1313 ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314 ps_merge_prms->i4_num_pred_dir_actual = 0;
1315
1316 if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317 {
1318 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319 S32 i4_num_valid_parts;
1320 S32 i4_sigma_array_offset;
1321
1322 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323
1324 /*********************************************************************************************************************************************/
1325 /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */
1326 /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327 /* increment as there will be 256 4x4 blocks in a CTB */
1328 /*********************************************************************************************************************************************/
1329 i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331
1332 for(i = 0; i < i4_num_valid_parts; i++)
1333 {
1334 S32 i4_part_id = ai4_valid_part_ids[i];
1335
1336 hme_compute_final_sigma_of_pu_from_base_blocks(
1337 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339 au8_final_src_sigmaX,
1340 au8_final_src_sigmaXSquared,
1341 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342 4,
1343 i4_part_id,
1344 16);
1345 }
1346
1347 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349 }
1350
1351 /*************************************************************************/
1352 /* Loop through all ref idx and pick the merge candts and refine based */
1353 /* on the active partitions. At this stage num ref will be 1 or 2 */
1354 /*************************************************************************/
1355 for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356 {
1357 S32 i4_cands;
1358 U08 u1_pred_dir = 0;
1359
1360 if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361 {
1362 u1_pred_dir = i4_search_idx;
1363 }
1364 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365 {
1366 u1_pred_dir = 1;
1367 }
1368 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369 {
1370 u1_pred_dir = 0;
1371 }
1372 else
1373 {
1374 ASSERT(0);
1375 }
1376
1377 /* call the function to pick and evaluate the merge candts, given */
1378 /* a ref id and a part mask. */
1379 i4_cands = hme_pick_eval_merge_candts(
1380 ps_merge_prms,
1381 ps_subpel_prms,
1382 u1_pred_dir,
1383 i4_best_part_type,
1384 is_vert,
1385 ps_wt_inp_prms,
1386 i4_frm_qstep,
1387 ps_cmn_utils_optimised_function_list,
1388 ps_me_optimised_function_list);
1389
1390 if(i4_cands)
1391 {
1392 ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393 u1_pred_dir;
1394 ps_merge_prms->i4_num_pred_dir_actual++;
1395 }
1396
1397 i4_num_merge_cands_evaluated += i4_cands;
1398 }
1399
1400 /* Call the decide_part_types function here */
1401 /* Populate the new PU struct with the results post subpel refinement*/
1402 if(i4_num_merge_cands_evaluated)
1403 {
1404 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405
1406 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407
1408 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410
1411 hme_populate_pus(
1412 ps_thrd_ctxt,
1413 ps_ctxt,
1414 ps_subpel_prms,
1415 ps_results_merge,
1416 ps_cu_results,
1417 ps_pu_results,
1418 ps_pu_result,
1419 ps_merge_prms->ps_inter_ctb_prms,
1420 &ps_ctxt->s_wt_pred,
1421 ps_merge_prms->ps_layer_ctxt,
1422 ps_merge_prms->au1_pred_dir_searched,
1423 ps_merge_prms->i4_num_pred_dir_actual);
1424
1425 ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426
1427 hme_decide_part_types(
1428 ps_cu_results,
1429 ps_pu_results,
1430 ps_merge_prms->ps_inter_ctb_prms,
1431 ps_ctxt,
1432 ps_cmn_utils_optimised_function_list,
1433 ps_me_optimised_function_list
1434
1435 );
1436
1437 /*****************************************************************/
1438 /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */
1439 /*****************************************************************/
1440 #if DISABLE_INTRA_IN_BPICS
1441 if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443 #endif
1444 {
1445 if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446 {
1447 hme_insert_intra_nodes_post_bipred(
1448 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449 }
1450 }
1451 }
1452 else
1453 {
1454 return CU_SPLIT;
1455 }
1456
1457 /* We check the best result of ref idx 0 and compare for parent vs child */
1458 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459 (CU_32x32 == ps_results_merge->e_cu_size))
1460 {
1461 i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462 /*********************************************************************/
1463 /* Add the cost of signaling the CU tree bits. */
1464 /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466 /* So, 4*lambda is extra for children cost. :Lokesh */
1467 /*********************************************************************/
1468 {
1469 pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470
1471 i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472 }
1473
1474 if(i4_cost_parent < i4_cost_children)
1475 {
1476 return CU_MERGED;
1477 }
1478
1479 return CU_SPLIT;
1480 }
1481 else
1482 {
1483 return CU_MERGED;
1484 }
1485 }
1486
1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
1488 { \
1489 (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \
1490 (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \
1491 *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \
1492 }
1493
1494 /**
1495 ********************************************************************************
1496 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497 * layer_mv_t *ps_layer_mv,
1498 * S32 i4_search_blk_x,
1499 * S32 i4_search_blk_y,
1500 * mvbank_update_prms_t *ps_prms)
1501 *
1502 * @brief Updates the mv bank in case there is no further encodign to be done
1503 *
1504 * @param[in] ps_search_results: contains results for the block just searched
1505 *
1506 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1507 *
1508 * @param[in] i4_search_blk_x : col num of blk being searched
1509 *
1510 * @param[in] i4_search_blk_y : row num of blk being searched
1511 *
1512 * @param[in] ps_prms : contains certain parameters which govern how updatedone
1513 *
1514 * @return None
1515 ********************************************************************************
1516 */
1517
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518 void hme_update_mv_bank_noencode(
1519 search_results_t *ps_search_results,
1520 layer_mv_t *ps_layer_mv,
1521 S32 i4_search_blk_x,
1522 S32 i4_search_blk_y,
1523 mvbank_update_prms_t *ps_prms)
1524 {
1525 hme_mv_t *ps_mv;
1526 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528 S32 i4_blk_x, i4_blk_y, i4_offset;
1529 S32 i4_j, i4_ref_id;
1530 search_node_t *ps_search_node;
1531 search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532 search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533 search_node_t *ps_search_node_4x4_4;
1534
1535 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538
1539 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540
1541 /* Identify the correct offset in the mvbank and the reference id buf */
1542 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544
1545 /*************************************************************************/
1546 /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1547 /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1548 /* do a straightforward single update of results. This will have a 1-1 */
1549 /* correspondence. */
1550 /*************************************************************************/
1551 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552 {
1553 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554 {
1555 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557 {
1558 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559 ps_mv++;
1560 pi1_ref_idx++;
1561 ps_search_node++;
1562 }
1563 }
1564 return;
1565 }
1566
1567 /*************************************************************************/
1568 /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569 /* case, we need to have NxN partitions enabled in search. */
1570 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572 /*************************************************************************/
1573 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574 ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576
1577 /*************************************************************************/
1578 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579 /* hence the below check. */
1580 /*************************************************************************/
1581 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582
1583 ps_mv1 = ps_mv;
1584 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587 pi1_ref_idx1 = pi1_ref_idx;
1588 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591
1592 for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593 {
1594 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595
1596 ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597
1598 ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599
1600 ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601
1602 ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603
1604 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605 ps_mv1++;
1606 pi1_ref_idx1++;
1607 ps_search_node_4x4_1++;
1608 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609 ps_mv2++;
1610 pi1_ref_idx2++;
1611 ps_search_node_4x4_2++;
1612 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613 ps_mv3++;
1614 pi1_ref_idx3++;
1615 ps_search_node_4x4_3++;
1616 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617 ps_mv4++;
1618 pi1_ref_idx4++;
1619 ps_search_node_4x4_4++;
1620
1621 if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622 {
1623 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624 ps_mv1++;
1625 pi1_ref_idx1++;
1626 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627 ps_mv2++;
1628 pi1_ref_idx2++;
1629 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630 ps_mv3++;
1631 pi1_ref_idx3++;
1632 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633 ps_mv4++;
1634 pi1_ref_idx4++;
1635 }
1636
1637 for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638 {
1639 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640 ps_mv1++;
1641 pi1_ref_idx1++;
1642 ps_search_node_4x4_1++;
1643 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644 ps_mv2++;
1645 pi1_ref_idx2++;
1646 ps_search_node_4x4_2++;
1647 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648 ps_mv3++;
1649 pi1_ref_idx3++;
1650 ps_search_node_4x4_3++;
1651 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652 ps_mv4++;
1653 pi1_ref_idx4++;
1654 ps_search_node_4x4_4++;
1655 }
1656 }
1657 }
1658
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659 void hme_update_mv_bank_encode(
1660 search_results_t *ps_search_results,
1661 layer_mv_t *ps_layer_mv,
1662 S32 i4_search_blk_x,
1663 S32 i4_search_blk_y,
1664 mvbank_update_prms_t *ps_prms,
1665 U08 *pu1_pred_dir_searched,
1666 S32 i4_num_act_ref_l0)
1667 {
1668 hme_mv_t *ps_mv;
1669 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671 S32 i4_blk_x, i4_blk_y, i4_offset;
1672 S32 j, i, num_parts;
1673 search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674 search_node_t *ps_search_node_bl, *ps_search_node_br;
1675 search_node_t s_zero_mv;
1676 WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677
1678 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681
1682 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683
1684 /* Identify the correct offset in the mvbank and the reference id buf */
1685 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687
1688 ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689 ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690
1691 /*************************************************************************/
1692 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693 /* hence the below check. */
1694 /*************************************************************************/
1695 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696
1697 ps_mv1 = ps_mv;
1698 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701 pi1_ref_idx1 = pi1_ref_idx;
1702 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705
1706 /* Initialize zero mv: default mv used for intra mvs */
1707 s_zero_mv.s_mv.i2_mvx = 0;
1708 s_zero_mv.s_mv.i2_mvy = 0;
1709 s_zero_mv.i1_ref_idx = 0;
1710
1711 if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712 (ps_search_results->i4_part_mask & ENABLE_NxN))
1713 {
1714 i4_part_type = PRT_NxN;
1715 }
1716
1717 for(i = 0; i < ps_prms->i4_num_ref; i++)
1718 {
1719 for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720 {
1721 WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722
1723 num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724
1725 ps_search_node_tl =
1726 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727
1728 if(num_parts == 1)
1729 {
1730 ps_search_node_tr = ps_search_node_tl;
1731 ps_search_node_bl = ps_search_node_tl;
1732 ps_search_node_br = ps_search_node_tl;
1733 }
1734 else if(num_parts == 2)
1735 {
1736 /* For vertically oriented partitions, tl, bl pt to same result */
1737 /* For horizontally oriented partition, tl, tr pt to same result */
1738 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740 /* and right 2 8x8 have 12x16R partition */
1741 if(gau1_is_vert_part[i4_part_type])
1742 {
1743 ps_search_node_tr =
1744 ps_search_results
1745 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746 ps_search_node_bl = ps_search_node_tl;
1747 }
1748 else
1749 {
1750 ps_search_node_tr = ps_search_node_tl;
1751 ps_search_node_bl =
1752 ps_search_results
1753 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754 }
1755 ps_search_node_br =
1756 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757 }
1758 else
1759 {
1760 /* 4 unique results */
1761 ps_search_node_tr =
1762 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763 ps_search_node_bl =
1764 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765 ps_search_node_br =
1766 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767 }
1768
1769 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770 ps_search_node_tl++;
1771 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772 ps_search_node_tr++;
1773 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774 ps_search_node_bl++;
1775 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776 ps_search_node_br++;
1777
1778 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779 ps_mv1++;
1780 pi1_ref_idx1++;
1781 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782 ps_mv2++;
1783 pi1_ref_idx2++;
1784 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785 ps_mv3++;
1786 pi1_ref_idx3++;
1787 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788 ps_mv4++;
1789 pi1_ref_idx4++;
1790
1791 if(ps_prms->i4_num_results_to_store > 1)
1792 {
1793 ps_search_node_tl =
1794 &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795
1796 if(num_parts == 1)
1797 {
1798 ps_search_node_tr = ps_search_node_tl;
1799 ps_search_node_bl = ps_search_node_tl;
1800 ps_search_node_br = ps_search_node_tl;
1801 }
1802 else if(num_parts == 2)
1803 {
1804 /* For vertically oriented partitions, tl, bl pt to same result */
1805 /* For horizontally oriented partition, tl, tr pt to same result */
1806 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808 /* and right 2 8x8 have 12x16R partition */
1809 if(gau1_is_vert_part[i4_part_type])
1810 {
1811 ps_search_node_tr =
1812 &ps_search_results
1813 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814 ps_search_node_bl = ps_search_node_tl;
1815 }
1816 else
1817 {
1818 ps_search_node_tr = ps_search_node_tl;
1819 ps_search_node_bl =
1820 &ps_search_results
1821 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822 }
1823 ps_search_node_br =
1824 &ps_search_results
1825 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826 }
1827 else
1828 {
1829 /* 4 unique results */
1830 ps_search_node_tr =
1831 &ps_search_results
1832 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833 ps_search_node_bl =
1834 &ps_search_results
1835 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836 ps_search_node_br =
1837 &ps_search_results
1838 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839 }
1840
1841 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842 ps_search_node_tl++;
1843 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844 ps_search_node_tr++;
1845 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846 ps_search_node_bl++;
1847 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848 ps_search_node_br++;
1849
1850 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851 ps_mv1++;
1852 pi1_ref_idx1++;
1853 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854 ps_mv2++;
1855 pi1_ref_idx2++;
1856 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857 ps_mv3++;
1858 pi1_ref_idx3++;
1859 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860 ps_mv4++;
1861 pi1_ref_idx4++;
1862 }
1863 }
1864 }
1865 }
1866
1867 /**
1868 ********************************************************************************
1869 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870 * layer_mv_t *ps_layer_mv,
1871 * S32 i4_search_blk_x,
1872 * S32 i4_search_blk_y,
1873 * mvbank_update_prms_t *ps_prms)
1874 *
1875 * @brief Updates the mv bank in case there is no further encodign to be done
1876 *
1877 * @param[in] ps_search_results: contains results for the block just searched
1878 *
1879 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1880 *
1881 * @param[in] i4_search_blk_x : col num of blk being searched
1882 *
1883 * @param[in] i4_search_blk_y : row num of blk being searched
1884 *
1885 * @param[in] ps_prms : contains certain parameters which govern how updatedone
1886 *
1887 * @return None
1888 ********************************************************************************
1889 */
1890
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891 void hme_update_mv_bank_in_l1_me(
1892 search_results_t *ps_search_results,
1893 layer_mv_t *ps_layer_mv,
1894 S32 i4_search_blk_x,
1895 S32 i4_search_blk_y,
1896 mvbank_update_prms_t *ps_prms)
1897 {
1898 hme_mv_t *ps_mv;
1899 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901 S32 i4_blk_x, i4_blk_y, i4_offset;
1902 S32 i4_j, i4_ref_id;
1903 search_node_t *ps_search_node;
1904 search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905
1906 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909
1910 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911
1912 /* Identify the correct offset in the mvbank and the reference id buf */
1913 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915
1916 /*************************************************************************/
1917 /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1918 /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1919 /* do a straightforward single update of results. This will have a 1-1 */
1920 /* correspondence. */
1921 /*************************************************************************/
1922 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923 {
1924 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925
1926 hme_mv_t *ps_mv_l0_root = ps_mv;
1927 hme_mv_t *ps_mv_l1_root =
1928 ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929
1930 U32 u4_num_l0_results_updated = 0;
1931 U32 u4_num_l1_results_updated = 0;
1932
1933 S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934 S08 *pi1_ref_idx_l1_root =
1935 pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936
1937 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938 {
1939 U32 *pu4_num_results_updated;
1940 search_node_t **pps_result_nodes;
1941
1942 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943
1944 if(u1_pred_dir_of_cur_ref)
1945 {
1946 pu4_num_results_updated = &u4_num_l1_results_updated;
1947 pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948 }
1949 else
1950 {
1951 pu4_num_results_updated = &u4_num_l0_results_updated;
1952 pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953 }
1954
1955 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956
1957 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958 {
1959 hme_add_new_node_to_a_sorted_array(
1960 &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961
1962 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963 (*pu4_num_results_updated)++;
1964 }
1965 }
1966
1967 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968 {
1969 COPY_SEARCH_RESULT(
1970 &ps_mv_l0_root[i4_j],
1971 &pi1_ref_idx_l0_root[i4_j],
1972 aps_result_nodes_sorted[0][i4_j],
1973 0);
1974 }
1975
1976 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977 {
1978 COPY_SEARCH_RESULT(
1979 &ps_mv_l1_root[i4_j],
1980 &pi1_ref_idx_l1_root[i4_j],
1981 aps_result_nodes_sorted[1][i4_j],
1982 0);
1983 }
1984
1985 return;
1986 }
1987
1988 /*************************************************************************/
1989 /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990 /* case, we need to have NxN partitions enabled in search. */
1991 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993 /*************************************************************************/
1994 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995 ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997
1998 /*************************************************************************/
1999 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000 /* hence the below check. */
2001 /*************************************************************************/
2002 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003
2004 ps_mv1 = ps_mv;
2005 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008 pi1_ref_idx1 = pi1_ref_idx;
2009 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012
2013 {
2014 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4];
2015 U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4];
2016
2017 S32 i;
2018
2019 hme_mv_t *ps_mv1_l0_root = ps_mv1;
2020 hme_mv_t *ps_mv1_l1_root =
2021 ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2022 hme_mv_t *ps_mv2_l0_root = ps_mv2;
2023 hme_mv_t *ps_mv2_l1_root =
2024 ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2025 hme_mv_t *ps_mv3_l0_root = ps_mv3;
2026 hme_mv_t *ps_mv3_l1_root =
2027 ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2028 hme_mv_t *ps_mv4_l0_root = ps_mv4;
2029 hme_mv_t *ps_mv4_l1_root =
2030 ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2031
2032 U32 u4_num_l0_results_updated = 0;
2033 U32 u4_num_l1_results_updated = 0;
2034
2035 S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2036 S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2037 ps_layer_mv->i4_num_mvs_per_ref);
2038 S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2039 S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2040 ps_layer_mv->i4_num_mvs_per_ref);
2041 S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2042 S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2043 ps_layer_mv->i4_num_mvs_per_ref);
2044 S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2045 S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2046 ps_layer_mv->i4_num_mvs_per_ref);
2047
2048 for(i = 0; i < 4; i++)
2049 {
2050 hme_mv_t *ps_mv_l0_root;
2051 hme_mv_t *ps_mv_l1_root;
2052
2053 S08 *pi1_ref_idx_l0_root;
2054 S08 *pi1_ref_idx_l1_root;
2055
2056 for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2057 {
2058 U32 *pu4_num_results_updated;
2059 search_node_t **pps_result_nodes;
2060 U08 *pu1_cost_shifts_for_sorted_node;
2061
2062 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2063
2064 if(u1_pred_dir_of_cur_ref)
2065 {
2066 pu4_num_results_updated = &u4_num_l1_results_updated;
2067 pps_result_nodes = &aps_result_nodes_sorted[1][0];
2068 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2069 }
2070 else
2071 {
2072 pu4_num_results_updated = &u4_num_l0_results_updated;
2073 pps_result_nodes = &aps_result_nodes_sorted[0][0];
2074 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2075 }
2076
2077 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2078
2079 ps_search_node_4x4 =
2080 ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2081
2082 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2083 {
2084 hme_add_new_node_to_a_sorted_array(
2085 &ps_search_node_4x4[i4_j],
2086 pps_result_nodes,
2087 pu1_cost_shifts_for_sorted_node,
2088 *pu4_num_results_updated,
2089 0);
2090
2091 (*pu4_num_results_updated)++;
2092
2093 hme_add_new_node_to_a_sorted_array(
2094 &ps_search_node_8x8[i4_j],
2095 pps_result_nodes,
2096 pu1_cost_shifts_for_sorted_node,
2097 *pu4_num_results_updated,
2098 2);
2099
2100 (*pu4_num_results_updated)++;
2101 }
2102 }
2103
2104 switch(i)
2105 {
2106 case 0:
2107 {
2108 ps_mv_l0_root = ps_mv1_l0_root;
2109 ps_mv_l1_root = ps_mv1_l1_root;
2110
2111 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2112 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2113
2114 break;
2115 }
2116 case 1:
2117 {
2118 ps_mv_l0_root = ps_mv2_l0_root;
2119 ps_mv_l1_root = ps_mv2_l1_root;
2120
2121 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2122 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2123
2124 break;
2125 }
2126 case 2:
2127 {
2128 ps_mv_l0_root = ps_mv3_l0_root;
2129 ps_mv_l1_root = ps_mv3_l1_root;
2130
2131 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2132 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2133
2134 break;
2135 }
2136 case 3:
2137 {
2138 ps_mv_l0_root = ps_mv4_l0_root;
2139 ps_mv_l1_root = ps_mv4_l1_root;
2140
2141 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2142 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2143
2144 break;
2145 }
2146 }
2147
2148 u4_num_l0_results_updated =
2149 MIN((S32)u4_num_l0_results_updated,
2150 ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2151
2152 u4_num_l1_results_updated =
2153 MIN((S32)u4_num_l1_results_updated,
2154 ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2155
2156 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2157 {
2158 COPY_SEARCH_RESULT(
2159 &ps_mv_l0_root[i4_j],
2160 &pi1_ref_idx_l0_root[i4_j],
2161 aps_result_nodes_sorted[0][i4_j],
2162 0);
2163 }
2164
2165 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2166 {
2167 COPY_SEARCH_RESULT(
2168 &ps_mv_l1_root[i4_j],
2169 &pi1_ref_idx_l1_root[i4_j],
2170 aps_result_nodes_sorted[1][i4_j],
2171 0);
2172 }
2173 }
2174 }
2175 }
2176
2177 /**
2178 ******************************************************************************
2179 * @brief Scales motion vector component projecte from a diff layer in same
2180 * picture (so no ref id related delta poc scaling required)
2181 ******************************************************************************
2182 */
2183
2184 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \
2185 ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2186 /**
2187 ********************************************************************************
2188 * @fn hme_project_coloc_candt(search_node_t *ps_search_node,
2189 * layer_ctxt_t *ps_curr_layer,
2190 * layer_ctxt_t *ps_coarse_layer,
2191 * S32 i4_pos_x,
2192 * S32 i4_pos_y,
2193 * S08 i1_ref_id,
2194 * S08 i1_result_id)
2195 *
2196 * @brief From a coarser layer, projects a candidated situated at "colocated"
2197 * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2198 *
2199 * @param[out] ps_search_node : contains the projected result
2200 *
2201 * @param[in] ps_curr_layer : current layer context
2202 *
2203 * @param[in] ps_coarse_layer : coarser layer context
2204 *
2205 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2206 *
2207 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2208 *
2209 * @param[in] i1_ref_id : reference id for which the candidate required
2210 *
2211 * @param[in] i4_result_id : result id for which the candidate required
2212 * (0 : best result, 1 : next best)
2213 *
2214 * @return None
2215 ********************************************************************************
2216 */
2217
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2218 void hme_project_coloc_candt(
2219 search_node_t *ps_search_node,
2220 layer_ctxt_t *ps_curr_layer,
2221 layer_ctxt_t *ps_coarse_layer,
2222 S32 i4_pos_x,
2223 S32 i4_pos_y,
2224 S08 i1_ref_id,
2225 S32 i4_result_id)
2226 {
2227 S32 wd_c, ht_c, wd_p, ht_p;
2228 S32 blksize_p, blk_x, blk_y, i4_offset;
2229 layer_mv_t *ps_layer_mvbank;
2230 hme_mv_t *ps_mv;
2231 S08 *pi1_ref_idx;
2232
2233 /* Width and ht of current and prev layers */
2234 wd_c = ps_curr_layer->i4_wd;
2235 ht_c = ps_curr_layer->i4_ht;
2236 wd_p = ps_coarse_layer->i4_wd;
2237 ht_p = ps_coarse_layer->i4_ht;
2238
2239 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2240 blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2241
2242 /* Safety check to avoid uninitialized access across temporal layers */
2243 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2244 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2245
2246 /* Project the positions to prev layer */
2247 /* TODO: convert these to scale factors at pic level */
2248 blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2249 blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2250
2251 /* Pick up the mvs from the location */
2252 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2253 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2254
2255 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2256 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2257
2258 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2259 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260
2261 ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2262 ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2263 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2264 ps_search_node->u1_subpel_done = 0;
2265 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2266 {
2267 ps_search_node->i1_ref_idx = i1_ref_id;
2268 ps_search_node->s_mv.i2_mvx = 0;
2269 ps_search_node->s_mv.i2_mvy = 0;
2270 }
2271 }
2272
2273 /**
2274 ********************************************************************************
2275 * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2276 * layer_ctxt_t *ps_curr_layer,
2277 * layer_ctxt_t *ps_coarse_layer,
2278 * S32 i4_pos_x,
2279 * S32 i4_pos_y,
2280 * S08 i1_ref_id,
2281 * S08 i1_result_id)
2282 *
2283 * @brief From a coarser layer, projects a candidated situated at "colocated"
2284 * position in the picture when the ratios are dyadic
2285 *
2286 * @param[out] ps_search_node : contains the projected result
2287 *
2288 * @param[in] ps_curr_layer : current layer context
2289 *
2290 * @param[in] ps_coarse_layer : coarser layer context
2291 *
2292 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2293 *
2294 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2295 *
2296 * @param[in] i1_ref_id : reference id for which the candidate required
2297 *
2298 * @param[in] i4_result_id : result id for which the candidate required
2299 * (0 : best result, 1 : next best)
2300 *
2301 * @return None
2302 ********************************************************************************
2303 */
2304
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2305 void hme_project_coloc_candt_dyadic(
2306 search_node_t *ps_search_node,
2307 layer_ctxt_t *ps_curr_layer,
2308 layer_ctxt_t *ps_coarse_layer,
2309 S32 i4_pos_x,
2310 S32 i4_pos_y,
2311 S08 i1_ref_id,
2312 S32 i4_result_id)
2313 {
2314 S32 wd_c, ht_c, wd_p, ht_p;
2315 S32 blksize_p, blk_x, blk_y, i4_offset;
2316 layer_mv_t *ps_layer_mvbank;
2317 hme_mv_t *ps_mv;
2318 S08 *pi1_ref_idx;
2319
2320 /* Width and ht of current and prev layers */
2321 wd_c = ps_curr_layer->i4_wd;
2322 ht_c = ps_curr_layer->i4_ht;
2323 wd_p = ps_coarse_layer->i4_wd;
2324 ht_p = ps_coarse_layer->i4_ht;
2325
2326 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2327 /* blksize_p = log2(wd) + 1 */
2328 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2329
2330 /* ASSERT for valid sizes */
2331 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2332
2333 /* Safety check to avoid uninitialized access across temporal layers */
2334 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2335 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2336
2337 /* Project the positions to prev layer */
2338 /* TODO: convert these to scale factors at pic level */
2339 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2340 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2341
2342 /* Pick up the mvs from the location */
2343 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2344 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2345
2346 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2347 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2348
2349 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2350 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351
2352 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2353 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2354 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2355 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2356 {
2357 ps_search_node->i1_ref_idx = i1_ref_id;
2358 ps_search_node->s_mv.i2_mvx = 0;
2359 ps_search_node->s_mv.i2_mvy = 0;
2360 }
2361 }
2362
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2363 void hme_project_coloc_candt_dyadic_implicit(
2364 search_node_t *ps_search_node,
2365 layer_ctxt_t *ps_curr_layer,
2366 layer_ctxt_t *ps_coarse_layer,
2367 S32 i4_pos_x,
2368 S32 i4_pos_y,
2369 S32 i4_num_act_ref_l0,
2370 U08 u1_pred_dir,
2371 U08 u1_default_ref_id,
2372 S32 i4_result_id)
2373 {
2374 S32 wd_c, ht_c, wd_p, ht_p;
2375 S32 blksize_p, blk_x, blk_y, i4_offset;
2376 layer_mv_t *ps_layer_mvbank;
2377 hme_mv_t *ps_mv;
2378 S08 *pi1_ref_idx;
2379
2380 /* Width and ht of current and prev layers */
2381 wd_c = ps_curr_layer->i4_wd;
2382 ht_c = ps_curr_layer->i4_ht;
2383 wd_p = ps_coarse_layer->i4_wd;
2384 ht_p = ps_coarse_layer->i4_ht;
2385
2386 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2387 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2388
2389 /* ASSERT for valid sizes */
2390 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2391
2392 /* Safety check to avoid uninitialized access across temporal layers */
2393 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2394 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2395 /* Project the positions to prev layer */
2396 /* TODO: convert these to scale factors at pic level */
2397 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2398 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2399
2400 /* Pick up the mvs from the location */
2401 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2402 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2403
2404 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2405 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2406
2407 if(u1_pred_dir == 1)
2408 {
2409 ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2410 pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411 }
2412
2413 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2414 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2415 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2416 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2417 {
2418 ps_search_node->i1_ref_idx = u1_default_ref_id;
2419 ps_search_node->s_mv.i2_mvx = 0;
2420 ps_search_node->s_mv.i2_mvy = 0;
2421 }
2422 }
2423
2424 #define SCALE_RANGE_PRMS(prm1, prm2, shift) \
2425 { \
2426 prm1.i2_min_x = prm2.i2_min_x << shift; \
2427 prm1.i2_max_x = prm2.i2_max_x << shift; \
2428 prm1.i2_min_y = prm2.i2_min_y << shift; \
2429 prm1.i2_max_y = prm2.i2_max_y << shift; \
2430 }
2431
2432 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \
2433 { \
2434 prm1->i2_min_x = prm2->i2_min_x << shift; \
2435 prm1->i2_max_x = prm2->i2_max_x << shift; \
2436 prm1->i2_min_y = prm2->i2_min_y << shift; \
2437 prm1->i2_max_y = prm2->i2_max_y << shift; \
2438 }
2439
2440 /**
2441 ********************************************************************************
2442 * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2443 * refine_layer_prms_t *ps_refine_prms)
2444 *
2445 * @brief Frame init of refinemnet layers in ME
2446 *
2447 * @param[in,out] ps_ctxt: ME Handle
2448 *
2449 * @param[in] ps_refine_prms : refinement layer prms
2450 *
2451 * @return None
2452 ********************************************************************************
2453 */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2454 void hme_refine_frm_init(
2455 layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2456 {
2457 /* local variables */
2458 BLK_SIZE_T e_result_blk_size = BLK_8x8;
2459 S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2460
2461 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2462
2463 if(ps_refine_prms->explicit_ref)
2464 {
2465 i4_num_ref_fpel = i4_num_ref_prev_layer;
2466 }
2467 else
2468 {
2469 i4_num_ref_fpel = 2;
2470 }
2471
2472 if(ps_refine_prms->i4_enable_4x4_part)
2473 {
2474 e_result_blk_size = BLK_4x4;
2475 }
2476
2477 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2478
2479 hme_init_mv_bank(
2480 ps_curr_layer,
2481 e_result_blk_size,
2482 i4_num_ref_fpel,
2483 ps_refine_prms->i4_num_mvbank_results,
2484 ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2485 }
2486
2487 #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2488 /**
2489 ********************************************************************************
2490 * @fn void hme_init_clusters_16x16
2491 * (
2492 * cluster_16x16_blk_t *ps_cluster_blk_16x16
2493 * )
2494 *
2495 * @brief Intialisations for the structs used in clustering algorithm
2496 *
2497 * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters
2498 * of 16x16 block
2499 *
2500 * @return None
2501 ********************************************************************************
2502 */
2503 static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2504 hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2505 {
2506 S32 i;
2507
2508 ps_cluster_blk_16x16->num_clusters = 0;
2509 ps_cluster_blk_16x16->intra_mv_area = 0;
2510 ps_cluster_blk_16x16->best_inter_cost = 0;
2511
2512 for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2513 {
2514 ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2515 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2516
2517 ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2518
2519 ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2520 ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2521 }
2522 for(i = 0; i < MAX_NUM_REF; i++)
2523 {
2524 ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2525 }
2526 }
2527
2528 /**
2529 ********************************************************************************
2530 * @fn void hme_init_clusters_32x32
2531 * (
2532 * cluster_32x32_blk_t *ps_cluster_blk_32x32
2533 * )
2534 *
2535 * @brief Intialisations for the structs used in clustering algorithm
2536 *
2537 * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters
2538 * of 32x32 block
2539 *
2540 * @return None
2541 ********************************************************************************
2542 */
2543 static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2544 hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2545 {
2546 S32 i;
2547
2548 ps_cluster_blk_32x32->num_clusters = 0;
2549 ps_cluster_blk_32x32->intra_mv_area = 0;
2550 ps_cluster_blk_32x32->best_alt_ref = -1;
2551 ps_cluster_blk_32x32->best_uni_ref = -1;
2552 ps_cluster_blk_32x32->best_inter_cost = 0;
2553 ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2554
2555 for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2556 {
2557 ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2558 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2559 ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2560
2561 ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2562 ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2563 }
2564 for(i = 0; i < MAX_NUM_REF; i++)
2565 {
2566 ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2567 }
2568 }
2569
2570 /**
2571 ********************************************************************************
2572 * @fn void hme_init_clusters_64x64
2573 * (
2574 * cluster_64x64_blk_t *ps_cluster_blk_64x64
2575 * )
2576 *
2577 * @brief Intialisations for the structs used in clustering algorithm
2578 *
2579 * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters
2580 * of 64x64 block
2581 *
2582 * @return None
2583 ********************************************************************************
2584 */
2585 static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2586 hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2587 {
2588 S32 i;
2589
2590 ps_cluster_blk_64x64->num_clusters = 0;
2591 ps_cluster_blk_64x64->intra_mv_area = 0;
2592 ps_cluster_blk_64x64->best_alt_ref = -1;
2593 ps_cluster_blk_64x64->best_uni_ref = -1;
2594 ps_cluster_blk_64x64->best_inter_cost = 0;
2595
2596 for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2597 {
2598 ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2599 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2600 ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2601
2602 ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2603 ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2604 }
2605 for(i = 0; i < MAX_NUM_REF; i++)
2606 {
2607 ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2608 }
2609 }
2610
2611 /**
2612 ********************************************************************************
2613 * @fn void hme_sort_and_assign_top_ref_ids_areawise
2614 * (
2615 * ctb_cluster_info_t *ps_ctb_cluster_info
2616 * )
2617 *
2618 * @brief Finds best_uni_ref and best_alt_ref
2619 *
2620 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2621 *
2622 * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is
2623 * enabled
2624 *
2625 * @param[in] block_width: width of the block in pels
2626 *
2627 * @param[in] e_cu_pos: position of the block within the CTB
2628 *
2629 * @return None
2630 ********************************************************************************
2631 */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2632 void hme_sort_and_assign_top_ref_ids_areawise(
2633 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2634 {
2635 cluster_32x32_blk_t *ps_32x32 = NULL;
2636 cluster_64x64_blk_t *ps_64x64 = NULL;
2637 cluster_data_t *ps_data;
2638
2639 S32 j, k;
2640
2641 S32 ai4_uni_area[MAX_NUM_REF];
2642 S32 ai4_bi_area[MAX_NUM_REF];
2643 S32 ai4_ref_id_found[MAX_NUM_REF];
2644 S32 ai4_ref_id[MAX_NUM_REF];
2645
2646 S32 best_uni_ref = -1, best_alt_ref = -1;
2647 S32 num_clusters;
2648 S32 num_ref = 0;
2649 S32 num_clusters_evaluated = 0;
2650 S32 is_cur_blk_valid;
2651
2652 if(32 == block_width)
2653 {
2654 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2655 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2656 num_clusters = ps_32x32->num_clusters;
2657 ps_data = &ps_32x32->as_cluster_data[0];
2658 }
2659 else
2660 {
2661 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2662 ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2663 num_clusters = ps_64x64->num_clusters;
2664 ps_data = &ps_64x64->as_cluster_data[0];
2665 }
2666
2667 #if !ENABLE_4CTB_EVALUATION
2668 if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2669 {
2670 return;
2671 }
2672 #endif
2673 if(num_clusters == 0)
2674 {
2675 return;
2676 }
2677 else if(!is_cur_blk_valid)
2678 {
2679 return;
2680 }
2681
2682 memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2683 memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2684 memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2685 memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2686
2687 for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2688 {
2689 S32 ref_id;
2690
2691 if(!ps_data->is_valid_cluster)
2692 {
2693 continue;
2694 }
2695
2696 ref_id = ps_data->ref_id;
2697
2698 num_clusters_evaluated++;
2699
2700 ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2701 ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2702
2703 if(!ai4_ref_id_found[ref_id])
2704 {
2705 ai4_ref_id[ref_id] = ref_id;
2706 ai4_ref_id_found[ref_id] = 1;
2707 num_ref++;
2708 }
2709 }
2710
2711 {
2712 S32 ai4_ref_id_temp[MAX_NUM_REF];
2713
2714 memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2715
2716 for(k = 1; k < MAX_NUM_REF; k++)
2717 {
2718 if(ai4_uni_area[k] > ai4_uni_area[0])
2719 {
2720 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2721 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2722 }
2723 }
2724
2725 best_uni_ref = ai4_ref_id_temp[0];
2726 }
2727
2728 if(bidir_enabled)
2729 {
2730 for(k = 1; k < MAX_NUM_REF; k++)
2731 {
2732 if(ai4_bi_area[k] > ai4_bi_area[0])
2733 {
2734 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2735 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2736 }
2737 }
2738
2739 if(!ai4_bi_area[0])
2740 {
2741 best_alt_ref = -1;
2742
2743 if(32 == block_width)
2744 {
2745 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2746 }
2747 else
2748 {
2749 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2750 }
2751
2752 return;
2753 }
2754
2755 if(best_uni_ref == ai4_ref_id[0])
2756 {
2757 for(k = 2; k < MAX_NUM_REF; k++)
2758 {
2759 if(ai4_bi_area[k] > ai4_bi_area[1])
2760 {
2761 SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2762 SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2763 }
2764 }
2765
2766 best_alt_ref = ai4_ref_id[1];
2767 }
2768 else
2769 {
2770 best_alt_ref = ai4_ref_id[0];
2771 }
2772 }
2773
2774 if(32 == block_width)
2775 {
2776 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2777 }
2778 else
2779 {
2780 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2781 }
2782 }
2783
2784 /**
2785 ********************************************************************************
2786 * @fn void hme_find_top_ref_ids
2787 * (
2788 * ctb_cluster_info_t *ps_ctb_cluster_info
2789 * )
2790 *
2791 * @brief Finds best_uni_ref and best_alt_ref
2792 *
2793 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2794 *
2795 * @return None
2796 ********************************************************************************
2797 */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2798 void hme_find_top_ref_ids(
2799 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2800 {
2801 S32 i;
2802
2803 if(32 == block_width)
2804 {
2805 for(i = 0; i < 4; i++)
2806 {
2807 hme_sort_and_assign_top_ref_ids_areawise(
2808 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2809 }
2810 }
2811 else if(64 == block_width)
2812 {
2813 hme_sort_and_assign_top_ref_ids_areawise(
2814 ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2815 }
2816 }
2817
2818 /**
2819 ********************************************************************************
2820 * @fn void hme_boot_out_outlier
2821 * (
2822 * ctb_cluster_info_t *ps_ctb_cluster_info
2823 * )
2824 *
2825 * @brief Removes outlier clusters before CU tree population
2826 *
2827 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2828 *
2829 * @return None
2830 ********************************************************************************
2831 */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2832 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2833 {
2834 cluster_32x32_blk_t *ps_32x32;
2835
2836 S32 i;
2837
2838 cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2839
2840 S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2841
2842 if(32 == blk_width)
2843 {
2844 /* 32x32 clusters */
2845 for(i = 0; i < 4; i++)
2846 {
2847 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2848
2849 if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2850 {
2851 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2852 }
2853 }
2854 }
2855 else if(64 == blk_width)
2856 {
2857 /* 64x64 clusters */
2858 if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2859 {
2860 BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2861 }
2862 }
2863 }
2864
2865 /**
2866 ********************************************************************************
2867 * @fn void hme_update_cluster_attributes
2868 * (
2869 * cluster_data_t *ps_cluster_data,
2870 * S32 mvx,
2871 * S32 mvy,
2872 * PART_ID_T e_part_id
2873 * )
2874 *
2875 * @brief Implementation fo the clustering algorithm
2876 *
2877 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2878 *
2879 * @param[in] mvx : x co-ordinate of the motion vector
2880 *
2881 * @param[in] mvy : y co-ordinate of the motion vector
2882 *
2883 * @param[in] ref_idx : ref_id of the motion vector
2884 *
2885 * @param[in] e_part_id : partition id of the motion vector
2886 *
2887 * @return None
2888 ********************************************************************************
2889 */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2890 static __inline void hme_update_cluster_attributes(
2891 cluster_data_t *ps_cluster_data,
2892 S32 mvx,
2893 S32 mvy,
2894 S32 mvdx,
2895 S32 mvdy,
2896 S32 ref_id,
2897 S32 sdi,
2898 U08 is_part_of_bi,
2899 PART_ID_T e_part_id)
2900 {
2901 LWORD64 i8_mvx_sum_q8;
2902 LWORD64 i8_mvy_sum_q8;
2903
2904 S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2905 S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2906
2907 if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2908 {
2909 ps_cluster_data->min_x = mvx;
2910 }
2911 else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2912 {
2913 ps_cluster_data->max_x = mvx;
2914 }
2915
2916 if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2917 {
2918 ps_cluster_data->min_y = mvy;
2919 }
2920 else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2921 {
2922 ps_cluster_data->max_y = mvy;
2923 }
2924
2925 {
2926 S32 num_mvs = ps_cluster_data->num_mvs;
2927
2928 ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2929 ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2930 ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2931
2932 /***************************/
2933 ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2934 ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2935 /**************************/
2936 }
2937
2938 /* Updation of centroid */
2939 {
2940 i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2941 i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2942
2943 ps_cluster_data->num_mvs++;
2944
2945 ps_cluster_data->s_centroid.i4_pos_x_q8 =
2946 (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2947 ps_cluster_data->s_centroid.i4_pos_y_q8 =
2948 (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2949 }
2950
2951 ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2952
2953 if(is_part_of_bi)
2954 {
2955 ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2956 }
2957 else
2958 {
2959 ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2960 }
2961 }
2962
2963 /**
2964 ********************************************************************************
2965 * @fn void hme_try_cluster_merge
2966 * (
2967 * cluster_data_t *ps_cluster_data,
2968 * S32 *pi4_num_clusters,
2969 * S32 idx_of_updated_cluster
2970 * )
2971 *
2972 * @brief Implementation fo the clustering algorithm
2973 *
2974 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2975 *
2976 * @param[in/out] pi4_num_clusters : pointer to number of clusters
2977 *
2978 * @param[in] idx_of_updated_cluster : index of the cluster most recently
2979 * updated
2980 *
2981 * @return Nothing
2982 ********************************************************************************
2983 */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2984 void hme_try_cluster_merge(
2985 cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2986 {
2987 centroid_t *ps_centroid;
2988
2989 S32 cur_pos_x_q8;
2990 S32 cur_pos_y_q8;
2991 S32 i;
2992 S32 max_dist_from_centroid;
2993 S32 mvd;
2994 S32 mvdx_q8;
2995 S32 mvdx;
2996 S32 mvdy_q8;
2997 S32 mvdy;
2998 S32 num_clusters, num_clusters_evaluated;
2999 S32 other_pos_x_q8;
3000 S32 other_pos_y_q8;
3001
3002 cluster_data_t *ps_root = ps_cluster_data;
3003 cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3004 centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3005
3006 /* Merge is superfluous if num_clusters is 1 */
3007 if(*pu1_num_clusters == 1)
3008 {
3009 return;
3010 }
3011
3012 cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3013 cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3014
3015 max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3016
3017 num_clusters = *pu1_num_clusters;
3018 num_clusters_evaluated = 0;
3019
3020 for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3021 {
3022 if(!ps_cluster_data->is_valid_cluster)
3023 {
3024 continue;
3025 }
3026 if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3027 {
3028 num_clusters_evaluated++;
3029 continue;
3030 }
3031
3032 ps_centroid = &ps_cluster_data->s_centroid;
3033
3034 other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3035 other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3036
3037 mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3038 mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3039 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3040 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3041
3042 mvd = ABS(mvdx) + ABS(mvdy);
3043
3044 if(mvd <= (max_dist_from_centroid >> 1))
3045 {
3046 /* 0 => no updates */
3047 /* 1 => min updated */
3048 /* 2 => max updated */
3049 S32 minmax_x_update_id;
3050 S32 minmax_y_update_id;
3051
3052 LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3053 LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3054 LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3055 LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3056
3057 (*pu1_num_clusters)--;
3058
3059 ps_cluster_data->is_valid_cluster = 0;
3060
3061 memcpy(
3062 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3063 ps_cluster_data->as_mv,
3064 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3065
3066 ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3067 ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3068 ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3069 ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3070 i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3071 i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3072
3073 ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3074 ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3075
3076 minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3077 ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3078 : 1;
3079 minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3080 ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3081 : 1;
3082
3083 /* Updation of centroid spread */
3084 switch(minmax_x_update_id + (minmax_y_update_id << 2))
3085 {
3086 case 1:
3087 {
3088 S32 mvd, mvd_q8;
3089
3090 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3091
3092 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3093 mvd = (mvd_q8 + (1 << 7)) >> 8;
3094
3095 if(mvd > (max_dist_from_centroid))
3096 {
3097 ps_cluster_data->max_dist_from_centroid = mvd;
3098 }
3099 break;
3100 }
3101 case 2:
3102 {
3103 S32 mvd, mvd_q8;
3104
3105 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3106
3107 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3108 mvd = (mvd_q8 + (1 << 7)) >> 8;
3109
3110 if(mvd > (max_dist_from_centroid))
3111 {
3112 ps_cluster_data->max_dist_from_centroid = mvd;
3113 }
3114 break;
3115 }
3116 case 4:
3117 {
3118 S32 mvd, mvd_q8;
3119
3120 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3121
3122 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3123 mvd = (mvd_q8 + (1 << 7)) >> 8;
3124
3125 if(mvd > (max_dist_from_centroid))
3126 {
3127 ps_cluster_data->max_dist_from_centroid = mvd;
3128 }
3129 break;
3130 }
3131 case 5:
3132 {
3133 S32 mvd;
3134 S32 mvdx, mvdx_q8;
3135 S32 mvdy, mvdy_q8;
3136
3137 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3138 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3139
3140 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3141 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3142
3143 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3144
3145 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3146 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3147
3148 if(mvd > max_dist_from_centroid)
3149 {
3150 ps_cluster_data->max_dist_from_centroid = mvd;
3151 }
3152 break;
3153 }
3154 case 6:
3155 {
3156 S32 mvd;
3157 S32 mvdx, mvdx_q8;
3158 S32 mvdy, mvdy_q8;
3159
3160 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3161 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3162
3163 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3164 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3165
3166 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3167
3168 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3169 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3170
3171 if(mvd > max_dist_from_centroid)
3172 {
3173 ps_cluster_data->max_dist_from_centroid = mvd;
3174 }
3175 break;
3176 }
3177 case 8:
3178 {
3179 S32 mvd, mvd_q8;
3180
3181 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3182
3183 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3184 mvd = (mvd_q8 + (1 << 7)) >> 8;
3185
3186 if(mvd > (max_dist_from_centroid))
3187 {
3188 ps_cluster_data->max_dist_from_centroid = mvd;
3189 }
3190 break;
3191 }
3192 case 9:
3193 {
3194 S32 mvd;
3195 S32 mvdx, mvdx_q8;
3196 S32 mvdy, mvdy_q8;
3197
3198 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3199 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3200
3201 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3202 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3203
3204 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3205
3206 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3207 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3208
3209 if(mvd > max_dist_from_centroid)
3210 {
3211 ps_cluster_data->max_dist_from_centroid = mvd;
3212 }
3213 break;
3214 }
3215 case 10:
3216 {
3217 S32 mvd;
3218 S32 mvdx, mvdx_q8;
3219 S32 mvdy, mvdy_q8;
3220
3221 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3222 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3223
3224 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3225 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3226
3227 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3228
3229 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3230 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3231
3232 if(mvd > ps_cluster_data->max_dist_from_centroid)
3233 {
3234 ps_cluster_data->max_dist_from_centroid = mvd;
3235 }
3236 break;
3237 }
3238 default:
3239 {
3240 break;
3241 }
3242 }
3243
3244 hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3245
3246 return;
3247 }
3248
3249 num_clusters_evaluated++;
3250 }
3251 }
3252
3253 /**
3254 ********************************************************************************
3255 * @fn void hme_find_and_update_clusters
3256 * (
3257 * cluster_data_t *ps_cluster_data,
3258 * S32 *pi4_num_clusters,
3259 * S32 mvx,
3260 * S32 mvy,
3261 * S32 ref_idx,
3262 * PART_ID_T e_part_id
3263 * )
3264 *
3265 * @brief Implementation fo the clustering algorithm
3266 *
3267 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
3268 *
3269 * @param[in/out] pi4_num_clusters : pointer to number of clusters
3270 *
3271 * @param[in] mvx : x co-ordinate of the motion vector
3272 *
3273 * @param[in] mvy : y co-ordinate of the motion vector
3274 *
3275 * @param[in] ref_idx : ref_id of the motion vector
3276 *
3277 * @param[in] e_part_id : partition id of the motion vector
3278 *
3279 * @return None
3280 ********************************************************************************
3281 */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3282 void hme_find_and_update_clusters(
3283 cluster_data_t *ps_cluster_data,
3284 U08 *pu1_num_clusters,
3285 S16 i2_mv_x,
3286 S16 i2_mv_y,
3287 U08 i1_ref_idx,
3288 S32 i4_sdi,
3289 PART_ID_T e_part_id,
3290 U08 is_part_of_bi)
3291 {
3292 S32 i;
3293 S32 min_mvd_cluster_id = -1;
3294 S32 mvd, mvd_limit, mvdx, mvdy;
3295 S32 min_mvdx, min_mvdy;
3296
3297 S32 min_mvd = MAX_32BIT_VAL;
3298 S32 num_clusters = *pu1_num_clusters;
3299
3300 S32 mvx = i2_mv_x;
3301 S32 mvy = i2_mv_y;
3302 S32 ref_idx = i1_ref_idx;
3303 S32 sdi = i4_sdi;
3304 S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3305
3306 if(num_clusters == 0)
3307 {
3308 cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3309
3310 ps_data->num_mvs = 1;
3311 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3312 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3313 ps_data->ref_id = ref_idx;
3314 ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3315 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3316 ps_data->as_mv[0].mvx = mvx;
3317 ps_data->as_mv[0].mvy = mvy;
3318
3319 /***************************/
3320 ps_data->as_mv[0].is_uni = !is_part_of_bi;
3321 ps_data->as_mv[0].sdi = sdi;
3322 if(is_part_of_bi)
3323 {
3324 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3325 }
3326 else
3327 {
3328 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3329 }
3330 /**************************/
3331 ps_data->max_x = mvx;
3332 ps_data->min_x = mvx;
3333 ps_data->max_y = mvy;
3334 ps_data->min_y = mvy;
3335
3336 ps_data->is_valid_cluster = 1;
3337
3338 *pu1_num_clusters = 1;
3339 }
3340 else
3341 {
3342 S32 num_clusters_evaluated = 0;
3343
3344 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3345 {
3346 cluster_data_t *ps_data = &ps_cluster_data[i];
3347
3348 centroid_t *ps_centroid;
3349
3350 S32 mvx_q8;
3351 S32 mvy_q8;
3352 S32 posx_q8;
3353 S32 posy_q8;
3354 S32 mvdx_q8;
3355 S32 mvdy_q8;
3356
3357 /* In anticipation of a possible merging of clusters */
3358 if(ps_data->is_valid_cluster == 0)
3359 {
3360 new_cluster_idx = i;
3361 continue;
3362 }
3363
3364 if(ref_idx != ps_data->ref_id)
3365 {
3366 num_clusters_evaluated++;
3367 continue;
3368 }
3369
3370 ps_centroid = &ps_data->s_centroid;
3371 posx_q8 = ps_centroid->i4_pos_x_q8;
3372 posy_q8 = ps_centroid->i4_pos_y_q8;
3373
3374 mvx_q8 = mvx << 8;
3375 mvy_q8 = mvy << 8;
3376
3377 mvdx_q8 = posx_q8 - mvx_q8;
3378 mvdy_q8 = posy_q8 - mvy_q8;
3379
3380 mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3381 mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3382
3383 mvd = ABS(mvdx) + ABS(mvdy);
3384
3385 if(mvd < min_mvd)
3386 {
3387 min_mvd = mvd;
3388 min_mvdx = mvdx;
3389 min_mvdy = mvdy;
3390 min_mvd_cluster_id = i;
3391 }
3392
3393 num_clusters_evaluated++;
3394 }
3395
3396 mvd_limit = (min_mvd_cluster_id == -1)
3397 ? ps_cluster_data[0].max_dist_from_centroid
3398 : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3399
3400 /* This condition implies that min_mvd has been updated */
3401 if(min_mvd <= mvd_limit)
3402 {
3403 hme_update_cluster_attributes(
3404 &ps_cluster_data[min_mvd_cluster_id],
3405 mvx,
3406 mvy,
3407 min_mvdx,
3408 min_mvdy,
3409 ref_idx,
3410 sdi,
3411 is_part_of_bi,
3412 e_part_id);
3413
3414 if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3415 {
3416 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3417 }
3418 }
3419 else
3420 {
3421 cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3422 ? &ps_cluster_data[num_clusters]
3423 : &ps_cluster_data[new_cluster_idx];
3424
3425 ps_data->num_mvs = 1;
3426 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3427 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3428 ps_data->ref_id = ref_idx;
3429 ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3430 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3431 ps_data->as_mv[0].mvx = mvx;
3432 ps_data->as_mv[0].mvy = mvy;
3433
3434 /***************************/
3435 ps_data->as_mv[0].is_uni = !is_part_of_bi;
3436 ps_data->as_mv[0].sdi = sdi;
3437 if(is_part_of_bi)
3438 {
3439 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3440 }
3441 else
3442 {
3443 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3444 }
3445 /**************************/
3446 ps_data->max_x = mvx;
3447 ps_data->min_x = mvx;
3448 ps_data->max_y = mvy;
3449 ps_data->min_y = mvy;
3450
3451 ps_data->is_valid_cluster = 1;
3452
3453 num_clusters++;
3454 *pu1_num_clusters = num_clusters;
3455 }
3456 }
3457 }
3458
3459 /**
3460 ********************************************************************************
3461 * @fn void hme_update_32x32_cluster_attributes
3462 * (
3463 * cluster_32x32_blk_t *ps_blk_32x32,
3464 * cluster_data_t *ps_cluster_data
3465 * )
3466 *
3467 * @brief Updates attributes for 32x32 clusters based on the attributes of
3468 * the constituent 16x16 clusters
3469 *
3470 * @param[out] ps_blk_32x32: structure containing 32x32 block results
3471 *
3472 * @param[in] ps_cluster_data : structure containing 16x16 block results
3473 *
3474 * @return None
3475 ********************************************************************************
3476 */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3477 void hme_update_32x32_cluster_attributes(
3478 cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3479 {
3480 cluster_data_t *ps_cur_cluster_32;
3481
3482 S32 i;
3483 S32 mvd_limit;
3484
3485 S32 num_clusters = ps_blk_32x32->num_clusters;
3486
3487 if(0 == num_clusters)
3488 {
3489 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3490
3491 ps_blk_32x32->num_clusters++;
3492 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3493
3494 ps_cur_cluster_32->is_valid_cluster = 1;
3495
3496 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3497 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3498 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3499
3500 memcpy(
3501 ps_cur_cluster_32->as_mv,
3502 ps_cluster_data->as_mv,
3503 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3504
3505 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3506
3507 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3508
3509 ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3510 ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3511 ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3512 ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3513
3514 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3515 }
3516 else
3517 {
3518 centroid_t *ps_centroid;
3519
3520 S32 cur_posx_q8, cur_posy_q8;
3521 S32 min_mvd_cluster_id = -1;
3522 S32 mvd;
3523 S32 mvdx;
3524 S32 mvdy;
3525 S32 mvdx_min;
3526 S32 mvdy_min;
3527 S32 mvdx_q8;
3528 S32 mvdy_q8;
3529
3530 S32 num_clusters_evaluated = 0;
3531
3532 S32 mvd_min = MAX_32BIT_VAL;
3533
3534 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3535 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3536
3537 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3538 {
3539 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3540
3541 if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3542 {
3543 num_clusters_evaluated++;
3544 continue;
3545 }
3546 if(!ps_cluster_data->is_valid_cluster)
3547 {
3548 continue;
3549 }
3550
3551 num_clusters_evaluated++;
3552
3553 ps_centroid = &ps_cur_cluster_32->s_centroid;
3554
3555 cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3556 cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3557
3558 mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3559 mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3560
3561 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3562 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3563
3564 mvd = ABS(mvdx) + ABS(mvdy);
3565
3566 if(mvd < mvd_min)
3567 {
3568 mvd_min = mvd;
3569 mvdx_min = mvdx;
3570 mvdy_min = mvdy;
3571 min_mvd_cluster_id = i;
3572 }
3573 }
3574
3575 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3576
3577 mvd_limit = (min_mvd_cluster_id == -1)
3578 ? ps_cur_cluster_32[0].max_dist_from_centroid
3579 : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3580
3581 if(mvd_min <= mvd_limit)
3582 {
3583 LWORD64 i8_updated_posx;
3584 LWORD64 i8_updated_posy;
3585 WORD32 minmax_updated_x = 0;
3586 WORD32 minmax_updated_y = 0;
3587
3588 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3589
3590 ps_centroid = &ps_cur_cluster_32->s_centroid;
3591
3592 ps_cur_cluster_32->is_valid_cluster = 1;
3593
3594 ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3595 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3596 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3597
3598 memcpy(
3599 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3600 ps_cluster_data->as_mv,
3601 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3602
3603 if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3604 {
3605 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3606 minmax_updated_x = 1;
3607 }
3608 else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3609 {
3610 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3611 minmax_updated_x = 2;
3612 }
3613
3614 if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3615 {
3616 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3617 minmax_updated_y = 1;
3618 }
3619 else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3620 {
3621 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3622 minmax_updated_y = 2;
3623 }
3624
3625 switch((minmax_updated_y << 2) + minmax_updated_x)
3626 {
3627 case 1:
3628 {
3629 S32 mvd, mvd_q8;
3630
3631 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3632 mvd = (mvd_q8 + (1 << 7)) >> 8;
3633
3634 if(mvd > (mvd_limit))
3635 {
3636 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3637 }
3638 break;
3639 }
3640 case 2:
3641 {
3642 S32 mvd, mvd_q8;
3643
3644 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3645 mvd = (mvd_q8 + (1 << 7)) >> 8;
3646
3647 if(mvd > (mvd_limit))
3648 {
3649 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3650 }
3651 break;
3652 }
3653 case 4:
3654 {
3655 S32 mvd, mvd_q8;
3656
3657 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3658 mvd = (mvd_q8 + (1 << 7)) >> 8;
3659
3660 if(mvd > (mvd_limit))
3661 {
3662 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3663 }
3664 break;
3665 }
3666 case 5:
3667 {
3668 S32 mvd;
3669 S32 mvdx, mvdx_q8;
3670 S32 mvdy, mvdy_q8;
3671
3672 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3673 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3674
3675 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3676 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3677
3678 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3679
3680 if(mvd > mvd_limit)
3681 {
3682 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3683 }
3684 break;
3685 }
3686 case 6:
3687 {
3688 S32 mvd;
3689 S32 mvdx, mvdx_q8;
3690 S32 mvdy, mvdy_q8;
3691
3692 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3693 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3694
3695 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3696 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3697
3698 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3699
3700 if(mvd > mvd_limit)
3701 {
3702 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3703 }
3704 break;
3705 }
3706 case 8:
3707 {
3708 S32 mvd, mvd_q8;
3709
3710 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3711 mvd = (mvd_q8 + (1 << 7)) >> 8;
3712
3713 if(mvd > (mvd_limit))
3714 {
3715 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3716 }
3717 break;
3718 }
3719 case 9:
3720 {
3721 S32 mvd;
3722 S32 mvdx, mvdx_q8;
3723 S32 mvdy, mvdy_q8;
3724
3725 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3726 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3727
3728 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3729 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3730
3731 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3732
3733 if(mvd > mvd_limit)
3734 {
3735 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3736 }
3737 break;
3738 }
3739 case 10:
3740 {
3741 S32 mvd;
3742 S32 mvdx, mvdx_q8;
3743 S32 mvdy, mvdy_q8;
3744
3745 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3746 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3747
3748 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3749 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3750
3751 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3752
3753 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3754 {
3755 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3756 }
3757 break;
3758 }
3759 default:
3760 {
3761 break;
3762 }
3763 }
3764
3765 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3766 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3767 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3768 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3769
3770 ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3771
3772 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3773 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3774 }
3775 else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3776 {
3777 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3778
3779 ps_blk_32x32->num_clusters++;
3780 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3781
3782 ps_cur_cluster_32->is_valid_cluster = 1;
3783
3784 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3785 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3786 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3787
3788 memcpy(
3789 ps_cur_cluster_32->as_mv,
3790 ps_cluster_data->as_mv,
3791 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3792
3793 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3794
3795 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3796
3797 ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3798 ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3799 ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3800 ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3801
3802 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3803 }
3804 }
3805 }
3806
3807 /**
3808 ********************************************************************************
3809 * @fn void hme_update_64x64_cluster_attributes
3810 * (
3811 * cluster_64x64_blk_t *ps_blk_32x32,
3812 * cluster_data_t *ps_cluster_data
3813 * )
3814 *
3815 * @brief Updates attributes for 64x64 clusters based on the attributes of
3816 * the constituent 16x16 clusters
3817 *
3818 * @param[out] ps_blk_64x64: structure containing 64x64 block results
3819 *
3820 * @param[in] ps_cluster_data : structure containing 32x32 block results
3821 *
3822 * @return None
3823 ********************************************************************************
3824 */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3825 void hme_update_64x64_cluster_attributes(
3826 cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3827 {
3828 cluster_data_t *ps_cur_cluster_64;
3829
3830 S32 i;
3831 S32 mvd_limit;
3832
3833 S32 num_clusters = ps_blk_64x64->num_clusters;
3834
3835 if(0 == num_clusters)
3836 {
3837 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3838
3839 ps_blk_64x64->num_clusters++;
3840 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3841
3842 ps_cur_cluster_64->is_valid_cluster = 1;
3843
3844 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3845 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3846 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3847
3848 memcpy(
3849 ps_cur_cluster_64->as_mv,
3850 ps_cluster_data->as_mv,
3851 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3852
3853 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3854
3855 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3856
3857 ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3858 ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3859 ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3860 ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3861
3862 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3863 }
3864 else
3865 {
3866 centroid_t *ps_centroid;
3867
3868 S32 cur_posx_q8, cur_posy_q8;
3869 S32 min_mvd_cluster_id = -1;
3870 S32 mvd;
3871 S32 mvdx;
3872 S32 mvdy;
3873 S32 mvdx_min;
3874 S32 mvdy_min;
3875 S32 mvdx_q8;
3876 S32 mvdy_q8;
3877
3878 S32 num_clusters_evaluated = 0;
3879
3880 S32 mvd_min = MAX_32BIT_VAL;
3881
3882 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3883 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3884
3885 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3886 {
3887 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3888
3889 if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3890 {
3891 num_clusters_evaluated++;
3892 continue;
3893 }
3894
3895 if(!ps_cur_cluster_64->is_valid_cluster)
3896 {
3897 continue;
3898 }
3899
3900 num_clusters_evaluated++;
3901
3902 ps_centroid = &ps_cur_cluster_64->s_centroid;
3903
3904 cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3905 cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3906
3907 mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3908 mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3909
3910 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3911 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3912
3913 mvd = ABS(mvdx) + ABS(mvdy);
3914
3915 if(mvd < mvd_min)
3916 {
3917 mvd_min = mvd;
3918 mvdx_min = mvdx;
3919 mvdy_min = mvdy;
3920 min_mvd_cluster_id = i;
3921 }
3922 }
3923
3924 ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3925
3926 mvd_limit = (min_mvd_cluster_id == -1)
3927 ? ps_cur_cluster_64[0].max_dist_from_centroid
3928 : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3929
3930 if(mvd_min <= mvd_limit)
3931 {
3932 LWORD64 i8_updated_posx;
3933 LWORD64 i8_updated_posy;
3934 WORD32 minmax_updated_x = 0;
3935 WORD32 minmax_updated_y = 0;
3936
3937 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3938
3939 ps_centroid = &ps_cur_cluster_64->s_centroid;
3940
3941 ps_cur_cluster_64->is_valid_cluster = 1;
3942
3943 ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3944 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3945 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3946
3947 memcpy(
3948 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3949 ps_cluster_data->as_mv,
3950 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3951
3952 if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3953 {
3954 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3955 minmax_updated_x = 1;
3956 }
3957 else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3958 {
3959 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3960 minmax_updated_x = 2;
3961 }
3962
3963 if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3964 {
3965 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3966 minmax_updated_y = 1;
3967 }
3968 else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3969 {
3970 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3971 minmax_updated_y = 2;
3972 }
3973
3974 switch((minmax_updated_y << 2) + minmax_updated_x)
3975 {
3976 case 1:
3977 {
3978 S32 mvd, mvd_q8;
3979
3980 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3981 mvd = (mvd_q8 + (1 << 7)) >> 8;
3982
3983 if(mvd > (mvd_limit))
3984 {
3985 ps_cur_cluster_64->max_dist_from_centroid = mvd;
3986 }
3987 break;
3988 }
3989 case 2:
3990 {
3991 S32 mvd, mvd_q8;
3992
3993 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3994 mvd = (mvd_q8 + (1 << 7)) >> 8;
3995
3996 if(mvd > (mvd_limit))
3997 {
3998 ps_cur_cluster_64->max_dist_from_centroid = mvd;
3999 }
4000 break;
4001 }
4002 case 4:
4003 {
4004 S32 mvd, mvd_q8;
4005
4006 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4007 mvd = (mvd_q8 + (1 << 7)) >> 8;
4008
4009 if(mvd > (mvd_limit))
4010 {
4011 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4012 }
4013 break;
4014 }
4015 case 5:
4016 {
4017 S32 mvd;
4018 S32 mvdx, mvdx_q8;
4019 S32 mvdy, mvdy_q8;
4020
4021 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4022 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4023
4024 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4025 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4026
4027 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4028
4029 if(mvd > mvd_limit)
4030 {
4031 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4032 }
4033 break;
4034 }
4035 case 6:
4036 {
4037 S32 mvd;
4038 S32 mvdx, mvdx_q8;
4039 S32 mvdy, mvdy_q8;
4040
4041 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4042 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4043
4044 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4045 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4046
4047 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4048
4049 if(mvd > mvd_limit)
4050 {
4051 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4052 }
4053 break;
4054 }
4055 case 8:
4056 {
4057 S32 mvd, mvd_q8;
4058
4059 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4060 mvd = (mvd_q8 + (1 << 7)) >> 8;
4061
4062 if(mvd > (mvd_limit))
4063 {
4064 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4065 }
4066 break;
4067 }
4068 case 9:
4069 {
4070 S32 mvd;
4071 S32 mvdx, mvdx_q8;
4072 S32 mvdy, mvdy_q8;
4073
4074 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4075 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4076
4077 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4078 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4079
4080 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4081
4082 if(mvd > mvd_limit)
4083 {
4084 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4085 }
4086 break;
4087 }
4088 case 10:
4089 {
4090 S32 mvd;
4091 S32 mvdx, mvdx_q8;
4092 S32 mvdy, mvdy_q8;
4093
4094 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4095 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4096
4097 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4098 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4099
4100 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4101
4102 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4103 {
4104 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4105 }
4106 break;
4107 }
4108 default:
4109 {
4110 break;
4111 }
4112 }
4113
4114 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4115 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4116 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4117 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4118
4119 ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4120
4121 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4122 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4123 }
4124 else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4125 {
4126 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4127
4128 ps_blk_64x64->num_clusters++;
4129 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4130
4131 ps_cur_cluster_64->is_valid_cluster = 1;
4132
4133 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4134 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4135 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4136
4137 memcpy(
4138 &ps_cur_cluster_64->as_mv[0],
4139 ps_cluster_data->as_mv,
4140 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4141
4142 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4143
4144 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4145
4146 ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4147 ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4148 ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4149 ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4150
4151 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4152 }
4153 }
4154 }
4155
4156 /**
4157 ********************************************************************************
4158 * @fn void hme_update_32x32_clusters
4159 * (
4160 * cluster_32x32_blk_t *ps_blk_32x32,
4161 * cluster_16x16_blk_t *ps_blk_16x16
4162 * )
4163 *
4164 * @brief Updates attributes for 32x32 clusters based on the attributes of
4165 * the constituent 16x16 clusters
4166 *
4167 * @param[out] ps_blk_32x32: structure containing 32x32 block results
4168 *
4169 * @param[in] ps_blk_16x16 : structure containing 16x16 block results
4170 *
4171 * @return None
4172 ********************************************************************************
4173 */
4174 static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4175 hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4176 {
4177 cluster_16x16_blk_t *ps_blk_16x16_cur;
4178 cluster_data_t *ps_cur_cluster;
4179
4180 S32 i, j;
4181 S32 num_clusters_cur_16x16_blk;
4182
4183 for(i = 0; i < 4; i++)
4184 {
4185 S32 num_clusters_evaluated = 0;
4186
4187 ps_blk_16x16_cur = &ps_blk_16x16[i];
4188
4189 num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4190
4191 ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4192
4193 ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4194
4195 for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4196 {
4197 ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4198
4199 if(!ps_cur_cluster->is_valid_cluster)
4200 {
4201 continue;
4202 }
4203
4204 hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4205
4206 num_clusters_evaluated++;
4207 }
4208 }
4209 }
4210
4211 /**
4212 ********************************************************************************
4213 * @fn void hme_update_64x64_clusters
4214 * (
4215 * cluster_64x64_blk_t *ps_blk_64x64,
4216 * cluster_32x32_blk_t *ps_blk_32x32
4217 * )
4218 *
4219 * @brief Updates attributes for 64x64 clusters based on the attributes of
4220 * the constituent 16x16 clusters
4221 *
4222 * @param[out] ps_blk_64x64: structure containing 32x32 block results
4223 *
4224 * @param[in] ps_blk_32x32 : structure containing 16x16 block results
4225 *
4226 * @return None
4227 ********************************************************************************
4228 */
4229 static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4230 hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4231 {
4232 cluster_32x32_blk_t *ps_blk_32x32_cur;
4233 cluster_data_t *ps_cur_cluster;
4234
4235 S32 i, j;
4236 S32 num_clusters_cur_32x32_blk;
4237
4238 for(i = 0; i < 4; i++)
4239 {
4240 S32 num_clusters_evaluated = 0;
4241
4242 ps_blk_32x32_cur = &ps_blk_32x32[i];
4243
4244 num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4245
4246 ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4247 ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4248
4249 for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4250 {
4251 ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4252
4253 if(!ps_cur_cluster->is_valid_cluster)
4254 {
4255 continue;
4256 }
4257
4258 hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4259
4260 num_clusters_evaluated++;
4261 }
4262 }
4263 }
4264
4265 /**
4266 ********************************************************************************
4267 * @fn void hme_try_merge_clusters_blksize_gt_16
4268 * (
4269 * cluster_data_t *ps_cluster_data,
4270 * S32 num_clusters
4271 * )
4272 *
4273 * @brief Merging clusters from blocks of size 32x32 and greater
4274 *
4275 * @param[in/out] ps_cluster_data: structure containing cluster data
4276 *
4277 * @param[in/out] pi4_num_clusters : pointer to number of clusters
4278 *
4279 * @return Success or failure
4280 ********************************************************************************
4281 */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4282 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4283 {
4284 centroid_t *ps_cur_centroid;
4285 cluster_data_t *ps_cur_cluster;
4286
4287 S32 i, mvd;
4288 S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4289
4290 centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4291
4292 S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4293 S32 ref_id = ps_cluster_data->ref_id;
4294
4295 S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4296 S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4297 S32 num_clusters_evaluated = 1;
4298 S32 ret_value = 0;
4299
4300 if(1 >= num_clusters)
4301 {
4302 return ret_value;
4303 }
4304
4305 for(i = 1; num_clusters_evaluated < num_clusters; i++)
4306 {
4307 S32 cur_posx_q8;
4308 S32 cur_posy_q8;
4309
4310 ps_cur_cluster = &ps_cluster_data[i];
4311
4312 if((ref_id != ps_cur_cluster->ref_id))
4313 {
4314 num_clusters_evaluated++;
4315 continue;
4316 }
4317
4318 if((!ps_cur_cluster->is_valid_cluster))
4319 {
4320 continue;
4321 }
4322
4323 num_clusters_evaluated++;
4324
4325 ps_cur_centroid = &ps_cur_cluster->s_centroid;
4326
4327 cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4328 cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4329
4330 mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4331 mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4332
4333 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4334 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4335
4336 mvd = ABS(mvdx) + ABS(mvdy);
4337
4338 if(mvd <= (mvd_limit >> 1))
4339 {
4340 LWORD64 i8_updated_posx;
4341 LWORD64 i8_updated_posy;
4342 WORD32 minmax_updated_x = 0;
4343 WORD32 minmax_updated_y = 0;
4344
4345 ps_cur_cluster->is_valid_cluster = 0;
4346
4347 ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4348 ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4349 ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4350
4351 memcpy(
4352 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4353 ps_cur_cluster->as_mv,
4354 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4355
4356 if(mvdx > 0)
4357 {
4358 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4359 minmax_updated_x = 1;
4360 }
4361 else
4362 {
4363 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4364 minmax_updated_x = 2;
4365 }
4366
4367 if(mvdy > 0)
4368 {
4369 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4370 minmax_updated_y = 1;
4371 }
4372 else
4373 {
4374 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4375 minmax_updated_y = 2;
4376 }
4377
4378 switch((minmax_updated_y << 2) + minmax_updated_x)
4379 {
4380 case 1:
4381 {
4382 S32 mvd, mvd_q8;
4383
4384 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4385 mvd = (mvd_q8 + (1 << 7)) >> 8;
4386
4387 if(mvd > (mvd_limit))
4388 {
4389 ps_cluster_data->max_dist_from_centroid = mvd;
4390 }
4391 break;
4392 }
4393 case 2:
4394 {
4395 S32 mvd, mvd_q8;
4396
4397 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4398 mvd = (mvd_q8 + (1 << 7)) >> 8;
4399
4400 if(mvd > (mvd_limit))
4401 {
4402 ps_cluster_data->max_dist_from_centroid = mvd;
4403 }
4404 break;
4405 }
4406 case 4:
4407 {
4408 S32 mvd, mvd_q8;
4409
4410 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4411 mvd = (mvd_q8 + (1 << 7)) >> 8;
4412
4413 if(mvd > (mvd_limit))
4414 {
4415 ps_cluster_data->max_dist_from_centroid = mvd;
4416 }
4417 break;
4418 }
4419 case 5:
4420 {
4421 S32 mvd;
4422 S32 mvdx, mvdx_q8;
4423 S32 mvdy, mvdy_q8;
4424
4425 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4426 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4427
4428 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4429 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4430
4431 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4432
4433 if(mvd > mvd_limit)
4434 {
4435 ps_cluster_data->max_dist_from_centroid = mvd;
4436 }
4437 break;
4438 }
4439 case 6:
4440 {
4441 S32 mvd;
4442 S32 mvdx, mvdx_q8;
4443 S32 mvdy, mvdy_q8;
4444
4445 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4446 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4447
4448 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4449 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4450
4451 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4452
4453 if(mvd > mvd_limit)
4454 {
4455 ps_cluster_data->max_dist_from_centroid = mvd;
4456 }
4457 break;
4458 }
4459 case 8:
4460 {
4461 S32 mvd, mvd_q8;
4462
4463 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4464 mvd = (mvd_q8 + (1 << 7)) >> 8;
4465
4466 if(mvd > (mvd_limit))
4467 {
4468 ps_cluster_data->max_dist_from_centroid = mvd;
4469 }
4470 break;
4471 }
4472 case 9:
4473 {
4474 S32 mvd;
4475 S32 mvdx, mvdx_q8;
4476 S32 mvdy, mvdy_q8;
4477
4478 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4479 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4480
4481 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4482 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4483
4484 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4485
4486 if(mvd > mvd_limit)
4487 {
4488 ps_cluster_data->max_dist_from_centroid = mvd;
4489 }
4490 break;
4491 }
4492 case 10:
4493 {
4494 S32 mvd;
4495 S32 mvdx, mvdx_q8;
4496 S32 mvdy, mvdy_q8;
4497
4498 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4499 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4500
4501 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4502 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4503
4504 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4505
4506 if(mvd > ps_cluster_data->max_dist_from_centroid)
4507 {
4508 ps_cluster_data->max_dist_from_centroid = mvd;
4509 }
4510 break;
4511 }
4512 default:
4513 {
4514 break;
4515 }
4516 }
4517
4518 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4519 ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4520 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4521 ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4522
4523 ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4524
4525 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4526 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4527
4528 if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4529 {
4530 num_clusters--;
4531 num_clusters_evaluated = 1;
4532 i = 0;
4533 ret_value++;
4534 }
4535 else
4536 {
4537 ret_value++;
4538
4539 return ret_value;
4540 }
4541 }
4542 }
4543
4544 if(ret_value)
4545 {
4546 for(i = 1; i < (num_clusters + ret_value); i++)
4547 {
4548 if(ps_cluster_data[i].is_valid_cluster)
4549 {
4550 break;
4551 }
4552 }
4553 if(i == (num_clusters + ret_value))
4554 {
4555 return ret_value;
4556 }
4557 }
4558 else
4559 {
4560 i = 1;
4561 }
4562
4563 return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4564 ret_value;
4565 }
4566
4567 /**
4568 ********************************************************************************
4569 * @fn S32 hme_determine_validity_32x32
4570 * (
4571 * ctb_cluster_info_t *ps_ctb_cluster_info
4572 * )
4573 *
4574 * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop
4575 * while recursing through the CU tree or not
4576 *
4577 * @param[in] ps_cluster_data: structure containing cluster data
4578 *
4579 * @return Success or failure
4580 ********************************************************************************
4581 */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4582 __inline S32 hme_determine_validity_32x32(
4583 ctb_cluster_info_t *ps_ctb_cluster_info,
4584 S32 *pi4_children_nodes_required,
4585 S32 blk_validity_wrt_pic_bndry,
4586 S32 parent_blk_validity_wrt_pic_bndry)
4587 {
4588 cluster_data_t *ps_data;
4589
4590 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4591 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4592
4593 S32 num_clusters = ps_32x32_blk->num_clusters;
4594 S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4595
4596 if(!blk_validity_wrt_pic_bndry)
4597 {
4598 *pi4_children_nodes_required = 1;
4599 return 0;
4600 }
4601
4602 if(!parent_blk_validity_wrt_pic_bndry)
4603 {
4604 *pi4_children_nodes_required = 1;
4605 return 1;
4606 }
4607
4608 if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4609 {
4610 *pi4_children_nodes_required = 1;
4611 return 0;
4612 }
4613
4614 if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4615 {
4616 *pi4_children_nodes_required = 1;
4617
4618 return 1;
4619 }
4620 else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4621 {
4622 *pi4_children_nodes_required = 0;
4623
4624 return 1;
4625 }
4626 else
4627 {
4628 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4629 {
4630 *pi4_children_nodes_required = 0;
4631 return 1;
4632 }
4633 else
4634 {
4635 S32 i;
4636
4637 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4638 S32 min_area = MAX_32BIT_VAL;
4639 S32 num_clusters_evaluated = 0;
4640
4641 for(i = 0; num_clusters_evaluated < num_clusters; i++)
4642 {
4643 ps_data = &ps_32x32_blk->as_cluster_data[i];
4644
4645 if(!ps_data->is_valid_cluster)
4646 {
4647 continue;
4648 }
4649
4650 num_clusters_evaluated++;
4651
4652 if(ps_data->area_in_pixels < min_area)
4653 {
4654 min_area = ps_data->area_in_pixels;
4655 }
4656 }
4657
4658 if((min_area << 4) < area_of_parent)
4659 {
4660 *pi4_children_nodes_required = 1;
4661 return 0;
4662 }
4663 else
4664 {
4665 *pi4_children_nodes_required = 0;
4666 return 1;
4667 }
4668 }
4669 }
4670 }
4671
4672 /**
4673 ********************************************************************************
4674 * @fn S32 hme_determine_validity_16x16
4675 * (
4676 * ctb_cluster_info_t *ps_ctb_cluster_info
4677 * )
4678 *
4679 * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop
4680 * while recursing through the CU tree or not
4681 *
4682 * @param[in] ps_cluster_data: structure containing cluster data
4683 *
4684 * @return Success or failure
4685 ********************************************************************************
4686 */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4687 __inline S32 hme_determine_validity_16x16(
4688 ctb_cluster_info_t *ps_ctb_cluster_info,
4689 S32 *pi4_children_nodes_required,
4690 S32 blk_validity_wrt_pic_bndry,
4691 S32 parent_blk_validity_wrt_pic_bndry)
4692 {
4693 cluster_data_t *ps_data;
4694
4695 cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4696 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4697 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4698
4699 S32 num_clusters = ps_16x16_blk->num_clusters;
4700 S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4701 S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4702
4703 if(!blk_validity_wrt_pic_bndry)
4704 {
4705 *pi4_children_nodes_required = 1;
4706 return 0;
4707 }
4708
4709 if(!parent_blk_validity_wrt_pic_bndry)
4710 {
4711 *pi4_children_nodes_required = 1;
4712 return 1;
4713 }
4714
4715 if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4716 (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4717 {
4718 *pi4_children_nodes_required = 1;
4719 return 1;
4720 }
4721
4722 /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4723 /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4724 if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4725 {
4726 if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4727 {
4728 *pi4_children_nodes_required = 0;
4729
4730 return 1;
4731 }
4732 else
4733 {
4734 *pi4_children_nodes_required = 1;
4735
4736 return 0;
4737 }
4738 }
4739 /* Implies nc_64 >= 3 */
4740 else
4741 {
4742 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4743 {
4744 *pi4_children_nodes_required = 0;
4745 return 1;
4746 }
4747 else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4748 {
4749 *pi4_children_nodes_required = 1;
4750 return 0;
4751 }
4752 else
4753 {
4754 S32 i;
4755
4756 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4757 S32 min_area = MAX_32BIT_VAL;
4758 S32 num_clusters_evaluated = 0;
4759
4760 for(i = 0; num_clusters_evaluated < num_clusters; i++)
4761 {
4762 ps_data = &ps_16x16_blk->as_cluster_data[i];
4763
4764 if(!ps_data->is_valid_cluster)
4765 {
4766 continue;
4767 }
4768
4769 num_clusters_evaluated++;
4770
4771 if(ps_data->area_in_pixels < min_area)
4772 {
4773 min_area = ps_data->area_in_pixels;
4774 }
4775 }
4776
4777 if((min_area << 4) < area_of_parent)
4778 {
4779 *pi4_children_nodes_required = 1;
4780 return 0;
4781 }
4782 else
4783 {
4784 *pi4_children_nodes_required = 0;
4785 return 1;
4786 }
4787 }
4788 }
4789 }
4790
4791 /**
4792 ********************************************************************************
4793 * @fn void hme_build_cu_tree
4794 * (
4795 * ctb_cluster_info_t *ps_ctb_cluster_info,
4796 * cur_ctb_cu_tree_t *ps_cu_tree,
4797 * S32 tree_depth,
4798 * CU_POS_T e_grand_parent_blk_pos,
4799 * CU_POS_T e_parent_blk_pos,
4800 * CU_POS_T e_cur_blk_pos
4801 * )
4802 *
4803 * @brief Recursive function for CU tree initialisation
4804 *
4805 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4806 * corresponding to all block sizes from 64x64
4807 * to 16x16
4808 *
4809 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4810 * applicable
4811 *
4812 * @param[in] e_cur_blk_pos: position of current block wrt parent
4813 *
4814 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4815 *
4816 * @param[in] tree_depth : specifies depth of the CU tree
4817 *
4818 * @return Nothing
4819 ********************************************************************************
4820 */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4821 void hme_build_cu_tree(
4822 ctb_cluster_info_t *ps_ctb_cluster_info,
4823 cur_ctb_cu_tree_t *ps_cu_tree,
4824 S32 tree_depth,
4825 CU_POS_T e_grandparent_blk_pos,
4826 CU_POS_T e_parent_blk_pos,
4827 CU_POS_T e_cur_blk_pos)
4828 {
4829 ihevce_cu_tree_init(
4830 ps_cu_tree,
4831 ps_ctb_cluster_info->ps_cu_tree_root,
4832 &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4833 tree_depth,
4834 e_grandparent_blk_pos,
4835 e_parent_blk_pos,
4836 e_cur_blk_pos);
4837 }
4838
4839 /**
4840 ********************************************************************************
4841 * @fn S32 hme_sdi_based_cluster_spread_eligibility
4842 * (
4843 * cluster_32x32_blk_t *ps_blk_32x32
4844 * )
4845 *
4846 * @brief Determines whether the spread of high SDI MV's around each cluster
4847 * center is below a pre-determined threshold
4848 *
4849 * @param[in] ps_blk_32x32: structure containing pointers to clusters
4850 * corresponding to all block sizes from 64x64
4851 * to 16x16
4852 *
4853 * @return 1 if the spread is constrained, else 0
4854 ********************************************************************************
4855 */
4856 __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4857 hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4858 {
4859 S32 cumulative_mv_distance;
4860 S32 i, j;
4861 S32 num_high_sdi_mvs;
4862
4863 S32 num_clusters = ps_blk_32x32->num_clusters;
4864
4865 for(i = 0; i < num_clusters; i++)
4866 {
4867 cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4868
4869 num_high_sdi_mvs = 0;
4870 cumulative_mv_distance = 0;
4871
4872 for(j = 0; j < ps_data->num_mvs; j++)
4873 {
4874 mv_data_t *ps_mv = &ps_data->as_mv[j];
4875
4876 if(ps_mv->sdi >= sdi_threshold)
4877 {
4878 num_high_sdi_mvs++;
4879
4880 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4881 }
4882 }
4883
4884 if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4885 {
4886 return 0;
4887 }
4888 }
4889
4890 return 1;
4891 }
4892
4893 /**
4894 ********************************************************************************
4895 * @fn S32 hme_populate_cu_tree
4896 * (
4897 * ctb_cluster_info_t *ps_ctb_cluster_info,
4898 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4899 * cur_ctb_cu_tree_t *ps_cu_tree,
4900 * S32 tree_depth,
4901 * CU_POS_T e_parent_blk_pos,
4902 * CU_POS_T e_cur_blk_pos
4903 * )
4904 *
4905 * @brief Recursive function for CU tree population based on output of
4906 * clustering algorithm
4907 *
4908 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4909 * corresponding to all block sizes from 64x64
4910 * to 16x16
4911 *
4912 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4913 applicable
4914 *
4915 * @param[in] e_cur_blk_pos: position of current block wrt parent
4916 *
4917 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
4918 *
4919 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4920 *
4921 * @param[in] tree_depth : specifies depth of the CU tree
4922 *
4923 * @param[in] ipe_decision_precedence : specifies whether precedence should
4924 * be given to decisions made either by IPE(1) or clustering algos.
4925 *
4926 * @return 1 if re-evaluation of parent node's validity is not required,
4927 else 0
4928 ********************************************************************************
4929 */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4930 void hme_populate_cu_tree(
4931 ctb_cluster_info_t *ps_ctb_cluster_info,
4932 cur_ctb_cu_tree_t *ps_cu_tree,
4933 S32 tree_depth,
4934 ME_QUALITY_PRESETS_T e_quality_preset,
4935 CU_POS_T e_grandparent_blk_pos,
4936 CU_POS_T e_parent_blk_pos,
4937 CU_POS_T e_cur_blk_pos)
4938 {
4939 S32 area_of_cur_blk;
4940 S32 area_limit_for_me_decision_precedence;
4941 S32 children_nodes_required;
4942 S32 intra_mv_area;
4943 S32 intra_eval_enable;
4944 S32 inter_eval_enable;
4945 S32 ipe_decision_precedence;
4946 S32 node_validity;
4947 S32 num_clusters;
4948
4949 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4950
4951 if(NULL == ps_cu_tree)
4952 {
4953 return;
4954 }
4955
4956 switch(tree_depth)
4957 {
4958 case 0:
4959 {
4960 /* 64x64 block */
4961 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4962
4963 cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4964
4965 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4966 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4967 children_nodes_required = 0;
4968 intra_mv_area = ps_blk_64x64->intra_mv_area;
4969
4970 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4971
4972 intra_eval_enable = ipe_decision_precedence;
4973 inter_eval_enable = !!ps_blk_64x64->num_clusters;
4974
4975 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4976 if(e_quality_preset >= ME_HIGH_QUALITY)
4977 {
4978 inter_eval_enable = 1;
4979 node_validity = (blk_32x32_mask == 0xf);
4980 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4981 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4982 #endif
4983 break;
4984 }
4985 #endif
4986
4987 #if ENABLE_4CTB_EVALUATION
4988 node_validity = (blk_32x32_mask == 0xf);
4989
4990 break;
4991 #else
4992 {
4993 S32 i;
4994
4995 num_clusters = ps_blk_64x64->num_clusters;
4996
4997 node_validity = (ipe_decision_precedence)
4998 ? (!ps_cur_ipe_ctb->u1_split_flag)
4999 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5000
5001 for(i = 0; i < MAX_NUM_REF; i++)
5002 {
5003 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5004 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5005 }
5006
5007 node_validity = node_validity && (blk_32x32_mask == 0xf);
5008 }
5009 break;
5010 #endif
5011 }
5012 case 1:
5013 {
5014 /* 32x32 block */
5015 S32 is_percent_intra_area_gt_threshold;
5016
5017 cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5018
5019 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5020
5021 #if !ENABLE_4CTB_EVALUATION
5022 S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5023 S32 best_intra_cost =
5024 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5025 ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5026 4) < 0)
5027 ? MAX_32BIT_VAL
5028 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5029 ps_ctb_cluster_info->i4_frame_qstep *
5030 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5031 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5032 S32 cost_differential = (best_inter_cost - best_cost);
5033 #endif
5034
5035 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5036 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5037 intra_mv_area = ps_blk_32x32->intra_mv_area;
5038 is_percent_intra_area_gt_threshold =
5039 (intra_mv_area > area_limit_for_me_decision_precedence);
5040 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5041
5042 intra_eval_enable = ipe_decision_precedence;
5043 inter_eval_enable = !!ps_blk_32x32->num_clusters;
5044 children_nodes_required = 1;
5045
5046 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5047 if(e_quality_preset >= ME_HIGH_QUALITY)
5048 {
5049 inter_eval_enable = 1;
5050 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5051 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5052 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5053 #endif
5054 break;
5055 }
5056 #endif
5057
5058 #if ENABLE_4CTB_EVALUATION
5059 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5060
5061 break;
5062 #else
5063 {
5064 S32 i;
5065 num_clusters = ps_blk_32x32->num_clusters;
5066
5067 if(ipe_decision_precedence)
5068 {
5069 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5070 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5071 }
5072 else
5073 {
5074 node_validity =
5075 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5076 (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5077 (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5078
5079 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5080 {
5081 node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5082 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5083 }
5084
5085 if(node_validity)
5086 {
5087 node_validity = node_validity &&
5088 hme_sdi_based_cluster_spread_eligibility(
5089 ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5090 }
5091 }
5092 }
5093
5094 break;
5095 #endif
5096 }
5097 case 2:
5098 {
5099 cluster_16x16_blk_t *ps_blk_16x16 =
5100 &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5101
5102 S32 blk_8x8_mask =
5103 ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5104
5105 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5106 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5107 children_nodes_required = 1;
5108 intra_mv_area = ps_blk_16x16->intra_mv_area;
5109 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5110 num_clusters = ps_blk_16x16->num_clusters;
5111
5112 intra_eval_enable = ipe_decision_precedence;
5113 inter_eval_enable = 1;
5114
5115 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5116 if(e_quality_preset >= ME_HIGH_QUALITY)
5117 {
5118 node_validity =
5119 !ps_ctb_cluster_info
5120 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5121 children_nodes_required = !node_validity;
5122 break;
5123 }
5124 #endif
5125
5126 #if ENABLE_4CTB_EVALUATION
5127 node_validity = (blk_8x8_mask == 0xf);
5128
5129 #if ENABLE_CU_TREE_CULLING
5130 {
5131 cur_ctb_cu_tree_t *ps_32x32_root;
5132
5133 switch(e_parent_blk_pos)
5134 {
5135 case POS_TL:
5136 {
5137 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5138
5139 break;
5140 }
5141 case POS_TR:
5142 {
5143 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5144
5145 break;
5146 }
5147 case POS_BL:
5148 {
5149 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5150
5151 break;
5152 }
5153 case POS_BR:
5154 {
5155 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5156
5157 break;
5158 }
5159 }
5160
5161 if(ps_32x32_root->is_node_valid)
5162 {
5163 node_validity =
5164 node_validity &&
5165 !ps_ctb_cluster_info
5166 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5167 children_nodes_required = !node_validity;
5168 }
5169 }
5170 #endif
5171
5172 break;
5173 #else
5174
5175 if(ipe_decision_precedence)
5176 {
5177 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5178 .as_intra16_analyse[e_cur_blk_pos]
5179 .b1_merge_flag);
5180 S32 valid_flag = (blk_8x8_mask == 0xf);
5181
5182 node_validity = merge_flag_16 && valid_flag;
5183 }
5184 else
5185 {
5186 node_validity = (blk_8x8_mask == 0xf);
5187 }
5188
5189 break;
5190 #endif
5191 }
5192 case 3:
5193 {
5194 S32 blk_8x8_mask =
5195 ps_ctb_cluster_info
5196 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5197 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5198 .as_intra16_analyse[e_parent_blk_pos]
5199 .b1_merge_flag);
5200 S32 merge_flag_32 =
5201 (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5202
5203 intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5204 inter_eval_enable = 1;
5205 children_nodes_required = 0;
5206
5207 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5208 if(e_quality_preset >= ME_HIGH_QUALITY)
5209 {
5210 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5211 break;
5212 }
5213 #endif
5214
5215 #if ENABLE_4CTB_EVALUATION
5216 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217
5218 break;
5219 #else
5220 {
5221 cur_ctb_cu_tree_t *ps_32x32_root;
5222 cur_ctb_cu_tree_t *ps_16x16_root;
5223 cluster_32x32_blk_t *ps_32x32_blk;
5224
5225 switch(e_grandparent_blk_pos)
5226 {
5227 case POS_TL:
5228 {
5229 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5230
5231 break;
5232 }
5233 case POS_TR:
5234 {
5235 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5236
5237 break;
5238 }
5239 case POS_BL:
5240 {
5241 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5242
5243 break;
5244 }
5245 case POS_BR:
5246 {
5247 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5248
5249 break;
5250 }
5251 }
5252
5253 switch(e_parent_blk_pos)
5254 {
5255 case POS_TL:
5256 {
5257 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5258
5259 break;
5260 }
5261 case POS_TR:
5262 {
5263 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5264
5265 break;
5266 }
5267 case POS_BL:
5268 {
5269 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5270
5271 break;
5272 }
5273 case POS_BR:
5274 {
5275 ps_16x16_root = ps_32x32_root->ps_child_node_br;
5276
5277 break;
5278 }
5279 }
5280
5281 ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5282
5283 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5284 ((!ps_32x32_root->is_node_valid) ||
5285 (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5286 (!ps_16x16_root->is_node_valid));
5287
5288 break;
5289 }
5290 #endif
5291 }
5292 }
5293
5294 /* Fill the current cu_tree node */
5295 ps_cu_tree->is_node_valid = node_validity;
5296 ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5297 ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5298
5299 if(children_nodes_required)
5300 {
5301 tree_depth++;
5302
5303 hme_populate_cu_tree(
5304 ps_ctb_cluster_info,
5305 ps_cu_tree->ps_child_node_tl,
5306 tree_depth,
5307 e_quality_preset,
5308 e_parent_blk_pos,
5309 e_cur_blk_pos,
5310 POS_TL);
5311
5312 hme_populate_cu_tree(
5313 ps_ctb_cluster_info,
5314 ps_cu_tree->ps_child_node_tr,
5315 tree_depth,
5316 e_quality_preset,
5317 e_parent_blk_pos,
5318 e_cur_blk_pos,
5319 POS_TR);
5320
5321 hme_populate_cu_tree(
5322 ps_ctb_cluster_info,
5323 ps_cu_tree->ps_child_node_bl,
5324 tree_depth,
5325 e_quality_preset,
5326 e_parent_blk_pos,
5327 e_cur_blk_pos,
5328 POS_BL);
5329
5330 hme_populate_cu_tree(
5331 ps_ctb_cluster_info,
5332 ps_cu_tree->ps_child_node_br,
5333 tree_depth,
5334 e_quality_preset,
5335 e_parent_blk_pos,
5336 e_cur_blk_pos,
5337 POS_BR);
5338 }
5339 }
5340
5341 /**
5342 ********************************************************************************
5343 * @fn void hme_analyse_mv_clustering
5344 * (
5345 * search_results_t *ps_search_results,
5346 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5347 * cur_ctb_cu_tree_t *ps_cu_tree
5348 * )
5349 *
5350 * @brief Implementation for the clustering algorithm
5351 *
5352 * @param[in] ps_search_results: structure containing 16x16 block results
5353 *
5354 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
5355 *
5356 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
5357 *
5358 * @return None
5359 ********************************************************************************
5360 */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5361 void hme_analyse_mv_clustering(
5362 search_results_t *ps_search_results,
5363 inter_cu_results_t *ps_16x16_cu_results,
5364 inter_cu_results_t *ps_8x8_cu_results,
5365 ctb_cluster_info_t *ps_ctb_cluster_info,
5366 S08 *pi1_future_list,
5367 S08 *pi1_past_list,
5368 S32 bidir_enabled,
5369 ME_QUALITY_PRESETS_T e_quality_preset)
5370 {
5371 cluster_16x16_blk_t *ps_blk_16x16;
5372 cluster_32x32_blk_t *ps_blk_32x32;
5373 cluster_64x64_blk_t *ps_blk_64x64;
5374
5375 part_type_results_t *ps_best_result;
5376 pu_result_t *aps_part_result[MAX_NUM_PARTS];
5377 pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5378
5379 PART_ID_T e_part_id;
5380 PART_TYPE_T e_part_type;
5381
5382 S32 enable_64x64_merge;
5383 S32 i, j, k;
5384 S32 mvx, mvy;
5385 S32 num_parts;
5386 S32 ref_idx;
5387 S32 ai4_pred_mode[MAX_NUM_PARTS];
5388
5389 S32 num_32x32_merges = 0;
5390
5391 /*****************************************/
5392 /*****************************************/
5393 /********* Enter ye who is HQ ************/
5394 /*****************************************/
5395 /*****************************************/
5396
5397 ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5398
5399 /* Initialise data in each of the clusters */
5400 for(i = 0; i < 16; i++)
5401 {
5402 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5403
5404 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5405 if(e_quality_preset < ME_HIGH_QUALITY)
5406 {
5407 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5408 }
5409 else
5410 {
5411 ps_blk_16x16->best_inter_cost = 0;
5412 ps_blk_16x16->intra_mv_area = 0;
5413 }
5414 #else
5415 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5416 #endif
5417 }
5418
5419 for(i = 0; i < 4; i++)
5420 {
5421 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5422
5423 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5424 if(e_quality_preset < ME_HIGH_QUALITY)
5425 {
5426 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5427 }
5428 else
5429 {
5430 ps_blk_32x32->best_inter_cost = 0;
5431 ps_blk_32x32->intra_mv_area = 0;
5432 }
5433 #else
5434 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5435 #endif
5436 }
5437
5438 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5439 if(e_quality_preset < ME_HIGH_QUALITY)
5440 {
5441 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5442 }
5443 else
5444 {
5445 ps_blk_64x64->best_inter_cost = 0;
5446 ps_blk_64x64->intra_mv_area = 0;
5447 }
5448 #else
5449 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5450 #endif
5451
5452 /* Initialise data for all nodes in the CU tree */
5453 hme_build_cu_tree(
5454 ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5455
5456 if(e_quality_preset >= ME_HIGH_QUALITY)
5457 {
5458 memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5459 }
5460
5461 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5462 return;
5463 #endif
5464
5465 for(i = 0; i < 16; i++)
5466 {
5467 S32 blk_8x8_mask;
5468 S32 is_16x16_blk_valid;
5469 S32 num_clusters_updated;
5470 S32 num_clusters;
5471
5472 blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5473
5474 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5475
5476 is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5477
5478 if(is_16x16_blk_valid)
5479 {
5480 /* Use 8x8 data when 16x16 CU is split */
5481 if(ps_search_results[i].u1_split_flag)
5482 {
5483 S32 blk_8x8_idx = i << 2;
5484
5485 num_parts = 4;
5486 e_part_type = PRT_NxN;
5487
5488 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5489 {
5490 /* Only 2Nx2N partition supported for 8x8 block */
5491 ASSERT(
5492 ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5493 ((PART_TYPE_T)PRT_2Nx2N));
5494
5495 aps_part_result[j] =
5496 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5497 aps_inferior_parts[j] =
5498 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5499 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5500 }
5501 }
5502 else
5503 {
5504 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5505
5506 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5507 num_parts = gau1_num_parts_in_part_type[e_part_type];
5508
5509 for(j = 0; j < num_parts; j++)
5510 {
5511 aps_part_result[j] = &ps_best_result->as_pu_results[j];
5512 aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5513 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5514 }
5515
5516 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5517 }
5518
5519 for(j = 0; j < num_parts; j++)
5520 {
5521 pu_result_t *ps_part_result = aps_part_result[j];
5522
5523 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5524
5525 e_part_id = ge_part_type_to_part_id[e_part_type][j];
5526
5527 /* Skip clustering if best mode is intra */
5528 if((ps_part_result->pu.b1_intra_flag))
5529 {
5530 ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5531 ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5532 continue;
5533 }
5534 else
5535 {
5536 ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5537 }
5538
5539 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5540 if(e_quality_preset >= ME_HIGH_QUALITY)
5541 {
5542 continue;
5543 }
5544 #endif
5545
5546 for(k = 0; k < num_mvs; k++)
5547 {
5548 mv_t *ps_mv;
5549
5550 pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5551
5552 S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5553
5554 ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5555
5556 mvx = ps_mv->i2_mvx;
5557 mvy = ps_mv->i2_mvy;
5558
5559 ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5560 : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5561
5562 num_clusters = ps_blk_16x16->num_clusters;
5563
5564 hme_find_and_update_clusters(
5565 ps_blk_16x16->as_cluster_data,
5566 &(ps_blk_16x16->num_clusters),
5567 mvx,
5568 mvy,
5569 ref_idx,
5570 ps_part_result->i4_sdi,
5571 e_part_id,
5572 (ai4_pred_mode[j] == 2));
5573
5574 num_clusters_updated = (ps_blk_16x16->num_clusters);
5575
5576 ps_blk_16x16->au1_num_clusters[ref_idx] +=
5577 (num_clusters_updated - num_clusters);
5578 }
5579 }
5580 }
5581 }
5582
5583 /* Search for 32x32 clusters */
5584 for(i = 0; i < 4; i++)
5585 {
5586 S32 num_clusters_merged;
5587
5588 S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5589
5590 if(is_32x32_blk_valid)
5591 {
5592 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5593 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5594
5595 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5596 if(e_quality_preset >= ME_HIGH_QUALITY)
5597 {
5598 for(j = 0; j < 4; j++, ps_blk_16x16++)
5599 {
5600 ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5601
5602 ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5603 }
5604 continue;
5605 }
5606 #endif
5607
5608 hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5609
5610 if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5611 {
5612 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5613 ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5614
5615 if(num_clusters_merged)
5616 {
5617 ps_blk_32x32->num_clusters -= num_clusters_merged;
5618
5619 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5620 }
5621 }
5622 }
5623 }
5624
5625 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5626 /* Eliminate outlier 32x32 clusters */
5627 if(e_quality_preset < ME_HIGH_QUALITY)
5628 #endif
5629 {
5630 hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5631
5632 /* Find best_uni_ref and best_alt_ref */
5633 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5634 }
5635
5636 /* Populate the CU tree for depths 1 and higher */
5637 {
5638 cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5639 cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5640 cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5641 cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5642 cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5643
5644 hme_populate_cu_tree(
5645 ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5646
5647 num_32x32_merges += (ps_tl->is_node_valid == 1);
5648
5649 hme_populate_cu_tree(
5650 ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5651
5652 num_32x32_merges += (ps_tr->is_node_valid == 1);
5653
5654 hme_populate_cu_tree(
5655 ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5656
5657 num_32x32_merges += (ps_bl->is_node_valid == 1);
5658
5659 hme_populate_cu_tree(
5660 ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5661
5662 num_32x32_merges += (ps_br->is_node_valid == 1);
5663 }
5664
5665 #if !ENABLE_4CTB_EVALUATION
5666 if(e_quality_preset < ME_HIGH_QUALITY)
5667 {
5668 enable_64x64_merge = (num_32x32_merges >= 3);
5669 }
5670 #else
5671 if(e_quality_preset < ME_HIGH_QUALITY)
5672 {
5673 enable_64x64_merge = 1;
5674 }
5675 #endif
5676
5677 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5678 if(e_quality_preset >= ME_HIGH_QUALITY)
5679 {
5680 enable_64x64_merge = 1;
5681 }
5682 #else
5683 if(e_quality_preset >= ME_HIGH_QUALITY)
5684 {
5685 enable_64x64_merge = (num_32x32_merges >= 3);
5686 }
5687 #endif
5688
5689 if(enable_64x64_merge)
5690 {
5691 S32 num_clusters_merged;
5692
5693 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5694
5695 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5696 if(e_quality_preset >= ME_HIGH_QUALITY)
5697 {
5698 for(j = 0; j < 4; j++, ps_blk_32x32++)
5699 {
5700 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5701
5702 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5703 }
5704 }
5705 else
5706 #endif
5707 {
5708 hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5709
5710 if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5711 {
5712 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5713 ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5714
5715 if(num_clusters_merged)
5716 {
5717 ps_blk_64x64->num_clusters -= num_clusters_merged;
5718
5719 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5720 }
5721 }
5722 }
5723
5724 #if !ENABLE_4CTB_EVALUATION
5725 if(e_quality_preset < ME_HIGH_QUALITY)
5726 {
5727 S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5728 S32 best_intra_cost =
5729 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5730 ps_ctb_cluster_info->i4_frame_qstep *
5731 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5732 ? MAX_32BIT_VAL
5733 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5734 ps_ctb_cluster_info->i4_frame_qstep *
5735 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5736 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5737 S32 cost_differential = (best_inter_cost - best_cost);
5738
5739 enable_64x64_merge =
5740 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5741 }
5742 #endif
5743 }
5744
5745 if(enable_64x64_merge)
5746 {
5747 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5748 if(e_quality_preset < ME_HIGH_QUALITY)
5749 #endif
5750 {
5751 hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5752
5753 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5754 }
5755
5756 hme_populate_cu_tree(
5757 ps_ctb_cluster_info,
5758 ps_ctb_cluster_info->ps_cu_tree_root,
5759 0,
5760 e_quality_preset,
5761 POS_NA,
5762 POS_NA,
5763 POS_NA);
5764 }
5765 }
5766 #endif
5767
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5768 static __inline void hme_merge_prms_init(
5769 hme_merge_prms_t *ps_prms,
5770 layer_ctxt_t *ps_curr_layer,
5771 refine_prms_t *ps_refine_prms,
5772 me_frm_ctxt_t *ps_me_ctxt,
5773 range_prms_t *ps_range_prms_rec,
5774 range_prms_t *ps_range_prms_inp,
5775 mv_grid_t **pps_mv_grid,
5776 inter_ctb_prms_t *ps_inter_ctb_prms,
5777 S32 i4_num_pred_dir,
5778 S32 i4_32x32_id,
5779 BLK_SIZE_T e_blk_size,
5780 ME_QUALITY_PRESETS_T e_me_quality_presets)
5781 {
5782 S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5783 S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5784
5785 /* Currently not enabling segmentation info from prev layers */
5786 ps_prms->i4_seg_info_avail = 0;
5787 ps_prms->i4_part_mask = 0;
5788
5789 /* Number of reference pics in which to do merge */
5790 ps_prms->i4_num_ref = i4_num_pred_dir;
5791
5792 /* Layer ctxt info */
5793 ps_prms->ps_layer_ctxt = ps_curr_layer;
5794
5795 ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5796
5797 /* Top left, top right, bottom left and bottom right 16x16 units */
5798 if(BLK_32x32 == e_blk_size)
5799 {
5800 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5801 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5802 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5803 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5804
5805 /* Merge results stored here */
5806 ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5807
5808 /* This could be lesser than the number of 16x16results generated*/
5809 /* For now, keeping it to be same */
5810 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5811 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5812 ps_prms->ps_results_grandchild = NULL;
5813 }
5814 else
5815 {
5816 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5817 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5818 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5819 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5820
5821 /* Merge results stored here */
5822 ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5823
5824 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5825 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5826 ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5827 }
5828
5829 if(i4_use_rec)
5830 {
5831 WORD32 ref_ctr;
5832
5833 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5834 {
5835 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5836 }
5837 }
5838 else
5839 {
5840 WORD32 ref_ctr;
5841
5842 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5843 {
5844 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5845 }
5846 }
5847 ps_prms->i4_use_rec = i4_use_rec;
5848
5849 ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5850
5851 ps_prms->pps_mv_grid = pps_mv_grid;
5852
5853 ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5854
5855 ps_prms->e_quality_preset = e_me_quality_presets;
5856 ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5857 ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5858 ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5859 }
5860
5861 /**
5862 ********************************************************************************
5863 * @fn void hme_refine(me_ctxt_t *ps_ctxt,
5864 * refine_layer_prms_t *ps_refine_prms)
5865 *
5866 * @brief Top level entry point for refinement ME
5867 *
5868 * @param[in,out] ps_ctxt: ME Handle
5869 *
5870 * @param[in] ps_refine_prms : refinement layer prms
5871 *
5872 * @return None
5873 ********************************************************************************
5874 */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5875 void hme_refine(
5876 me_ctxt_t *ps_thrd_ctxt,
5877 refine_prms_t *ps_refine_prms,
5878 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5879 layer_ctxt_t *ps_coarse_layer,
5880 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5881 S32 lyr_job_type,
5882 S32 thrd_id,
5883 S32 me_frm_id,
5884 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5885 {
5886 inter_ctb_prms_t s_common_frm_prms;
5887
5888 BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5889 WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5890 me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5891 ME_QUALITY_PRESETS_T e_me_quality_presets =
5892 ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5893
5894 WORD32 num_rows_proc = 0;
5895 WORD32 num_act_ref_pics;
5896 WORD16 i2_prev_enc_frm_max_mv_y;
5897 WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5898
5899 /*************************************************************************/
5900 /* Complexity of search: Low to High */
5901 /*************************************************************************/
5902 SEARCH_COMPLEXITY_T e_search_complexity;
5903
5904 /*************************************************************************/
5905 /* to store the PU results which are passed to the decide_part_types */
5906 /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5907 /*************************************************************************/
5908
5909 pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5910 inter_pu_results_t as_inter_pu_results[4];
5911 inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5912
5913 /*************************************************************************/
5914 /* Config parameter structures for varius ME submodules */
5915 /*************************************************************************/
5916 hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5917 hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5918 hme_merge_prms_t s_merge_prms_64x64;
5919 hme_search_prms_t s_search_prms_blk;
5920 mvbank_update_prms_t s_mv_update_prms;
5921 hme_ctb_prms_t s_ctb_prms;
5922 hme_subpel_prms_t s_subpel_prms;
5923 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5924 ctb_cluster_info_t *ps_ctb_cluster_info;
5925 fpel_srch_cand_init_data_t s_srch_cand_init_data;
5926
5927 /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5928 S32 en_merge_32x32;
5929 /* 5 lsb's specify whether or not merge algorithm is required */
5930 /* to be executed or not. Relevant only in PQ. Ought to be */
5931 /* used in conjunction with en_merge_32x32 and */
5932 /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5933 /* required when all children are deemed to be intras */
5934 S32 en_merge_execution;
5935
5936 /*************************************************************************/
5937 /* All types of search candidates for predictor based search. */
5938 /*************************************************************************/
5939 S32 num_init_candts = 0;
5940 S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5941 S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5942 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5943 search_node_t as_top_neighbours[4], as_left_neighbours[3];
5944
5945 pf_get_wt_inp fp_get_wt_inp;
5946
5947 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5948 U32 au4_unique_node_map[MAP_X_MAX * 2];
5949
5950 /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5951 ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5952
5953 /*************************************************************************/
5954 /* points ot the search results for the blk level search (8x8/16x16) */
5955 /*************************************************************************/
5956 search_results_t *ps_search_results;
5957
5958 /*************************************************************************/
5959 /* Coordinates */
5960 /*************************************************************************/
5961 S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5962 S32 pos_x, pos_y;
5963 S32 blk_id_in_full_ctb;
5964
5965 /*************************************************************************/
5966 /* Related to dimensions of block being searched and pic dimensions */
5967 /*************************************************************************/
5968 S32 blk_4x4_to_16x16;
5969 S32 blk_wd, blk_ht, blk_size_shift;
5970 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5971 S32 num_results_prev_layer;
5972
5973 /*************************************************************************/
5974 /* Size of a basic unit for this layer. For non encode layers, we search */
5975 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5976 /* basic unit size is the ctb size. */
5977 /*************************************************************************/
5978 S32 unit_size;
5979
5980 /*************************************************************************/
5981 /* Local variable storing results of any 4 CU merge to bigger CU */
5982 /*************************************************************************/
5983 CU_MERGE_RESULT_T e_merge_result;
5984
5985 /*************************************************************************/
5986 /* This mv grid stores results during and after fpel search, during */
5987 /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
5988 /* meant for the 2 directions of search (l0 and l1). */
5989 /*************************************************************************/
5990 mv_grid_t *aps_mv_grid[2];
5991
5992 /*************************************************************************/
5993 /* Pointers to context in current and coarser layers */
5994 /*************************************************************************/
5995 layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
5996
5997 /*************************************************************************/
5998 /* to store mv range per blk, and picture limit, allowed search range */
5999 /* range prms in hpel and qpel units as well */
6000 /*************************************************************************/
6001 range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6002 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6003 range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6004
6005 /*************************************************************************/
6006 /* These variables are used to track number of references at different */
6007 /* stages of ME. */
6008 /*************************************************************************/
6009 S32 i4_num_pred_dir;
6010 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6011 S32 lambda_recon = ps_refine_prms->lambda_recon;
6012
6013 /* Counts successful merge to 32x32 every CTB (0-4) */
6014 S32 merge_count_32x32;
6015
6016 S32 ai4_id_coloc[14], ai4_id_Z[2];
6017 U08 au1_search_candidate_list_index[2];
6018 S32 ai4_num_coloc_cands[2];
6019 U08 u1_pred_dir, u1_pred_dir_ctr;
6020
6021 /*************************************************************************/
6022 /* Input pointer and stride */
6023 /*************************************************************************/
6024 U08 *pu1_inp;
6025 S32 i4_inp_stride;
6026 S32 end_of_frame;
6027 S32 num_sync_units_in_row, num_sync_units_in_tile;
6028
6029 /*************************************************************************/
6030 /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6031 /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6032 /* we need to stop merges and force 8x8 CUs for that 16x16 blk */
6033 /*************************************************************************/
6034 S32 blk_8x8_mask;
6035 S32 ai4_blk_8x8_mask[16];
6036 U08 au1_is_64x64Blk_noisy[1];
6037 U08 au1_is_32x32Blk_noisy[4];
6038 U08 au1_is_16x16Blk_noisy[16];
6039
6040 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6041 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6042 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6043 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6044
6045 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6046
6047 /*************************************************************************/
6048 /* Pointers to current and coarse layer are needed for projection */
6049 /* Pointer to prev layer are needed for other candts like coloc */
6050 /*************************************************************************/
6051 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6052
6053 ps_prev_layer = hme_get_past_layer_ctxt(
6054 ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6055
6056 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6057
6058 /* Function pointer is selected based on the C vc X86 macro */
6059
6060 fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6061
6062 i4_inp_stride = ps_curr_layer->i4_inp_stride;
6063 i4_pic_wd = ps_curr_layer->i4_wd;
6064 i4_pic_ht = ps_curr_layer->i4_ht;
6065 e_search_complexity = ps_refine_prms->e_search_complexity;
6066 end_of_frame = 0;
6067
6068 /* This points to all the initial candts */
6069 ps_search_candts = &as_search_candts[0];
6070
6071 /* mv grid being huge strucutre is part of context */
6072 aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6073 aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6074
6075 /*************************************************************************/
6076 /* If the current layer is encoded (since it may be multicast or final */
6077 /* layer (finest)), then we use 16x16 blk size with some selected parts */
6078 /* If the current layer is not encoded, then we use 8x8 blk size, with */
6079 /* enable or disable of 4x4 partitions depending on the input prms */
6080 /*************************************************************************/
6081 e_search_blk_size = BLK_16x16;
6082 blk_wd = blk_ht = 16;
6083 blk_size_shift = 4;
6084 e_result_blk_size = BLK_8x8;
6085 s_mv_update_prms.i4_shift = 1;
6086
6087 if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6088 {
6089 blk_4x4_to_16x16 = 1;
6090 }
6091 else
6092 {
6093 blk_4x4_to_16x16 = 0;
6094 }
6095
6096 unit_size = 1 << ps_ctxt->log_ctb_size;
6097 s_search_prms_blk.i4_inp_stride = unit_size;
6098
6099 /* This is required to properly update the layer mv bank */
6100 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6101 s_search_prms_blk.e_blk_size = e_search_blk_size;
6102
6103 /*************************************************************************/
6104 /* If current layer is explicit, then the number of ref frames are to */
6105 /* be same as previous layer. Else it will be 2 */
6106 /*************************************************************************/
6107 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6108 i4_num_pred_dir =
6109 (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6110 1;
6111
6112 #if USE_MODIFIED == 1
6113 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6114 #else
6115 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6116 #endif
6117
6118 i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6119 if(i4_num_ref_prev_layer <= 2)
6120 {
6121 i4_num_ref_each_dir = 1;
6122 }
6123 else
6124 {
6125 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6126 }
6127
6128 s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6129 s_mv_update_prms.i4_num_results_to_store =
6130 MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6131 : (i4_num_act_ref_l0 > 1) + 1,
6132 ps_refine_prms->i4_num_results_per_part);
6133
6134 /*************************************************************************/
6135 /* Initialization of merge params for 16x16 to 32x32 merge. */
6136 /* There are 4 32x32 units in a CTB, so 4 param structures initialized */
6137 /*************************************************************************/
6138 {
6139 hme_merge_prms_t *aps_merge_prms[4];
6140 aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6141 aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6142 aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6143 aps_merge_prms[3] = &s_merge_prms_32x32_br;
6144 for(i = 0; i < 4; i++)
6145 {
6146 hme_merge_prms_init(
6147 aps_merge_prms[i],
6148 ps_curr_layer,
6149 ps_refine_prms,
6150 ps_ctxt,
6151 as_range_prms_rec,
6152 as_range_prms_inp,
6153 &aps_mv_grid[0],
6154 &s_common_frm_prms,
6155 i4_num_pred_dir,
6156 i,
6157 BLK_32x32,
6158 e_me_quality_presets);
6159 }
6160 }
6161
6162 /*************************************************************************/
6163 /* Initialization of merge params for 32x32 to 64x64 merge. */
6164 /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */
6165 /*************************************************************************/
6166 {
6167 hme_merge_prms_init(
6168 &s_merge_prms_64x64,
6169 ps_curr_layer,
6170 ps_refine_prms,
6171 ps_ctxt,
6172 as_range_prms_rec,
6173 as_range_prms_inp,
6174 &aps_mv_grid[0],
6175 &s_common_frm_prms,
6176 i4_num_pred_dir,
6177 0,
6178 BLK_64x64,
6179 e_me_quality_presets);
6180 }
6181
6182 /* Pointers to cu_results are initialised here */
6183 {
6184 WORD32 i;
6185
6186 ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6187
6188 for(i = 0; i < 4; i++)
6189 {
6190 ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6191 }
6192
6193 for(i = 0; i < 16; i++)
6194 {
6195 ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6196 }
6197 }
6198
6199 /*************************************************************************/
6200 /* SUBPEL Params initialized here */
6201 /*************************************************************************/
6202 {
6203 s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6204 s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6205 s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6206
6207 s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6208 s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6209 s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6210
6211 s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6212 s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6213
6214 s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6215
6216 s_subpel_prms.i4_inp_stride = unit_size;
6217
6218 s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6219 s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6220 s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6221
6222 s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6223
6224 {
6225 WORD32 ref_ctr;
6226 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6227 {
6228 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6229 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6230 }
6231 }
6232 s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6233
6234 #if USE_MODIFIED == 0
6235 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6236 #else
6237 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6238 #endif
6239 s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6240
6241 /* BI Refinement done only if this field is 1 */
6242 s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6243
6244 s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6245
6246 s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6247 s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6248 s_subpel_prms.u1_max_num_subpel_refine_centers =
6249 ps_refine_prms->u1_max_num_subpel_refine_centers;
6250 }
6251
6252 /* inter_ctb_prms_t struct initialisation */
6253 {
6254 inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6255 hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6256
6257 ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6258 ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6259 ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6260 ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6261 ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6262 ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6263 ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6264 ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6265 ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6266 ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6267 ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6268 ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6269 ps_inter_ctb_prms->i4_lamda = lambda_recon;
6270 ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6271 ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6272 ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6273 ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6274 ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6275 ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6276 ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6277 ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6278 }
6279
6280 for(i = 0; i < MAX_INIT_CANDTS; i++)
6281 {
6282 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6283 ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6284
6285 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6286 }
6287 num_act_ref_pics =
6288 ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6289
6290 if(num_act_ref_pics)
6291 {
6292 hme_search_cand_data_init(
6293 ai4_id_Z,
6294 ai4_id_coloc,
6295 ai4_num_coloc_cands,
6296 au1_search_candidate_list_index,
6297 i4_num_act_ref_l0,
6298 i4_num_act_ref_l1,
6299 ps_ctxt->s_frm_prms.bidir_enabled,
6300 blk_4x4_to_16x16);
6301 }
6302
6303 if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6304 {
6305 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6306 ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6307 }
6308 else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6309 {
6310 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6311 }
6312
6313 for(i = 0; i < 3; i++)
6314 {
6315 search_node_t *ps_search_node;
6316 ps_search_node = &as_left_neighbours[i];
6317 INIT_SEARCH_NODE(ps_search_node, 0);
6318 ps_search_node = &as_top_neighbours[i];
6319 INIT_SEARCH_NODE(ps_search_node, 0);
6320 }
6321
6322 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6323 as_left_neighbours[2].u1_is_avail = 0;
6324
6325 /*************************************************************************/
6326 /* Initialize all the search results structure here. We update all the */
6327 /* search results to default values, and configure things like blk sizes */
6328 /*************************************************************************/
6329 if(num_act_ref_pics)
6330 {
6331 S32 i4_x, i4_y;
6332 /* 16x16 results */
6333 for(i = 0; i < 16; i++)
6334 {
6335 search_results_t *ps_search_results;
6336 S32 pred_lx;
6337 ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6338 i4_x = (S32)gau1_encode_to_raster_x[i];
6339 i4_y = (S32)gau1_encode_to_raster_y[i];
6340 i4_x <<= 4;
6341 i4_y <<= 4;
6342
6343 hme_init_search_results(
6344 ps_search_results,
6345 i4_num_pred_dir,
6346 ps_refine_prms->i4_num_fpel_results,
6347 ps_refine_prms->i4_num_results_per_part,
6348 e_search_blk_size,
6349 i4_x,
6350 i4_y,
6351 &ps_ctxt->au1_is_past[0]);
6352
6353 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6354 {
6355 pred_ctxt_t *ps_pred_ctxt;
6356
6357 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6358
6359 hme_init_pred_ctxt_encode(
6360 ps_pred_ctxt,
6361 ps_search_results,
6362 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6363 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6364 aps_mv_grid[pred_lx],
6365 pred_lx,
6366 lambda_recon,
6367 ps_refine_prms->lambda_q_shift,
6368 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6369 &ps_ctxt->ai2_ref_scf[0]);
6370 }
6371 }
6372
6373 for(i = 0; i < 4; i++)
6374 {
6375 search_results_t *ps_search_results;
6376 S32 pred_lx;
6377 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6378
6379 i4_x = (S32)gau1_encode_to_raster_x[i];
6380 i4_y = (S32)gau1_encode_to_raster_y[i];
6381 i4_x <<= 5;
6382 i4_y <<= 5;
6383
6384 hme_init_search_results(
6385 ps_search_results,
6386 i4_num_pred_dir,
6387 ps_refine_prms->i4_num_32x32_merge_results,
6388 ps_refine_prms->i4_num_results_per_part,
6389 BLK_32x32,
6390 i4_x,
6391 i4_y,
6392 &ps_ctxt->au1_is_past[0]);
6393
6394 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6395 {
6396 pred_ctxt_t *ps_pred_ctxt;
6397
6398 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6399
6400 hme_init_pred_ctxt_encode(
6401 ps_pred_ctxt,
6402 ps_search_results,
6403 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6404 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6405 aps_mv_grid[pred_lx],
6406 pred_lx,
6407 lambda_recon,
6408 ps_refine_prms->lambda_q_shift,
6409 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6410 &ps_ctxt->ai2_ref_scf[0]);
6411 }
6412 }
6413
6414 {
6415 search_results_t *ps_search_results;
6416 S32 pred_lx;
6417 ps_search_results = &ps_ctxt->s_search_results_64x64;
6418
6419 hme_init_search_results(
6420 ps_search_results,
6421 i4_num_pred_dir,
6422 ps_refine_prms->i4_num_64x64_merge_results,
6423 ps_refine_prms->i4_num_results_per_part,
6424 BLK_64x64,
6425 0,
6426 0,
6427 &ps_ctxt->au1_is_past[0]);
6428
6429 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6430 {
6431 pred_ctxt_t *ps_pred_ctxt;
6432
6433 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6434
6435 hme_init_pred_ctxt_encode(
6436 ps_pred_ctxt,
6437 ps_search_results,
6438 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6439 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6440 aps_mv_grid[pred_lx],
6441 pred_lx,
6442 lambda_recon,
6443 ps_refine_prms->lambda_q_shift,
6444 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6445 &ps_ctxt->ai2_ref_scf[0]);
6446 }
6447 }
6448 }
6449
6450 /* Initialise the structure used in clustering */
6451 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6452 {
6453 ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6454
6455 ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6456 ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6457 ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6458 ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6459 ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6460 ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6461 ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6462 }
6463
6464 /*********************************************************************/
6465 /* Initialize the dyn. search range params. for each reference index */
6466 /* in current layer ctxt */
6467 /*********************************************************************/
6468
6469 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6470 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6471 {
6472 WORD32 ref_ctr;
6473 /* set no. of act ref in L0 for further use at frame level */
6474 ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6475 ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6476
6477 for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6478 {
6479 INIT_DYN_SEARCH_PRMS(
6480 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6481 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6482 }
6483 }
6484 /*************************************************************************/
6485 /* Now that the candidates have been ordered, to choose the right number */
6486 /* of initial candidates. */
6487 /*************************************************************************/
6488 if(blk_4x4_to_16x16)
6489 {
6490 if(i4_num_ref_prev_layer > 2)
6491 {
6492 if(e_search_complexity == SEARCH_CX_LOW)
6493 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6494 else if(e_search_complexity == SEARCH_CX_MED)
6495 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6496 else if(e_search_complexity == SEARCH_CX_HIGH)
6497 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6498 else
6499 ASSERT(0);
6500 }
6501 else if(i4_num_ref_prev_layer == 2)
6502 {
6503 if(e_search_complexity == SEARCH_CX_LOW)
6504 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6505 else if(e_search_complexity == SEARCH_CX_MED)
6506 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6507 else if(e_search_complexity == SEARCH_CX_HIGH)
6508 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6509 else
6510 ASSERT(0);
6511 }
6512 else
6513 {
6514 if(e_search_complexity == SEARCH_CX_LOW)
6515 num_init_candts = 5;
6516 else if(e_search_complexity == SEARCH_CX_MED)
6517 num_init_candts = 12;
6518 else if(e_search_complexity == SEARCH_CX_HIGH)
6519 num_init_candts = 19;
6520 else
6521 ASSERT(0);
6522 }
6523 }
6524 else
6525 {
6526 if(i4_num_ref_prev_layer > 2)
6527 {
6528 if(e_search_complexity == SEARCH_CX_LOW)
6529 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6530 else if(e_search_complexity == SEARCH_CX_MED)
6531 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6532 else if(e_search_complexity == SEARCH_CX_HIGH)
6533 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6534 else
6535 ASSERT(0);
6536 }
6537 else if(i4_num_ref_prev_layer == 2)
6538 {
6539 if(e_search_complexity == SEARCH_CX_LOW)
6540 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6541 else if(e_search_complexity == SEARCH_CX_MED)
6542 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6543 else if(e_search_complexity == SEARCH_CX_HIGH)
6544 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6545 else
6546 ASSERT(0);
6547 }
6548 else
6549 {
6550 if(e_search_complexity == SEARCH_CX_LOW)
6551 num_init_candts = 5;
6552 else if(e_search_complexity == SEARCH_CX_MED)
6553 num_init_candts = 11;
6554 else if(e_search_complexity == SEARCH_CX_HIGH)
6555 num_init_candts = 16;
6556 else
6557 ASSERT(0);
6558 }
6559 }
6560
6561 /*************************************************************************/
6562 /* The following search parameters are fixed throughout the search across*/
6563 /* all blks. So these are configured outside processing loop */
6564 /*************************************************************************/
6565 s_search_prms_blk.i4_num_init_candts = num_init_candts;
6566 s_search_prms_blk.i4_start_step = 1;
6567 s_search_prms_blk.i4_use_satd = 0;
6568 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6569 /* we use recon only for encoded layers, otherwise it is not available */
6570 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6571
6572 s_search_prms_blk.ps_search_candts = ps_search_candts;
6573 if(s_search_prms_blk.i4_use_rec)
6574 {
6575 WORD32 ref_ctr;
6576 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6577 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6578 }
6579 else
6580 {
6581 WORD32 ref_ctr;
6582 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6583 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6584 }
6585
6586 /*************************************************************************/
6587 /* Initialize coordinates. Meaning as follows */
6588 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
6589 /* blk_y : same as above, y coord. */
6590 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
6591 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
6592 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
6593 /* corner of the picture. Always multiple of 64. */
6594 /* blk_id_in_ctb : encode order id of the blk in the ctb. */
6595 /*************************************************************************/
6596 blk_y = 0;
6597 blk_id_in_ctb = 0;
6598 i4_ctb_y = 0;
6599
6600 /*************************************************************************/
6601 /* Picture limit on all 4 sides. This will be used to set mv limits for */
6602 /* every block given its coordinate. Note thsi assumes that the min amt */
6603 /* of padding to right of pic is equal to the blk size. If we go all the */
6604 /* way upto 64x64, then the min padding on right size of picture should */
6605 /* be 64, and also on bottom side of picture. */
6606 /*************************************************************************/
6607 SET_PIC_LIMIT(
6608 s_pic_limit_inp,
6609 ps_curr_layer->i4_pad_x_rec,
6610 ps_curr_layer->i4_pad_y_rec,
6611 ps_curr_layer->i4_wd,
6612 ps_curr_layer->i4_ht,
6613 s_search_prms_blk.i4_num_steps_post_refine);
6614
6615 SET_PIC_LIMIT(
6616 s_pic_limit_rec,
6617 ps_curr_layer->i4_pad_x_rec,
6618 ps_curr_layer->i4_pad_y_rec,
6619 ps_curr_layer->i4_wd,
6620 ps_curr_layer->i4_ht,
6621 s_search_prms_blk.i4_num_steps_post_refine);
6622
6623 /*************************************************************************/
6624 /* set the MV limit per ref. pic. */
6625 /* - P pic. : Based on the config params. */
6626 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6627 /*************************************************************************/
6628 hme_set_mv_limit_using_dvsr_data(
6629 ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6630 s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6631 s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6632 s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6633 s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6634 s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6635 s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6636 s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6637 s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6638 s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6639 s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6640 s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6641
6642 while(0 == end_of_frame)
6643 {
6644 job_queue_t *ps_job;
6645 frm_ctb_ctxt_t *ps_frm_ctb_prms;
6646 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6647
6648 WORD32 i4_max_mv_x_in_ctb;
6649 WORD32 i4_max_mv_y_in_ctb;
6650 void *pv_dep_mngr_encloop_dep_me;
6651 WORD32 offset_val, check_dep_pos, set_dep_pos;
6652 WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6653
6654 pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6655
6656 ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6657
6658 /* Get the current row from the job queue */
6659 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6660 ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6661
6662 /* If all rows are done, set the end of process flag to 1, */
6663 /* and the current row to -1 */
6664 if(NULL == ps_job)
6665 {
6666 blk_y = -1;
6667 i4_ctb_y = -1;
6668 tile_col_idx = -1;
6669 end_of_frame = 1;
6670
6671 continue;
6672 }
6673
6674 /* set the output dependency after picking up the row */
6675 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6676
6677 /* Obtain the current row's details from the job */
6678 {
6679 ihevce_tile_params_t *ps_col_tile_params;
6680
6681 i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6682 /* Obtain the current colum tile index from the job */
6683 tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6684
6685 /* in encode layer block are 16x16 and CTB is 64 x 64 */
6686 /* note if ctb is 32x32 the this calc needs to be changed */
6687 num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6688 ps_ctxt->log_ctb_size;
6689
6690 /* The tile parameter for the col. idx. Use only the properties
6691 which is same for all the bottom tiles like width, start_x, etc.
6692 Don't use height, start_y, etc. */
6693 ps_col_tile_params =
6694 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6695 /* in encode layer block are 16x16 and CTB is 64 x 64 */
6696 /* note if ctb is 32x32 the this calc needs to be changed */
6697 num_sync_units_in_tile =
6698 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6699 ps_ctxt->log_ctb_size;
6700
6701 i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6702 i4_ctb_x = i4_first_ctb_x;
6703
6704 if(!num_act_ref_pics)
6705 {
6706 for(i4_ctb_x = i4_first_ctb_x;
6707 i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6708 i4_ctb_x++)
6709 {
6710 S32 blk_i = 0, blk_j = 0;
6711 /* set the dependency for the corresponding row in enc loop */
6712 ihevce_dmgr_set_row_row_sync(
6713 pv_dep_mngr_encloop_dep_me,
6714 (i4_ctb_x + 1),
6715 i4_ctb_y,
6716 tile_col_idx /* Col Tile No. */);
6717 }
6718
6719 continue;
6720 }
6721
6722 /* increment the number of rows proc */
6723 num_rows_proc++;
6724
6725 /* Set Variables for Dep. Checking and Setting */
6726 set_dep_pos = i4_ctb_y + 1;
6727 if(i4_ctb_y > 0)
6728 {
6729 offset_val = 2;
6730 check_dep_pos = i4_ctb_y - 1;
6731 }
6732 else
6733 {
6734 /* First row should run without waiting */
6735 offset_val = -1;
6736 check_dep_pos = 0;
6737 }
6738
6739 /* row ctb out pointer */
6740 ps_ctxt->ps_ctb_analyse_curr_row =
6741 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6742
6743 /* Row level CU Tree buffer */
6744 ps_ctxt->ps_cu_tree_curr_row =
6745 ps_ctxt->ps_cu_tree_base +
6746 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6747
6748 ps_ctxt->ps_me_ctb_data_curr_row =
6749 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6750 }
6751
6752 /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6753 left_ctb_in_diff_tile = 1;
6754
6755 /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */
6756 /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6757 {
6758 S32 i4_ref_id, i4_bits_req;
6759
6760 for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6761 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6762 i4_ref_id++)
6763 {
6764 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6765
6766 if(i4_bits_req > 12)
6767 {
6768 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6769 }
6770 else
6771 {
6772 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6773 }
6774 }
6775
6776 s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6777 }
6778
6779 /* if non-encode layer then i4_ctb_x will be same as blk_x */
6780 /* loop over all the units is a row */
6781 for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6782 i4_ctb_x++)
6783 {
6784 ihevce_ctb_noise_params *ps_ctb_noise_params =
6785 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6786
6787 s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6788 s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6789
6790 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6791 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6792 /* Initialize ptr to current IPE CTB */
6793 ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6794 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6795 {
6796 ps_ctb_bound_attrs =
6797 get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6798
6799 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6800 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6801 }
6802
6803 /* Block to initialise pointers to part_type_results_t */
6804 /* in each size-specific inter_cu_results_t */
6805 {
6806 WORD32 i;
6807
6808 for(i = 0; i < 64; i++)
6809 {
6810 ps_ctxt->as_cu8x8_results[i].ps_best_results =
6811 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6812 .as_8x8_block_data[i]
6813 .as_best_results;
6814 ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6815 }
6816
6817 for(i = 0; i < 16; i++)
6818 {
6819 ps_ctxt->as_cu16x16_results[i].ps_best_results =
6820 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6821 ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6822 }
6823
6824 for(i = 0; i < 4; i++)
6825 {
6826 ps_ctxt->as_cu32x32_results[i].ps_best_results =
6827 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828 .as_32x32_block_data[i]
6829 .as_best_results;
6830 ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6831 }
6832
6833 ps_ctxt->s_cu64x64_results.ps_best_results =
6834 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6835 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6836 }
6837
6838 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6839 {
6840 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6841 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6842 ps_ctb_cluster_info->ps_cu_tree_root =
6843 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6844 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6845 }
6846
6847 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6848 {
6849 S32 i4_nodes_created_in_cu_tree = 1;
6850
6851 ihevce_cu_tree_init(
6852 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6853 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6854 &i4_nodes_created_in_cu_tree,
6855 0,
6856 POS_NA,
6857 POS_NA,
6858 POS_NA);
6859 }
6860
6861 memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6862
6863 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6864 {
6865 S32 j;
6866
6867 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6868
6869 ps_cur_ipe_ctb =
6870 ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6871 lambda_recon =
6872 hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6873
6874 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6875
6876 for(i = 0; i < 4; i++)
6877 {
6878 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6879
6880 for(j = 0; j < 2; j++)
6881 {
6882 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6883 }
6884 }
6885 ps_search_results = &ps_ctxt->s_search_results_64x64;
6886
6887 for(j = 0; j < 2; j++)
6888 {
6889 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6890 }
6891
6892 s_common_frm_prms.i4_lamda = lambda_recon;
6893 }
6894 else
6895 {
6896 lambda_recon = ps_refine_prms->lambda_recon;
6897 }
6898
6899 /*********************************************************************/
6900 /* replicate the inp buffer at blk or ctb level for each ref id, */
6901 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6902 /* thereby avoiding a bloat up of memory. If we did all references */
6903 /* weighted pred, we will end up with a duplicate copy of each ref */
6904 /* at each layer, since we need to preserve the original reference. */
6905 /* ToDo: Need to observe performance with this mechanism and compare */
6906 /* with case where ref is weighted. */
6907 /*********************************************************************/
6908 fp_get_wt_inp(
6909 ps_curr_layer,
6910 &ps_ctxt->s_wt_pred,
6911 unit_size,
6912 s_common_frm_prms.i4_ctb_x_off,
6913 s_common_frm_prms.i4_ctb_y_off,
6914 unit_size,
6915 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6916 ps_ctxt->i4_wt_pred_enable_flag);
6917
6918 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6919 {
6920 #if TEMPORAL_NOISE_DETECT
6921 {
6922 WORD32 had_block_size = 16;
6923 WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6924 ? 64
6925 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6926 WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6927 ? 64
6928 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6929 WORD32 num_pred_dir = i4_num_pred_dir;
6930 WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6931 WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6932
6933 WORD32 i;
6934 WORD32 noise_detected;
6935 WORD32 ctb_size;
6936 WORD32 num_comp_had_blocks;
6937 WORD32 noisy_block_cnt;
6938 WORD32 index_8x8_block;
6939 WORD32 num_8x8_in_ctb_row;
6940
6941 WORD32 ht_offset;
6942 WORD32 wd_offset;
6943 WORD32 block_ht;
6944 WORD32 block_wd;
6945
6946 WORD32 num_horz_blocks;
6947 WORD32 num_vert_blocks;
6948
6949 WORD32 mean;
6950 UWORD32 variance_8x8;
6951
6952 WORD32 hh_energy_percent;
6953
6954 /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6955 WORD32 min_noisy_block_cnt;
6956 WORD32 min_coeffs_above_avg;
6957 WORD32 min_coeff_avg_energy;
6958
6959 /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6960 WORD32 i4_cu_x_off, i4_cu_y_off;
6961 WORD32 is_noisy;
6962
6963 /* intialise the variables holding the constants */
6964 if(had_block_size == 8)
6965 {
6966 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;//
6967 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6968 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6969 }
6970 else
6971 {
6972 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;//
6973 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6974 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6975 }
6976
6977 /* initialize the variables */
6978 noise_detected = 0;
6979 noisy_block_cnt = 0;
6980 hh_energy_percent = 0;
6981 variance_8x8 = 0;
6982 block_ht = ctb_height;
6983 block_wd = ctb_width;
6984
6985 mean = 0;
6986
6987 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
6988 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
6989
6990 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
6991 num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size;
6992
6993 ht_offset = -had_block_size;
6994 wd_offset = -had_block_size;
6995
6996 num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb
6997 for(i = 0; i < num_comp_had_blocks; i++)
6998 {
6999 if(i % num_horz_blocks == 0)
7000 {
7001 wd_offset = -had_block_size;
7002 ht_offset += had_block_size;
7003 }
7004 wd_offset += had_block_size;
7005
7006 /* CU level offsets */
7007 i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16
7008 i4_cu_y_off = i4_y_off + (i / 4) * 16;
7009
7010 /* if 50 % or more of the CU is noisy then the return value is 1 */
7011 is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7012 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7013 (i % 4) * 16,
7014 (i / 4) * 16,
7015 16);
7016
7017 /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7018 if(is_noisy)
7019 {
7020 index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7021 (i % num_horz_blocks) * 2;
7022 noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7023 16,
7024 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7025 ? 64
7026 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7027 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7028 ? 64
7029 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7030 ps_ctb_noise_params,
7031 &s_srch_cand_init_data,
7032 &s_search_prms_blk,
7033 ps_ctxt,
7034 num_pred_dir,
7035 i4_num_act_ref_l0,
7036 i4_num_act_ref_l1,
7037 i4_cu_x_off,
7038 i4_cu_y_off,
7039 &ps_ctxt->s_wt_pred,
7040 unit_size,
7041 index_8x8_block,
7042 num_horz_blocks,
7043 /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra
7044 i);
7045 } /* if 16x16 is noisy */
7046 } /* loop over for all 16x16*/
7047
7048 if(noisy_block_cnt >= min_noisy_block_cnt)
7049 {
7050 noise_detected = 1;
7051 }
7052
7053 /* write back the noise presence detected for the current CTB to the structure */
7054 ps_ctb_noise_params->i4_noise_present = noise_detected;
7055 }
7056 #endif
7057
7058 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7059 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7060 ps_ctb_noise_params->i4_noise_present)
7061 {
7062 memset(
7063 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7064 1,
7065 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7066 }
7067 #endif
7068
7069 for(i = 0; i < 16; i++)
7070 {
7071 au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7072 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7073 }
7074
7075 for(i = 0; i < 4; i++)
7076 {
7077 au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7078 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7079 }
7080
7081 for(i = 0; i < 1; i++)
7082 {
7083 au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7084 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7085 }
7086
7087 if(ps_ctxt->s_frm_prms.bidir_enabled &&
7088 (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7089 MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7090 {
7091 ps_ctb_noise_params->i4_noise_present = 0;
7092 memset(
7093 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7094 0,
7095 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7096 }
7097
7098 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7099 for(i = 0; i < 4; i++)
7100 {
7101 S32 j;
7102 S32 lambda;
7103
7104 if(au1_is_32x32Blk_noisy[i])
7105 {
7106 lambda = lambda_recon;
7107 lambda =
7108 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7109
7110 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7111
7112 for(j = 0; j < 2; j++)
7113 {
7114 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7115 }
7116 }
7117 }
7118
7119 {
7120 S32 j;
7121 S32 lambda;
7122
7123 if(au1_is_64x64Blk_noisy[0])
7124 {
7125 lambda = lambda_recon;
7126 lambda =
7127 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7128
7129 ps_search_results = &ps_ctxt->s_search_results_64x64;
7130
7131 for(j = 0; j < 2; j++)
7132 {
7133 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7134 }
7135 }
7136 }
7137 #endif
7138 if(au1_is_64x64Blk_noisy[0])
7139 {
7140 U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7141 (s_common_frm_prms.i4_ctb_y_off *
7142 ps_curr_layer->i4_inp_stride));
7143
7144 hme_compute_sigmaX_and_sigmaXSquared(
7145 pu1_inp,
7146 ps_curr_layer->i4_inp_stride,
7147 ps_ctxt->au4_4x4_src_sigmaX,
7148 ps_ctxt->au4_4x4_src_sigmaXSquared,
7149 4,
7150 4,
7151 64,
7152 64,
7153 1,
7154 16);
7155 }
7156 else
7157 {
7158 for(i = 0; i < 4; i++)
7159 {
7160 if(au1_is_32x32Blk_noisy[i])
7161 {
7162 U08 *pu1_inp =
7163 ps_curr_layer->pu1_inp +
7164 (s_common_frm_prms.i4_ctb_x_off +
7165 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7166
7167 U08 u1_cu_size = 32;
7168 WORD32 i4_inp_buf_offset =
7169 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7170 ((i % 2) * u1_cu_size));
7171
7172 U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7173 U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7174 S32 i4_sigma_arr_offset =
7175 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7176 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7177
7178 hme_compute_sigmaX_and_sigmaXSquared(
7179 pu1_inp + i4_inp_buf_offset,
7180 ps_curr_layer->i4_inp_stride,
7181 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7182 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7183 4,
7184 4,
7185 32,
7186 32,
7187 1,
7188 16);
7189 }
7190 else
7191 {
7192 S32 j;
7193
7194 U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7195 U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7196 S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7197 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7198 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7199
7200 for(j = 0; j < 4; j++)
7201 {
7202 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7203 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7204 S32 i4_16x16_blk_index_in_ctb =
7205 i4_16x16_blk_start_index_in_i_th_32x32_blk +
7206 ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7207 ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7208
7209 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7210
7211 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7212 {
7213 U08 *pu1_inp =
7214 ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7215 (s_common_frm_prms.i4_ctb_y_off *
7216 ps_curr_layer->i4_inp_stride));
7217
7218 U08 u1_cu_size = 16;
7219 WORD32 i4_inp_buf_offset =
7220 (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7221 ((i4_16x16_blk_index_in_ctb / 4) *
7222 (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7223
7224 U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7225 U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7226 S32 i4_sigma_arr_offset =
7227 (((i4_16x16_blk_index_in_ctb % 4) *
7228 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7229 ((i4_16x16_blk_index_in_ctb / 4) *
7230 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7231
7232 hme_compute_sigmaX_and_sigmaXSquared(
7233 pu1_inp + i4_inp_buf_offset,
7234 ps_curr_layer->i4_inp_stride,
7235 (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7236 (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7237 4,
7238 4,
7239 16,
7240 16,
7241 1,
7242 16);
7243 }
7244 }
7245 }
7246 }
7247 }
7248 }
7249 else
7250 {
7251 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7252
7253 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7254
7255 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7256 }
7257
7258 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7259 {
7260 S32 ref_ctr;
7261 U08 au1_pred_dir_searched[2];
7262 U08 u1_is_cu_noisy;
7263 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7264
7265 {
7266 blk_x = (i4_ctb_x << 2) +
7267 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7268 blk_y = (i4_ctb_y << 2) +
7269 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7270
7271 blk_id_in_full_ctb =
7272 ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7273 blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7274 ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7275 s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7276 s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7277 }
7278
7279 /* get the current input blk point */
7280 pos_x = blk_x << blk_size_shift;
7281 pos_y = blk_y << blk_size_shift;
7282 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7283
7284 /*********************************************************************/
7285 /* For every blk in the picture, the search range needs to be derived*/
7286 /* Any blk can have any mv, but practical search constraints are */
7287 /* imposed by the picture boundary and amt of padding. */
7288 /*********************************************************************/
7289 /* MV limit is different based on ref. PIC */
7290 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7291 {
7292 if(!s_search_prms_blk.i4_use_rec)
7293 {
7294 hme_derive_search_range(
7295 &as_range_prms_inp[ref_ctr],
7296 &s_pic_limit_inp,
7297 &as_mv_limit[ref_ctr],
7298 pos_x,
7299 pos_y,
7300 blk_wd,
7301 blk_ht);
7302 }
7303 else
7304 {
7305 hme_derive_search_range(
7306 &as_range_prms_rec[ref_ctr],
7307 &s_pic_limit_rec,
7308 &as_mv_limit[ref_ctr],
7309 pos_x,
7310 pos_y,
7311 blk_wd,
7312 blk_ht);
7313 }
7314 }
7315 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7316 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7317 /* Select search results from a suitable search result in the context */
7318 {
7319 ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7320
7321 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7322 {
7323 S32 i;
7324
7325 for(i = 0; i < 2; i++)
7326 {
7327 ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7328 }
7329 }
7330 }
7331
7332 u1_is_cu_noisy = au1_is_16x16Blk_noisy
7333 [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7334
7335 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7336
7337 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7338 if(u1_is_cu_noisy)
7339 {
7340 S32 j;
7341 S32 lambda;
7342
7343 lambda = lambda_recon;
7344 lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7345
7346 for(j = 0; j < 2; j++)
7347 {
7348 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7349 }
7350 }
7351 else
7352 {
7353 S32 j;
7354 S32 lambda;
7355
7356 lambda = lambda_recon;
7357
7358 for(j = 0; j < 2; j++)
7359 {
7360 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7361 }
7362 }
7363 #endif
7364
7365 s_search_prms_blk.ps_search_results = ps_search_results;
7366
7367 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7368 pu1_inp,
7369 i4_inp_stride,
7370 ps_refine_prms->limit_active_partitions,
7371 ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7372 ps_ctxt->u1_is_curFrame_a_refFrame,
7373 blk_8x8_mask,
7374 e_me_quality_presets);
7375
7376 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7377 {
7378 ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7379 s_search_prms_blk.i4_part_mask;
7380 }
7381
7382 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7383 {
7384 /* Setting u1_num_active_refs to 2 */
7385 /* for the sole purpose of the */
7386 /* function called below */
7387 ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7388
7389 hme_reset_search_results(
7390 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7391
7392 ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7393 }
7394
7395 if(0 == blk_id_in_ctb)
7396 {
7397 UWORD8 u1_ctr;
7398 for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7399 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7400 u1_ctr++)
7401 {
7402 WORD32 i4_max_dep_ctb_y;
7403 WORD32 i4_max_dep_ctb_x;
7404
7405 /* Set max mv in ctb units */
7406 i4_max_mv_x_in_ctb =
7407 (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7408 ps_ctxt->log_ctb_size;
7409
7410 i4_max_mv_y_in_ctb =
7411 (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7412 ps_ctxt->log_ctb_size;
7413 /********************************************************************/
7414 /* Set max ctb_x and ctb_y dependency on reference picture */
7415 /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */
7416 /********************************************************************/
7417 i4_max_dep_ctb_x = CLIP3(
7418 (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7419 0,
7420 ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7421 i4_max_dep_ctb_y = CLIP3(
7422 (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7423 0,
7424 ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7425
7426 ihevce_dmgr_map_chk_sync(
7427 ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7428 ps_ctxt->thrd_id,
7429 i4_ctb_x,
7430 i4_ctb_y,
7431 i4_max_mv_x_in_ctb,
7432 i4_max_mv_y_in_ctb);
7433 }
7434 }
7435
7436 /* Loop across different Ref IDx */
7437 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7438 {
7439 S32 resultid;
7440 S08 u1_default_ref_id;
7441 S32 i4_num_srch_cands = 0;
7442 S32 i4_num_refinement_iterations;
7443 S32 i4_refine_iter_ctr;
7444
7445 if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7446 (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7447 {
7448 u1_pred_dir = u1_pred_dir_ctr;
7449 }
7450 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7451 {
7452 u1_pred_dir = 1;
7453 }
7454
7455 u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7456 : ps_ctxt->ai1_future_list[0];
7457 au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7458
7459 i4_num_srch_cands = 0;
7460 resultid = 0;
7461
7462 /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7463 if(0 == blk_id_in_ctb)
7464 {
7465 /*****************************************************************/
7466 /* Initialize the mv grid with results of neighbours for the next*/
7467 /* ctb. */
7468 /*****************************************************************/
7469 hme_fill_ctb_neighbour_mvs(
7470 ps_curr_layer,
7471 blk_x,
7472 blk_y,
7473 aps_mv_grid[u1_pred_dir],
7474 u1_pred_dir_ctr,
7475 u1_default_ref_id,
7476 ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7477 }
7478
7479 s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7480
7481 {
7482 if((blk_id_in_full_ctb % 4) == 0)
7483 {
7484 ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7485 .as_pred_ctxt[u1_pred_dir]
7486 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7487 }
7488
7489 if(blk_id_in_full_ctb == 0)
7490 {
7491 ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7492 }
7493
7494 ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7495 !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7496 }
7497
7498 {
7499 S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7500 S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7501 U08 u1_is_blk_at_ctb_boundary = !y;
7502
7503 s_srch_cand_init_data.u1_is_left_available =
7504 !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7505
7506 if(u1_is_blk_at_ctb_boundary)
7507 {
7508 s_srch_cand_init_data.u1_is_topRight_available = 0;
7509 s_srch_cand_init_data.u1_is_topLeft_available = 0;
7510 s_srch_cand_init_data.u1_is_top_available = 0;
7511 }
7512 else
7513 {
7514 s_srch_cand_init_data.u1_is_topRight_available =
7515 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7516 s_srch_cand_init_data.u1_is_top_available = 1;
7517 s_srch_cand_init_data.u1_is_topLeft_available =
7518 s_srch_cand_init_data.u1_is_left_available;
7519 }
7520 }
7521
7522 s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7523 s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7524 s_srch_cand_init_data.i4_pos_x = pos_x;
7525 s_srch_cand_init_data.i4_pos_y = pos_y;
7526 s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7527 s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7528 s_srch_cand_init_data.u1_search_candidate_list_index =
7529 au1_search_candidate_list_index[u1_pred_dir];
7530
7531 i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7532
7533 /* Note this block also clips the MV range for all candidates */
7534 {
7535 S08 i1_check_for_mult_refs;
7536
7537 i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7538 : (ps_ctxt->num_ref_past > 1);
7539
7540 ps_me_optimised_function_list->pf_mv_clipper(
7541 &s_search_prms_blk,
7542 i4_num_srch_cands,
7543 i1_check_for_mult_refs,
7544 ps_refine_prms->i4_num_steps_fpel_refine,
7545 ps_refine_prms->i4_num_steps_hpel_refine,
7546 ps_refine_prms->i4_num_steps_qpel_refine);
7547 }
7548
7549 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7550 i4_num_refinement_iterations =
7551 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7552 ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7553 : 1;
7554 #else
7555 i4_num_refinement_iterations =
7556 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7557 #endif
7558
7559 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7560 if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7561 {
7562 i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7563 : i4_num_act_ref_l1;
7564 }
7565 #endif
7566
7567 for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7568 i4_refine_iter_ctr++)
7569 {
7570 S32 center_x;
7571 S32 center_y;
7572 S32 center_ref_idx;
7573
7574 S08 *pi1_pred_dir_to_ref_idx =
7575 (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7576
7577 {
7578 WORD32 i4_i;
7579
7580 for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7581 {
7582 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7583 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7584 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7585 MAX_SIGNED_16BIT_VAL;
7586 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7587 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7588 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7589
7590 if(ps_refine_prms->i4_num_results_per_part == 2)
7591 {
7592 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7593 MAX_SIGNED_16BIT_VAL;
7594 ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7595 MAX_SIGNED_16BIT_VAL;
7596 ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7597 MAX_SIGNED_16BIT_VAL;
7598 ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7599 ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7600 ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7601 }
7602 }
7603
7604 s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7605 s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7606 }
7607
7608 {
7609 search_node_t *ps_coloc_node;
7610
7611 S32 i = 0;
7612
7613 if(i4_num_refinement_iterations > 1)
7614 {
7615 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7616 {
7617 ps_coloc_node =
7618 s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7619 .ps_search_node;
7620
7621 if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7622 ps_coloc_node->i1_ref_idx)
7623 {
7624 break;
7625 }
7626 }
7627
7628 if(i == ai4_num_coloc_cands[u1_pred_dir])
7629 {
7630 i = 0;
7631 }
7632 }
7633 else
7634 {
7635 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7636 .ps_search_node;
7637 }
7638
7639 hme_set_mvp_node(
7640 ps_search_results,
7641 ps_coloc_node,
7642 u1_pred_dir,
7643 (i4_num_refinement_iterations > 1)
7644 ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7645 : u1_default_ref_id);
7646
7647 center_x = ps_coloc_node->ps_mv->i2_mvx;
7648 center_y = ps_coloc_node->ps_mv->i2_mvy;
7649 center_ref_idx = ps_coloc_node->i1_ref_idx;
7650 }
7651
7652 /* Full-Pel search */
7653 {
7654 S32 num_unique_nodes;
7655
7656 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7657
7658 num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7659 as_unique_search_nodes,
7660 s_search_prms_blk.ps_search_candts,
7661 au4_unique_node_map,
7662 pi1_pred_dir_to_ref_idx,
7663 i4_num_srch_cands,
7664 s_search_prms_blk.i4_num_init_candts,
7665 i4_refine_iter_ctr,
7666 i4_num_refinement_iterations,
7667 i4_num_act_ref_l0,
7668 center_ref_idx,
7669 center_x,
7670 center_y,
7671 ps_ctxt->s_frm_prms.bidir_enabled,
7672 e_me_quality_presets);
7673
7674 /*************************************************************************/
7675 /* This array stores the ids of the partitions whose */
7676 /* SADs are updated. Since the partitions whose SADs are updated may not */
7677 /* be in contiguous order, we supply another level of indirection. */
7678 /*************************************************************************/
7679 ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7680 s_search_prms_blk.i4_part_mask,
7681 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7682
7683 if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7684 {
7685 S32 i;
7686 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7687 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7688 (s_search_prms_blk.i4_cu_y_off * 4);
7689
7690 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7691 {
7692 S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7693
7694 hme_compute_final_sigma_of_pu_from_base_blocks(
7695 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7696 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7697 au8_final_src_sigmaX,
7698 au8_final_src_sigmaXSquared,
7699 16,
7700 4,
7701 i4_part_id,
7702 16);
7703 }
7704
7705 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7706 s_common_frm_prms.pu8_part_src_sigmaXSquared =
7707 au8_final_src_sigmaXSquared;
7708
7709 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7710 s_search_prms_blk.pu8_part_src_sigmaXSquared =
7711 au8_final_src_sigmaXSquared;
7712 }
7713
7714 if(0 == num_unique_nodes)
7715 {
7716 continue;
7717 }
7718
7719 if(num_unique_nodes >= 2)
7720 {
7721 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7722 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7723 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7724 {
7725 if(ps_ctxt->i4_temporal_layer == 1)
7726 {
7727 hme_fullpel_cand_sifter(
7728 &s_search_prms_blk,
7729 ps_curr_layer,
7730 &ps_ctxt->s_wt_pred,
7731 ALPHA_FOR_NOISE_TERM_IN_ME,
7732 u1_is_cu_noisy,
7733 ps_me_optimised_function_list);
7734 }
7735 else
7736 {
7737 hme_fullpel_cand_sifter(
7738 &s_search_prms_blk,
7739 ps_curr_layer,
7740 &ps_ctxt->s_wt_pred,
7741 ALPHA_FOR_NOISE_TERM_IN_ME,
7742 u1_is_cu_noisy,
7743 ps_me_optimised_function_list);
7744 }
7745 }
7746 else
7747 {
7748 hme_fullpel_cand_sifter(
7749 &s_search_prms_blk,
7750 ps_curr_layer,
7751 &ps_ctxt->s_wt_pred,
7752 ALPHA_FOR_NOISE_TERM_IN_ME_P,
7753 u1_is_cu_noisy,
7754 ps_me_optimised_function_list);
7755 }
7756 }
7757
7758 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7759
7760 hme_fullpel_refine(
7761 ps_refine_prms,
7762 &s_search_prms_blk,
7763 ps_curr_layer,
7764 &ps_ctxt->s_wt_pred,
7765 au4_unique_node_map,
7766 num_unique_nodes,
7767 blk_8x8_mask,
7768 center_x,
7769 center_y,
7770 center_ref_idx,
7771 e_me_quality_presets,
7772 ps_me_optimised_function_list);
7773 }
7774
7775 /* Sub-Pel search */
7776 {
7777 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7778
7779 s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7780 &ps_ctxt->s_buf_mgr,
7781 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7782 /* MV limit is different based on ref. PIC */
7783 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7784 {
7785 SCALE_RANGE_PRMS(
7786 as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7787 SCALE_RANGE_PRMS(
7788 as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7789 }
7790 s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7791 s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7792
7793 hme_subpel_refine_cu_hs(
7794 &s_subpel_prms,
7795 ps_curr_layer,
7796 ps_search_results,
7797 u1_pred_dir,
7798 &ps_ctxt->s_wt_pred,
7799 blk_8x8_mask,
7800 ps_ctxt->ps_func_selector,
7801 ps_cmn_utils_optimised_function_list,
7802 ps_me_optimised_function_list);
7803 }
7804 }
7805 }
7806 /* Populate the new PU struct with the results post subpel refinement*/
7807 {
7808 inter_cu_results_t *ps_cu_results;
7809 WORD32 best_inter_cost, intra_cost, posx, posy;
7810
7811 UWORD8 intra_8x8_enabled = 0;
7812
7813 /* cost of 16x16 cu parent */
7814 WORD32 parent_cost = MAX_32BIT_VAL;
7815
7816 /* cost of 8x8 cu children */
7817 /*********************************************************************/
7818 /* Assuming parent is not split, then we signal 1 bit for this parent*/
7819 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7820 /* So, 4*lambda is extra for children cost. */
7821 /*********************************************************************/
7822 WORD32 child_cost = 0;
7823
7824 ps_cu_results = ps_search_results->ps_cu_results;
7825
7826 /* Initialize the pu_results pointers to the first struct in the stack array */
7827 ps_pu_results = as_inter_pu_results;
7828
7829 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7830
7831 hme_populate_pus(
7832 ps_thrd_ctxt,
7833 ps_ctxt,
7834 &s_subpel_prms,
7835 ps_search_results,
7836 ps_cu_results,
7837 ps_pu_results,
7838 &(as_pu_results[0][0][0]),
7839 &s_common_frm_prms,
7840 &ps_ctxt->s_wt_pred,
7841 ps_curr_layer,
7842 au1_pred_dir_searched,
7843 i4_num_pred_dir);
7844
7845 ps_cu_results->i4_inp_offset =
7846 (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7847
7848 hme_decide_part_types(
7849 ps_cu_results,
7850 ps_pu_results,
7851 &s_common_frm_prms,
7852 ps_ctxt,
7853 ps_cmn_utils_optimised_function_list,
7854 ps_me_optimised_function_list
7855
7856 );
7857
7858 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7859 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7860 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7861 {
7862 WORD32 res_ctr;
7863
7864 for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7865 {
7866 WORD32 num_part = 2, part_ctr;
7867 part_type_results_t *ps_best_results =
7868 &ps_cu_results->ps_best_results[res_ctr];
7869
7870 if(PRT_2Nx2N == ps_best_results->u1_part_type)
7871 num_part = 1;
7872
7873 for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7874 {
7875 pu_result_t *ps_pu_results =
7876 &ps_best_results->as_pu_results[part_ctr];
7877
7878 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7879
7880 hme_update_dynamic_search_params(
7881 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7882 .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7883 ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7884
7885 /* Sanity Check */
7886 ASSERT(
7887 ps_pu_results->pu.mv.i1_l0_ref_idx <
7888 ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7889
7890 /* No L1 for P Pic. */
7891 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7892 /* No BI for P Pic. */
7893 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7894 }
7895 }
7896 }
7897
7898 /*****************************************************************/
7899 /* INSERT INTRA RESULTS AT 16x16 LEVEL. */
7900 /*****************************************************************/
7901
7902 #if DISABLE_INTRA_IN_BPICS
7903 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7904 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7905 #endif
7906 {
7907 if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7908 {
7909 hme_insert_intra_nodes_post_bipred(
7910 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7911 }
7912 }
7913
7914 #if DISABLE_INTRA_IN_BPICS
7915 if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7916 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7917 {
7918 intra_8x8_enabled = 0;
7919 }
7920 else
7921 #endif
7922 {
7923 /*TRAQO intra flag updation*/
7924 if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7925 {
7926 best_inter_cost =
7927 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7928 intra_cost =
7929 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7930 /*@16x16 level*/
7931 posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7932 << 2) >>
7933 4;
7934 posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7935 << 2) >>
7936 4;
7937 }
7938 else
7939 {
7940 best_inter_cost =
7941 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7942 posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7943 << 2) >>
7944 3;
7945 posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7946 << 2) >>
7947 3;
7948 }
7949
7950 /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7951 if(ps_cur_ipe_ctb->u1_split_flag)
7952 {
7953 /* Id of the 32x32 block, 16x16 block in a CTB */
7954 WORD32 i4_32x32_id =
7955 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7956 WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7957 ((ps_cu_results->u1_x_off >> 4) & 0x1);
7958
7959 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7960 {
7961 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7962 .as_intra16_analyse[i4_16x16_id]
7963 .b1_split_flag)
7964 {
7965 intra_8x8_enabled =
7966 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7967 .as_intra16_analyse[i4_16x16_id]
7968 .as_intra8_analyse[0]
7969 .b1_valid_cu;
7970 intra_8x8_enabled &=
7971 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7972 .as_intra16_analyse[i4_16x16_id]
7973 .as_intra8_analyse[1]
7974 .b1_valid_cu;
7975 intra_8x8_enabled &=
7976 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7977 .as_intra16_analyse[i4_16x16_id]
7978 .as_intra8_analyse[2]
7979 .b1_valid_cu;
7980 intra_8x8_enabled &=
7981 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7982 .as_intra16_analyse[i4_16x16_id]
7983 .as_intra8_analyse[3]
7984 .b1_valid_cu;
7985 }
7986 }
7987 }
7988 }
7989
7990 if(blk_8x8_mask == 0xf)
7991 {
7992 parent_cost =
7993 ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
7994 ps_search_results->u1_split_flag = 0;
7995 }
7996 else
7997 {
7998 ps_search_results->u1_split_flag = 1;
7999 }
8000
8001 ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8002
8003 if(s_common_frm_prms.u1_is_cu_noisy)
8004 {
8005 intra_8x8_enabled = 0;
8006 }
8007
8008 /* Evalaute 8x8 if NxN part id is enabled */
8009 if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8010 {
8011 /* Populates the PU's for the 4 8x8's in one call */
8012 hme_populate_pus_8x8_cu(
8013 ps_thrd_ctxt,
8014 ps_ctxt,
8015 &s_subpel_prms,
8016 ps_search_results,
8017 ps_cu_results,
8018 ps_pu_results,
8019 &(as_pu_results[0][0][0]),
8020 &s_common_frm_prms,
8021 au1_pred_dir_searched,
8022 i4_num_pred_dir,
8023 blk_8x8_mask);
8024
8025 /* Re-initialize the pu_results pointers to the first struct in the stack array */
8026 ps_pu_results = as_inter_pu_results;
8027
8028 for(i = 0; i < 4; i++)
8029 {
8030 if((blk_8x8_mask & (1 << i)))
8031 {
8032 if(ps_cu_results->i4_part_mask)
8033 {
8034 hme_decide_part_types(
8035 ps_cu_results,
8036 ps_pu_results,
8037 &s_common_frm_prms,
8038 ps_ctxt,
8039 ps_cmn_utils_optimised_function_list,
8040 ps_me_optimised_function_list
8041
8042 );
8043 }
8044 /*****************************************************************/
8045 /* INSERT INTRA RESULTS AT 8x8 LEVEL. */
8046 /*****************************************************************/
8047 #if DISABLE_INTRA_IN_BPICS
8048 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8049 (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8050 TEMPORAL_LAYER_DISABLE)))
8051 #endif
8052 {
8053 if(!(DISABLE_INTRA_WHEN_NOISY &&
8054 s_common_frm_prms.u1_is_cu_noisy))
8055 {
8056 hme_insert_intra_nodes_post_bipred(
8057 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8058 }
8059 }
8060
8061 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8062 }
8063
8064 ps_cu_results++;
8065 ps_pu_results++;
8066 }
8067
8068 /* Compare 16x16 vs 8x8 cost */
8069 if(child_cost < parent_cost)
8070 {
8071 ps_search_results->best_cu_cost = child_cost;
8072 ps_search_results->u1_split_flag = 1;
8073 }
8074 }
8075 }
8076
8077 hme_update_mv_bank_encode(
8078 ps_search_results,
8079 ps_curr_layer->ps_layer_mvbank,
8080 blk_x,
8081 blk_y,
8082 &s_mv_update_prms,
8083 au1_pred_dir_searched,
8084 i4_num_act_ref_l0);
8085
8086 /*********************************************************************/
8087 /* Map the best results to an MV Grid. This is a 18x18 grid that is */
8088 /* useful for doing things like predictor for cost calculation or */
8089 /* also for merge calculations if need be. */
8090 /*********************************************************************/
8091 hme_map_mvs_to_grid(
8092 &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8093 }
8094
8095 /* Set the CU tree nodes appropriately */
8096 if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8097 {
8098 WORD32 i, j;
8099
8100 for(i = 0; i < 16; i++)
8101 {
8102 cur_ctb_cu_tree_t *ps_tree_node =
8103 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8104 search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8105
8106 switch(i >> 2)
8107 {
8108 case 0:
8109 {
8110 ps_tree_node = ps_tree_node->ps_child_node_tl;
8111
8112 break;
8113 }
8114 case 1:
8115 {
8116 ps_tree_node = ps_tree_node->ps_child_node_tr;
8117
8118 break;
8119 }
8120 case 2:
8121 {
8122 ps_tree_node = ps_tree_node->ps_child_node_bl;
8123
8124 break;
8125 }
8126 case 3:
8127 {
8128 ps_tree_node = ps_tree_node->ps_child_node_br;
8129
8130 break;
8131 }
8132 }
8133
8134 switch(i % 4)
8135 {
8136 case 0:
8137 {
8138 ps_tree_node = ps_tree_node->ps_child_node_tl;
8139
8140 break;
8141 }
8142 case 1:
8143 {
8144 ps_tree_node = ps_tree_node->ps_child_node_tr;
8145
8146 break;
8147 }
8148 case 2:
8149 {
8150 ps_tree_node = ps_tree_node->ps_child_node_bl;
8151
8152 break;
8153 }
8154 case 3:
8155 {
8156 ps_tree_node = ps_tree_node->ps_child_node_br;
8157
8158 break;
8159 }
8160 }
8161
8162 if(ai4_blk_8x8_mask[i] == 15)
8163 {
8164 if(!ps_results->u1_split_flag)
8165 {
8166 ps_tree_node->is_node_valid = 1;
8167 NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8168 }
8169 else
8170 {
8171 ps_tree_node->is_node_valid = 0;
8172 ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8173 }
8174 }
8175 else
8176 {
8177 cur_ctb_cu_tree_t *ps_tree_child;
8178
8179 ps_tree_node->is_node_valid = 0;
8180
8181 for(j = 0; j < 4; j++)
8182 {
8183 switch(j)
8184 {
8185 case 0:
8186 {
8187 ps_tree_child = ps_tree_node->ps_child_node_tl;
8188
8189 break;
8190 }
8191 case 1:
8192 {
8193 ps_tree_child = ps_tree_node->ps_child_node_tr;
8194
8195 break;
8196 }
8197 case 2:
8198 {
8199 ps_tree_child = ps_tree_node->ps_child_node_bl;
8200
8201 break;
8202 }
8203 case 3:
8204 {
8205 ps_tree_child = ps_tree_node->ps_child_node_br;
8206
8207 break;
8208 }
8209 }
8210
8211 ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8212 }
8213 }
8214 }
8215 }
8216
8217 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8218 {
8219 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8220
8221 hme_analyse_mv_clustering(
8222 ps_ctxt->as_search_results_16x16,
8223 ps_ctxt->as_cu16x16_results,
8224 ps_ctxt->as_cu8x8_results,
8225 ps_ctxt->ps_ctb_cluster_info,
8226 ps_ctxt->ai1_future_list,
8227 ps_ctxt->ai1_past_list,
8228 ps_ctxt->s_frm_prms.bidir_enabled,
8229 e_me_quality_presets);
8230
8231 #if DISABLE_BLK_MERGE_WHEN_NOISY
8232 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8233 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8234 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8235 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8236 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8237 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8238 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8239 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8240 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8241 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8242 #endif
8243
8244 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8245 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8246 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8247 (ps_tree->ps_child_node_br->is_node_valid << 3);
8248
8249 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8250 (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8251 (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8252 (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8253 (ps_tree->u1_inter_eval_enable << 4);
8254 }
8255 else
8256 {
8257 en_merge_execution = 0x1f;
8258
8259 #if DISABLE_BLK_MERGE_WHEN_NOISY
8260 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8261 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8262 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8263 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8264 #endif
8265 }
8266
8267 /* Re-initialize the pu_results pointers to the first struct in the stack array */
8268 ps_pu_results = as_inter_pu_results;
8269
8270 {
8271 WORD32 ref_ctr;
8272
8273 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8274 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8275
8276 /* MV limit is different based on ref. PIC */
8277 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8278 {
8279 SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8280 SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8281 }
8282
8283 e_merge_result = CU_SPLIT;
8284 merge_count_32x32 = 0;
8285
8286 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8287 {
8288 range_prms_t *ps_pic_limit;
8289 if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8290 {
8291 ps_pic_limit = &s_pic_limit_rec;
8292 }
8293 else
8294 {
8295 ps_pic_limit = &s_pic_limit_inp;
8296 }
8297 /* MV limit is different based on ref. PIC */
8298 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8299 {
8300 hme_derive_search_range(
8301 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8302 ps_pic_limit,
8303 &as_mv_limit[ref_ctr],
8304 i4_ctb_x << 6,
8305 i4_ctb_y << 6,
8306 32,
8307 32);
8308
8309 SCALE_RANGE_PRMS_POINTERS(
8310 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8311 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8312 2);
8313 }
8314 s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8315 s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8316 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8317
8318 e_merge_result = hme_try_merge_high_speed(
8319 ps_thrd_ctxt,
8320 ps_ctxt,
8321 ps_cur_ipe_ctb,
8322 &s_subpel_prms,
8323 &s_merge_prms_32x32_tl,
8324 ps_pu_results,
8325 &as_pu_results[0][0][0]);
8326
8327 if(e_merge_result == CU_MERGED)
8328 {
8329 inter_cu_results_t *ps_cu_results =
8330 s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8331
8332 if(!((ps_cu_results->u1_num_best_results == 1) &&
8333 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8334 {
8335 hme_map_mvs_to_grid(
8336 &aps_mv_grid[0],
8337 s_merge_prms_32x32_tl.ps_results_merge,
8338 s_merge_prms_32x32_tl.au1_pred_dir_searched,
8339 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8340 }
8341
8342 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8343 {
8344 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8345 .ps_child_node_tl->is_node_valid = 1;
8346 NULLIFY_THE_CHILDREN_NODES(
8347 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8348 .ps_child_node_tl);
8349 }
8350
8351 merge_count_32x32++;
8352 e_merge_result = CU_SPLIT;
8353 }
8354 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8355 {
8356 #if ENABLE_CU_TREE_CULLING
8357 cur_ctb_cu_tree_t *ps_tree =
8358 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8359
8360 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8361 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8362 ENABLE_THE_CHILDREN_NODES(ps_tree);
8363 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8364 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8365 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8366 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8367 #endif
8368 }
8369 }
8370 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8371 {
8372 #if ENABLE_CU_TREE_CULLING
8373 cur_ctb_cu_tree_t *ps_tree =
8374 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375
8376 ENABLE_THE_CHILDREN_NODES(ps_tree);
8377 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8378 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8379 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8380 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8381 #endif
8382
8383 if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8384 {
8385 ps_tree->is_node_valid = 0;
8386 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8387 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8388 }
8389 }
8390
8391 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8392 {
8393 range_prms_t *ps_pic_limit;
8394 if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8395 {
8396 ps_pic_limit = &s_pic_limit_rec;
8397 }
8398 else
8399 {
8400 ps_pic_limit = &s_pic_limit_inp;
8401 }
8402 /* MV limit is different based on ref. PIC */
8403 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8404 {
8405 hme_derive_search_range(
8406 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8407 ps_pic_limit,
8408 &as_mv_limit[ref_ctr],
8409 (i4_ctb_x << 6) + 32,
8410 i4_ctb_y << 6,
8411 32,
8412 32);
8413 SCALE_RANGE_PRMS_POINTERS(
8414 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8415 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8416 2);
8417 }
8418 s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8419 s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8420 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8421
8422 e_merge_result = hme_try_merge_high_speed(
8423 ps_thrd_ctxt,
8424 ps_ctxt,
8425 ps_cur_ipe_ctb,
8426 &s_subpel_prms,
8427 &s_merge_prms_32x32_tr,
8428 ps_pu_results,
8429 &as_pu_results[0][0][0]);
8430
8431 if(e_merge_result == CU_MERGED)
8432 {
8433 inter_cu_results_t *ps_cu_results =
8434 s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8435
8436 if(!((ps_cu_results->u1_num_best_results == 1) &&
8437 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8438 {
8439 hme_map_mvs_to_grid(
8440 &aps_mv_grid[0],
8441 s_merge_prms_32x32_tr.ps_results_merge,
8442 s_merge_prms_32x32_tr.au1_pred_dir_searched,
8443 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8444 }
8445
8446 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8447 {
8448 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8449 .ps_child_node_tr->is_node_valid = 1;
8450 NULLIFY_THE_CHILDREN_NODES(
8451 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8452 .ps_child_node_tr);
8453 }
8454
8455 merge_count_32x32++;
8456 e_merge_result = CU_SPLIT;
8457 }
8458 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8459 {
8460 #if ENABLE_CU_TREE_CULLING
8461 cur_ctb_cu_tree_t *ps_tree =
8462 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8463
8464 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8465 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8466 ENABLE_THE_CHILDREN_NODES(ps_tree);
8467 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8468 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8469 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8470 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8471 #endif
8472 }
8473 }
8474 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8475 {
8476 #if ENABLE_CU_TREE_CULLING
8477 cur_ctb_cu_tree_t *ps_tree =
8478 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479
8480 ENABLE_THE_CHILDREN_NODES(ps_tree);
8481 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8482 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8483 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8484 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8485 #endif
8486
8487 if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8488 {
8489 ps_tree->is_node_valid = 0;
8490 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8491 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8492 }
8493 }
8494
8495 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8496 {
8497 range_prms_t *ps_pic_limit;
8498 if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8499 {
8500 ps_pic_limit = &s_pic_limit_rec;
8501 }
8502 else
8503 {
8504 ps_pic_limit = &s_pic_limit_inp;
8505 }
8506 /* MV limit is different based on ref. PIC */
8507 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8508 {
8509 hme_derive_search_range(
8510 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8511 ps_pic_limit,
8512 &as_mv_limit[ref_ctr],
8513 i4_ctb_x << 6,
8514 (i4_ctb_y << 6) + 32,
8515 32,
8516 32);
8517 SCALE_RANGE_PRMS_POINTERS(
8518 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8519 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8520 2);
8521 }
8522 s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8523 s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8524 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8525
8526 e_merge_result = hme_try_merge_high_speed(
8527 ps_thrd_ctxt,
8528 ps_ctxt,
8529 ps_cur_ipe_ctb,
8530 &s_subpel_prms,
8531 &s_merge_prms_32x32_bl,
8532 ps_pu_results,
8533 &as_pu_results[0][0][0]);
8534
8535 if(e_merge_result == CU_MERGED)
8536 {
8537 inter_cu_results_t *ps_cu_results =
8538 s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8539
8540 if(!((ps_cu_results->u1_num_best_results == 1) &&
8541 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8542 {
8543 hme_map_mvs_to_grid(
8544 &aps_mv_grid[0],
8545 s_merge_prms_32x32_bl.ps_results_merge,
8546 s_merge_prms_32x32_bl.au1_pred_dir_searched,
8547 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8548 }
8549
8550 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8551 {
8552 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8553 .ps_child_node_bl->is_node_valid = 1;
8554 NULLIFY_THE_CHILDREN_NODES(
8555 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8556 .ps_child_node_bl);
8557 }
8558
8559 merge_count_32x32++;
8560 e_merge_result = CU_SPLIT;
8561 }
8562 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8563 {
8564 #if ENABLE_CU_TREE_CULLING
8565 cur_ctb_cu_tree_t *ps_tree =
8566 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8567
8568 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8569 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8570 ENABLE_THE_CHILDREN_NODES(ps_tree);
8571 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8572 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8573 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8574 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8575 #endif
8576 }
8577 }
8578 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8579 {
8580 #if ENABLE_CU_TREE_CULLING
8581 cur_ctb_cu_tree_t *ps_tree =
8582 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583
8584 ENABLE_THE_CHILDREN_NODES(ps_tree);
8585 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8586 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8587 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8588 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8589 #endif
8590
8591 if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8592 {
8593 ps_tree->is_node_valid = 0;
8594 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8595 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8596 }
8597 }
8598
8599 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8600 {
8601 range_prms_t *ps_pic_limit;
8602 if(s_merge_prms_32x32_br.i4_use_rec == 1)
8603 {
8604 ps_pic_limit = &s_pic_limit_rec;
8605 }
8606 else
8607 {
8608 ps_pic_limit = &s_pic_limit_inp;
8609 }
8610 /* MV limit is different based on ref. PIC */
8611 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8612 {
8613 hme_derive_search_range(
8614 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8615 ps_pic_limit,
8616 &as_mv_limit[ref_ctr],
8617 (i4_ctb_x << 6) + 32,
8618 (i4_ctb_y << 6) + 32,
8619 32,
8620 32);
8621
8622 SCALE_RANGE_PRMS_POINTERS(
8623 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8624 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8625 2);
8626 }
8627 s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8628 s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8629 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8630
8631 e_merge_result = hme_try_merge_high_speed(
8632 ps_thrd_ctxt,
8633 ps_ctxt,
8634 ps_cur_ipe_ctb,
8635 &s_subpel_prms,
8636 &s_merge_prms_32x32_br,
8637 ps_pu_results,
8638 &as_pu_results[0][0][0]);
8639
8640 if(e_merge_result == CU_MERGED)
8641 {
8642 /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8643
8644 if(!((ps_cu_results->u1_num_best_results == 1) &&
8645 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8646 {
8647 hme_map_mvs_to_grid
8648 (
8649 &aps_mv_grid[0],
8650 s_merge_prms_32x32_br.ps_results_merge,
8651 s_merge_prms_32x32_br.au1_pred_dir_searched,
8652 s_merge_prms_32x32_br.i4_num_pred_dir_actual
8653 );
8654 }*/
8655
8656 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8657 {
8658 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8659 .ps_child_node_br->is_node_valid = 1;
8660 NULLIFY_THE_CHILDREN_NODES(
8661 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8662 .ps_child_node_br);
8663 }
8664
8665 merge_count_32x32++;
8666 e_merge_result = CU_SPLIT;
8667 }
8668 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8669 {
8670 #if ENABLE_CU_TREE_CULLING
8671 cur_ctb_cu_tree_t *ps_tree =
8672 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8673
8674 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8675 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8676 ENABLE_THE_CHILDREN_NODES(ps_tree);
8677 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8678 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8679 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8680 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8681 #endif
8682 }
8683 }
8684 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8685 {
8686 #if ENABLE_CU_TREE_CULLING
8687 cur_ctb_cu_tree_t *ps_tree =
8688 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689
8690 ENABLE_THE_CHILDREN_NODES(ps_tree);
8691 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8692 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8693 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8694 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8695 #endif
8696
8697 if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8698 {
8699 ps_tree->is_node_valid = 0;
8700 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8701 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8702 }
8703 }
8704
8705 /* Try merging all 32x32 to 64x64 candts */
8706 if(((en_merge_32x32 & 0xf) == 0xf) &&
8707 (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8708 ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8709 if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8710 !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8711 (e_me_quality_presets != ME_XTREME_SPEED_25)))
8712 {
8713 range_prms_t *ps_pic_limit;
8714 if(s_merge_prms_64x64.i4_use_rec == 1)
8715 {
8716 ps_pic_limit = &s_pic_limit_rec;
8717 }
8718 else
8719 {
8720 ps_pic_limit = &s_pic_limit_inp;
8721 }
8722 /* MV limit is different based on ref. PIC */
8723 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8724 {
8725 hme_derive_search_range(
8726 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8727 ps_pic_limit,
8728 &as_mv_limit[ref_ctr],
8729 i4_ctb_x << 6,
8730 i4_ctb_y << 6,
8731 64,
8732 64);
8733
8734 SCALE_RANGE_PRMS_POINTERS(
8735 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8736 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8737 2);
8738 }
8739 s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8740 s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8741 s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8742
8743 e_merge_result = hme_try_merge_high_speed(
8744 ps_thrd_ctxt,
8745 ps_ctxt,
8746 ps_cur_ipe_ctb,
8747 &s_subpel_prms,
8748 &s_merge_prms_64x64,
8749 ps_pu_results,
8750 &as_pu_results[0][0][0]);
8751
8752 if((e_merge_result == CU_MERGED) &&
8753 (ME_PRISTINE_QUALITY != e_me_quality_presets))
8754 {
8755 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8756 .is_node_valid = 1;
8757 NULLIFY_THE_CHILDREN_NODES(
8758 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8759 }
8760 else if(
8761 (e_merge_result == CU_SPLIT) &&
8762 (ME_PRISTINE_QUALITY == e_me_quality_presets))
8763 {
8764 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8765 .is_node_valid = 0;
8766 }
8767 }
8768
8769 /*****************************************************************/
8770 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */
8771 /*****************************************************************/
8772 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8773
8774 {
8775 #ifdef _DEBUG
8776 S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8777 ? 64
8778 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8779 S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8780 ? 64
8781 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8782 ASSERT(
8783 (wd * ht) ==
8784 ihevce_compute_area_of_valid_cus_in_ctb(
8785 &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8786 #endif
8787 }
8788 }
8789
8790 /* set the dependency for the corresponding row in enc loop */
8791 ihevce_dmgr_set_row_row_sync(
8792 pv_dep_mngr_encloop_dep_me,
8793 (i4_ctb_x + 1),
8794 i4_ctb_y,
8795 tile_col_idx /* Col Tile No. */);
8796
8797 left_ctb_in_diff_tile = 0;
8798 }
8799 }
8800 }
8801
8802 /**
8803 ********************************************************************************
8804 * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8805 * refine_layer_prms_t *ps_refine_prms)
8806 *
8807 * @brief Top level entry point for refinement ME
8808 *
8809 * @param[in,out] ps_ctxt: ME Handle
8810 *
8811 * @param[in] ps_refine_prms : refinement layer prms
8812 *
8813 * @return None
8814 ********************************************************************************
8815 */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8816 void hme_refine_no_encode(
8817 coarse_me_ctxt_t *ps_ctxt,
8818 refine_prms_t *ps_refine_prms,
8819 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8820 S32 lyr_job_type,
8821 WORD32 i4_ping_pong,
8822 void **ppv_dep_mngr_hme_sync)
8823 {
8824 BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8825 ME_QUALITY_PRESETS_T e_me_quality_presets =
8826 ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8827
8828 /*************************************************************************/
8829 /* Complexity of search: Low to High */
8830 /*************************************************************************/
8831 SEARCH_COMPLEXITY_T e_search_complexity;
8832
8833 /*************************************************************************/
8834 /* Config parameter structures for varius ME submodules */
8835 /*************************************************************************/
8836 hme_search_prms_t s_search_prms_blk;
8837 mvbank_update_prms_t s_mv_update_prms;
8838
8839 /*************************************************************************/
8840 /* All types of search candidates for predictor based search. */
8841 /*************************************************************************/
8842 S32 num_init_candts = 0;
8843 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8844 search_node_t as_top_neighbours[4], as_left_neighbours[3];
8845 search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8846 search_node_t *ps_candt_l, *ps_candt_t;
8847 search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8848 search_node_t *ps_candt_prj_bl[2];
8849 search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8850 search_node_t *ps_candt_prj_coloc[2];
8851
8852 pf_get_wt_inp fp_get_wt_inp;
8853
8854 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8855 U32 au4_unique_node_map[MAP_X_MAX * 2];
8856
8857 /*EIID */
8858 WORD32 i4_num_inter_wins = 0; //debug code to find stat of
8859 WORD32 i4_num_comparisions = 0; //debug code
8860 WORD32 i4_threshold_multiplier;
8861 WORD32 i4_threshold_divider;
8862 WORD32 i4_temporal_layer =
8863 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8864
8865 /*************************************************************************/
8866 /* points ot the search results for the blk level search (8x8/16x16) */
8867 /*************************************************************************/
8868 search_results_t *ps_search_results;
8869
8870 /*************************************************************************/
8871 /* Coordinates */
8872 /*************************************************************************/
8873 S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8874 //S32 i4_ctb_y;
8875 S32 pos_x, pos_y;
8876 S32 blk_id_in_full_ctb;
8877 S32 i4_num_srch_cands;
8878
8879 S32 blk_y;
8880
8881 /*************************************************************************/
8882 /* Related to dimensions of block being searched and pic dimensions */
8883 /*************************************************************************/
8884 S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8885 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8886 S32 num_results_prev_layer;
8887
8888 /*************************************************************************/
8889 /* Size of a basic unit for this layer. For non encode layers, we search */
8890 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8891 /* basic unit size is the ctb size. */
8892 /*************************************************************************/
8893 S32 unit_size;
8894
8895 /*************************************************************************/
8896 /* Pointers to context in current and coarser layers */
8897 /*************************************************************************/
8898 layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8899
8900 /*************************************************************************/
8901 /* to store mv range per blk, and picture limit, allowed search range */
8902 /* range prms in hpel and qpel units as well */
8903 /*************************************************************************/
8904 range_prms_t s_range_prms_inp, s_range_prms_rec;
8905 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8906 /*************************************************************************/
8907 /* These variables are used to track number of references at different */
8908 /* stages of ME. */
8909 /*************************************************************************/
8910 S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8911 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8912 S32 lambda_inp = ps_refine_prms->lambda_inp;
8913
8914 /*************************************************************************/
8915 /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8916 /* Explicit means it searches on all active ref idx. */
8917 /*************************************************************************/
8918 S32 curr_layer_implicit, prev_layer_implicit;
8919
8920 /*************************************************************************/
8921 /* Variables for loop counts */
8922 /*************************************************************************/
8923 S32 id;
8924 S08 i1_ref_idx;
8925
8926 /*************************************************************************/
8927 /* Input pointer and stride */
8928 /*************************************************************************/
8929 U08 *pu1_inp;
8930 S32 i4_inp_stride;
8931
8932 S32 end_of_frame;
8933
8934 S32 num_sync_units_in_row;
8935
8936 PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8937 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8938
8939 /*************************************************************************/
8940 /* Pointers to current and coarse layer are needed for projection */
8941 /* Pointer to prev layer are needed for other candts like coloc */
8942 /*************************************************************************/
8943 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8944
8945 ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8946
8947 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8948
8949 /* Function pointer is selected based on the C vc X86 macro */
8950
8951 fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8952 ->pf_get_wt_inp_8x8;
8953
8954 i4_inp_stride = ps_curr_layer->i4_inp_stride;
8955 i4_pic_wd = ps_curr_layer->i4_wd;
8956 i4_pic_ht = ps_curr_layer->i4_ht;
8957 e_search_complexity = ps_refine_prms->e_search_complexity;
8958
8959 end_of_frame = 0;
8960
8961 /* If the previous layer is non-encode layer, then use dyadic projection */
8962 if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8963 pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8964 else
8965 pf_hme_project_coloc_candt = hme_project_coloc_candt;
8966
8967 /* This points to all the initial candts */
8968 ps_search_candts = &as_search_candts[0];
8969
8970 {
8971 e_search_blk_size = BLK_8x8;
8972 blk_wd = blk_ht = 8;
8973 blk_size_shift = 3;
8974 s_mv_update_prms.i4_shift = 0;
8975 /*********************************************************************/
8976 /* In case we do not encode this layer, we search 8x8 with or without*/
8977 /* enable 4x4 SAD. */
8978 /*********************************************************************/
8979 {
8980 S32 i4_mask = (ENABLE_2Nx2N);
8981
8982 e_result_blk_size = BLK_8x8;
8983 if(ps_refine_prms->i4_enable_4x4_part)
8984 {
8985 i4_mask |= (ENABLE_NxN);
8986 e_result_blk_size = BLK_4x4;
8987 s_mv_update_prms.i4_shift = 1;
8988 }
8989
8990 s_search_prms_blk.i4_part_mask = i4_mask;
8991 }
8992
8993 unit_size = blk_wd;
8994 s_search_prms_blk.i4_inp_stride = unit_size;
8995 }
8996
8997 /* This is required to properly update the layer mv bank */
8998 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
8999 s_search_prms_blk.e_blk_size = e_search_blk_size;
9000
9001 /*************************************************************************/
9002 /* If current layer is explicit, then the number of ref frames are to */
9003 /* be same as previous layer. Else it will be 2 */
9004 /*************************************************************************/
9005 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9006 if(ps_refine_prms->explicit_ref)
9007 {
9008 curr_layer_implicit = 0;
9009 i4_num_ref_fpel = i4_num_ref_prev_layer;
9010 /* 100578 : Using same mv cost fun. for all presets. */
9011 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9012 }
9013 else
9014 {
9015 i4_num_ref_fpel = 2;
9016 curr_layer_implicit = 1;
9017 {
9018 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9019 {
9020 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9021 }
9022 else
9023 {
9024 #if USE_MODIFIED == 1
9025 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9026 #else
9027 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9028 #endif
9029 }
9030 }
9031 }
9032
9033 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9034 if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9035 IV_IDR_FRAME ||
9036 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9037 {
9038 i4_num_ref_fpel = 1;
9039 }
9040 if(i4_num_ref_prev_layer <= 2)
9041 {
9042 prev_layer_implicit = 1;
9043 curr_layer_implicit = 1;
9044 i4_num_ref_each_dir = 1;
9045 }
9046 else
9047 {
9048 /* It is assumed that we have equal number of references in each dir */
9049 //ASSERT(!(i4_num_ref_prev_layer & 1));
9050 prev_layer_implicit = 0;
9051 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9052 }
9053 s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9054 s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9055 s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9056
9057 /* this can be kept to 1 or 2 */
9058 i4_num_ref_before_merge = 2;
9059 i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9060
9061 /* Set up place holders to hold the search nodes of each initial candt */
9062 for(i = 0; i < MAX_INIT_CANDTS; i++)
9063 {
9064 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9065 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9066 }
9067
9068 /* redundant, but doing it here since it is used in pred ctxt init */
9069 ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9070 for(i = 0; i < 3; i++)
9071 {
9072 search_node_t *ps_search_node;
9073 ps_search_node = &as_left_neighbours[i];
9074 INIT_SEARCH_NODE(ps_search_node, 0);
9075 ps_search_node = &as_top_neighbours[i];
9076 INIT_SEARCH_NODE(ps_search_node, 0);
9077 }
9078
9079 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9080 /* bottom left node always not available for the blk being searched */
9081 as_left_neighbours[2].u1_is_avail = 0;
9082 /*************************************************************************/
9083 /* Initialize all the search results structure here. We update all the */
9084 /* search results to default values, and configure things like blk sizes */
9085 /*************************************************************************/
9086 if(ps_refine_prms->i4_encode == 0)
9087 {
9088 S32 pred_lx;
9089 search_results_t *ps_search_results;
9090
9091 ps_search_results = &ps_ctxt->s_search_results_8x8;
9092 hme_init_search_results(
9093 ps_search_results,
9094 i4_num_ref_fpel,
9095 ps_refine_prms->i4_num_fpel_results,
9096 ps_refine_prms->i4_num_results_per_part,
9097 e_search_blk_size,
9098 0,
9099 0,
9100 &ps_ctxt->au1_is_past[0]);
9101 for(pred_lx = 0; pred_lx < 2; pred_lx++)
9102 {
9103 hme_init_pred_ctxt_no_encode(
9104 &ps_search_results->as_pred_ctxt[pred_lx],
9105 ps_search_results,
9106 &as_top_neighbours[0],
9107 &as_left_neighbours[0],
9108 &ps_candt_prj_coloc[0],
9109 ps_candt_zeromv,
9110 ps_candt_zeromv,
9111 pred_lx,
9112 lambda_inp,
9113 ps_refine_prms->lambda_q_shift,
9114 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9115 &ps_ctxt->ai2_ref_scf[0]);
9116 }
9117 }
9118
9119 /*********************************************************************/
9120 /* Initialize the dyn. search range params. for each reference index */
9121 /* in current layer ctxt */
9122 /*********************************************************************/
9123 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9124 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9125 {
9126 WORD32 ref_ctr;
9127
9128 for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9129 {
9130 INIT_DYN_SEARCH_PRMS(
9131 &ps_ctxt->s_coarse_dyn_range_prms
9132 .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9133 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9134 }
9135 }
9136
9137 /* Next set up initial candidates according to a given set of rules. */
9138 /* The number of initial candidates affects the quality of ME in the */
9139 /* case of motion with multiple degrees of freedom. In case of simple */
9140 /* translational motion, a current and a few causal and non causal */
9141 /* candts would suffice. More candidates help to cover more complex */
9142 /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9143 /* where multiple ref helps etc. */
9144 /* The candidate choice also depends on the following parameters. */
9145 /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */
9146 /* Whether we encode or not, and the type of search across reference */
9147 /* i.e. the previous layer may have been explicit/implicit and curr */
9148 /* layer may be explicit/implicit */
9149
9150 /* 0, 0, L, T, projected coloc best always presnt by default */
9151 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9152 ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9153 ps_search_candts[id].u1_num_steps_refine = 0;
9154 ps_candt_zeromv->s_mv.i2_mvx = 0;
9155 ps_candt_zeromv->s_mv.i2_mvy = 0;
9156
9157 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9158 ps_candt_l = ps_search_candts[id].ps_search_node;
9159 ps_search_candts[id].u1_num_steps_refine = 0;
9160
9161 /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9162 /* not at the CTB boundary use the causal T and */
9163 /* not the projected T, although the candidate is */
9164 /* still pointed to by ps_candt_prj_t[0] */
9165 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9166 {
9167 /* Using Projected top to eliminate sync */
9168 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9169 PROJECTED_TOP0, e_me_quality_presets);
9170 ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9171 ps_search_candts[id].u1_num_steps_refine = 1;
9172 }
9173 else
9174 {
9175 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9176 SPATIAL_TOP0, e_me_quality_presets);
9177 ps_candt_t = ps_search_candts[id].ps_search_node;
9178 ps_search_candts[id].u1_num_steps_refine = 0;
9179 }
9180
9181 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9182 PROJECTED_COLOC0, e_me_quality_presets);
9183 ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9184 ps_search_candts[id].u1_num_steps_refine = 1;
9185
9186 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9187 PROJECTED_COLOC1, e_me_quality_presets);
9188 ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9189 ps_search_candts[id].u1_num_steps_refine = 1;
9190
9191 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9192 {
9193 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9194 PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9195 ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9196 ps_search_candts[id].u1_num_steps_refine = 1;
9197
9198 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9199 PROJECTED_TOP_LEFT0, e_me_quality_presets);
9200 ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9201 ps_search_candts[id].u1_num_steps_refine = 1;
9202 }
9203 else
9204 {
9205 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9206 SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9207 ps_candt_tr = ps_search_candts[id].ps_search_node;
9208 ps_search_candts[id].u1_num_steps_refine = 0;
9209
9210 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9211 SPATIAL_TOP_LEFT0, e_me_quality_presets);
9212 ps_candt_tl = ps_search_candts[id].ps_search_node;
9213 ps_search_candts[id].u1_num_steps_refine = 0;
9214 }
9215
9216 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9217 PROJECTED_RIGHT0, e_me_quality_presets);
9218 ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9219 ps_search_candts[id].u1_num_steps_refine = 1;
9220
9221 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222 PROJECTED_BOTTOM0, e_me_quality_presets);
9223 ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9224 ps_search_candts[id].u1_num_steps_refine = 1;
9225
9226 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227 PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9228 ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9229 ps_search_candts[id].u1_num_steps_refine = 1;
9230
9231 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9232 PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9233 ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9234 ps_search_candts[id].u1_num_steps_refine = 1;
9235
9236 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9237 PROJECTED_RIGHT1, e_me_quality_presets);
9238 ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9239 ps_search_candts[id].u1_num_steps_refine = 1;
9240
9241 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9242 PROJECTED_BOTTOM1, e_me_quality_presets);
9243 ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9244 ps_search_candts[id].u1_num_steps_refine = 1;
9245
9246 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9247 PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9248 ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9249 ps_search_candts[id].u1_num_steps_refine = 1;
9250
9251 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9252 PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9253 ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9254 ps_search_candts[id].u1_num_steps_refine = 1;
9255
9256 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9257 ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9258 ps_search_candts[id].u1_num_steps_refine = 1;
9259
9260 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9261 PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9262 ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9263 ps_search_candts[id].u1_num_steps_refine = 1;
9264
9265 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9266 PROJECTED_TOP_LEFT1, e_me_quality_presets);
9267 ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9268 ps_search_candts[id].u1_num_steps_refine = 1;
9269
9270 /*************************************************************************/
9271 /* Now that the candidates have been ordered, to choose the right number */
9272 /* of initial candidates. */
9273 /*************************************************************************/
9274 if(curr_layer_implicit && !prev_layer_implicit)
9275 {
9276 if(e_search_complexity == SEARCH_CX_LOW)
9277 num_init_candts = 7;
9278 else if(e_search_complexity == SEARCH_CX_MED)
9279 num_init_candts = 13;
9280 else if(e_search_complexity == SEARCH_CX_HIGH)
9281 num_init_candts = 18;
9282 else
9283 ASSERT(0);
9284 }
9285 else
9286 {
9287 if(e_search_complexity == SEARCH_CX_LOW)
9288 num_init_candts = 5;
9289 else if(e_search_complexity == SEARCH_CX_MED)
9290 num_init_candts = 11;
9291 else if(e_search_complexity == SEARCH_CX_HIGH)
9292 num_init_candts = 16;
9293 else
9294 ASSERT(0);
9295 }
9296
9297 if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9298 {
9299 num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9300 }
9301
9302 /*************************************************************************/
9303 /* The following search parameters are fixed throughout the search across*/
9304 /* all blks. So these are configured outside processing loop */
9305 /*************************************************************************/
9306 s_search_prms_blk.i4_num_init_candts = num_init_candts;
9307 s_search_prms_blk.i4_start_step = 1;
9308 s_search_prms_blk.i4_use_satd = 0;
9309 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9310 /* we use recon only for encoded layers, otherwise it is not available */
9311 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9312
9313 s_search_prms_blk.ps_search_candts = ps_search_candts;
9314 /* We use the same mv_range for all ref. pic. So assign to member 0 */
9315 if(s_search_prms_blk.i4_use_rec)
9316 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9317 else
9318 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9319 /*************************************************************************/
9320 /* Initialize coordinates. Meaning as follows */
9321 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
9322 /* blk_y : same as above, y coord. */
9323 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
9324 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
9325 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
9326 /* corner of the picture. Always multiple of 64. */
9327 /* blk_id_in_ctb : encode order id of the blk in the ctb. */
9328 /*************************************************************************/
9329 blk_y = 0;
9330 blk_id_in_ctb = 0;
9331
9332 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9333
9334 /* Get the number of sync units in a row based on encode/non enocde layer */
9335 num_sync_units_in_row = num_blks_in_row;
9336
9337 /*************************************************************************/
9338 /* Picture limit on all 4 sides. This will be used to set mv limits for */
9339 /* every block given its coordinate. Note thsi assumes that the min amt */
9340 /* of padding to right of pic is equal to the blk size. If we go all the */
9341 /* way upto 64x64, then the min padding on right size of picture should */
9342 /* be 64, and also on bottom side of picture. */
9343 /*************************************************************************/
9344 SET_PIC_LIMIT(
9345 s_pic_limit_inp,
9346 ps_curr_layer->i4_pad_x_inp,
9347 ps_curr_layer->i4_pad_y_inp,
9348 ps_curr_layer->i4_wd,
9349 ps_curr_layer->i4_ht,
9350 s_search_prms_blk.i4_num_steps_post_refine);
9351
9352 SET_PIC_LIMIT(
9353 s_pic_limit_rec,
9354 ps_curr_layer->i4_pad_x_rec,
9355 ps_curr_layer->i4_pad_y_rec,
9356 ps_curr_layer->i4_wd,
9357 ps_curr_layer->i4_ht,
9358 s_search_prms_blk.i4_num_steps_post_refine);
9359
9360 /*************************************************************************/
9361 /* set the MV limit per ref. pic. */
9362 /* - P pic. : Based on the config params. */
9363 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9364 /*************************************************************************/
9365 {
9366 WORD32 ref_ctr;
9367 /* Only for B/b pic. */
9368 if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9369 {
9370 WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9371 WORD32 cur_poc, ref_poc, abs_poc_diff;
9372
9373 cur_poc = ps_ctxt->i4_curr_poc;
9374
9375 /* Get abs MAX for symmetric search */
9376 i2_mv_y_per_poc = MAX(
9377 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9378 (ABS(ps_ctxt->s_coarse_dyn_range_prms
9379 .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9380
9381 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9382 {
9383 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9384 abs_poc_diff = ABS((cur_poc - ref_poc));
9385 /* Get the cur. max MV based on POC distance */
9386 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9387 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9388
9389 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9390 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9391 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9392 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9393 }
9394 }
9395 else
9396 {
9397 /* Set the Config. File Params for P pic. */
9398 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9399 {
9400 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9401 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9402 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9403 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9404 }
9405 }
9406 }
9407
9408 /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9409 if(e_me_quality_presets == ME_MEDIUM_SPEED)
9410 {
9411 i4_threshold_multiplier = 1;
9412 i4_threshold_divider = 4;
9413 }
9414 else if(e_me_quality_presets == ME_HIGH_SPEED)
9415 {
9416 i4_threshold_multiplier = 1;
9417 i4_threshold_divider = 2;
9418 }
9419 else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9420 {
9421 #if OLD_XTREME_SPEED
9422 /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9423 i4_temporal_layer = 1;
9424 #endif
9425 if(i4_temporal_layer == 0)
9426 {
9427 i4_threshold_multiplier = 3;
9428 i4_threshold_divider = 4;
9429 }
9430 else if(i4_temporal_layer == 1)
9431 {
9432 i4_threshold_multiplier = 3;
9433 i4_threshold_divider = 4;
9434 }
9435 else if(i4_temporal_layer == 2)
9436 {
9437 i4_threshold_multiplier = 1;
9438 i4_threshold_divider = 1;
9439 }
9440 else
9441 {
9442 i4_threshold_multiplier = 5;
9443 i4_threshold_divider = 4;
9444 }
9445 }
9446 else if(e_me_quality_presets == ME_HIGH_QUALITY)
9447 {
9448 i4_threshold_multiplier = 1;
9449 i4_threshold_divider = 1;
9450 }
9451
9452 /*************************************************************************/
9453 /*************************************************************************/
9454 /*************************************************************************/
9455 /* START OF THE CORE LOOP */
9456 /* If Encode is 0, then we just loop over each blk */
9457 /*************************************************************************/
9458 /*************************************************************************/
9459 /*************************************************************************/
9460 while(0 == end_of_frame)
9461 {
9462 job_queue_t *ps_job;
9463 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID
9464 WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4)
9465 WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only
9466 //+3 to get ceil values when divided by 4
9467 WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9468 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now
9469 //if there is variable for ctb size use that and this variable can be derived
9470 WORD32 offset_val, check_dep_pos, set_dep_pos;
9471 void *pv_hme_dep_mngr;
9472 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9473
9474 /* Get the current layer HME Dep Mngr */
9475 /* Note : Use layer_id - 1 in HME layers */
9476
9477 pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9478
9479 /* Get the current row from the job queue */
9480 ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9481 ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9482
9483 /* If all rows are done, set the end of process flag to 1, */
9484 /* and the current row to -1 */
9485 if(NULL == ps_job)
9486 {
9487 blk_y = -1;
9488 end_of_frame = 1;
9489
9490 continue;
9491 }
9492
9493 if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9494 {
9495 /* set the output dependency of current row */
9496 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9497 continue;
9498 }
9499
9500 blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9501 blk_x = 0;
9502 i4_ctb_x = 0;
9503
9504 /* wait for Corresponding Pre intra Job to be completed */
9505 if(1 == ps_refine_prms->i4_layer_id)
9506 {
9507 volatile UWORD32 i4_l1_done;
9508 volatile UWORD32 *pi4_l1_done;
9509 pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9510 ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9511 i4_l1_done = *pi4_l1_done;
9512 while(!i4_l1_done)
9513 {
9514 i4_l1_done = *pi4_l1_done;
9515 }
9516 }
9517 /* Set Variables for Dep. Checking and Setting */
9518 set_dep_pos = blk_y + 1;
9519 if(blk_y > 0)
9520 {
9521 offset_val = 2;
9522 check_dep_pos = blk_y - 1;
9523 }
9524 else
9525 {
9526 /* First row should run without waiting */
9527 offset_val = -1;
9528 check_dep_pos = 0;
9529 }
9530
9531 /* EIID: calculate ed_blk_ctxt pointer for current row */
9532 /* valid for only layer-1. not varified and used for other layers */
9533 i4_ctb_row_ctr = blk_y / 4;
9534 ps_ed_blk_ctxt_curr_row =
9535 ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9536 i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only
9537 ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9538
9539 /* if non-encode layer then i4_ctb_x will be same as blk_x */
9540 /* loop over all the units is a row */
9541 for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9542 {
9543 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD
9544 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9545 WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9546
9547 /* Wait till top row block is processed */
9548 /* Currently checking till top right block*/
9549
9550 /* Disabled since all candidates, except for */
9551 /* L and C, are projected from the coarser layer, */
9552 /* only in ME_HIGH_SPEED mode */
9553 if((ME_MEDIUM_SPEED > e_me_quality_presets))
9554 {
9555 if(i4_ctb_x < (num_sync_units_in_row - 1))
9556 {
9557 ihevce_dmgr_chk_row_row_sync(
9558 pv_hme_dep_mngr,
9559 i4_ctb_x,
9560 offset_val,
9561 check_dep_pos,
9562 0, /* Col Tile No. : Not supported in PreEnc*/
9563 ps_ctxt->thrd_id);
9564 }
9565 }
9566
9567 {
9568 /* for non encoder layer only one block is processed */
9569 num_blks_in_this_ctb = 1;
9570 }
9571
9572 /* EIID: derive ed_ctxt ptr for current CTB */
9573 ps_ed_blk_ctxt_curr_ctb =
9574 ps_ed_blk_ctxt_curr_row +
9575 (i4_ctb_blk_ctr *
9576 i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only
9577 ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9578
9579 /* loop over all the blocks in CTB will always be 1 */
9580 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9581 {
9582 {
9583 /* non encode layer */
9584 blk_x = i4_ctb_x;
9585 blk_id_in_full_ctb = 0;
9586 s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9587 }
9588
9589 /* get the current input blk point */
9590 pos_x = blk_x << blk_size_shift;
9591 pos_y = blk_y << blk_size_shift;
9592 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9593
9594 /*********************************************************************/
9595 /* replicate the inp buffer at blk or ctb level for each ref id, */
9596 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9597 /* thereby avoiding a bloat up of memory. If we did all references */
9598 /* weighted pred, we will end up with a duplicate copy of each ref */
9599 /* at each layer, since we need to preserve the original reference. */
9600 /* ToDo: Need to observe performance with this mechanism and compare */
9601 /* with case where ref is weighted. */
9602 /*********************************************************************/
9603 if(blk_id_in_ctb == 0)
9604 {
9605 fp_get_wt_inp(
9606 ps_curr_layer,
9607 &ps_ctxt->s_wt_pred,
9608 unit_size,
9609 pos_x,
9610 pos_y,
9611 unit_size,
9612 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9613 ps_ctxt->i4_wt_pred_enable_flag);
9614 }
9615
9616 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9617 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9618 /* Select search results from a suitable search result in the context */
9619 {
9620 ps_search_results = &ps_ctxt->s_search_results_8x8;
9621 }
9622
9623 s_search_prms_blk.ps_search_results = ps_search_results;
9624
9625 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9626 hme_reset_search_results(
9627 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9628
9629 /* Loop across different Ref IDx */
9630 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9631 {
9632 S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9633 S32 prev_blk_offset = 6;
9634 S32 resultid;
9635
9636 /*********************************************************************/
9637 /* For every blk in the picture, the search range needs to be derived*/
9638 /* Any blk can have any mv, but practical search constraints are */
9639 /* imposed by the picture boundary and amt of padding. */
9640 /*********************************************************************/
9641 /* MV limit is different based on ref. PIC */
9642 hme_derive_search_range(
9643 &s_range_prms_inp,
9644 &s_pic_limit_inp,
9645 &as_mv_limit[i1_ref_idx],
9646 pos_x,
9647 pos_y,
9648 blk_wd,
9649 blk_ht);
9650 hme_derive_search_range(
9651 &s_range_prms_rec,
9652 &s_pic_limit_rec,
9653 &as_mv_limit[i1_ref_idx],
9654 pos_x,
9655 pos_y,
9656 blk_wd,
9657 blk_ht);
9658
9659 s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9660 ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9661
9662 i4_num_srch_cands = 1;
9663
9664 if(1 != ps_refine_prms->i4_layer_id)
9665 {
9666 S32 x, y;
9667 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9668 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9669
9670 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9671 {
9672 hme_get_spatial_candt(
9673 ps_curr_layer,
9674 e_search_blk_size,
9675 blk_x,
9676 blk_y,
9677 i1_ref_idx,
9678 &as_top_neighbours[0],
9679 &as_left_neighbours[0],
9680 0,
9681 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9682 0,
9683 ps_refine_prms->i4_encode);
9684
9685 *ps_candt_tr = as_top_neighbours[3];
9686 *ps_candt_t = as_top_neighbours[1];
9687 *ps_candt_tl = as_top_neighbours[0];
9688 i4_num_srch_cands += 3;
9689 }
9690 else
9691 {
9692 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9693 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9694 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9695 search_node_t *ps_search_node;
9696 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9697 hme_mv_t *ps_mv, *ps_mv_base;
9698 S08 *pi1_ref_idx, *pi1_ref_idx_base;
9699 S32 jump = 1, mvs_in_blk, mvs_in_row;
9700 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9701
9702 if(i4_blk_size1 != i4_blk_size2)
9703 {
9704 blk_x_temp <<= 1;
9705 blk_y_temp <<= 1;
9706 jump = 2;
9707 if((i4_blk_size1 << 2) == i4_blk_size2)
9708 {
9709 blk_x_temp <<= 1;
9710 blk_y_temp <<= 1;
9711 jump = 4;
9712 }
9713 }
9714
9715 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9716 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9717
9718 /* Adjust teh blk coord to point to top left locn */
9719 blk_x_temp -= 1;
9720 blk_y_temp -= 1;
9721
9722 /* Pick up the mvs from the location */
9723 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9724 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9725
9726 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9727 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9728
9729 ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9730 pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9731
9732 ps_mv_base = ps_mv;
9733 pi1_ref_idx_base = pi1_ref_idx;
9734
9735 ps_search_node = &as_left_neighbours[0];
9736 ps_mv = ps_mv_base + mvs_in_row;
9737 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9738 COPY_MV_TO_SEARCH_NODE(
9739 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9740
9741 i4_num_srch_cands++;
9742 }
9743 }
9744 else
9745 {
9746 S32 x, y;
9747 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9748 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9749
9750 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9751 {
9752 hme_get_spatial_candt_in_l1_me(
9753 ps_curr_layer,
9754 e_search_blk_size,
9755 blk_x,
9756 blk_y,
9757 i1_ref_idx,
9758 !ps_search_results->pu1_is_past[i1_ref_idx],
9759 &as_top_neighbours[0],
9760 &as_left_neighbours[0],
9761 0,
9762 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9763 0,
9764 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9765 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9766
9767 *ps_candt_tr = as_top_neighbours[3];
9768 *ps_candt_t = as_top_neighbours[1];
9769 *ps_candt_tl = as_top_neighbours[0];
9770
9771 i4_num_srch_cands += 3;
9772 }
9773 else
9774 {
9775 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9776 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9777 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9778 S32 i4_mv_pos_in_implicit_array;
9779 search_node_t *ps_search_node;
9780 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9781 hme_mv_t *ps_mv, *ps_mv_base;
9782 S08 *pi1_ref_idx, *pi1_ref_idx_base;
9783 S32 jump = 1, mvs_in_blk, mvs_in_row;
9784 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9785 U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9786 S32 i4_num_results_in_given_dir =
9787 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9788 ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9789 : (ps_layer_mvbank->i4_num_mvs_per_ref *
9790 ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9791
9792 if(i4_blk_size1 != i4_blk_size2)
9793 {
9794 blk_x_temp <<= 1;
9795 blk_y_temp <<= 1;
9796 jump = 2;
9797 if((i4_blk_size1 << 2) == i4_blk_size2)
9798 {
9799 blk_x_temp <<= 1;
9800 blk_y_temp <<= 1;
9801 jump = 4;
9802 }
9803 }
9804
9805 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9806 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9807
9808 /* Adjust teh blk coord to point to top left locn */
9809 blk_x_temp -= 1;
9810 blk_y_temp -= 1;
9811
9812 /* Pick up the mvs from the location */
9813 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9814 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9815
9816 i4_offset +=
9817 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9818 ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9819 : 0);
9820
9821 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9822 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9823
9824 ps_mv_base = ps_mv;
9825 pi1_ref_idx_base = pi1_ref_idx;
9826
9827 {
9828 /* ps_mv and pi1_ref_idx now point to the top left locn */
9829 ps_search_node = &as_left_neighbours[0];
9830 ps_mv = ps_mv_base + mvs_in_row;
9831 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9832
9833 i4_mv_pos_in_implicit_array =
9834 hme_find_pos_of_implicitly_stored_ref_id(
9835 pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9836
9837 if(-1 != i4_mv_pos_in_implicit_array)
9838 {
9839 COPY_MV_TO_SEARCH_NODE(
9840 ps_search_node,
9841 &ps_mv[i4_mv_pos_in_implicit_array],
9842 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9843 i1_ref_idx,
9844 shift);
9845 }
9846 else
9847 {
9848 ps_search_node->u1_is_avail = 0;
9849 ps_search_node->s_mv.i2_mvx = 0;
9850 ps_search_node->s_mv.i2_mvy = 0;
9851 ps_search_node->i1_ref_idx = i1_ref_idx;
9852 }
9853
9854 i4_num_srch_cands++;
9855 }
9856 }
9857 }
9858
9859 *ps_candt_l = as_left_neighbours[0];
9860
9861 /* when 16x16 is searched in an encode layer, and the prev layer */
9862 /* stores results for 4x4 blks, we project 5 candts corresponding */
9863 /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9864 /* However in other cases, only 2,2 best and 2nd best reqd */
9865 resultid = 0;
9866 pf_hme_project_coloc_candt(
9867 ps_candt_prj_coloc[0],
9868 ps_curr_layer,
9869 ps_coarse_layer,
9870 pos_x + 2,
9871 pos_y + 2,
9872 i1_ref_idx,
9873 resultid);
9874
9875 i4_num_srch_cands++;
9876
9877 resultid = 1;
9878 if(num_results_prev_layer > 1)
9879 {
9880 pf_hme_project_coloc_candt(
9881 ps_candt_prj_coloc[1],
9882 ps_curr_layer,
9883 ps_coarse_layer,
9884 pos_x + 2,
9885 pos_y + 2,
9886 i1_ref_idx,
9887 resultid);
9888
9889 i4_num_srch_cands++;
9890 }
9891
9892 resultid = 0;
9893
9894 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9895 {
9896 pf_hme_project_coloc_candt(
9897 ps_candt_prj_t[0],
9898 ps_curr_layer,
9899 ps_coarse_layer,
9900 pos_x,
9901 pos_y - prev_blk_offset,
9902 i1_ref_idx,
9903 resultid);
9904
9905 i4_num_srch_cands++;
9906 }
9907
9908 {
9909 pf_hme_project_coloc_candt(
9910 ps_candt_prj_br[0],
9911 ps_curr_layer,
9912 ps_coarse_layer,
9913 pos_x + next_blk_offset,
9914 pos_y + next_blk_offset,
9915 i1_ref_idx,
9916 resultid);
9917 pf_hme_project_coloc_candt(
9918 ps_candt_prj_bl[0],
9919 ps_curr_layer,
9920 ps_coarse_layer,
9921 pos_x - prev_blk_offset,
9922 pos_y + next_blk_offset,
9923 i1_ref_idx,
9924 resultid);
9925 pf_hme_project_coloc_candt(
9926 ps_candt_prj_r[0],
9927 ps_curr_layer,
9928 ps_coarse_layer,
9929 pos_x + next_blk_offset,
9930 pos_y,
9931 i1_ref_idx,
9932 resultid);
9933 pf_hme_project_coloc_candt(
9934 ps_candt_prj_b[0],
9935 ps_curr_layer,
9936 ps_coarse_layer,
9937 pos_x,
9938 pos_y + next_blk_offset,
9939 i1_ref_idx,
9940 resultid);
9941
9942 i4_num_srch_cands += 4;
9943
9944 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9945 {
9946 pf_hme_project_coloc_candt(
9947 ps_candt_prj_tr[0],
9948 ps_curr_layer,
9949 ps_coarse_layer,
9950 pos_x + next_blk_offset,
9951 pos_y - prev_blk_offset,
9952 i1_ref_idx,
9953 resultid);
9954 pf_hme_project_coloc_candt(
9955 ps_candt_prj_tl[0],
9956 ps_curr_layer,
9957 ps_coarse_layer,
9958 pos_x - prev_blk_offset,
9959 pos_y - prev_blk_offset,
9960 i1_ref_idx,
9961 resultid);
9962
9963 i4_num_srch_cands += 2;
9964 }
9965 }
9966 if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9967 {
9968 resultid = 1;
9969 pf_hme_project_coloc_candt(
9970 ps_candt_prj_br[1],
9971 ps_curr_layer,
9972 ps_coarse_layer,
9973 pos_x + next_blk_offset,
9974 pos_y + next_blk_offset,
9975 i1_ref_idx,
9976 resultid);
9977 pf_hme_project_coloc_candt(
9978 ps_candt_prj_bl[1],
9979 ps_curr_layer,
9980 ps_coarse_layer,
9981 pos_x - prev_blk_offset,
9982 pos_y + next_blk_offset,
9983 i1_ref_idx,
9984 resultid);
9985 pf_hme_project_coloc_candt(
9986 ps_candt_prj_r[1],
9987 ps_curr_layer,
9988 ps_coarse_layer,
9989 pos_x + next_blk_offset,
9990 pos_y,
9991 i1_ref_idx,
9992 resultid);
9993 pf_hme_project_coloc_candt(
9994 ps_candt_prj_b[1],
9995 ps_curr_layer,
9996 ps_coarse_layer,
9997 pos_x,
9998 pos_y + next_blk_offset,
9999 i1_ref_idx,
10000 resultid);
10001
10002 i4_num_srch_cands += 4;
10003
10004 pf_hme_project_coloc_candt(
10005 ps_candt_prj_tr[1],
10006 ps_curr_layer,
10007 ps_coarse_layer,
10008 pos_x + next_blk_offset,
10009 pos_y - prev_blk_offset,
10010 i1_ref_idx,
10011 resultid);
10012 pf_hme_project_coloc_candt(
10013 ps_candt_prj_tl[1],
10014 ps_curr_layer,
10015 ps_coarse_layer,
10016 pos_x - prev_blk_offset,
10017 pos_y - prev_blk_offset,
10018 i1_ref_idx,
10019 resultid);
10020 pf_hme_project_coloc_candt(
10021 ps_candt_prj_t[1],
10022 ps_curr_layer,
10023 ps_coarse_layer,
10024 pos_x,
10025 pos_y - prev_blk_offset,
10026 i1_ref_idx,
10027 resultid);
10028
10029 i4_num_srch_cands += 3;
10030 }
10031
10032 /* Note this block also clips the MV range for all candidates */
10033 #ifdef _DEBUG
10034 {
10035 S32 candt;
10036 range_prms_t *ps_range_prms;
10037
10038 S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10039 for(candt = 0; candt < i4_num_srch_cands; candt++)
10040 {
10041 search_node_t *ps_search_node;
10042
10043 ps_search_node =
10044 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10045
10046 ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10047
10048 if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10049 (ps_search_node->i1_ref_idx < 0))
10050 {
10051 ASSERT(0);
10052 }
10053 }
10054 }
10055 #endif
10056
10057 {
10058 S32 srch_cand;
10059 S32 num_unique_nodes = 0;
10060 S32 num_nodes_searched = 0;
10061 S32 num_best_cand = 0;
10062 S08 i1_grid_enable = 0;
10063 search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10064 /* has list of valid partition to search terminated by -1 */
10065 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10066 S32 center_x;
10067 S32 center_y;
10068
10069 /* indicates if the centre point of grid needs to be explicitly added for search */
10070 S32 add_centre = 0;
10071
10072 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10073 center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10074 center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10075
10076 for(srch_cand = 0;
10077 (srch_cand < i4_num_srch_cands) &&
10078 (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10079 srch_cand++)
10080 {
10081 search_node_t s_search_node_temp =
10082 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10083
10084 s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX;
10085
10086 /* Clip the motion vectors as well here since after clipping
10087 two candidates can become same and they will be removed during deduplication */
10088 CLIP_MV_WITHIN_RANGE(
10089 s_search_node_temp.s_mv.i2_mvx,
10090 s_search_node_temp.s_mv.i2_mvy,
10091 s_search_prms_blk.aps_mv_range[0],
10092 ps_refine_prms->i4_num_steps_fpel_refine,
10093 ps_refine_prms->i4_num_steps_hpel_refine,
10094 ps_refine_prms->i4_num_steps_qpel_refine);
10095
10096 /* PT_C */
10097 INSERT_NEW_NODE(
10098 as_unique_search_nodes,
10099 num_unique_nodes,
10100 s_search_node_temp,
10101 0,
10102 au4_unique_node_map,
10103 center_x,
10104 center_y,
10105 1);
10106
10107 num_nodes_searched += 1;
10108 }
10109 num_unique_nodes =
10110 MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10111
10112 /* If number of candidates projected/number of candidates to be refined are more than 2,
10113 then filter out and choose the best two here */
10114 if(num_unique_nodes >= 2)
10115 {
10116 S32 num_results;
10117 S32 cnt;
10118 S32 *pi4_valid_part_ids;
10119 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10120 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10121 pi4_valid_part_ids = &ai4_valid_part_ids[0];
10122
10123 /* pi4_valid_part_ids is updated inside */
10124 hme_pred_search_no_encode(
10125 &s_search_prms_blk,
10126 ps_curr_layer,
10127 &ps_ctxt->s_wt_pred,
10128 pi4_valid_part_ids,
10129 1,
10130 e_me_quality_presets,
10131 i1_grid_enable,
10132 (ihevce_me_optimised_function_list_t *)
10133 ps_ctxt->pv_me_optimised_function_list
10134
10135 );
10136
10137 num_best_cand = 0;
10138 cnt = 0;
10139 num_results = ps_search_results->u1_num_results_per_part;
10140
10141 while((id = pi4_valid_part_ids[cnt++]) >= 0)
10142 {
10143 num_results =
10144 MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10145
10146 for(i = 0; i < num_results; i++)
10147 {
10148 search_node_t s_search_node_temp;
10149 s_search_node_temp =
10150 *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10151 if(s_search_node_temp.i1_ref_idx >= 0)
10152 {
10153 INSERT_NEW_NODE_NOMAP(
10154 as_best_two_proj_node,
10155 num_best_cand,
10156 s_search_node_temp,
10157 0);
10158 }
10159 }
10160 }
10161 }
10162 else
10163 {
10164 add_centre = 1;
10165 num_best_cand = num_unique_nodes;
10166 as_best_two_proj_node[0] = as_unique_search_nodes[0];
10167 }
10168
10169 num_unique_nodes = 0;
10170 num_nodes_searched = 0;
10171
10172 if(1 == num_best_cand)
10173 {
10174 search_node_t s_search_node_temp = as_best_two_proj_node[0];
10175 S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10176 S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10177 S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10178
10179 i1_grid_enable = 1;
10180
10181 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10182 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10183 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10184
10185 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10186 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10187 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10188
10189 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10190 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10191 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10192
10193 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10194 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10195 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10196
10197 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10198 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10199 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200
10201 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10202 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10203 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204
10205 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10206 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10207 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208
10209 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10210 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10211 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212
10213 if(add_centre)
10214 {
10215 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10216 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10217 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10218 }
10219 }
10220 else
10221 {
10222 /* For the candidates where refinement was required, choose the best two */
10223 for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10224 {
10225 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10226 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10227 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10228
10229 /* Because there may not be two best unique candidates (because of clipping),
10230 second best candidate can be uninitialized, ignore that */
10231 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10232 s_search_node_temp.i1_ref_idx < 0)
10233 {
10234 num_nodes_searched++;
10235 continue;
10236 }
10237
10238 /* PT_C */
10239 /* Since the center point has already be evaluated and best results are persistent,
10240 it will not be evaluated again */
10241 if(add_centre) /* centre point added explicitly again if search results is not updated */
10242 {
10243 INSERT_NEW_NODE(
10244 as_unique_search_nodes,
10245 num_unique_nodes,
10246 s_search_node_temp,
10247 0,
10248 au4_unique_node_map,
10249 center_x,
10250 center_y,
10251 1);
10252 }
10253
10254 /* PT_L */
10255 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10256 s_search_node_temp.s_mv.i2_mvy = mv_y;
10257 INSERT_NEW_NODE(
10258 as_unique_search_nodes,
10259 num_unique_nodes,
10260 s_search_node_temp,
10261 0,
10262 au4_unique_node_map,
10263 center_x,
10264 center_y,
10265 1);
10266
10267 /* PT_T */
10268 s_search_node_temp.s_mv.i2_mvx = mv_x;
10269 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10270 INSERT_NEW_NODE(
10271 as_unique_search_nodes,
10272 num_unique_nodes,
10273 s_search_node_temp,
10274 0,
10275 au4_unique_node_map,
10276 center_x,
10277 center_y,
10278 1);
10279
10280 /* PT_R */
10281 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10282 s_search_node_temp.s_mv.i2_mvy = mv_y;
10283 INSERT_NEW_NODE(
10284 as_unique_search_nodes,
10285 num_unique_nodes,
10286 s_search_node_temp,
10287 0,
10288 au4_unique_node_map,
10289 center_x,
10290 center_y,
10291 1);
10292
10293 /* PT_B */
10294 s_search_node_temp.s_mv.i2_mvx = mv_x;
10295 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10296 INSERT_NEW_NODE(
10297 as_unique_search_nodes,
10298 num_unique_nodes,
10299 s_search_node_temp,
10300 0,
10301 au4_unique_node_map,
10302 center_x,
10303 center_y,
10304 1);
10305
10306 /* PT_TL */
10307 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10308 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10309 INSERT_NEW_NODE(
10310 as_unique_search_nodes,
10311 num_unique_nodes,
10312 s_search_node_temp,
10313 0,
10314 au4_unique_node_map,
10315 center_x,
10316 center_y,
10317 1);
10318
10319 /* PT_TR */
10320 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10321 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10322 INSERT_NEW_NODE(
10323 as_unique_search_nodes,
10324 num_unique_nodes,
10325 s_search_node_temp,
10326 0,
10327 au4_unique_node_map,
10328 center_x,
10329 center_y,
10330 1);
10331
10332 /* PT_BL */
10333 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10334 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10335 INSERT_NEW_NODE(
10336 as_unique_search_nodes,
10337 num_unique_nodes,
10338 s_search_node_temp,
10339 0,
10340 au4_unique_node_map,
10341 center_x,
10342 center_y,
10343 1);
10344
10345 /* PT_BR */
10346 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10347 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10348 INSERT_NEW_NODE(
10349 as_unique_search_nodes,
10350 num_unique_nodes,
10351 s_search_node_temp,
10352 0,
10353 au4_unique_node_map,
10354 center_x,
10355 center_y,
10356 1);
10357 }
10358 }
10359
10360 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10361 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10362
10363 /*****************************************************************/
10364 /* Call the search algorithm, this includes: */
10365 /* Pre-Search-Refinement (for coarse candts) */
10366 /* Search on each candidate */
10367 /* Post Search Refinement on winners/other new candidates */
10368 /*****************************************************************/
10369
10370 hme_pred_search_no_encode(
10371 &s_search_prms_blk,
10372 ps_curr_layer,
10373 &ps_ctxt->s_wt_pred,
10374 ai4_valid_part_ids,
10375 0,
10376 e_me_quality_presets,
10377 i1_grid_enable,
10378 (ihevce_me_optimised_function_list_t *)
10379 ps_ctxt->pv_me_optimised_function_list);
10380
10381 i1_grid_enable = 0;
10382 }
10383 }
10384
10385 /* for non encode layer update MV and end processing for block */
10386 {
10387 WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10388 search_node_t *ps_search_node;
10389 /* now update the reqd results back to the layer mv bank. */
10390 if(1 == ps_refine_prms->i4_layer_id)
10391 {
10392 hme_update_mv_bank_in_l1_me(
10393 ps_search_results,
10394 ps_curr_layer->ps_layer_mvbank,
10395 blk_x,
10396 blk_y,
10397 &s_mv_update_prms);
10398 }
10399 else
10400 {
10401 hme_update_mv_bank_noencode(
10402 ps_search_results,
10403 ps_curr_layer->ps_layer_mvbank,
10404 blk_x,
10405 blk_y,
10406 &s_mv_update_prms);
10407 }
10408
10409 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10410 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10411 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10412 {
10413 WORD32 i4_j;
10414 layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10415
10416 //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10417 /* Not considering this for Dyn. Search Update */
10418 {
10419 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10420 i4_ref_id++)
10421 {
10422 ps_search_node =
10423 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10424
10425 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10426 {
10427 hme_update_dynamic_search_params(
10428 &ps_ctxt->s_coarse_dyn_range_prms
10429 .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10430 [i4_ref_id],
10431 ps_search_node->s_mv.i2_mvy);
10432
10433 ps_search_node++;
10434 }
10435 }
10436 }
10437 }
10438
10439 if(1 == ps_refine_prms->i4_layer_id)
10440 {
10441 WORD32 wt_pred_val, log_wt_pred_val;
10442 WORD32 ref_id_of_nearest_poc = 0;
10443 WORD32 max_val = 0x7fffffff;
10444 WORD32 max_l0_val = 0x7fffffff;
10445 WORD32 max_l1_val = 0x7fffffff;
10446 WORD32 cur_val;
10447 WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10448
10449 WORD32 bestl0_sad = 0x7fffffff;
10450 WORD32 bestl1_sad = 0x7fffffff;
10451 search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10452
10453 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10454 i4_ref_id++)
10455 {
10456 wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10457 log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10458
10459 ps_search_node =
10460 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10461
10462 i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10463 ((1 << log_wt_pred_val) >> 1)) >>
10464 log_wt_pred_val;
10465
10466 i4_local_cost_weighted_pred =
10467 i4_local_weighted_sad +
10468 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10469 //the loop is redundant as the results are already sorted based on total cost
10470 //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10471 {
10472 if(i4_local_cost_weighted_pred < min_cost)
10473 {
10474 min_cost = i4_local_cost_weighted_pred;
10475 min_sad = i4_local_weighted_sad;
10476 }
10477 }
10478
10479 /* For P frame, calculate the nearest poc which is either P or I frame*/
10480 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10481 {
10482 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10483 {
10484 cur_val =
10485 ABS(ps_ctxt->i4_curr_poc -
10486 ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10487 if(cur_val < max_val)
10488 {
10489 max_val = cur_val;
10490 ref_id_of_nearest_poc = i4_ref_id;
10491 }
10492 }
10493 }
10494 }
10495 /*Store me cost wrt. to past frame only for P frame */
10496 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497 {
10498 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10499 {
10500 WORD16 i2_mvx, i2_mvy;
10501
10502 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10503 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10504 WORD32 z_scan_idx =
10505 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10506 WORD32 wt, log_wt;
10507
10508 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10509 <= (1 + ps_ctxt->num_b_frms));*/
10510
10511 /*obtain mvx and mvy */
10512 i2_mvx =
10513 ps_search_results
10514 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10515 ->s_mv.i2_mvx;
10516 i2_mvy =
10517 ps_search_results
10518 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10519 ->s_mv.i2_mvy;
10520
10521 /*register the min cost for l1 me in blk context */
10522 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10523 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10524
10525 /*register the min cost for l1 me in blk context */
10526 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10527 ((ps_search_results
10528 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10529 ->i4_sad *
10530 wt) +
10531 ((1 << log_wt) >> 1)) >>
10532 log_wt;
10533 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10534 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10535 (ps_search_results
10536 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10537 ->i4_tot_cost -
10538 ps_search_results
10539 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10540 ->i4_sad);
10541 /*for complexity change detection*/
10542 ps_ctxt->i4_num_blks++;
10543 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10544 (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10545 {
10546 ps_ctxt->i4_num_blks_high_sad++;
10547 }
10548 }
10549 }
10550 }
10551
10552 /* EIID: Early inter intra decisions */
10553 /* tap L1 level SAD for inter intra decisions */
10554 if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10555 (!ps_ctxt->s_frm_prms
10556 .is_i_pic)) //for high-quality preset->disable early decisions
10557 {
10558 if(1 == ps_refine_prms->i4_layer_id)
10559 {
10560 WORD32 i4_min_sad_cost_8x8_block = min_cost;
10561 ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10562 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10563 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10564 WORD32 z_scan_idx =
10565 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10566 ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10567
10568 /*register the min cost for l1 me in blk context */
10569 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10570 i4_min_sad_cost_8x8_block;
10571 i4_num_comparisions++;
10572
10573 /* take early inter-intra decision here */
10574 ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10575 #if DISABLE_INTRA_IN_BPICS
10576 if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10577 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10578 {
10579 ps_curr_ed_blk_ctxt->intra_or_inter =
10580 2; /*eval only inter if inter cost is less */
10581 i4_num_inter_wins++;
10582 }
10583 else
10584 #endif
10585 {
10586 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10587 ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10588 i4_threshold_multiplier) /
10589 i4_threshold_divider))
10590 {
10591 ps_curr_ed_blk_ctxt->intra_or_inter =
10592 2; /*eval only inter if inter cost is less */
10593 i4_num_inter_wins++;
10594 }
10595 }
10596
10597 //{
10598 // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10599 // blk_x,blk_y,
10600 // i4_ctb_blk_ctr, i4_ctb_row_ctr,
10601 // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10602 // i4_min_sad_cost_8x8_block
10603 // );
10604 //}
10605
10606 } //end of layer-1
10607 } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10608 else
10609 {
10610 if(1 == ps_refine_prms->i4_layer_id)
10611 {
10612 WORD32 i4_min_sad_cost_8x8_block = min_cost;
10613 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10614 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10615 WORD32 z_scan_idx =
10616 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10617
10618 /*register the min cost for l1 me in blk context */
10619 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10620 i4_min_sad_cost_8x8_block;
10621 }
10622 }
10623 if(1 == ps_refine_prms->i4_layer_id)
10624 {
10625 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10626 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10627 WORD32 z_scan_idx =
10628 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10629
10630 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10631 min_sad;
10632
10633 if(min_cost <
10634 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10635 {
10636 ps_ctxt->i4_L1_hme_best_cost += min_cost;
10637 ps_ctxt->i4_L1_hme_sad += min_sad;
10638 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10639 }
10640 else
10641 {
10642 ps_ctxt->i4_L1_hme_best_cost +=
10643 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10644 ps_ctxt->i4_L1_hme_sad +=
10645 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10646 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10647 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10648 }
10649 }
10650 }
10651 }
10652
10653 /* Update the number of blocks processed in the current row */
10654 if((ME_MEDIUM_SPEED > e_me_quality_presets))
10655 {
10656 ihevce_dmgr_set_row_row_sync(
10657 pv_hme_dep_mngr,
10658 (i4_ctb_x + 1),
10659 blk_y,
10660 0 /* Col Tile No. : Not supported in PreEnc*/);
10661 }
10662 }
10663
10664 /* set the output dependency after completion of row */
10665 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10666 }
10667 }
10668