1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_inter_mode_sifter.c
24 *
25 * \brief
26 * This file contains functions for selecting best inter candidates for RDOPT evaluation
27 *
28 * \date
29 * 10/09/2014
30 *
31 ******************************************************************************
32 */
33
34 /*****************************************************************************/
35 /* File Includes */
36 /*****************************************************************************/
37 /* System include files */
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #include <stdarg.h>
43 #include <math.h>
44 #include <limits.h>
45
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54
55 #include "ihevc_defs.h"
56 #include "ihevc_macros.h"
57 #include "ihevc_debug.h"
58 #include "ihevc_structs.h"
59 #include "ihevc_platform_macros.h"
60 #include "ihevc_deblk.h"
61 #include "ihevc_itrans_recon.h"
62 #include "ihevc_chroma_itrans_recon.h"
63 #include "ihevc_chroma_intra_pred.h"
64 #include "ihevc_intra_pred.h"
65 #include "ihevc_inter_pred.h"
66 #include "ihevc_mem_fns.h"
67 #include "ihevc_padding.h"
68 #include "ihevc_weighted_pred.h"
69 #include "ihevc_sao.h"
70 #include "ihevc_resi_trans.h"
71 #include "ihevc_quant_iquant_ssd.h"
72 #include "ihevc_cabac_tables.h"
73
74 #include "ihevce_defs.h"
75 #include "ihevce_hle_interface.h"
76 #include "ihevce_lap_enc_structs.h"
77 #include "ihevce_multi_thrd_structs.h"
78 #include "ihevce_multi_thrd_funcs.h"
79 #include "ihevce_me_common_defs.h"
80 #include "ihevce_had_satd.h"
81 #include "ihevce_error_codes.h"
82 #include "ihevce_bitstream.h"
83 #include "ihevce_cabac.h"
84 #include "ihevce_rdoq_macros.h"
85 #include "ihevce_function_selector.h"
86 #include "ihevce_enc_structs.h"
87 #include "ihevce_entropy_structs.h"
88 #include "ihevce_cmn_utils_instr_set_router.h"
89 #include "ihevce_ipe_instr_set_router.h"
90 #include "ihevce_decomp_pre_intra_structs.h"
91 #include "ihevce_decomp_pre_intra_pass.h"
92 #include "ihevce_enc_loop_structs.h"
93 #include "ihevce_global_tables.h"
94 #include "ihevce_nbr_avail.h"
95 #include "ihevce_enc_loop_utils.h"
96 #include "ihevce_bs_compute_ctb.h"
97 #include "ihevce_cabac_rdo.h"
98 #include "ihevce_dep_mngr_interface.h"
99 #include "ihevce_enc_loop_pass.h"
100 #include "ihevce_rc_enc_structs.h"
101 #include "ihevce_common_utils.h"
102 #include "ihevce_stasino_helpers.h"
103
104 #include "hme_datatype.h"
105 #include "hme_common_defs.h"
106 #include "hme_common_utils.h"
107 #include "hme_interface.h"
108 #include "hme_defs.h"
109 #include "ihevce_me_instr_set_router.h"
110 #include "hme_err_compute.h"
111 #include "hme_globals.h"
112 #include "ihevce_mv_pred.h"
113 #include "ihevce_mv_pred_merge.h"
114 #include "ihevce_inter_pred.h"
115 #include "ihevce_enc_loop_inter_mode_sifter.h"
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
ihevce_get_num_part_types_in_me_cand_list(cu_inter_cand_t * ps_me_cand_list,UWORD8 * pu1_part_type_ref_cand,UWORD8 * pu1_idx_ref_cand,UWORD8 * pu1_diff_skip_cand_flag,WORD8 * pi1_skip_cand_from_merge_idx,WORD8 * pi1_final_skip_cand_merge_idx,UWORD8 u1_max_num_part_types_to_select,UWORD8 u1_num_me_cands)120 static WORD32 ihevce_get_num_part_types_in_me_cand_list(
121 cu_inter_cand_t *ps_me_cand_list,
122 UWORD8 *pu1_part_type_ref_cand,
123 UWORD8 *pu1_idx_ref_cand,
124 UWORD8 *pu1_diff_skip_cand_flag,
125 WORD8 *pi1_skip_cand_from_merge_idx,
126 WORD8 *pi1_final_skip_cand_merge_idx,
127 UWORD8 u1_max_num_part_types_to_select,
128 UWORD8 u1_num_me_cands)
129 {
130 UWORD8 i, j;
131 UWORD8 u1_num_unique_parts = 0;
132
133 for(i = 0; i < u1_num_me_cands; i++)
134 {
135 UWORD8 u1_cur_part_type = ps_me_cand_list[i].b3_part_size;
136 UWORD8 u1_is_unique = 1;
137
138 if(u1_num_unique_parts >= u1_max_num_part_types_to_select)
139 {
140 return u1_num_unique_parts;
141 }
142
143 /* loop to check if the current cand is already present in the list */
144 for(j = 0; j < u1_num_unique_parts; j++)
145 {
146 if(u1_cur_part_type == pu1_part_type_ref_cand[j])
147 {
148 u1_is_unique = 0;
149 break;
150 }
151 }
152
153 if(u1_is_unique)
154 {
155 if(SIZE_2Nx2N == u1_cur_part_type)
156 {
157 *pu1_diff_skip_cand_flag = 0;
158 *pi1_skip_cand_from_merge_idx = u1_num_unique_parts;
159 *pi1_final_skip_cand_merge_idx = u1_num_unique_parts;
160 }
161
162 pu1_part_type_ref_cand[u1_num_unique_parts] = u1_cur_part_type;
163 pu1_idx_ref_cand[u1_num_unique_parts] = i;
164 u1_num_unique_parts++;
165 }
166 }
167
168 return u1_num_unique_parts;
169 }
170
ihevce_compute_inter_pred_and_cost(inter_pred_ctxt_t * ps_mc_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_func,pu_t * ps_pu,void * pv_src,void * pv_pred,WORD32 i4_src_stride,WORD32 i4_pred_stride,UWORD8 u1_compute_error,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)171 static WORD32 ihevce_compute_inter_pred_and_cost(
172 inter_pred_ctxt_t *ps_mc_ctxt,
173 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
174 PF_SAD_FXN_T pf_sad_func,
175 pu_t *ps_pu,
176 void *pv_src,
177 void *pv_pred,
178 WORD32 i4_src_stride,
179 WORD32 i4_pred_stride,
180 UWORD8 u1_compute_error,
181 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
182 {
183 IV_API_CALL_STATUS_T u1_is_valid_mv;
184 WORD32 i4_error;
185
186 u1_is_valid_mv = pf_luma_inter_pred_pu(ps_mc_ctxt, ps_pu, pv_pred, i4_pred_stride, 0);
187
188 if(u1_compute_error)
189 {
190 if(IV_SUCCESS == u1_is_valid_mv)
191 {
192 err_prms_t s_err_prms;
193
194 s_err_prms.i4_blk_ht = (ps_pu->b4_ht + 1) << 2;
195 s_err_prms.i4_blk_wd = (ps_pu->b4_wd + 1) << 2;
196 s_err_prms.pu1_inp = (UWORD8 *)pv_src;
197 s_err_prms.pu2_inp = (UWORD16 *)pv_src;
198 s_err_prms.pu1_ref = (UWORD8 *)pv_pred;
199 s_err_prms.pu2_ref = (UWORD16 *)pv_pred;
200 s_err_prms.i4_inp_stride = i4_src_stride;
201 s_err_prms.i4_ref_stride = i4_pred_stride;
202 s_err_prms.pi4_sad_grid = &i4_error;
203
204 s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
205
206 pf_sad_func(&s_err_prms);
207 }
208 else
209 {
210 /* max 32 bit satd */
211 i4_error = INT_MAX;
212 }
213
214 return i4_error;
215 }
216
217 return INT_MAX;
218 }
219
ihevce_determine_best_merge_pu(merge_prms_t * ps_prms,pu_t * ps_pu_merge,pu_t * ps_pu_me,void * pv_src,WORD32 i4_me_cand_cost,WORD32 i4_pred_buf_offset,UWORD8 u1_num_cands,UWORD8 u1_part_id,UWORD8 u1_force_pred_evaluation)220 static WORD32 ihevce_determine_best_merge_pu(
221 merge_prms_t *ps_prms,
222 pu_t *ps_pu_merge,
223 pu_t *ps_pu_me,
224 void *pv_src,
225 WORD32 i4_me_cand_cost,
226 WORD32 i4_pred_buf_offset,
227 UWORD8 u1_num_cands,
228 UWORD8 u1_part_id,
229 UWORD8 u1_force_pred_evaluation)
230 {
231 pu_t *ps_pu;
232
233 INTER_CANDIDATE_ID_T e_cand_id;
234
235 UWORD8 i;
236 UWORD8 u1_best_pred_mode;
237 WORD32 i4_mean;
238 UWORD32 u4_cur_variance, u4_best_variance;
239
240 merge_cand_list_t *ps_list = ps_prms->ps_list;
241 inter_pred_ctxt_t *ps_mc_ctxt = ps_prms->ps_mc_ctxt;
242 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_prms->pf_luma_inter_pred_pu;
243 PF_SAD_FXN_T pf_sad_fxn = ps_prms->pf_sad_fxn;
244
245 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
246 ps_prms->ps_cmn_utils_optimised_function_list;
247
248 WORD32(*pai4_noise_term)[MAX_NUM_INTER_PARTS] = ps_prms->pai4_noise_term;
249 UWORD32(*pau4_pred_variance)[MAX_NUM_INTER_PARTS] = ps_prms->pau4_pred_variance;
250 WORD32 i4_alpha_stim_multiplier = ps_prms->i4_alpha_stim_multiplier;
251 UWORD32 *pu4_src_variance = ps_prms->pu4_src_variance;
252 UWORD8 u1_is_cu_noisy = ps_prms->u1_is_cu_noisy;
253 UWORD8 u1_is_hbd = ps_prms->u1_is_hbd;
254 UWORD8 *pu1_valid_merge_indices = ps_prms->au1_valid_merge_indices;
255 void **ppv_pred_buf_list = ps_prms->ppv_pred_buf_list;
256 UWORD8 *pu1_merge_pred_buf_array = ps_prms->pu1_merge_pred_buf_array;
257 UWORD8(*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS] = ps_prms->pau1_best_pred_buf_id;
258 UWORD8 u1_merge_idx_cabac_model = ps_prms->u1_merge_idx_cabac_model;
259 WORD32 i4_lambda = ps_prms->i4_lambda;
260 WORD32 i4_src_stride = ps_prms->i4_src_stride;
261 WORD32 i4_pred_stride = ps_prms->i4_pred_stride;
262 UWORD8 u1_max_cands = ps_prms->u1_max_cands;
263 UWORD8 u1_best_buf_id = pu1_merge_pred_buf_array[0];
264 UWORD8 u1_cur_buf_id = pu1_merge_pred_buf_array[1];
265 UWORD8 u1_best_cand_id = UCHAR_MAX;
266 WORD32 i4_best_cost = INT_MAX;
267 WORD32 i4_cur_noise_term = 0;
268 WORD32 i4_best_noise_term = 0;
269
270 ps_pu = ps_pu_merge;
271 e_cand_id = MERGE_DERIVED;
272
273 ASSERT(ps_pu->b1_merge_flag);
274
275 for(i = 0; i < u1_num_cands; i++)
276 {
277 WORD32 i4_cur_cost;
278
279 void *pv_pred = (UWORD8 *)ppv_pred_buf_list[u1_cur_buf_id] + i4_pred_buf_offset;
280 UWORD8 u1_is_pred_available = 0;
281
282 if(!ps_prms->u1_use_merge_cand_from_top_row && ps_prms->pu1_is_top_used[i])
283 {
284 continue;
285 }
286
287 ps_pu->mv = ps_list[i].mv;
288 ps_pu->b3_merge_idx = pu1_valid_merge_indices[i];
289
290 /* set the prediction mode */
291 if(ps_list[i].u1_pred_flag_l0 && ps_list[i].u1_pred_flag_l1)
292 {
293 ps_pu->b2_pred_mode = PRED_BI;
294 }
295 else if(ps_list[i].u1_pred_flag_l0)
296 {
297 ps_pu->b2_pred_mode = PRED_L0;
298 }
299 else
300 {
301 ps_pu->b2_pred_mode = PRED_L1;
302 }
303
304 /* 8x8 SMPs should not have bipred mode as per std */
305 {
306 WORD32 i4_part_wd, i4_part_ht;
307
308 i4_part_wd = (ps_pu->b4_wd + 1) << 2;
309 i4_part_ht = (ps_pu->b4_ht + 1) << 2;
310
311 if((PRED_BI == ps_pu->b2_pred_mode) && ((i4_part_wd + i4_part_ht) < 16))
312 {
313 continue;
314 }
315 }
316
317 if((!u1_force_pred_evaluation) &&
318 (ihevce_compare_pu_mv_t(
319 &ps_pu->mv, &ps_pu_me->mv, ps_pu->b2_pred_mode, ps_pu_me->b2_pred_mode)))
320 {
321 i4_cur_cost = i4_me_cand_cost;
322 u1_is_pred_available = 1;
323
324 if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
325 {
326 i4_cur_noise_term = pai4_noise_term[ME_OR_SKIP_DERIVED][u1_part_id];
327 u4_cur_variance = pau4_pred_variance[ME_OR_SKIP_DERIVED][u1_part_id];
328 }
329 }
330 else
331 {
332 i4_cur_cost = ihevce_compute_inter_pred_and_cost(
333 ps_mc_ctxt,
334 pf_luma_inter_pred_pu,
335 pf_sad_fxn,
336 ps_pu,
337 pv_src,
338 pv_pred,
339 i4_src_stride,
340 i4_pred_stride,
341 1,
342 ps_cmn_utils_optimised_function_list);
343
344 if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
345 {
346 ihevce_calc_variance(
347 pv_pred,
348 i4_pred_stride,
349 &i4_mean,
350 &u4_cur_variance,
351 (ps_pu->b4_ht + 1) << 2,
352 (ps_pu->b4_wd + 1) << 2,
353 u1_is_hbd,
354 0);
355
356 i4_cur_noise_term = ihevce_compute_noise_term(
357 i4_alpha_stim_multiplier, pu4_src_variance[u1_part_id], u4_cur_variance);
358
359 MULTIPLY_STIM_WITH_DISTORTION(
360 i4_cur_cost, i4_cur_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
361 }
362 }
363
364 if(i4_cur_cost < INT_MAX)
365 {
366 WORD32 i4_merge_idx_cost = 0;
367 COMPUTE_MERGE_IDX_COST(
368 u1_merge_idx_cabac_model, i, u1_max_cands, i4_lambda, i4_merge_idx_cost);
369 i4_cur_cost += i4_merge_idx_cost;
370 }
371
372 if(i4_cur_cost < i4_best_cost)
373 {
374 i4_best_cost = i4_cur_cost;
375
376 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
377 {
378 i4_best_noise_term = i4_cur_noise_term;
379 u4_best_variance = u4_cur_variance;
380 }
381
382 u1_best_cand_id = i;
383 u1_best_pred_mode = ps_pu->b2_pred_mode;
384
385 if(u1_is_pred_available)
386 {
387 pau1_best_pred_buf_id[e_cand_id][u1_part_id] =
388 pau1_best_pred_buf_id[ME_OR_SKIP_DERIVED][u1_part_id];
389 }
390 else
391 {
392 SWAP(u1_best_buf_id, u1_cur_buf_id);
393 pau1_best_pred_buf_id[e_cand_id][u1_part_id] = u1_best_buf_id;
394 }
395 }
396 }
397
398 if(u1_best_cand_id != UCHAR_MAX)
399 {
400 ps_pu->mv = ps_list[u1_best_cand_id].mv;
401 ps_pu->b2_pred_mode = u1_best_pred_mode;
402 ps_pu->b3_merge_idx = pu1_valid_merge_indices[u1_best_cand_id];
403
404 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
405 {
406 pai4_noise_term[MERGE_DERIVED][u1_part_id] = i4_best_noise_term;
407 pau4_pred_variance[MERGE_DERIVED][u1_part_id] = u4_best_variance;
408 }
409 }
410
411 return i4_best_cost;
412 }
413
ihevce_merge_cand_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)414 static WORD8 ihevce_merge_cand_pred_buffer_preparation(
415 void **ppv_pred_buf_list,
416 cu_inter_cand_t *ps_cand,
417 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
418 WORD32 i4_pred_stride,
419 UWORD8 u1_cu_size,
420 UWORD8 u1_part_type,
421 UWORD8 u1_num_bytes_per_pel,
422 FT_COPY_2D *pf_copy_2d)
423 {
424 WORD32 i4_part_wd;
425 WORD32 i4_part_ht;
426 WORD32 i4_part_wd_pu2;
427 WORD32 i4_part_ht_pu2;
428 WORD32 i4_buf_offset;
429 UWORD8 *pu1_pred_src;
430 UWORD8 *pu1_pred_dst;
431 WORD8 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
432
433 WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
434
435 if((0 == u1_part_type) ||
436 (pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[MERGE_DERIVED][1]))
437 {
438 ps_cand->pu1_pred_data =
439 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
440 ps_cand->pu2_pred_data =
441 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
442 ps_cand->i4_pred_data_stride = i4_pred_stride;
443
444 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
445 }
446 else if(pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
447 {
448 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
449 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
450
451 i4_buf_offset = 0;
452
453 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
454 i4_buf_offset;
455 pu1_pred_dst =
456 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] + i4_buf_offset;
457
458 pf_copy_2d(
459 pu1_pred_dst,
460 i4_stride,
461 pu1_pred_src,
462 i4_stride,
463 i4_part_wd * u1_num_bytes_per_pel,
464 i4_part_ht);
465
466 ps_cand->pu1_pred_data =
467 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
468 ps_cand->pu2_pred_data =
469 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
470 ps_cand->i4_pred_data_stride = i4_pred_stride;
471
472 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
473 }
474 else if(pau1_final_pred_buf_id[MERGE_DERIVED][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
475 {
476 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
477 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
478
479 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
480 (i4_part_wd < u1_cu_size) * i4_part_wd;
481
482 i4_buf_offset *= u1_num_bytes_per_pel;
483
484 i4_part_wd = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
485 i4_part_ht = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
486
487 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
488 i4_buf_offset;
489 pu1_pred_dst =
490 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] + i4_buf_offset;
491
492 pf_copy_2d(
493 pu1_pred_dst,
494 i4_stride,
495 pu1_pred_src,
496 i4_stride,
497 i4_part_wd * u1_num_bytes_per_pel,
498 i4_part_ht);
499
500 ps_cand->pu1_pred_data =
501 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
502 ps_cand->pu2_pred_data =
503 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
504 ps_cand->i4_pred_data_stride = i4_pred_stride;
505
506 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
507 }
508 else
509 {
510 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
511 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
512
513 i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
514 i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
515
516 switch((PART_TYPE_T)u1_part_type)
517 {
518 case PRT_2NxN:
519 case PRT_Nx2N:
520 case PRT_2NxnU:
521 case PRT_nLx2N:
522 {
523 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
524 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
525
526 ps_cand->pu1_pred_data =
527 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
528 ps_cand->pu2_pred_data =
529 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
530
531 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
532
533 break;
534 }
535 case PRT_nRx2N:
536 case PRT_2NxnD:
537 {
538 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
539 (i4_part_wd < u1_cu_size) * i4_part_wd;
540
541 i4_buf_offset *= u1_num_bytes_per_pel;
542
543 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
544 i4_buf_offset;
545 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
546 i4_buf_offset;
547
548 i4_part_wd = i4_part_wd_pu2;
549 i4_part_ht = i4_part_ht_pu2;
550
551 ps_cand->pu1_pred_data =
552 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
553 ps_cand->pu2_pred_data =
554 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
555
556 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
557
558 break;
559 }
560 }
561
562 pf_copy_2d(
563 pu1_pred_dst,
564 i4_stride,
565 pu1_pred_src,
566 i4_stride,
567 i4_part_wd * u1_num_bytes_per_pel,
568 i4_part_ht);
569
570 ps_cand->i4_pred_data_stride = i4_pred_stride;
571 }
572
573 return i1_retval;
574 }
575
ihevce_mixed_mode_cand_type1_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,WORD32 i4_pred_stride,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_type0_cand_is_valid,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)576 static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
577 void **ppv_pred_buf_list,
578 cu_inter_cand_t *ps_cand,
579 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
580 UWORD8 *pu1_merge_pred_buf_idx_array,
581 WORD32 i4_pred_stride,
582 UWORD8 u1_me_pred_buf_id,
583 UWORD8 u1_merge_pred_buf_id,
584 UWORD8 u1_type0_cand_is_valid,
585 UWORD8 u1_cu_size,
586 UWORD8 u1_part_type,
587 UWORD8 u1_num_bytes_per_pel,
588 FT_COPY_2D *pf_copy_2d)
589 {
590 WORD32 i4_part_wd;
591 WORD32 i4_part_ht;
592 WORD32 i4_part_wd_pu2;
593 WORD32 i4_part_ht_pu2;
594 UWORD8 *pu1_pred_src;
595 UWORD8 *pu1_pred_dst = NULL;
596 WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
597
598 WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
599
600 ASSERT(0 != u1_part_type);
601
602 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
603 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
604
605 i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
606 i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
607
608 if(pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
609 {
610 ps_cand->pu1_pred_data =
611 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
612 ps_cand->pu2_pred_data =
613 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
614 ps_cand->i4_pred_data_stride = i4_pred_stride;
615
616 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
617
618 return i1_retval;
619 }
620 else
621 {
622 UWORD8 u1_bitfield = ((u1_merge_pred_buf_id == UCHAR_MAX) << 3) |
623 ((u1_me_pred_buf_id == UCHAR_MAX) << 2) |
624 ((!u1_type0_cand_is_valid) << 1) |
625 (pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] ==
626 pau1_final_pred_buf_id[MERGE_DERIVED][1]);
627
628 WORD32 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
629 (i4_part_wd < u1_cu_size) * i4_part_wd;
630
631 i4_buf_offset *= u1_num_bytes_per_pel;
632
633 switch(u1_bitfield)
634 {
635 case 15:
636 case 14:
637 case 6:
638 {
639 switch((PART_TYPE_T)u1_part_type)
640 {
641 case PRT_2NxN:
642 case PRT_Nx2N:
643 case PRT_2NxnU:
644 case PRT_nLx2N:
645 {
646 pu1_pred_src =
647 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
648 pu1_pred_dst =
649 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
650
651 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
652
653 break;
654 }
655 case PRT_nRx2N:
656 case PRT_2NxnD:
657 {
658 pu1_pred_src =
659 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]] +
660 i4_buf_offset;
661 pu1_pred_dst =
662 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
663 i4_buf_offset;
664
665 i4_part_wd = i4_part_wd_pu2;
666 i4_part_ht = i4_part_ht_pu2;
667
668 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
669
670 break;
671 }
672 }
673
674 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
675 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
676 ps_cand->i4_pred_data_stride = i4_pred_stride;
677
678 pf_copy_2d(
679 pu1_pred_dst,
680 i4_stride,
681 pu1_pred_src,
682 i4_stride,
683 i4_part_wd * u1_num_bytes_per_pel,
684 i4_part_ht);
685
686 break;
687 }
688 case 13:
689 case 9:
690 case 5:
691 {
692 UWORD8 i;
693
694 for(i = 0; i < 3; i++)
695 {
696 if((pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
697 (pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
698 {
699 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
700 i4_buf_offset;
701
702 i1_retval = pu1_merge_pred_buf_idx_array[i];
703
704 break;
705 }
706 }
707
708 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
709 i4_buf_offset;
710
711 pf_copy_2d(
712 pu1_pred_dst,
713 i4_stride,
714 pu1_pred_src,
715 i4_stride,
716 i4_part_wd_pu2 * u1_num_bytes_per_pel,
717 i4_part_ht_pu2);
718 /* Copy PU1 */
719 pu1_pred_src =
720 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
721 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
722
723 pf_copy_2d(
724 pu1_pred_dst,
725 i4_stride,
726 pu1_pred_src,
727 i4_stride,
728 i4_part_wd * u1_num_bytes_per_pel,
729 i4_part_ht);
730
731 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
732 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
733 ps_cand->i4_pred_data_stride = i4_pred_stride;
734
735 break;
736 }
737 case 12:
738 case 10:
739 case 8:
740 case 4:
741 case 2:
742 case 0:
743 {
744 pu1_pred_src =
745 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
746 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
747
748 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
749
750 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
751 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
752 ps_cand->i4_pred_data_stride = i4_pred_stride;
753
754 pf_copy_2d(
755 pu1_pred_dst,
756 i4_stride,
757 pu1_pred_src,
758 i4_stride,
759 i4_part_wd * u1_num_bytes_per_pel,
760 i4_part_ht);
761
762 break;
763 }
764 case 11:
765 {
766 pu1_pred_src =
767 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
768 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
769
770 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
771
772 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
773 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
774 ps_cand->i4_pred_data_stride = i4_pred_stride;
775
776 pf_copy_2d(
777 pu1_pred_dst,
778 i4_stride,
779 pu1_pred_src,
780 i4_stride,
781 i4_part_wd * u1_num_bytes_per_pel,
782 i4_part_ht);
783
784 break;
785 }
786 case 7:
787 {
788 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
789 i4_buf_offset;
790 pu1_pred_dst =
791 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
792 i4_buf_offset;
793
794 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
795
796 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
797 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
798 ps_cand->i4_pred_data_stride = i4_pred_stride;
799
800 pf_copy_2d(
801 pu1_pred_dst,
802 i4_stride,
803 pu1_pred_src,
804 i4_stride,
805 i4_part_wd_pu2 * u1_num_bytes_per_pel,
806 i4_part_ht_pu2);
807
808 break;
809 }
810 case 3:
811 case 1:
812 {
813 if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][0]) &&
814 (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][1]))
815 {
816 pu1_pred_src =
817 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
818 pu1_pred_dst =
819 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
820
821 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
822
823 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
824 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
825 ps_cand->i4_pred_data_stride = i4_pred_stride;
826
827 pf_copy_2d(
828 pu1_pred_dst,
829 i4_stride,
830 pu1_pred_src,
831 i4_stride,
832 i4_part_wd * u1_num_bytes_per_pel,
833 i4_part_ht);
834 }
835 else
836 {
837 UWORD8 i;
838
839 for(i = 0; i < 3; i++)
840 {
841 if((pu1_merge_pred_buf_idx_array[i] !=
842 pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
843 (pu1_merge_pred_buf_idx_array[i] !=
844 pau1_final_pred_buf_id[MERGE_DERIVED][0]))
845 {
846 pu1_pred_dst =
847 (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
848 i4_buf_offset;
849
850 i1_retval = pu1_merge_pred_buf_idx_array[i];
851
852 break;
853 }
854 }
855
856 pu1_pred_src =
857 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
858 i4_buf_offset;
859
860 pf_copy_2d(
861 pu1_pred_dst,
862 i4_stride,
863 pu1_pred_src,
864 i4_stride,
865 i4_part_wd_pu2 * u1_num_bytes_per_pel,
866 i4_part_ht_pu2);
867
868 /* Copy PU1 */
869 pu1_pred_src =
870 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
871 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
872
873 pf_copy_2d(
874 pu1_pred_dst,
875 i4_stride,
876 pu1_pred_src,
877 i4_stride,
878 i4_part_wd * u1_num_bytes_per_pel,
879 i4_part_ht);
880
881 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
882 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
883 ps_cand->i4_pred_data_stride = i4_pred_stride;
884
885 break;
886 }
887 }
888 }
889 }
890
891 return i1_retval;
892 }
893
ihevce_mixed_mode_cand_type0_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_mixed_tyep1_pred_buf_id,WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)894 static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
895 void **ppv_pred_buf_list,
896 cu_inter_cand_t *ps_cand,
897 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
898 UWORD8 *pu1_merge_pred_buf_idx_array,
899 UWORD8 u1_me_pred_buf_id,
900 UWORD8 u1_merge_pred_buf_id,
901 UWORD8 u1_mixed_tyep1_pred_buf_id,
902 WORD32 i4_pred_stride,
903 UWORD8 u1_cu_size,
904 UWORD8 u1_part_type,
905 UWORD8 u1_num_bytes_per_pel,
906 FT_COPY_2D *pf_copy_2d)
907 {
908 WORD32 i4_part_wd;
909 WORD32 i4_part_ht;
910 WORD32 i4_part_wd_pu2;
911 WORD32 i4_part_ht_pu2;
912 WORD32 i4_buf_offset;
913 UWORD8 *pu1_pred_src;
914 UWORD8 *pu1_pred_dst = NULL;
915 WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
916
917 WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
918
919 ASSERT(0 != u1_part_type);
920
921 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
922 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
923 i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
924 i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
925
926 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
927 (i4_part_wd < u1_cu_size) * i4_part_wd;
928
929 i4_buf_offset *= u1_num_bytes_per_pel;
930
931 if(pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
932 {
933 ps_cand->pu1_pred_data =
934 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
935 ps_cand->pu2_pred_data =
936 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
937 ps_cand->i4_pred_data_stride = i4_pred_stride;
938
939 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
940 }
941 else
942 {
943 UWORD8 u1_bitfield =
944 ((u1_merge_pred_buf_id == UCHAR_MAX) << 2) | ((u1_me_pred_buf_id == UCHAR_MAX) << 1) |
945 (u1_mixed_tyep1_pred_buf_id != pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]);
946
947 switch(u1_bitfield)
948 {
949 case 7:
950 {
951 switch((PART_TYPE_T)u1_part_type)
952 {
953 case PRT_2NxN:
954 case PRT_Nx2N:
955 case PRT_2NxnU:
956 case PRT_nLx2N:
957 {
958 pu1_pred_src =
959 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
960 pu1_pred_dst =
961 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
962
963 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][1];
964
965 break;
966 }
967 case PRT_nRx2N:
968 case PRT_2NxnD:
969 {
970 pu1_pred_src =
971 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
972 i4_buf_offset;
973 pu1_pred_dst =
974 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
975 i4_buf_offset;
976
977 i4_part_wd = i4_part_wd_pu2;
978 i4_part_ht = i4_part_ht_pu2;
979
980 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
981
982 break;
983 }
984 }
985
986 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
987 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
988 ps_cand->i4_pred_data_stride = i4_pred_stride;
989
990 pf_copy_2d(
991 pu1_pred_dst,
992 i4_stride,
993 pu1_pred_src,
994 i4_stride,
995 i4_part_wd * u1_num_bytes_per_pel,
996 i4_part_ht);
997
998 break;
999 }
1000 case 6:
1001 case 5:
1002 case 4:
1003 {
1004 pu1_pred_src =
1005 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1006 i4_buf_offset;
1007 pu1_pred_dst =
1008 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
1009 i4_buf_offset;
1010
1011 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
1012
1013 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1014 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1015 ps_cand->i4_pred_data_stride = i4_pred_stride;
1016
1017 pf_copy_2d(
1018 pu1_pred_dst,
1019 i4_stride,
1020 pu1_pred_src,
1021 i4_stride,
1022 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1023 i4_part_ht_pu2);
1024 break;
1025 }
1026 case 3:
1027 {
1028 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
1029 pu1_pred_dst =
1030 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
1031
1032 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
1033
1034 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1035 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1036 ps_cand->i4_pred_data_stride = i4_pred_stride;
1037
1038 pf_copy_2d(
1039 pu1_pred_dst,
1040 i4_stride,
1041 pu1_pred_src,
1042 i4_stride,
1043 i4_part_wd * u1_num_bytes_per_pel,
1044 i4_part_ht);
1045
1046 break;
1047 }
1048 case 2:
1049 case 1:
1050 case 0:
1051 {
1052 if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
1053 (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
1054 {
1055 pu1_pred_src =
1056 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1057 i4_buf_offset;
1058 pu1_pred_dst =
1059 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
1060 i4_buf_offset;
1061
1062 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
1063
1064 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1065 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1066 ps_cand->i4_pred_data_stride = i4_pred_stride;
1067
1068 pf_copy_2d(
1069 pu1_pred_dst,
1070 i4_stride,
1071 pu1_pred_src,
1072 i4_stride,
1073 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1074 i4_part_ht_pu2);
1075 }
1076 else
1077 {
1078 UWORD8 i;
1079
1080 for(i = 0; i < 3; i++)
1081 {
1082 if((pu1_merge_pred_buf_idx_array[i] != u1_merge_pred_buf_id) &&
1083 (pu1_merge_pred_buf_idx_array[i] != u1_mixed_tyep1_pred_buf_id))
1084 {
1085 pu1_pred_dst =
1086 (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
1087 i4_buf_offset;
1088
1089 i1_retval = pu1_merge_pred_buf_idx_array[i];
1090
1091 break;
1092 }
1093 }
1094
1095 pu1_pred_src =
1096 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1097 i4_buf_offset;
1098
1099 pf_copy_2d(
1100 pu1_pred_dst,
1101 i4_stride,
1102 pu1_pred_src,
1103 i4_stride,
1104 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1105 i4_part_ht_pu2);
1106
1107 /* Copy PU1 */
1108 pu1_pred_src =
1109 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
1110 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1111
1112 pf_copy_2d(
1113 pu1_pred_dst,
1114 i4_stride,
1115 pu1_pred_src,
1116 i4_stride,
1117 i4_part_wd * u1_num_bytes_per_pel,
1118 i4_part_ht);
1119
1120 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1121 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1122 ps_cand->i4_pred_data_stride = i4_pred_stride;
1123
1124 break;
1125 }
1126 }
1127 }
1128 }
1129
1130 return i1_retval;
1131 }
1132
ihevce_find_idx_of_worst_cost(UWORD32 * pu4_cost_array,UWORD8 u1_array_size)1133 static UWORD8 ihevce_find_idx_of_worst_cost(UWORD32 *pu4_cost_array, UWORD8 u1_array_size)
1134 {
1135 WORD32 i;
1136
1137 UWORD8 u1_worst_cost_idx = 0;
1138
1139 for(i = 1; i < u1_array_size; i++)
1140 {
1141 if(pu4_cost_array[i] > pu4_cost_array[u1_worst_cost_idx])
1142 {
1143 u1_worst_cost_idx = i;
1144 }
1145 }
1146
1147 return u1_worst_cost_idx;
1148 }
1149
ihevce_free_unused_buf_indices(UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_buf_id_in_use,UWORD8 * pu1_buf_id_to_free,UWORD8 u1_me_buf_id,UWORD8 u1_num_available_cands,UWORD8 u1_num_bufs_to_free,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_part_type)1150 static void ihevce_free_unused_buf_indices(
1151 UWORD32 *pu4_pred_buf_usage_indicator,
1152 UWORD8 *pu1_merge_pred_buf_idx_array,
1153 UWORD8 *pu1_buf_id_in_use,
1154 UWORD8 *pu1_buf_id_to_free,
1155 UWORD8 u1_me_buf_id,
1156 UWORD8 u1_num_available_cands,
1157 UWORD8 u1_num_bufs_to_free,
1158 UWORD8 u1_eval_merge,
1159 UWORD8 u1_eval_skip,
1160 UWORD8 u1_part_type)
1161 {
1162 UWORD8 i;
1163
1164 if(u1_eval_skip)
1165 {
1166 if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[0])
1167 {
1168 ihevce_set_pred_buf_as_free(
1169 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1170 }
1171 else if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[1])
1172 {
1173 ihevce_set_pred_buf_as_free(
1174 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1175 }
1176 else
1177 {
1178 ihevce_set_pred_buf_as_free(
1179 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1180
1181 ihevce_set_pred_buf_as_free(
1182 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1183 }
1184
1185 for(i = 0; i < u1_num_bufs_to_free; i++)
1186 {
1187 if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1188 {
1189 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1190 }
1191 }
1192 }
1193 else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == UCHAR_MAX))
1194 {
1195 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, u1_me_buf_id);
1196
1197 for(i = 0; i < u1_num_bufs_to_free; i++)
1198 {
1199 if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1200 {
1201 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1202 }
1203 }
1204 }
1205 else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] != UCHAR_MAX))
1206 {
1207 for(i = 0; i < u1_num_bufs_to_free; i++)
1208 {
1209 if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1210 {
1211 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1212 }
1213 }
1214 }
1215 else if((u1_eval_merge) && (0 == u1_part_type))
1216 {
1217 /* ME pred buf */
1218 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1219 u1_me_buf_id,
1220 pu1_buf_id_in_use,
1221 pu1_buf_id_to_free,
1222 4,
1223 u1_num_bufs_to_free,
1224 pu4_pred_buf_usage_indicator);
1225
1226 /* Merge pred buf 0 */
1227 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1228 pu1_merge_pred_buf_idx_array[0],
1229 pu1_buf_id_in_use,
1230 pu1_buf_id_to_free,
1231 4,
1232 u1_num_bufs_to_free,
1233 pu4_pred_buf_usage_indicator);
1234
1235 /* Merge pred buf 1 */
1236 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1237 pu1_merge_pred_buf_idx_array[1],
1238 pu1_buf_id_in_use,
1239 pu1_buf_id_to_free,
1240 4,
1241 u1_num_bufs_to_free,
1242 pu4_pred_buf_usage_indicator);
1243
1244 for(i = 0; i < u1_num_bufs_to_free; i++)
1245 {
1246 if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1247 (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1248 (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1249 {
1250 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1251 }
1252 }
1253 }
1254 else if((u1_eval_merge) || (u1_eval_skip))
1255 {
1256 /* ME pred buf */
1257 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1258 u1_me_buf_id,
1259 pu1_buf_id_in_use,
1260 pu1_buf_id_to_free,
1261 4,
1262 u1_num_bufs_to_free,
1263 pu4_pred_buf_usage_indicator);
1264
1265 /* Merge pred buf 0 */
1266 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1267 pu1_merge_pred_buf_idx_array[0],
1268 pu1_buf_id_in_use,
1269 pu1_buf_id_to_free,
1270 4,
1271 u1_num_bufs_to_free,
1272 pu4_pred_buf_usage_indicator);
1273
1274 /* Merge pred buf 1 */
1275 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1276 pu1_merge_pred_buf_idx_array[1],
1277 pu1_buf_id_in_use,
1278 pu1_buf_id_to_free,
1279 4,
1280 u1_num_bufs_to_free,
1281 pu4_pred_buf_usage_indicator);
1282
1283 /* Merge pred buf 2 */
1284 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1285 pu1_merge_pred_buf_idx_array[2],
1286 pu1_buf_id_in_use,
1287 pu1_buf_id_to_free,
1288 4,
1289 u1_num_bufs_to_free,
1290 pu4_pred_buf_usage_indicator);
1291
1292 for(i = 0; i < u1_num_bufs_to_free; i++)
1293 {
1294 if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1295 (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1296 (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1297 {
1298 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1299 }
1300 }
1301 }
1302 }
1303
ihevce_check_if_buf_can_be_freed(UWORD8 * pu1_pred_id_of_winners,UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,UWORD8 u1_num_cands_previously_added)1304 static UWORD8 ihevce_check_if_buf_can_be_freed(
1305 UWORD8 *pu1_pred_id_of_winners,
1306 UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,
1307 UWORD8 u1_num_cands_previously_added)
1308 {
1309 UWORD8 i;
1310
1311 UWORD8 u1_num_trysts = 0;
1312
1313 for(i = 0; i < u1_num_cands_previously_added; i++)
1314 {
1315 if(u1_idx_of_worst_cost_in_pred_buf_array == pu1_pred_id_of_winners[i])
1316 {
1317 u1_num_trysts++;
1318
1319 if(u1_num_trysts > 1)
1320 {
1321 return 0;
1322 }
1323 }
1324 }
1325
1326 ASSERT(u1_num_trysts > 0);
1327
1328 return 1;
1329 }
1330
ihevce_get_worst_costs_and_indices(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,UWORD8 * pu1_worst_dst_cand_idx,UWORD8 u1_src_array_length,UWORD8 u1_num_cands_to_pick,UWORD8 u1_worst_cost_idx_in_dst_array)1331 static void ihevce_get_worst_costs_and_indices(
1332 UWORD32 *pu4_cost_src,
1333 UWORD32 *pu4_cost_dst,
1334 UWORD8 *pu1_worst_dst_cand_idx,
1335 UWORD8 u1_src_array_length,
1336 UWORD8 u1_num_cands_to_pick,
1337 UWORD8 u1_worst_cost_idx_in_dst_array)
1338 {
1339 WORD32 i;
1340
1341 pu4_cost_dst[0] = pu4_cost_src[u1_worst_cost_idx_in_dst_array];
1342 pu4_cost_src[u1_worst_cost_idx_in_dst_array] = 0;
1343 pu1_worst_dst_cand_idx[0] = u1_worst_cost_idx_in_dst_array;
1344
1345 for(i = 1; i < u1_num_cands_to_pick; i++)
1346 {
1347 pu1_worst_dst_cand_idx[i] =
1348 ihevce_find_idx_of_worst_cost(pu4_cost_src, u1_src_array_length);
1349
1350 pu4_cost_dst[i] = pu4_cost_src[pu1_worst_dst_cand_idx[i]];
1351 pu4_cost_src[pu1_worst_dst_cand_idx[i]] = 0;
1352 }
1353
1354 for(i = 0; i < u1_num_cands_to_pick; i++)
1355 {
1356 pu4_cost_src[pu1_worst_dst_cand_idx[i]] = pu4_cost_dst[i];
1357 }
1358 }
1359
ihevce_select_cands_to_replace_previous_worst(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,INTER_CANDIDATE_ID_T * pe_cand_id,UWORD8 * pu1_cand_idx_in_dst_array,UWORD8 * pu1_buf_id_to_free,UWORD8 * pu1_pred_id_of_winners,UWORD8 * pu1_num_bufs_to_free,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_num_cands_previously_added,UWORD8 u1_num_available_cands,UWORD8 u1_worst_cost_idx_in_dst_array)1360 static UWORD8 ihevce_select_cands_to_replace_previous_worst(
1361 UWORD32 *pu4_cost_src,
1362 UWORD32 *pu4_cost_dst,
1363 INTER_CANDIDATE_ID_T *pe_cand_id,
1364 UWORD8 *pu1_cand_idx_in_dst_array,
1365 UWORD8 *pu1_buf_id_to_free,
1366 UWORD8 *pu1_pred_id_of_winners,
1367 UWORD8 *pu1_num_bufs_to_free,
1368 WORD32 i4_max_num_inter_rdopt_cands,
1369 UWORD8 u1_num_cands_previously_added,
1370 UWORD8 u1_num_available_cands,
1371 UWORD8 u1_worst_cost_idx_in_dst_array)
1372 {
1373 WORD32 i, j, k;
1374 UWORD32 au4_worst_dst_costs[4];
1375 UWORD8 au1_worst_dst_cand_idx[4];
1376
1377 INTER_CANDIDATE_ID_T ae_default_cand_id[4] = {
1378 ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1379 };
1380
1381 UWORD8 u1_num_cands_to_add_wo_comparisons =
1382 i4_max_num_inter_rdopt_cands - u1_num_cands_previously_added;
1383 UWORD8 u1_num_cands_to_add_after_comparisons =
1384 u1_num_available_cands - u1_num_cands_to_add_wo_comparisons;
1385 UWORD8 u1_num_cands_to_add = 0;
1386 UWORD8 au1_valid_src_cands[4] = { 0, 0, 0, 0 };
1387
1388 ASSERT(u1_num_cands_to_add_after_comparisons >= 0);
1389
1390 /* Sorting src costs */
1391 SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
1392 pu4_cost_src, pe_cand_id, u1_num_available_cands, INTER_CANDIDATE_ID_T);
1393
1394 for(i = 0; i < u1_num_cands_to_add_wo_comparisons; i++)
1395 {
1396 pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] = u1_num_cands_previously_added + i;
1397 au1_valid_src_cands[pe_cand_id[i]] = 1;
1398 }
1399
1400 if(u1_num_cands_previously_added)
1401 {
1402 WORD8 i1_last_index = 0;
1403
1404 ihevce_get_worst_costs_and_indices(
1405 pu4_cost_dst,
1406 au4_worst_dst_costs,
1407 au1_worst_dst_cand_idx,
1408 u1_num_cands_previously_added,
1409 u1_num_cands_to_add_after_comparisons,
1410 u1_worst_cost_idx_in_dst_array);
1411
1412 for(i = u1_num_available_cands - 1; i >= u1_num_cands_to_add_wo_comparisons; i--)
1413 {
1414 for(j = u1_num_cands_to_add_after_comparisons - 1; j >= i1_last_index; j--)
1415 {
1416 if((pu4_cost_src[i] < au4_worst_dst_costs[j]))
1417 {
1418 if((i - u1_num_cands_to_add_wo_comparisons) <= j)
1419 {
1420 for(k = 0; k <= (i - u1_num_cands_to_add_wo_comparisons); k++)
1421 {
1422 pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] =
1423 au1_worst_dst_cand_idx[k];
1424 au1_valid_src_cands[pe_cand_id[u1_num_cands_to_add_wo_comparisons + k]] =
1425 1;
1426
1427 if(1 == ihevce_check_if_buf_can_be_freed(
1428 pu1_pred_id_of_winners,
1429 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]],
1430 u1_num_cands_previously_added))
1431 {
1432 pu1_buf_id_to_free[(*pu1_num_bufs_to_free)++] =
1433 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]];
1434 }
1435 else
1436 {
1437 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]] = UCHAR_MAX;
1438 }
1439 }
1440
1441 i1_last_index = -1;
1442 }
1443 else
1444 {
1445 i1_last_index = j;
1446 }
1447
1448 break;
1449 }
1450 }
1451
1452 if(-1 == i1_last_index)
1453 {
1454 break;
1455 }
1456 }
1457 }
1458
1459 for(i = 0, j = 0; i < u1_num_available_cands; i++)
1460 {
1461 if(au1_valid_src_cands[ae_default_cand_id[i]])
1462 {
1463 pe_cand_id[j++] = ae_default_cand_id[i];
1464 }
1465 }
1466
1467 return u1_num_cands_to_add;
1468 }
1469
ihevce_merge_cands_with_existing_best(inter_cu_mode_info_t * ps_mode_info,cu_inter_cand_t ** pps_cand_src,pu_mv_t (* pas_mvp_winner)[NUM_INTER_PU_PARTS],UWORD32 (* pau4_cost)[MAX_NUM_INTER_PARTS],void ** ppv_pred_buf_list,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_num_merge_cands,UWORD8 * pu1_num_skip_cands,UWORD8 * pu1_num_mixed_mode_type0_cands,UWORD8 * pu1_num_mixed_mode_type1_cands,UWORD8 * pu1_merge_pred_buf_idx_array,FT_COPY_2D * pf_copy_2d,WORD32 i4_pred_stride,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_num_bytes_per_pel)1470 static UWORD8 ihevce_merge_cands_with_existing_best(
1471 inter_cu_mode_info_t *ps_mode_info,
1472 cu_inter_cand_t **pps_cand_src,
1473 pu_mv_t (*pas_mvp_winner)[NUM_INTER_PU_PARTS],
1474 UWORD32 (*pau4_cost)[MAX_NUM_INTER_PARTS],
1475 void **ppv_pred_buf_list,
1476 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
1477 UWORD32 *pu4_pred_buf_usage_indicator,
1478 UWORD8 *pu1_num_merge_cands,
1479 UWORD8 *pu1_num_skip_cands,
1480 UWORD8 *pu1_num_mixed_mode_type0_cands,
1481 UWORD8 *pu1_num_mixed_mode_type1_cands,
1482 UWORD8 *pu1_merge_pred_buf_idx_array,
1483
1484 FT_COPY_2D *pf_copy_2d,
1485
1486 WORD32 i4_pred_stride,
1487 WORD32 i4_max_num_inter_rdopt_cands,
1488 UWORD8 u1_cu_size,
1489 UWORD8 u1_part_type,
1490 UWORD8 u1_eval_merge,
1491 UWORD8 u1_eval_skip,
1492 UWORD8 u1_num_bytes_per_pel)
1493 {
1494 UWORD32 au4_cost_src[4];
1495 WORD32 i;
1496 WORD32 u1_num_available_cands;
1497 UWORD8 au1_buf_id_in_use[4];
1498 UWORD8 au1_buf_id_to_free[4];
1499 UWORD8 au1_cand_idx_in_dst_array[4];
1500
1501 INTER_CANDIDATE_ID_T ae_cand_id[4] = {
1502 ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1503 };
1504
1505 cu_inter_cand_t **pps_cand_dst = ps_mode_info->aps_cu_data;
1506
1507 UWORD8 u1_num_cands_previously_added = ps_mode_info->u1_num_inter_cands;
1508 UWORD8 u1_worst_cost_idx = ps_mode_info->u1_idx_of_worst_cost_in_cost_array;
1509 UWORD8 u1_idx_of_worst_cost_in_pred_buf_array =
1510 ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array;
1511 UWORD32 *pu4_cost_dst = ps_mode_info->au4_cost;
1512 UWORD8 *pu1_pred_id_of_winners = ps_mode_info->au1_pred_buf_idx;
1513 UWORD8 u1_num_bufs_to_free = 0;
1514 UWORD8 u1_skip_or_merge_cand_is_valid = 0;
1515 UWORD8 u1_num_invalid_cands = 0;
1516
1517 memset(au1_buf_id_in_use, UCHAR_MAX, sizeof(au1_buf_id_in_use));
1518
1519 u1_num_available_cands = (u1_eval_merge) ? 2 + ((u1_part_type != 0) + 1) : 1;
1520
1521 for(i = 0; i < u1_num_available_cands; i++)
1522 {
1523 WORD32 i4_idx = i - u1_num_invalid_cands;
1524
1525 if(u1_part_type == 0)
1526 {
1527 au4_cost_src[i4_idx] = pau4_cost[ae_cand_id[i4_idx]][0];
1528 }
1529 else
1530 {
1531 au4_cost_src[i4_idx] =
1532 pau4_cost[ae_cand_id[i4_idx]][0] + pau4_cost[ae_cand_id[i4_idx]][1];
1533 }
1534
1535 if(au4_cost_src[i4_idx] >= INT_MAX)
1536 {
1537 memmove(
1538 &ae_cand_id[i4_idx],
1539 &ae_cand_id[i4_idx + 1],
1540 sizeof(INTER_CANDIDATE_ID_T) * (u1_num_available_cands - i - 1));
1541
1542 u1_num_invalid_cands++;
1543 }
1544 }
1545
1546 u1_num_available_cands -= u1_num_invalid_cands;
1547
1548 if((u1_num_cands_previously_added + u1_num_available_cands) > i4_max_num_inter_rdopt_cands)
1549 {
1550 u1_num_available_cands = ihevce_select_cands_to_replace_previous_worst(
1551 au4_cost_src,
1552 pu4_cost_dst,
1553 ae_cand_id,
1554 au1_cand_idx_in_dst_array,
1555 au1_buf_id_to_free,
1556 pu1_pred_id_of_winners,
1557 &u1_num_bufs_to_free,
1558 i4_max_num_inter_rdopt_cands,
1559 u1_num_cands_previously_added,
1560 u1_num_available_cands,
1561 u1_worst_cost_idx);
1562 }
1563 else
1564 {
1565 for(i = 0; i < u1_num_available_cands; i++)
1566 {
1567 au1_cand_idx_in_dst_array[i] = u1_num_cands_previously_added + i;
1568 }
1569 }
1570
1571 for(i = 0; i < u1_num_available_cands; i++)
1572 {
1573 UWORD8 u1_dst_array_idx = au1_cand_idx_in_dst_array[i];
1574
1575 if(u1_part_type == 0)
1576 {
1577 au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0];
1578 }
1579 else
1580 {
1581 au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0] + pau4_cost[ae_cand_id[i]][1];
1582 }
1583
1584 pps_cand_dst[u1_dst_array_idx] = pps_cand_src[ae_cand_id[i]];
1585
1586 /* Adding a skip candidate identical to the merge winner */
1587 if((u1_eval_merge) && (0 == u1_part_type) && (MIXED_MODE_TYPE1 == ae_cand_id[i]))
1588 {
1589 (*pu1_num_skip_cands)++;
1590
1591 pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1592
1593 if(u1_num_cands_previously_added >= i4_max_num_inter_rdopt_cands)
1594 {
1595 u1_worst_cost_idx =
1596 ihevce_find_idx_of_worst_cost(pu4_cost_dst, u1_num_cands_previously_added);
1597
1598 u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1599 }
1600 else
1601 {
1602 u1_num_cands_previously_added++;
1603 }
1604
1605 if(u1_skip_or_merge_cand_is_valid)
1606 {
1607 pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1608 (UWORD8 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1609 pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1610 (UWORD16 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1611 pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1612
1613 au1_buf_id_in_use[MIXED_MODE_TYPE1] = au1_buf_id_in_use[MERGE_DERIVED];
1614 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1615 }
1616 else
1617 {
1618 u1_skip_or_merge_cand_is_valid = 1;
1619
1620 au1_buf_id_in_use[MIXED_MODE_TYPE1] = ihevce_merge_cand_pred_buffer_preparation(
1621 ppv_pred_buf_list,
1622 pps_cand_dst[u1_dst_array_idx],
1623 pau1_final_pred_buf_id,
1624 i4_pred_stride,
1625 u1_cu_size,
1626 u1_part_type,
1627 u1_num_bytes_per_pel,
1628 pf_copy_2d);
1629
1630 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1631 }
1632
1633 continue;
1634 }
1635
1636 if(u1_num_cands_previously_added < i4_max_num_inter_rdopt_cands)
1637 {
1638 if(u1_num_cands_previously_added)
1639 {
1640 if(au4_cost_src[i] > pu4_cost_dst[u1_worst_cost_idx])
1641 {
1642 u1_worst_cost_idx = u1_num_cands_previously_added;
1643 }
1644 }
1645
1646 pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1647
1648 u1_num_cands_previously_added++;
1649 }
1650 else
1651 {
1652 pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1653
1654 u1_worst_cost_idx = ihevce_find_idx_of_worst_cost(
1655 ps_mode_info->au4_cost, u1_num_cands_previously_added);
1656
1657 u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1658 }
1659
1660 switch(ae_cand_id[i])
1661 {
1662 case ME_OR_SKIP_DERIVED:
1663 {
1664 (*pu1_num_skip_cands) += u1_eval_skip;
1665
1666 pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1667 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1668 pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1669 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1670 pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1671
1672 if(u1_worst_cost_idx == u1_dst_array_idx)
1673 {
1674 u1_idx_of_worst_cost_in_pred_buf_array =
1675 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1676 }
1677
1678 u1_skip_or_merge_cand_is_valid = u1_eval_skip;
1679
1680 au1_buf_id_in_use[ME_OR_SKIP_DERIVED] = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1681 pu1_pred_id_of_winners[u1_dst_array_idx] =
1682 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1683
1684 break;
1685 }
1686 case MERGE_DERIVED:
1687 {
1688 (*pu1_num_merge_cands)++;
1689
1690 au1_buf_id_in_use[MERGE_DERIVED] = ihevce_merge_cand_pred_buffer_preparation(
1691 ppv_pred_buf_list,
1692 pps_cand_dst[u1_dst_array_idx],
1693 pau1_final_pred_buf_id,
1694 i4_pred_stride,
1695 u1_cu_size,
1696 u1_part_type,
1697 u1_num_bytes_per_pel,
1698 pf_copy_2d
1699
1700 );
1701
1702 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1703
1704 if(u1_worst_cost_idx == u1_dst_array_idx)
1705 {
1706 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MERGE_DERIVED];
1707 }
1708
1709 u1_skip_or_merge_cand_is_valid = 1;
1710
1711 break;
1712 }
1713 case MIXED_MODE_TYPE1:
1714 {
1715 (*pu1_num_mixed_mode_type1_cands)++;
1716
1717 au1_buf_id_in_use[MIXED_MODE_TYPE1] =
1718 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
1719 ppv_pred_buf_list,
1720 pps_cand_dst[u1_dst_array_idx],
1721 pau1_final_pred_buf_id,
1722 pu1_merge_pred_buf_idx_array,
1723 i4_pred_stride,
1724 au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1725 au1_buf_id_in_use[MERGE_DERIVED],
1726 (u1_num_available_cands - i) > 1,
1727 u1_cu_size,
1728 u1_part_type,
1729 u1_num_bytes_per_pel,
1730 pf_copy_2d
1731
1732 );
1733
1734 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1735
1736 if(u1_worst_cost_idx == u1_dst_array_idx)
1737 {
1738 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1739 }
1740
1741 break;
1742 }
1743 case MIXED_MODE_TYPE0:
1744 {
1745 (*pu1_num_mixed_mode_type0_cands)++;
1746
1747 au1_buf_id_in_use[MIXED_MODE_TYPE0] =
1748 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
1749 ppv_pred_buf_list,
1750 pps_cand_dst[u1_dst_array_idx],
1751 pau1_final_pred_buf_id,
1752 pu1_merge_pred_buf_idx_array,
1753 au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1754 au1_buf_id_in_use[MERGE_DERIVED],
1755 au1_buf_id_in_use[MIXED_MODE_TYPE1],
1756 i4_pred_stride,
1757 u1_cu_size,
1758 u1_part_type,
1759 u1_num_bytes_per_pel,
1760 pf_copy_2d);
1761
1762 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1763
1764 if(u1_worst_cost_idx == u1_dst_array_idx)
1765 {
1766 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1767 }
1768
1769 break;
1770 }
1771 }
1772 }
1773
1774 ihevce_free_unused_buf_indices(
1775 pu4_pred_buf_usage_indicator,
1776 pu1_merge_pred_buf_idx_array,
1777 au1_buf_id_in_use,
1778 au1_buf_id_to_free,
1779 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0],
1780 u1_num_available_cands,
1781 u1_num_bufs_to_free,
1782 u1_eval_merge,
1783 u1_eval_skip,
1784 u1_part_type);
1785
1786 ps_mode_info->u1_idx_of_worst_cost_in_cost_array = u1_worst_cost_idx;
1787 ps_mode_info->u1_num_inter_cands = u1_num_cands_previously_added;
1788 ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array = u1_idx_of_worst_cost_in_pred_buf_array;
1789
1790 return u1_skip_or_merge_cand_is_valid;
1791 }
1792
ihevce_prepare_cand_containers(ihevce_inter_cand_sifter_prms_t * ps_ctxt,cu_inter_cand_t ** pps_cands,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_me_pred_buf_idx,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)1793 static UWORD8 ihevce_prepare_cand_containers(
1794 ihevce_inter_cand_sifter_prms_t *ps_ctxt,
1795 cu_inter_cand_t **pps_cands,
1796 UWORD8 *pu1_merge_pred_buf_idx_array,
1797 UWORD8 *pu1_me_pred_buf_idx,
1798 UWORD8 u1_part_type,
1799 UWORD8 u1_me_cand_list_idx,
1800 UWORD8 u1_eval_merge,
1801 UWORD8 u1_eval_skip)
1802 {
1803 UWORD8 u1_num_bufs_currently_allocated;
1804
1805 WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
1806 UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
1807 UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
1808 UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
1809 void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
1810
1811 if(!u1_eval_merge)
1812 {
1813 if(u1_eval_skip)
1814 {
1815 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1816 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1817
1818 if(u1_num_bufs_currently_allocated < 2)
1819 {
1820 return 0;
1821 }
1822
1823 pps_cands[ME_OR_SKIP_DERIVED] =
1824 &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1825 [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
1826 ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
1827
1828 pps_cands[ME_OR_SKIP_DERIVED]->b1_skip_flag = 1;
1829 pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_mark = 1;
1830 pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize = 1;
1831 pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize_by2 = 1;
1832 pps_cands[ME_OR_SKIP_DERIVED]->b1_intra_has_won = 0;
1833 pps_cands[ME_OR_SKIP_DERIVED]->b3_part_size = 0;
1834 pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1835 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_intra_flag = 0;
1836 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_merge_flag = 1;
1837 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_x = u1_cu_pos_x >> 2;
1838 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_y = u1_cu_pos_y >> 2;
1839 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_wd = (u1_cu_size >> 2) - 1;
1840 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_ht = (u1_cu_size >> 2) - 1;
1841
1842 pps_cands[MERGE_DERIVED] = pps_cands[ME_OR_SKIP_DERIVED];
1843 }
1844 else
1845 {
1846 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1847 pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1848
1849 if(u1_num_bufs_currently_allocated < 1)
1850 {
1851 return 0;
1852 }
1853
1854 pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1855 pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1856 pps_cands[ME_OR_SKIP_DERIVED]->pu1_pred_data =
1857 (UWORD8 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1858 pps_cands[ME_OR_SKIP_DERIVED]->pu2_pred_data =
1859 (UWORD16 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1860 }
1861 }
1862 else
1863 {
1864 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1865 pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1866
1867 if(u1_num_bufs_currently_allocated < 1)
1868 {
1869 return 0;
1870 }
1871
1872 pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1873
1874 if(u1_part_type > 0)
1875 {
1876 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1877 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 3);
1878
1879 if(u1_num_bufs_currently_allocated < 3)
1880 {
1881 return 0;
1882 }
1883
1884 pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1885 [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1886
1887 pps_cands[MIXED_MODE_TYPE0] =
1888 &ps_ctxt->ps_mixed_modes_datastore
1889 ->as_cu_data[ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands];
1890
1891 pps_cands[MIXED_MODE_TYPE1] =
1892 &ps_ctxt->ps_mixed_modes_datastore->as_cu_data
1893 [MAX_NUM_MIXED_MODE_INTER_RDO_CANDS - 1 -
1894 ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands];
1895
1896 *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1897 *pps_cands[MIXED_MODE_TYPE0] = *pps_cands[ME_OR_SKIP_DERIVED];
1898 *pps_cands[MIXED_MODE_TYPE1] = *pps_cands[ME_OR_SKIP_DERIVED];
1899 }
1900 else
1901 {
1902 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1903 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1904
1905 if(u1_num_bufs_currently_allocated < 2)
1906 {
1907 return 0;
1908 }
1909
1910 pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1911 [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1912
1913 *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1914 }
1915
1916 pps_cands[MERGE_DERIVED]->as_inter_pu[0].b1_merge_flag = 1;
1917 pps_cands[MERGE_DERIVED]->as_inter_pu[1].b1_merge_flag = 1;
1918 }
1919
1920 return u1_num_bufs_currently_allocated;
1921 }
1922
ihevce_merge_prms_init(merge_prms_t * ps_prms,merge_cand_list_t * ps_list,inter_pred_ctxt_t * ps_mc_ctxt,mv_pred_ctxt_t * ps_mv_pred_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_fxn,void ** ppv_pred_buf_list,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,UWORD8 * pu1_merge_pred_buf_array,UWORD8 (* pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_is_top_used,WORD32 (* pai4_noise_term)[MAX_NUM_INTER_PARTS],UWORD32 (* pau4_pred_variance)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_src_variance,WORD32 i4_alpha_stim_multiplier,WORD32 i4_src_stride,WORD32 i4_pred_stride,WORD32 i4_lambda,UWORD8 u1_is_cu_noisy,UWORD8 u1_is_hbd,UWORD8 u1_max_cands,UWORD8 u1_merge_idx_cabac_model,UWORD8 u1_use_merge_cand_from_top_row)1923 static __inline void ihevce_merge_prms_init(
1924 merge_prms_t *ps_prms,
1925 merge_cand_list_t *ps_list,
1926 inter_pred_ctxt_t *ps_mc_ctxt,
1927 mv_pred_ctxt_t *ps_mv_pred_ctxt,
1928 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
1929 PF_SAD_FXN_T pf_sad_fxn,
1930 void **ppv_pred_buf_list,
1931 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
1932 UWORD8 *pu1_merge_pred_buf_array,
1933 UWORD8 (*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],
1934 UWORD8 *pu1_is_top_used,
1935 WORD32 (*pai4_noise_term)[MAX_NUM_INTER_PARTS],
1936 UWORD32 (*pau4_pred_variance)[MAX_NUM_INTER_PARTS],
1937 UWORD32 *pu4_src_variance,
1938 WORD32 i4_alpha_stim_multiplier,
1939 WORD32 i4_src_stride,
1940 WORD32 i4_pred_stride,
1941 WORD32 i4_lambda,
1942 UWORD8 u1_is_cu_noisy,
1943 UWORD8 u1_is_hbd,
1944 UWORD8 u1_max_cands,
1945 UWORD8 u1_merge_idx_cabac_model,
1946 UWORD8 u1_use_merge_cand_from_top_row)
1947 {
1948 ps_prms->ps_list = ps_list;
1949 ps_prms->ps_mc_ctxt = ps_mc_ctxt;
1950 ps_prms->ps_mv_pred_ctxt = ps_mv_pred_ctxt;
1951 ps_prms->pf_luma_inter_pred_pu = pf_luma_inter_pred_pu;
1952 ps_prms->pf_sad_fxn = pf_sad_fxn;
1953 ps_prms->ppv_pred_buf_list = ppv_pred_buf_list;
1954 ps_prms->ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
1955
1956 ps_prms->pu1_merge_pred_buf_array = pu1_merge_pred_buf_array;
1957 ps_prms->pau1_best_pred_buf_id = pau1_best_pred_buf_id;
1958 ps_prms->pu1_is_top_used = pu1_is_top_used;
1959 ps_prms->pai4_noise_term = pai4_noise_term;
1960 ps_prms->pau4_pred_variance = pau4_pred_variance;
1961 ps_prms->pu4_src_variance = pu4_src_variance;
1962 ps_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
1963 ps_prms->i4_src_stride = i4_src_stride;
1964 ps_prms->i4_pred_stride = i4_pred_stride;
1965 ps_prms->i4_lambda = i4_lambda;
1966 ps_prms->u1_is_cu_noisy = u1_is_cu_noisy;
1967 ps_prms->u1_is_hbd = u1_is_hbd;
1968 ps_prms->u1_max_cands = u1_max_cands;
1969 ps_prms->u1_merge_idx_cabac_model = u1_merge_idx_cabac_model;
1970 ps_prms->u1_use_merge_cand_from_top_row = u1_use_merge_cand_from_top_row;
1971 }
1972
ihevce_merge_candidate_seive(nbr_avail_flags_t * ps_nbr,merge_cand_list_t * ps_merge_cand,UWORD8 * pu1_is_top_used,UWORD8 u1_num_merge_cands,UWORD8 u1_use_merge_cand_from_top_row)1973 static UWORD8 ihevce_merge_candidate_seive(
1974 nbr_avail_flags_t *ps_nbr,
1975 merge_cand_list_t *ps_merge_cand,
1976 UWORD8 *pu1_is_top_used,
1977 UWORD8 u1_num_merge_cands,
1978 UWORD8 u1_use_merge_cand_from_top_row)
1979 {
1980 if(!u1_use_merge_cand_from_top_row)
1981 {
1982 if(ps_nbr->u1_bot_lt_avail || ps_nbr->u1_left_avail)
1983 {
1984 return !pu1_is_top_used[0];
1985 }
1986 else
1987 {
1988 return 0;
1989 }
1990 }
1991 else
1992 {
1993 return u1_num_merge_cands;
1994 }
1995 }
1996
ihevce_compute_pred_and_populate_modes(ihevce_inter_cand_sifter_prms_t * ps_ctxt,PF_SAD_FXN_T pf_sad_func,UWORD32 * pu4_src_variance,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)1997 static UWORD8 ihevce_compute_pred_and_populate_modes(
1998 ihevce_inter_cand_sifter_prms_t *ps_ctxt,
1999 PF_SAD_FXN_T pf_sad_func,
2000 UWORD32 *pu4_src_variance,
2001 UWORD8 u1_part_type,
2002 UWORD8 u1_me_cand_list_idx,
2003 UWORD8 u1_eval_merge,
2004 UWORD8 u1_eval_skip)
2005 {
2006 cu_inter_cand_t *aps_cands[4];
2007 pu_mv_t as_mvp_winner[4][NUM_INTER_PU_PARTS];
2008 merge_prms_t s_merge_prms;
2009 merge_cand_list_t as_merge_cand[MAX_NUM_MERGE_CAND];
2010
2011 UWORD8 i, j;
2012 UWORD32 au4_cost[4][NUM_INTER_PU_PARTS];
2013 UWORD8 au1_final_pred_buf_id[4][NUM_INTER_PU_PARTS];
2014 UWORD8 au1_merge_pred_buf_idx_array[3];
2015 UWORD8 au1_is_top_used[MAX_NUM_MERGE_CAND];
2016 UWORD8 u1_me_pred_buf_idx;
2017 UWORD8 u1_num_bufs_currently_allocated;
2018 WORD32 i4_mean;
2019 UWORD32 au4_pred_variance[4][NUM_INTER_PU_PARTS];
2020 WORD32 ai4_noise_term[4][NUM_INTER_PU_PARTS];
2021
2022 UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
2023 UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
2024
2025 inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2026 inter_pred_ctxt_t *ps_mc_ctxt = ps_ctxt->ps_mc_ctxt;
2027 nbr_4x4_t *ps_cu_nbr_buf = ps_ctxt->aps_cu_nbr_buf[0];
2028 nbr_4x4_t *ps_pu_left_nbr = ps_ctxt->ps_left_nbr_4x4;
2029 nbr_4x4_t *ps_pu_top_nbr = ps_ctxt->ps_top_nbr_4x4;
2030 nbr_4x4_t *ps_pu_topleft_nbr = ps_ctxt->ps_topleft_nbr_4x4;
2031
2032 ihevce_inter_pred_buf_data_t *ps_pred_buf_info = ps_ctxt->ps_pred_buf_data;
2033 mv_pred_ctxt_t *ps_mv_pred_ctxt = ps_ctxt->ps_mv_pred_ctxt;
2034
2035 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_ctxt->pf_luma_inter_pred_pu;
2036
2037 void *pv_src = ps_ctxt->pv_src;
2038 WORD32 i4_src_stride = ps_ctxt->i4_src_strd;
2039 WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
2040 UWORD8 u1_num_parts = (u1_part_type != PRT_2Nx2N) + 1;
2041 UWORD8 u1_num_bytes_per_pel = ps_ctxt->u1_is_hbd + 1;
2042 void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
2043 UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
2044 WORD32 i4_nbr_4x4_left_stride = ps_ctxt->i4_nbr_4x4_left_strd;
2045 UWORD8 *pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
2046 WORD32 i4_nbr_map_stride = ps_ctxt->i4_ctb_nbr_map_stride;
2047 UWORD8 u1_max_merge_candidates = ps_ctxt->u1_max_merge_candidates;
2048 WORD32 i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
2049 WORD32 i4_pred_buf_offset = 0;
2050 WORD32 i4_src_buf_offset = 0;
2051 UWORD8 u1_single_mcl_flag =
2052 ((8 == u1_cu_size) && (ps_mv_pred_ctxt->i4_log2_parallel_merge_level_minus2 > 0));
2053 UWORD8 u1_skip_or_merge_cand_is_valid = 0;
2054 WORD32 i4_lambda_qf = ps_ctxt->i4_lambda_qf;
2055 UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2056
2057 ASSERT(0 == (u1_eval_skip && u1_eval_merge));
2058 ASSERT(u1_me_cand_list_idx < ps_ctxt->u1_num_me_cands);
2059
2060 /*
2061 Algorithm -
2062 1. Determine pred and satd for ME cand.
2063 2. Determine merge winner for PU1.
2064 3. Determine pred and satd for mixed_type0 cand.
2065 4. Determine merge winner for PU2 and hence derive pred and satd for merge cand.
2066 5. Determine merge winner for PU2 assuming ME cand as PU1 winner and hence derive
2067 pred and satd for mixed_type1 cand.
2068 6. Sort the 4 preceding costs and hence, the cand list.
2069 7. Merge the sorted lists with the final cand list.
2070
2071 PS : 2 - 7 will be relevant only if u1_eval_merge = 1 and u1_eval_skip = 0
2072 PPS : 1 will not be relevant if u1_eval_skip = 1
2073 */
2074
2075 /*
2076 Explanatory notes -
2077 1. Motion Vector Merge candidates and nbr's in all merge mode (RealD)
2078 2. Motion Vector Merge candidates and nbr's in mixed mode (AltD)
2079 */
2080
2081 u1_num_bufs_currently_allocated = ihevce_prepare_cand_containers(
2082 ps_ctxt,
2083 aps_cands,
2084 au1_merge_pred_buf_idx_array,
2085 &u1_me_pred_buf_idx,
2086 u1_part_type,
2087 u1_me_cand_list_idx,
2088 u1_eval_merge,
2089 u1_eval_skip);
2090
2091 if(0 == u1_num_bufs_currently_allocated)
2092 {
2093 return 0;
2094 }
2095
2096 if((u1_eval_merge) || (u1_eval_skip))
2097 {
2098 ihevce_merge_prms_init(
2099 &s_merge_prms,
2100 as_merge_cand,
2101 ps_mc_ctxt,
2102 ps_mv_pred_ctxt,
2103 pf_luma_inter_pred_pu,
2104 pf_sad_func,
2105 ppv_pred_buf_list,
2106 ps_ctxt->ps_cmn_utils_optimised_function_list,
2107 au1_merge_pred_buf_idx_array,
2108 au1_final_pred_buf_id,
2109 au1_is_top_used,
2110 ai4_noise_term,
2111 au4_pred_variance,
2112 pu4_src_variance,
2113 ps_ctxt->i4_alpha_stim_multiplier,
2114 i4_src_stride,
2115 i4_pred_stride,
2116 i4_lambda_qf,
2117 u1_is_cu_noisy,
2118 ps_ctxt->u1_is_hbd,
2119 u1_max_merge_candidates,
2120 ps_ctxt->u1_merge_idx_cabac_model,
2121 ps_ctxt->u1_use_merge_cand_from_top_row);
2122 }
2123
2124 for(i = 0; i < u1_num_parts; i++)
2125 {
2126 nbr_avail_flags_t s_nbr;
2127
2128 UWORD8 u1_part_wd;
2129 UWORD8 u1_part_ht;
2130 UWORD8 u1_pu_pos_x_4x4;
2131 UWORD8 u1_pu_pos_y_4x4;
2132
2133 pu_t *ps_pu = &aps_cands[MERGE_DERIVED]->as_inter_pu[i];
2134
2135 PART_SIZE_E e_part_size = (PART_SIZE_E)aps_cands[ME_OR_SKIP_DERIVED]->b3_part_size;
2136
2137 void *pv_pu_src = (UWORD8 *)pv_src + i4_src_buf_offset;
2138 UWORD8 u1_num_merge_cands = 0;
2139
2140 u1_part_wd = (aps_cands[0]->as_inter_pu[i].b4_wd + 1) << 2;
2141 u1_part_ht = (aps_cands[0]->as_inter_pu[i].b4_ht + 1) << 2;
2142 u1_pu_pos_x_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_x;
2143 u1_pu_pos_y_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_y;
2144
2145 /* Inter cand pred and satd */
2146 if(!u1_eval_skip)
2147 {
2148 void *pv_pu_pred = (UWORD8 *)ppv_pred_buf_list[u1_me_pred_buf_idx] + i4_pred_buf_offset;
2149
2150 if(ps_ctxt->u1_reuse_me_sad)
2151 {
2152 ihevce_compute_inter_pred_and_cost(
2153 ps_mc_ctxt,
2154 pf_luma_inter_pred_pu,
2155 pf_sad_func,
2156 &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2157 pv_pu_src,
2158 pv_pu_pred,
2159 i4_src_stride,
2160 i4_pred_stride,
2161 0,
2162 ps_ctxt->ps_cmn_utils_optimised_function_list);
2163
2164 au4_cost[ME_OR_SKIP_DERIVED][i] =
2165 ps_ctxt->pai4_me_err_metric[u1_me_cand_list_idx][i];
2166 }
2167 else
2168 {
2169 au4_cost[ME_OR_SKIP_DERIVED][i] = ihevce_compute_inter_pred_and_cost(
2170 ps_mc_ctxt,
2171 pf_luma_inter_pred_pu,
2172 pf_sad_func,
2173 &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2174 pv_pu_src,
2175 pv_pu_pred,
2176 i4_src_stride,
2177 i4_pred_stride,
2178 1,
2179 ps_ctxt->ps_cmn_utils_optimised_function_list);
2180 }
2181
2182 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] = u1_me_pred_buf_idx;
2183
2184 if(u1_is_cu_noisy && ps_ctxt->i4_alpha_stim_multiplier)
2185 {
2186 ihevce_calc_variance(
2187 pv_pu_pred,
2188 i4_pred_stride,
2189 &i4_mean,
2190 &au4_pred_variance[ME_OR_SKIP_DERIVED][i],
2191 u1_part_ht,
2192 u1_part_wd,
2193 ps_ctxt->u1_is_hbd,
2194 0);
2195
2196 ai4_noise_term[ME_OR_SKIP_DERIVED][i] = ihevce_compute_noise_term(
2197 ps_ctxt->i4_alpha_stim_multiplier,
2198 pu4_src_variance[i],
2199 au4_pred_variance[ME_OR_SKIP_DERIVED][i]);
2200
2201 MULTIPLY_STIM_WITH_DISTORTION(
2202 au4_cost[ME_OR_SKIP_DERIVED][i],
2203 ai4_noise_term[ME_OR_SKIP_DERIVED][i],
2204 STIM_Q_FORMAT,
2205 ALPHA_Q_FORMAT);
2206 }
2207 }
2208
2209 if(u1_eval_skip || u1_eval_merge)
2210 {
2211 pu_t s_pu, *ps_pu_merge;
2212
2213 UWORD8 u1_is_any_top_available = 1;
2214 UWORD8 u1_are_valid_merge_cands_available = 1;
2215
2216 /* get the neighbour availability flags */
2217 if((u1_num_parts > 1) && u1_single_mcl_flag)
2218 { /* 8x8 SMPs take the 2Nx2N neighbours */
2219 ihevce_get_only_nbr_flag(
2220 &s_nbr,
2221 pu1_ctb_nbr_map,
2222 i4_nbr_map_stride,
2223 aps_cands[0]->as_inter_pu[0].b4_pos_x,
2224 aps_cands[0]->as_inter_pu[0].b4_pos_y,
2225 u1_cu_size >> 2,
2226 u1_cu_size >> 2);
2227
2228 /* Make the PU width and height as 8 */
2229 memcpy(&s_pu, ps_pu, sizeof(pu_t));
2230 s_pu.b4_pos_x = u1_cu_pos_x >> 2;
2231 s_pu.b4_pos_y = u1_cu_pos_y >> 2;
2232 s_pu.b4_wd = (u1_cu_size >> 2) - 1;
2233 s_pu.b4_ht = (u1_cu_size >> 2) - 1;
2234
2235 /* Give the local PU structure to MV merge */
2236 ps_pu_merge = &s_pu;
2237 }
2238 else
2239 {
2240 ihevce_get_only_nbr_flag(
2241 &s_nbr,
2242 pu1_ctb_nbr_map,
2243 i4_nbr_map_stride,
2244 u1_pu_pos_x_4x4,
2245 u1_pu_pos_y_4x4,
2246 u1_part_wd >> 2,
2247 u1_part_ht >> 2);
2248
2249 u1_is_any_top_available = s_nbr.u1_top_avail || s_nbr.u1_top_rt_avail ||
2250 s_nbr.u1_top_lt_avail;
2251
2252 if(!ps_ctxt->u1_use_merge_cand_from_top_row)
2253 {
2254 if(u1_is_any_top_available)
2255 {
2256 if(s_nbr.u1_left_avail || s_nbr.u1_bot_lt_avail)
2257 {
2258 s_nbr.u1_top_avail = 0;
2259 s_nbr.u1_top_rt_avail = 0;
2260 s_nbr.u1_top_lt_avail = 0;
2261 }
2262 else
2263 {
2264 u1_are_valid_merge_cands_available = 0;
2265 }
2266 }
2267 }
2268
2269 /* Actual PU passed to MV merge */
2270 ps_pu_merge = ps_pu;
2271 }
2272 if(u1_are_valid_merge_cands_available)
2273 {
2274 u1_num_merge_cands = ihevce_mv_pred_merge(
2275 ps_mv_pred_ctxt,
2276 ps_pu_top_nbr,
2277 ps_pu_left_nbr,
2278 ps_pu_topleft_nbr,
2279 i4_nbr_4x4_left_stride,
2280 &s_nbr,
2281 NULL,
2282 ps_pu_merge,
2283 e_part_size,
2284 i,
2285 u1_single_mcl_flag,
2286 as_merge_cand,
2287 au1_is_top_used);
2288
2289 if(u1_num_merge_cands > u1_max_merge_candidates)
2290 {
2291 u1_num_merge_cands = u1_max_merge_candidates;
2292 }
2293
2294 u1_num_merge_cands = ihevce_merge_candidate_seive(
2295 &s_nbr,
2296 as_merge_cand,
2297 au1_is_top_used,
2298 u1_num_merge_cands,
2299 ps_ctxt->u1_use_merge_cand_from_top_row || !u1_is_any_top_available);
2300
2301 for(j = 0; j < u1_num_merge_cands; j++)
2302 {
2303 s_merge_prms.au1_valid_merge_indices[j] = j;
2304 }
2305
2306 au4_cost[MERGE_DERIVED][i] = ihevce_determine_best_merge_pu(
2307 &s_merge_prms,
2308 &aps_cands[MERGE_DERIVED]->as_inter_pu[i],
2309 &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2310 pv_pu_src,
2311 au4_cost[ME_OR_SKIP_DERIVED][i],
2312 i4_pred_buf_offset,
2313 u1_num_merge_cands,
2314 i,
2315 u1_eval_skip);
2316 }
2317 else
2318 {
2319 au4_cost[MERGE_DERIVED][i] = INT_MAX;
2320 }
2321
2322 au4_cost[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] = au4_cost[MERGE_DERIVED][i];
2323
2324 if(u1_eval_skip)
2325 {
2326 /* This statement ensures that the skip candidate is always added */
2327 au4_cost[ME_OR_SKIP_DERIVED][i] =
2328 (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST : INT_MAX;
2329 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] =
2330 au1_final_pred_buf_id[MERGE_DERIVED][i];
2331 }
2332 else
2333 {
2334 au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2335 au4_cost[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2336 au4_cost[ME_OR_SKIP_DERIVED][i];
2337 }
2338
2339 au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] =
2340 au1_final_pred_buf_id[MERGE_DERIVED][i];
2341 au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2342 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i];
2343 }
2344 else
2345 {
2346 au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2347 }
2348
2349 if(!i && (u1_num_parts > 1) && u1_eval_merge)
2350 {
2351 ihevce_set_inter_nbr_map(
2352 pu1_ctb_nbr_map,
2353 i4_nbr_map_stride,
2354 u1_pu_pos_x_4x4,
2355 u1_pu_pos_y_4x4,
2356 (u1_part_wd >> 2),
2357 (u1_part_ht >> 2),
2358 1);
2359 ihevce_populate_nbr_4x4_with_pu_data(
2360 ps_cu_nbr_buf, &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i], u1_cu_size >> 2);
2361
2362 if(u1_part_wd < u1_cu_size)
2363 {
2364 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2365
2366 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2367 {
2368 ps_cu_nbr_buf += (u1_part_wd >> 2);
2369 ps_pu_left_nbr = ps_cu_nbr_buf - 1;
2370 ps_pu_top_nbr += (u1_part_wd >> 2);
2371 ps_pu_topleft_nbr = ps_pu_top_nbr - 1;
2372
2373 i4_nbr_4x4_left_stride = (u1_cu_size >> 2);
2374 }
2375 }
2376 else if(u1_part_ht < u1_cu_size)
2377 {
2378 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2379 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2380
2381 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2382 {
2383 ps_cu_nbr_buf += (u1_part_ht >> 2) * (u1_cu_size >> 2);
2384 ps_pu_left_nbr += (u1_part_ht >> 2) * i4_nbr_4x4_left_stride;
2385 ps_pu_top_nbr = ps_cu_nbr_buf - (u1_cu_size >> 2);
2386 ps_pu_topleft_nbr = ps_pu_left_nbr - i4_nbr_4x4_left_stride;
2387 }
2388 }
2389
2390 i4_pred_buf_offset *= u1_num_bytes_per_pel;
2391 i4_src_buf_offset *= u1_num_bytes_per_pel;
2392
2393 aps_cands[MIXED_MODE_TYPE0]->as_inter_pu[0] = aps_cands[MERGE_DERIVED]->as_inter_pu[0];
2394 }
2395 else if(!i && (u1_num_parts > 1) && (!u1_eval_merge))
2396 {
2397 if(u1_part_wd < u1_cu_size)
2398 {
2399 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2400 }
2401 else if(u1_part_ht < u1_cu_size)
2402 {
2403 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2404 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2405 }
2406
2407 i4_pred_buf_offset *= u1_num_bytes_per_pel;
2408 i4_src_buf_offset *= u1_num_bytes_per_pel;
2409 }
2410 else if(i && (u1_num_parts > 1) && u1_eval_merge)
2411 {
2412 aps_cands[MIXED_MODE_TYPE1]->as_inter_pu[1] = aps_cands[MERGE_DERIVED]->as_inter_pu[1];
2413 }
2414 }
2415
2416 /* Adding a skip candidate */
2417 if((u1_eval_merge) && (0 == u1_part_type))
2418 {
2419 cu_inter_cand_t *ps_cand = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
2420 [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
2421 ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
2422
2423 (*ps_cand) = (*aps_cands[MERGE_DERIVED]);
2424
2425 ps_cand->b1_skip_flag = 1;
2426
2427 aps_cands[MIXED_MODE_TYPE1] = ps_cand;
2428 au4_cost[MIXED_MODE_TYPE1][0] = (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST
2429 : INT_MAX;
2430 }
2431
2432 /* Sort and populate */
2433 u1_skip_or_merge_cand_is_valid = ihevce_merge_cands_with_existing_best(
2434 ps_cu_mode_info,
2435 aps_cands,
2436 as_mvp_winner,
2437 au4_cost,
2438 ppv_pred_buf_list,
2439 au1_final_pred_buf_id,
2440 &ps_pred_buf_info->u4_is_buf_in_use,
2441 &ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands,
2442 &ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands,
2443 &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands,
2444 &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands,
2445 au1_merge_pred_buf_idx_array,
2446 ps_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d,
2447
2448 i4_pred_stride,
2449 i4_max_num_inter_rdopt_cands,
2450 u1_cu_size,
2451 u1_part_type,
2452 u1_eval_merge,
2453 u1_eval_skip,
2454 u1_num_bytes_per_pel);
2455
2456 return u1_skip_or_merge_cand_is_valid;
2457 }
2458
ihevce_redundant_candidate_pruner(inter_cu_mode_info_t * ps_inter_cu_mode_info)2459 static __inline void ihevce_redundant_candidate_pruner(inter_cu_mode_info_t *ps_inter_cu_mode_info)
2460 {
2461 WORD8 i, j;
2462 WORD8 i1_num_merge_vs_mvds;
2463
2464 UWORD8 au1_redundant_cand_indices[MAX_NUM_INTER_RDO_CANDS] = { 0 };
2465
2466 for(i = 0; i < (ps_inter_cu_mode_info->u1_num_inter_cands - 1); i++)
2467 {
2468 if(au1_redundant_cand_indices[i] || ps_inter_cu_mode_info->aps_cu_data[i]->b1_skip_flag)
2469 {
2470 continue;
2471 }
2472
2473 for(j = i + 1; j < ps_inter_cu_mode_info->u1_num_inter_cands; j++)
2474 {
2475 if(au1_redundant_cand_indices[j] || ps_inter_cu_mode_info->aps_cu_data[j]->b1_skip_flag)
2476 {
2477 continue;
2478 }
2479
2480 i1_num_merge_vs_mvds = 0;
2481
2482 if(ps_inter_cu_mode_info->aps_cu_data[j]->b3_part_size ==
2483 ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2484 {
2485 if(ihevce_compare_pu_mv_t(
2486 &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->mv,
2487 &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->mv,
2488 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b2_pred_mode,
2489 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b2_pred_mode))
2490 {
2491 i1_num_merge_vs_mvds +=
2492 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b1_merge_flag -
2493 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b1_merge_flag;
2494
2495 if(ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2496 {
2497 if(ihevce_compare_pu_mv_t(
2498 &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].mv,
2499 &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].mv,
2500 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b2_pred_mode,
2501 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b2_pred_mode))
2502 {
2503 i1_num_merge_vs_mvds +=
2504 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b1_merge_flag -
2505 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b1_merge_flag;
2506 }
2507 }
2508 }
2509 }
2510
2511 if(i1_num_merge_vs_mvds != 0)
2512 {
2513 au1_redundant_cand_indices[(i1_num_merge_vs_mvds > 0) ? j : i] = 1;
2514 }
2515 }
2516 }
2517
2518 for(i = 0; i < ps_inter_cu_mode_info->u1_num_inter_cands; i++)
2519 {
2520 if(au1_redundant_cand_indices[i])
2521 {
2522 memmove(
2523 &ps_inter_cu_mode_info->aps_cu_data[i],
2524 &ps_inter_cu_mode_info->aps_cu_data[i + 1],
2525 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2526 sizeof(ps_inter_cu_mode_info->aps_cu_data[i]));
2527
2528 memmove(
2529 &ps_inter_cu_mode_info->au4_cost[i],
2530 &ps_inter_cu_mode_info->au4_cost[i + 1],
2531 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2532 sizeof(ps_inter_cu_mode_info->au4_cost[i]));
2533
2534 memmove(
2535 &ps_inter_cu_mode_info->au1_pred_buf_idx[i],
2536 &ps_inter_cu_mode_info->au1_pred_buf_idx[i + 1],
2537 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2538 sizeof(ps_inter_cu_mode_info->au1_pred_buf_idx[i]));
2539
2540 memmove(
2541 &au1_redundant_cand_indices[i],
2542 &au1_redundant_cand_indices[i + 1],
2543 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2544 sizeof(au1_redundant_cand_indices[i]));
2545
2546 ps_inter_cu_mode_info->u1_num_inter_cands--;
2547 i--;
2548 }
2549 }
2550 }
2551
2552 /*!
2553 ******************************************************************************
2554 * \if Function name : ihevce_inter_cand_sifter \endif
2555 *
2556 * \brief
2557 * Selects the best inter candidate modes amongst ME, merge,
2558 * skip and mixed modes. Also computes corresponding preds
2559 *
2560 * \author
2561 * Ittiam
2562 *
2563 *****************************************************************************
2564 */
ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t * ps_ctxt)2565 void ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t *ps_ctxt)
2566 {
2567 PF_SAD_FXN_T pf_sad_func;
2568
2569 UWORD8 au1_final_cand_idx[MAX_INTER_CU_CANDIDATES];
2570 UWORD8 au1_part_types_evaluated[MAX_INTER_CU_CANDIDATES];
2571 UWORD8 u1_num_unique_parts;
2572 UWORD8 i, j;
2573 UWORD32 au4_src_variance[NUM_INTER_PU_PARTS];
2574 WORD32 i4_mean;
2575
2576 cu_inter_cand_t *ps_me_cands = ps_ctxt->ps_me_cands;
2577 inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2578
2579 UWORD8 u1_diff_skip_cand_flag = 1;
2580 WORD8 i1_skip_cand_from_merge_idx = -1;
2581 WORD8 i1_final_skip_cand_merge_idx = -1;
2582 UWORD8 u1_max_num_part_types_to_select = MAX_INTER_CU_CANDIDATES;
2583 UWORD8 u1_num_me_cands = ps_ctxt->u1_num_me_cands;
2584 UWORD8 u1_num_parts_evaluated_for_merge = 0;
2585 UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2586
2587 if((ps_ctxt->u1_quality_preset >= IHEVCE_QUALITY_P3) && (ps_ctxt->i1_slice_type == BSLICE))
2588 {
2589 u1_max_num_part_types_to_select = 1;
2590 }
2591
2592 {
2593 pf_sad_func = (ps_ctxt->u1_use_satd_for_merge_eval) ? compute_satd_8bit
2594 : ps_ctxt->pf_evalsad_pt_npu_mxn_8bit;
2595 }
2596
2597 u1_num_unique_parts = ihevce_get_num_part_types_in_me_cand_list(
2598 ps_me_cands,
2599 au1_part_types_evaluated,
2600 au1_final_cand_idx,
2601 &u1_diff_skip_cand_flag,
2602 &i1_skip_cand_from_merge_idx,
2603 &i1_final_skip_cand_merge_idx,
2604 u1_max_num_part_types_to_select,
2605 u1_num_me_cands);
2606
2607 if((u1_num_me_cands + u1_diff_skip_cand_flag) && u1_is_cu_noisy &&
2608 ps_ctxt->i4_alpha_stim_multiplier)
2609 {
2610 ihevce_calc_variance(
2611 ps_ctxt->pv_src,
2612 ps_ctxt->i4_src_strd,
2613 &i4_mean,
2614 &ps_cu_mode_info->u4_src_variance,
2615 ps_ctxt->u1_cu_size,
2616 ps_ctxt->u1_cu_size,
2617 ps_ctxt->u1_is_hbd,
2618 0);
2619 }
2620
2621 if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2622 {
2623 u1_diff_skip_cand_flag = 0;
2624 }
2625 else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2626 {
2627 if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2628 {
2629 u1_diff_skip_cand_flag = 0;
2630 }
2631 }
2632
2633 for(i = 0; i < u1_num_me_cands + u1_diff_skip_cand_flag; i++)
2634 {
2635 UWORD8 u1_part_type;
2636 UWORD8 u1_eval_skip;
2637 UWORD8 u1_eval_merge;
2638 UWORD8 u1_valid_cand;
2639
2640 if(i == u1_num_me_cands)
2641 {
2642 u1_eval_skip = 1;
2643 u1_eval_merge = 0;
2644 u1_part_type = 0;
2645 }
2646 else
2647 {
2648 u1_eval_skip = 0;
2649 u1_part_type = ps_me_cands[i].b3_part_size;
2650
2651 if(u1_num_parts_evaluated_for_merge >= u1_num_unique_parts)
2652 {
2653 u1_eval_merge = 0;
2654 u1_num_parts_evaluated_for_merge = u1_num_unique_parts;
2655 }
2656 else
2657 {
2658 u1_eval_merge = (i == au1_final_cand_idx[u1_num_parts_evaluated_for_merge]);
2659 }
2660
2661 for(j = 0; (j < u1_num_parts_evaluated_for_merge) && (u1_eval_merge); j++)
2662 {
2663 if(u1_part_type == au1_part_types_evaluated[j])
2664 {
2665 u1_eval_merge = 0;
2666 break;
2667 }
2668 }
2669 }
2670
2671 if(u1_is_cu_noisy && u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2672 {
2673 void *pv_src = ps_ctxt->pv_src;
2674 UWORD8 u1_pu_wd = (ps_me_cands[i].as_inter_pu[0].b4_wd + 1) << 2;
2675 UWORD8 u1_pu_ht = (ps_me_cands[i].as_inter_pu[0].b4_ht + 1) << 2;
2676
2677 ihevce_calc_variance(
2678 pv_src,
2679 ps_ctxt->i4_src_strd,
2680 &i4_mean,
2681 &au4_src_variance[0],
2682 u1_pu_ht,
2683 u1_pu_wd,
2684 ps_ctxt->u1_is_hbd,
2685 0);
2686
2687 pv_src = (void *) (((UWORD8 *) pv_src) +
2688 ((ps_ctxt->u1_cu_size == u1_pu_wd) ? ps_ctxt->i4_src_strd * u1_pu_ht : u1_pu_wd)
2689 * (ps_ctxt->u1_is_hbd + 1));
2690 u1_pu_wd = (ps_me_cands[i].as_inter_pu[1].b4_wd + 1) << 2;
2691 u1_pu_ht = (ps_me_cands[i].as_inter_pu[1].b4_ht + 1) << 2;
2692
2693 ihevce_calc_variance(
2694 pv_src,
2695 ps_ctxt->i4_src_strd,
2696 &i4_mean,
2697 &au4_src_variance[1],
2698 u1_pu_ht,
2699 u1_pu_wd,
2700 ps_ctxt->u1_is_hbd,
2701 0);
2702 }
2703 else if(u1_is_cu_noisy && !u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2704 {
2705 au4_src_variance[0] = ps_cu_mode_info->u4_src_variance;
2706 }
2707
2708 if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2709 {
2710 u1_eval_merge = 0;
2711 }
2712 else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2713 {
2714 if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2715 {
2716 u1_eval_merge = 0;
2717 }
2718 }
2719
2720 u1_valid_cand = ihevce_compute_pred_and_populate_modes(
2721 ps_ctxt,
2722 pf_sad_func,
2723 au4_src_variance,
2724 u1_part_type,
2725 MIN(i, (u1_num_me_cands - 1)),
2726 u1_eval_merge,
2727 u1_eval_skip);
2728
2729 u1_num_parts_evaluated_for_merge += u1_eval_merge;
2730
2731 /* set the neighbour map to 0 */
2732 if(u1_part_type)
2733 {
2734 ihevce_set_nbr_map(
2735 ps_ctxt->pu1_ctb_nbr_map,
2736 ps_ctxt->i4_ctb_nbr_map_stride,
2737 (ps_ctxt->u1_cu_pos_x >> 2),
2738 (ps_ctxt->u1_cu_pos_y >> 2),
2739 (ps_ctxt->u1_cu_size >> 2),
2740 0);
2741 }
2742 }
2743
2744 ihevce_redundant_candidate_pruner(ps_ctxt->ps_inter_cu_mode_info);
2745 }
2746