1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*****************************************************************************/
22 /* File Includes */
23 /*****************************************************************************/
24 /* System include files */
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <stdarg.h>
30 #include <math.h>
31 #include <limits.h>
32
33 /* User include files */
34 #include "ihevc_typedefs.h"
35 #include "itt_video_api.h"
36 #include "ihevce_api.h"
37
38 #include "rc_cntrl_param.h"
39 #include "rc_frame_info_collector.h"
40 #include "rc_look_ahead_params.h"
41
42 #include "ihevc_defs.h"
43 #include "ihevc_structs.h"
44 #include "ihevc_platform_macros.h"
45 #include "ihevc_deblk.h"
46 #include "ihevc_itrans_recon.h"
47 #include "ihevc_chroma_itrans_recon.h"
48 #include "ihevc_chroma_intra_pred.h"
49 #include "ihevc_intra_pred.h"
50 #include "ihevc_inter_pred.h"
51 #include "ihevc_mem_fns.h"
52 #include "ihevc_padding.h"
53 #include "ihevc_weighted_pred.h"
54 #include "ihevc_sao.h"
55 #include "ihevc_resi_trans.h"
56 #include "ihevc_quant_iquant_ssd.h"
57 #include "ihevc_cabac_tables.h"
58
59 #include "ihevce_defs.h"
60 #include "ihevce_lap_enc_structs.h"
61 #include "ihevce_multi_thrd_structs.h"
62 #include "ihevce_multi_thrd_funcs.h"
63 #include "ihevce_me_common_defs.h"
64 #include "ihevce_had_satd.h"
65 #include "ihevce_error_codes.h"
66 #include "ihevce_bitstream.h"
67 #include "ihevce_cabac.h"
68 #include "ihevce_rdoq_macros.h"
69 #include "ihevce_function_selector.h"
70 #include "ihevce_enc_structs.h"
71 #include "ihevce_entropy_structs.h"
72 #include "ihevce_cmn_utils_instr_set_router.h"
73 #include "ihevce_enc_loop_structs.h"
74 #include "ihevce_inter_pred.h"
75 #include "ihevce_global_tables.h"
76 #include "ihevce_dep_mngr_interface.h"
77 #include "hme_datatype.h"
78 #include "hme_interface.h"
79 #include "hme_common_defs.h"
80 #include "hme_defs.h"
81 #include "ihevce_me_instr_set_router.h"
82 #include "hme_globals.h"
83 #include "hme_utils.h"
84 #include "hme_coarse.h"
85 #include "hme_fullpel.h"
86 #include "hme_subpel.h"
87 #include "hme_refine.h"
88 #include "hme_err_compute.h"
89 #include "hme_common_utils.h"
90 #include "hme_search_algo.h"
91 #include "ihevce_stasino_helpers.h"
92 #include "ihevce_common_utils.h"
93
94 /*****************************************************************************/
95 /* Macros */
96 /*****************************************************************************/
97 #define UNI_SATD_SCALE 1
98
99 /*****************************************************************************/
100 /* Function Definitions */
101 /*****************************************************************************/
ihevce_open_loop_pred_data(me_frm_ctxt_t * ps_ctxt,inter_pu_results_t * ps_pu_results,U08 * pu1_src,U08 * pu1_temp_pred,S32 stride,S32 src_strd,UWORD8 e_part_id)102 void ihevce_open_loop_pred_data(
103 me_frm_ctxt_t *ps_ctxt,
104 inter_pu_results_t *ps_pu_results,
105 U08 *pu1_src,
106 U08 *pu1_temp_pred,
107 S32 stride,
108 S32 src_strd,
109 UWORD8 e_part_id)
110 {
111 S32 best_sad_l0 = -1, best_sad_l1 = -1;
112 S32 sad_diff, status;
113 inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt;
114 U08 enable_bi = 0;
115 pu_t s_pu;
116
117 ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt;
118 ps_ctxt->i4_count++;
119 /* L0*/
120 if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
121 {
122 pu_result_t *ps_best_l0_pu;
123 ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N];
124 best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost;
125 s_pu.b2_pred_mode = PRED_L0;
126 s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht;
127 s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd;
128 s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x;
129 s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y;
130 s_pu.b1_intra_flag = 0;
131 s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx;
132 s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy;
133 s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx;
134 }
135 /*L1*/
136 if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
137 {
138 pu_result_t *ps_best_l1_pu;
139 ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N];
140 best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost;
141 s_pu.b2_pred_mode = PRED_L1;
142 s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht;
143 s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd;
144 s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x;
145 s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y;
146 s_pu.b1_intra_flag = 0;
147 s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx;
148 s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy;
149 s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx;
150 }
151 ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1));
152 /*bi selection*/
153 if((best_sad_l0 != -1) && (best_sad_l1 != -1))
154 {
155 sad_diff = abs(best_sad_l0 - best_sad_l1);
156 if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15)))
157 {
158 enable_bi = 1;
159 s_pu.b2_pred_mode = PRED_BI;
160 }
161 if(!enable_bi)
162 {
163 if(best_sad_l0 < best_sad_l1)
164 {
165 s_pu.b2_pred_mode = PRED_L0;
166 }
167 else
168 {
169 s_pu.b2_pred_mode = PRED_L1;
170 }
171 }
172 }
173 status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1);
174 if(status == -1)
175 {
176 ASSERT(0);
177 }
178 }
179
180 /**
181 ********************************************************************************
182 * @fn void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
183 *
184 * @brief Allocates a block of size = i4_size from working memory and returns
185 *
186 * @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
187 *
188 * @param[in] i4_size : size required
189 *
190 * @return void pointer to allocated memory, NULL if failure
191 ********************************************************************************
192 */
hme_get_wkg_mem(buf_mgr_t * ps_buf_mgr,S32 i4_size)193 void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
194 {
195 U08 *pu1_mem;
196
197 if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total)
198 return NULL;
199
200 pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used;
201 ps_buf_mgr->i4_used += i4_size;
202
203 return ((void *)pu1_mem);
204 }
205
206 /**
207 ********************************************************************************
208 * @fn hme_init_histogram(
209 *
210 * @brief Top level entry point for Coarse ME. Runs across blocks and does the
211 * needful by calling other low level routines.
212 *
213 * @param[in,out] ps_hist : the histogram structure
214 *
215 * @param[in] i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
216 *
217 * @param[in] i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
218 *
219 * @return None
220 ********************************************************************************
221 */
222
hme_init_histogram(mv_hist_t * ps_hist,S32 i4_max_mv_x,S32 i4_max_mv_y)223 void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y)
224 {
225 S32 i4_num_bins, i4_num_cols, i4_num_rows;
226 S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val;
227
228 /*************************************************************************/
229 /* Evaluate the shift_x and shift_y. For this, we use the following logic*/
230 /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is */
231 /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/
232 /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/
233 /* if above quantity is negative, then we make it zero. */
234 /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */
235 /*************************************************************************/
236 i4_val = i4_max_mv_x * i4_max_mv_y * 4;
237 i4_range = (hme_get_range(i4_val - 1)) + 1;
238 if(i4_range > LOG_MAX_NUM_BINS)
239 {
240 i4_shift_y = (i4_range - LOG_MAX_NUM_BINS);
241 i4_shift_x = (i4_shift_y + 1) >> 1;
242 i4_shift_y >>= 1;
243 }
244 else
245 {
246 i4_shift_y = 0;
247 i4_shift_x = 0;
248 }
249
250 /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */
251 /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */
252 /* this helps us compute num bins that are active for this histo session */
253 i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x;
254 i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y;
255 i4_num_bins = i4_num_rows * i4_num_cols;
256
257 ASSERT(i4_num_bins <= MAX_NUM_BINS);
258
259 ps_hist->i4_num_rows = i4_num_rows;
260 ps_hist->i4_num_cols = i4_num_cols;
261 ps_hist->i4_min_x = -i4_max_mv_x;
262 ps_hist->i4_min_y = -i4_max_mv_y;
263 ps_hist->i4_shift_x = i4_shift_x;
264 ps_hist->i4_shift_y = i4_shift_y;
265 ps_hist->i4_lobe1_size = 5;
266 ps_hist->i4_lobe2_size = 3;
267
268 ps_hist->i4_num_bins = i4_num_bins;
269
270 for(i = 0; i < i4_num_bins; i++)
271 {
272 ps_hist->ai4_bin_count[i] = 0;
273 }
274 }
275
276 /**
277 ********************************************************************************
278 * @fn hme_update_histogram(
279 *
280 * @brief Updates the histogram given an mv entry
281 *
282 * @param[in,out] ps_hist : the histogram structure
283 *
284 * @param[in] i4_mv_x : x component of the mv (fpel units)
285 *
286 * @param[in] i4_mv_y : y component of the mv (fpel units)
287 *
288 * @return None
289 ********************************************************************************
290 */
hme_update_histogram(mv_hist_t * ps_hist,S32 i4_mv_x,S32 i4_mv_y)291 void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y)
292 {
293 S32 i4_bin_index, i4_col, i4_row;
294
295 i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x;
296 i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y;
297
298 i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols);
299 /* Sanity Check */
300 ASSERT(i4_bin_index < MAX_NUM_BINS);
301
302 ps_hist->ai4_bin_count[i4_bin_index]++;
303 }
304
305 /**
306 ********************************************************************************
307 * @fn hme_get_global_mv(
308 *
309 * @brief returns the global mv of a previous picture. Accounts for the fact
310 * that the delta poc of the previous picture may have been different
311 * from delta poc of current picture. Delta poc is POC difference
312 * between a picture and its reference.
313 *
314 * @param[out] ps_mv: mv_t structure where the motion vector is returned
315 *
316 * @param[in] i4_delta_poc: the delta poc for the current pic w.r.t. reference
317 *
318 * @return None
319 ********************************************************************************
320 */
hme_get_global_mv(layer_ctxt_t * ps_prev_layer,hme_mv_t * ps_mv,S32 i4_delta_poc)321 void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc)
322 {
323 S16 i2_mv_x, i2_mv_y;
324 S32 i4_delta_poc_prev;
325 S32 i4_poc_prev = ps_prev_layer->i4_poc;
326 S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0];
327
328 i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref;
329 i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x;
330 i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y;
331
332 i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev);
333 i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev);
334
335 ps_mv->i2_mv_x = i2_mv_x;
336 ps_mv->i2_mv_y = i2_mv_y;
337 }
338
339 /**
340 ********************************************************************************
341 * @fn hme_calculate_global_mv(
342 *
343 * @brief Calculates global mv for a given histogram
344 *
345 * @param[in] ps_hist : the histogram structure
346 *
347 * @param[in] ps_mv : used to return the global mv
348 *
349 * @param[in] e_lobe_type : refer to GMV_MVTYPE_T
350 *
351 * @return None
352 ********************************************************************************
353 */
hme_calculate_global_mv(mv_hist_t * ps_hist,hme_mv_t * ps_mv,GMV_MVTYPE_T e_lobe_type)354 void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type)
355 {
356 S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count;
357 S32 i4_max_sum = -1;
358 S32 i4_max_x = 0, i4_max_y = 0;
359
360 if(e_lobe_type == GMV_THICK_LOBE)
361 i4_lobe_size = ps_hist->i4_lobe1_size;
362 else
363 i4_lobe_size = ps_hist->i4_lobe2_size;
364
365 i4_offset = i4_lobe_size >> 1;
366 for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++)
367 {
368 for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++)
369 {
370 S32 i4_bin_id, i4_sum;
371 i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols);
372
373 pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id];
374 i4_sum = hme_compute_2d_sum_unsigned(
375 (void *)pi4_bin_count,
376 i4_lobe_size,
377 i4_lobe_size,
378 ps_hist->i4_num_cols,
379 sizeof(U32));
380
381 if(i4_sum > i4_max_sum)
382 {
383 i4_max_x = i4_x;
384 i4_max_y = i4_y;
385 i4_max_sum = i4_sum;
386 }
387 }
388 }
389
390 ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y);
391 ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x);
392 }
393
394 /**
395 ********************************************************************************
396 * @fn ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
397 *
398 * @brief returns a new ctb node usable for creating a new ctb candidate
399 *
400 * @param[in] ps_mem_mgr : memory manager holding all ctb nodes
401 *
402 * @return NULL if no free nodes, else ptr to the new ctb node
403 ********************************************************************************
404 */
hme_get_ctb_node(ctb_mem_mgr_t * ps_mem_mgr)405 ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
406 {
407 U08 *pu1_ret;
408 if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot)
409 return (NULL);
410 pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used;
411 ps_mem_mgr->i4_used += ps_mem_mgr->i4_size;
412 return ((ctb_node_t *)pu1_ret);
413 }
414
415 /**
416 ********************************************************************************
417 * @fn hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
418 search_results_t *ps_search_results, S32 i4_num_ref)
419 *
420 * @brief For a given CU whose results are in ps_search_results, the 17x17
421 * mv grid is updated for future use within the CTB
422 *
423 * @param[in] ps_search_results : Search results data structure
424 *
425 * @param[out] pps_mv_grid: The mv grid (as many as num ref)
426 *
427 * @param[in] i4_num_ref: nuber of search iterations to update
428 *
429 * @return None
430 ********************************************************************************
431 */
hme_map_mvs_to_grid(mv_grid_t ** pps_mv_grid,search_results_t * ps_search_results,U08 * pu1_pred_dir_searched,S32 i4_num_pred_dir)432 void hme_map_mvs_to_grid(
433 mv_grid_t **pps_mv_grid,
434 search_results_t *ps_search_results,
435 U08 *pu1_pred_dir_searched,
436 S32 i4_num_pred_dir)
437 {
438 S32 i4_cu_start_offset;
439 /*************************************************************************/
440 /* Start x, y offset of CU relative to CTB. To update the mv grid which */
441 /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset */
442 /*************************************************************************/
443 S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2;
444 S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2;
445
446 /* Controls the attribute of a given partition within CU */
447 /* , i.e. start locn, size */
448 part_attr_t *ps_part_attr;
449
450 S32 i4_part, i4_part_id, num_parts, i4_stride;
451 S16 i2_mv_x, i2_mv_y;
452 S08 i1_ref_idx;
453
454 /* Per partition, attributes w.r.t. CU start */
455 S32 x_start, y_start, x_end, y_end, i4_x, i4_y;
456 PART_TYPE_T e_part_type;
457
458 /* Points to exact mv structures within the grid to be udpated */
459 search_node_t *ps_grid_node, *ps_grid_node_tmp;
460
461 /* points to exact mv grid (based on search iteration) to be updated */
462 mv_grid_t *ps_mv_grid;
463
464 search_node_t *ps_search_node;
465
466 S32 shift, i, mv_shift = 2;
467 /* Proportional to the size of CU, controls the number of 4x4 blks */
468 /* to be updated */
469 shift = ps_search_results->e_cu_size;
470 ASSERT(i4_num_pred_dir <= 2);
471
472 e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
473
474 if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
475 (ps_search_results->i4_part_mask & ENABLE_NxN))
476 {
477 e_part_type = PRT_NxN;
478 }
479
480 for(i = 0; i < i4_num_pred_dir; i++)
481 {
482 num_parts = gau1_num_parts_in_part_type[e_part_type];
483 ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]];
484 i4_stride = ps_mv_grid->i4_stride;
485
486 i4_cu_start_offset =
487 i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset;
488
489 /* Move to the appropriate 2d locn of CU start within Grid */
490 ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset];
491
492 for(i4_part = 0; i4_part < num_parts; i4_part++)
493 {
494 i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part];
495
496 /* Pick the mvx and y and ref id corresponding to this partition */
497 ps_search_node =
498 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
499
500 i2_mv_x = ps_search_node->s_mv.i2_mvx;
501 i2_mv_y = ps_search_node->s_mv.i2_mvy;
502 i1_ref_idx = ps_search_node->i1_ref_idx;
503
504 /* Move to the appropriate location within the CU */
505 ps_part_attr = &gas_part_attr_in_cu[i4_part_id];
506 x_start = ps_part_attr->u1_x_start;
507 x_end = x_start + ps_part_attr->u1_x_count;
508 y_start = ps_part_attr->u1_y_start;
509 y_end = y_start + ps_part_attr->u1_y_count;
510
511 /* Convert attributes from 8x8 CU size to given CU size */
512 x_start = (x_start << shift) >> mv_shift;
513 x_end = (x_end << shift) >> mv_shift;
514 y_start = (y_start << shift) >> mv_shift;
515 y_end = (y_end << shift) >> mv_shift;
516
517 ps_grid_node_tmp = ps_grid_node + y_start * i4_stride;
518
519 /* Update all 4x4 blk mvs with the part mv */
520 /* For e.g. we update 4 units in case of NxN for 16x16 CU */
521 for(i4_y = y_start; i4_y < y_end; i4_y++)
522 {
523 for(i4_x = x_start; i4_x < x_end; i4_x++)
524 {
525 ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x;
526 ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y;
527 ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx;
528 ps_grid_node_tmp[i4_x].u1_subpel_done = 1;
529 }
530 ps_grid_node_tmp += i4_stride;
531 }
532 }
533 }
534 }
535
hme_set_ctb_pred_attr(ctb_node_t * ps_parent,U08 * pu1_pred0,U08 * pu1_pred1,S32 i4_stride)536 void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride)
537 {
538 ps_parent->apu1_pred[0] = pu1_pred0;
539 ps_parent->apu1_pred[1] = pu1_pred1;
540 ps_parent->i4_pred_stride = i4_stride;
541 if(ps_parent->ps_tl != NULL)
542 {
543 S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off;
544 blk_wd -= (S32)ps_parent->u1_x_off;
545
546 hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1);
547
548 hme_set_ctb_pred_attr(
549 ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1);
550
551 hme_set_ctb_pred_attr(
552 ps_parent->ps_bl,
553 pu1_pred0 + (blk_wd * i4_stride),
554 pu1_pred1 + (blk_wd * i4_stride),
555 i4_stride >> 1);
556
557 hme_set_ctb_pred_attr(
558 ps_parent->ps_tr,
559 pu1_pred0 + (blk_wd * (1 + i4_stride)),
560 pu1_pred1 + (blk_wd * (1 + i4_stride)),
561 i4_stride >> 1);
562 }
563 }
564
565 /**
566 ********************************************************************************
567 * @fn hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
568 *
569 * @brief Expands the part mask to a list of valid part ids terminated by -1
570 *
571 * @param[in] i4_part_mask : bit mask of active partitino ids
572 *
573 * @param[out] pi4_valid_part_ids : array, each entry has one valid part id
574 * Terminated by -1 to signal end.
575 *
576 * @return number of partitions
577 ********************************************************************************
578 */
hme_create_valid_part_ids(S32 i4_part_mask,S32 * pi4_valid_part_ids)579 S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
580 {
581 S32 id = 0, i;
582 for(i = 0; i < TOT_NUM_PARTS; i++)
583 {
584 if(i4_part_mask & (1 << i))
585 {
586 pi4_valid_part_ids[id] = i;
587 id++;
588 }
589 }
590 pi4_valid_part_ids[id] = -1;
591
592 return id;
593 }
594
595 ctb_boundary_attrs_t *
get_ctb_attrs(S32 ctb_start_x,S32 ctb_start_y,S32 pic_wd,S32 pic_ht,me_frm_ctxt_t * ps_ctxt)596 get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt)
597 {
598 S32 horz_crop, vert_crop;
599 ctb_boundary_attrs_t *ps_attrs;
600
601 horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0;
602 vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0;
603 switch(horz_crop + vert_crop)
604 {
605 case 0:
606 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE];
607 break;
608 case 1:
609 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY];
610 break;
611 case 2:
612 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY];
613 break;
614 case 3:
615 ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY];
616 break;
617 }
618 return (ps_attrs);
619 }
620
621 /**
622 ********************************************************************************
623 * @fn hevc_avg_2d(U08 *pu1_src1,
624 * U08 *pu1_src2,
625 * S32 i4_src1_stride,
626 * S32 i4_src2_stride,
627 * S32 i4_blk_wd,
628 * S32 i4_blk_ht,
629 * U08 *pu1_dst,
630 * S32 i4_dst_stride)
631 *
632 *
633 * @brief point wise average of two buffers into a third buffer
634 *
635 * @param[in] pu1_src1 : first source buffer
636 *
637 * @param[in] pu1_src2 : 2nd source buffer
638 *
639 * @param[in] i4_src1_stride : stride of source 1 buffer
640 *
641 * @param[in] i4_src2_stride : stride of source 2 buffer
642 *
643 * @param[in] i4_blk_wd : block width
644 *
645 * @param[in] i4_blk_ht : block height
646 *
647 * @param[out] pu1_dst : destination buffer
648 *
649 * @param[in] i4_dst_stride : stride of the destination buffer
650 *
651 * @return void
652 ********************************************************************************
653 */
hevc_avg_2d(U08 * pu1_src1,U08 * pu1_src2,S32 i4_src1_stride,S32 i4_src2_stride,S32 i4_blk_wd,S32 i4_blk_ht,U08 * pu1_dst,S32 i4_dst_stride)654 void hevc_avg_2d(
655 U08 *pu1_src1,
656 U08 *pu1_src2,
657 S32 i4_src1_stride,
658 S32 i4_src2_stride,
659 S32 i4_blk_wd,
660 S32 i4_blk_ht,
661 U08 *pu1_dst,
662 S32 i4_dst_stride)
663 {
664 S32 i, j;
665
666 for(i = 0; i < i4_blk_ht; i++)
667 {
668 for(j = 0; j < i4_blk_wd; j++)
669 {
670 pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1;
671 }
672 pu1_src1 += i4_src1_stride;
673 pu1_src2 += i4_src2_stride;
674 pu1_dst += i4_dst_stride;
675 }
676 }
677 /**
678 ********************************************************************************
679 * @fn hme_pick_back_search_node(search_results_t *ps_search_results,
680 * search_node_t *ps_search_node_fwd,
681 * S32 i4_part_idx,
682 * layer_ctxt_t *ps_curr_layer)
683 *
684 *
685 * @brief returns the search node corresponding to a ref idx in same or
686 * opp direction. Preference is given to opp direction, but if that
687 * does not yield results, same direction is attempted.
688 *
689 * @param[in] ps_search_results: search results overall
690 *
691 * @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
692 *
693 * @param[in] i4_part_idx : partition id
694 *
695 * @param[in] ps_curr_layer : layer context for current layer.
696 *
697 * @return search node corresponding to hte "other direction"
698 ********************************************************************************
699 */
700 //#define PICK_L1_REF_SAME_DIR
hme_pick_back_search_node(search_results_t * ps_search_results,search_node_t * ps_search_node_fwd,S32 i4_part_idx,layer_ctxt_t * ps_curr_layer)701 search_node_t *hme_pick_back_search_node(
702 search_results_t *ps_search_results,
703 search_node_t *ps_search_node_fwd,
704 S32 i4_part_idx,
705 layer_ctxt_t *ps_curr_layer)
706 {
707 S32 is_past_l0, is_past_l1, id, i, i4_poc;
708 S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc;
709 //ref_attr_t *ps_ref_attr_lc;
710 S08 i1_ref_idx_fwd;
711 S16 i2_mv_x, i2_mv_y;
712 search_node_t *ps_search_node;
713
714 i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx;
715 i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx;
716 i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy;
717 i4_poc = ps_curr_layer->i4_poc;
718
719 //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0];
720 /* If the ref id already picked up maps to a past pic, then we pick */
721 /* a result corresponding to future pic. If such a result is not */
722 /* to be found, then we pick a result corresponding to a past pic */
723 //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past;
724 is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0;
725
726 ASSERT(ps_search_results->u1_num_active_ref <= 2);
727
728 /* pick the right iteration of search nodes to pick up */
729 #ifdef PICK_L1_REF_SAME_DIR
730 if(ps_search_results->u1_num_active_ref == 2)
731 id = !is_past_l0;
732 #else
733 if(ps_search_results->u1_num_active_ref == 2)
734 id = is_past_l0;
735 #endif
736 else
737 id = 0;
738
739 ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx];
740
741 for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
742 {
743 S08 i1_ref_test = ps_search_node[i].i1_ref_idx;
744 is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0;
745 //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past)
746 #ifdef PICK_L1_REF_SAME_DIR
747 if(is_past_l1 == is_past_l0)
748 #else
749 if(is_past_l1 != is_past_l0)
750 #endif
751 {
752 /* belongs to same direction as the ref idx passed, so continue */
753 return (ps_search_node + i);
754 }
755 }
756
757 /* Unable to find best result in opp direction, so try same direction */
758 /* However we need to ensure that we do not pick up same result */
759 for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
760 {
761 if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) ||
762 (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y))
763 {
764 return (ps_search_node);
765 }
766 ps_search_node++;
767 }
768
769 //ASSERT(0);
770 return (ps_search_results->aps_part_results[id][i4_part_idx]);
771
772 //return (NULL);
773 }
774
775 /**
776 ********************************************************************************
777 * @fn hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride)
778 *
779 *
780 * @brief Examines input 16x16 for possible edges and orientations of those,
781 * and returns a bit mask of partitions that should be searched for
782 *
783 * @param[in] pu1_inp : input buffer
784 *
785 * @param[in] i4_inp_stride: input stride
786 *
787 * @return part mask (bit mask of active partitions to search)
788 ********************************************************************************
789 */
790
hme_study_input_segmentation(U08 * pu1_inp,S32 i4_inp_stride,S32 limit_active_partitions)791 S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions)
792 {
793 S32 i4_rsum[16], i4_csum[16];
794 U08 *pu1_tmp, u1_tmp;
795 S32 i4_max_ridx, i4_max_cidx, i4_tmp;
796 S32 i, j, i4_ret;
797 S32 i4_max_rp[4], i4_max_cp[4];
798 S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N };
799 S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD };
800 #define EDGE_THR (15 * 16)
801 #define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2]))
802
803 if(0 == limit_active_partitions)
804 {
805 /*********************************************************************/
806 /* In this case, we do not optimize on active partitions and search */
807 /* brute force. This way, 17 partitinos would be enabled. */
808 /*********************************************************************/
809 return (ENABLE_ALL_PARTS);
810 }
811
812 /*************************************************************************/
813 /* Control passes below in case we wish to optimize on active partitions.*/
814 /* This is based on input characteristics, check how an edge passes along*/
815 /* an input 16x16 area, if at all, and decide active partitinos. */
816 /*************************************************************************/
817
818 /* Initialize row and col sums */
819 for(i = 0; i < 16; i++)
820 {
821 i4_rsum[i] = 0;
822 i4_csum[i] = 0;
823 }
824 pu1_tmp = pu1_inp;
825 for(i = 0; i < 16; i++)
826 {
827 for(j = 0; j < 16; j++)
828 {
829 u1_tmp = *pu1_tmp++;
830 i4_rsum[i] += u1_tmp;
831 i4_csum[j] += u1_tmp;
832 }
833 pu1_tmp += (i4_inp_stride - 16);
834 }
835
836 /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */
837 i4_max_rp[0] = 0;
838 i4_max_cp[0] = 0;
839 i4_max_rp[1] = 0;
840 i4_max_cp[1] = 0;
841 i4_max_rp[2] = 0;
842 i4_max_cp[2] = 0;
843 i4_max_rp[3] = 0;
844 i4_max_cp[3] = 0;
845
846 /* Get Max edge strength across (2,3) (3,4) (4,5) */
847 for(i = 3; i < 6; i++)
848 {
849 /* Run [-1 -2 2 1] filter through rsum/csum */
850 i4_tmp = HI_PASS(i4_rsum, i);
851 if(ABS(i4_tmp) > i4_max_rp[1])
852 i4_max_rp[1] = i4_tmp;
853
854 i4_tmp = HI_PASS(i4_csum, i);
855 if(ABS(i4_tmp) > i4_max_cp[1])
856 i4_max_cp[1] = i4_tmp;
857 }
858
859 /* Get Max edge strength across (6,7) (7,8) (8,9) */
860 for(i = 7; i < 10; i++)
861 {
862 /* Run [-1 -2 2 1] filter through rsum/csum */
863 i4_tmp = HI_PASS(i4_rsum, i);
864 if(ABS(i4_tmp) > i4_max_rp[2])
865 i4_max_rp[2] = i4_tmp;
866
867 i4_tmp = HI_PASS(i4_csum, i);
868 if(ABS(i4_tmp) > i4_max_cp[2])
869 i4_max_cp[2] = i4_tmp;
870 }
871
872 /* Get Max edge strength across (10,11) (11,12) (12,13) */
873 for(i = 11; i < 14; i++)
874 {
875 /* Run [-1 -2 2 1] filter through rsum/csum */
876 i4_tmp = HI_PASS(i4_rsum, i);
877 if(ABS(i4_tmp) > i4_max_rp[3])
878 i4_max_rp[3] = i4_tmp;
879
880 i4_tmp = HI_PASS(i4_csum, i);
881 if(ABS(i4_tmp) > i4_max_cp[3])
882 i4_max_cp[3] = i4_tmp;
883 }
884
885 /* Find the maximum across the 3 and see whether the strength qualifies as edge */
886 i4_max_ridx = 1;
887 i4_max_cidx = 1;
888 for(i = 2; i <= 3; i++)
889 {
890 if(i4_max_rp[i] > i4_max_rp[i4_max_ridx])
891 i4_max_ridx = i;
892
893 if(i4_max_cp[i] > i4_max_cp[i4_max_cidx])
894 i4_max_cidx = i;
895 }
896
897 if(EDGE_THR > i4_max_rp[i4_max_ridx])
898 {
899 i4_max_ridx = 0;
900 }
901
902 if(EDGE_THR > i4_max_cp[i4_max_cidx])
903 {
904 i4_max_cidx = 0;
905 }
906
907 i4_ret = ENABLE_2Nx2N;
908
909 /* If only vertical discontinuity, go with one of 2Nx? */
910 if(0 == (i4_max_ridx + i4_max_cidx))
911 {
912 //num_me_parts++;
913 return i4_ret;
914 }
915
916 if(i4_max_ridx && (i4_max_cidx == 0))
917 {
918 //num_me_parts += 3;
919 return ((i4_ret | i4_seg_lutr[i4_max_ridx]));
920 }
921
922 /* If only horizontal discontinuity, go with one of ?x2N */
923 if(i4_max_cidx && (i4_max_ridx == 0))
924 {
925 //num_me_parts += 3;
926 return ((i4_ret | i4_seg_lutc[i4_max_cidx]));
927 }
928
929 /* If middle is dominant in both directions, go with NxN */
930 if((2 == i4_max_cidx) && (2 == i4_max_ridx))
931 {
932 //num_me_parts += 5;
933 return ((i4_ret | ENABLE_NxN));
934 }
935
936 /* Otherwise, conservatively, enable NxN and the 2 AMPs */
937 //num_me_parts += 9;
938 return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]);
939 }
940
941 /**
942 ********************************************************************************
943 * @fn hme_init_search_results(search_results_t *ps_search_results,
944 * S32 i4_num_ref,
945 * S32 i4_num_best_results,
946 * S32 i4_num_results_per_part,
947 * BLK_SIZE_T e_blk_size,
948 * S32 i4_x_off,
949 * S32 i4_y_off)
950 *
951 * @brief Initializes the search results structure with some key attributes
952 *
953 * @param[out] ps_search_results : search results structure to initialise
954 *
955 * @param[in] i4_num_Ref: corresponds to the number of ref ids searched
956 *
957 * @param[in] i4_num_best_results: Number of best results for the CU to
958 * be maintained in the result structure
959 *
960 * @param[in] i4_num_results_per_part: Per active partition the number of best
961 * results to be maintained
962 *
963 * @param[in] e_blk_size: blk size of the CU for which this structure used
964 *
965 * @param[in] i4_x_off: x offset of the top left of CU from CTB top left
966 *
967 * @param[in] i4_y_off: y offset of the top left of CU from CTB top left
968 *
969 * @param[in] pu1_is_past : points ot an array that tells whether a given ref id
970 * has prominence in L0 or in L1 list (past or future )
971 *
972 * @return void
973 ********************************************************************************
974 */
hme_init_search_results(search_results_t * ps_search_results,S32 i4_num_ref,S32 i4_num_best_results,S32 i4_num_results_per_part,BLK_SIZE_T e_blk_size,S32 i4_x_off,S32 i4_y_off,U08 * pu1_is_past)975 void hme_init_search_results(
976 search_results_t *ps_search_results,
977 S32 i4_num_ref,
978 S32 i4_num_best_results,
979 S32 i4_num_results_per_part,
980 BLK_SIZE_T e_blk_size,
981 S32 i4_x_off,
982 S32 i4_y_off,
983 U08 *pu1_is_past)
984 {
985 CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size];
986
987 ASSERT(e_cu_size != -1);
988 ps_search_results->e_cu_size = e_cu_size;
989 ps_search_results->u1_x_off = (U08)i4_x_off;
990 ps_search_results->u1_y_off = (U08)i4_y_off;
991 ps_search_results->u1_num_active_ref = (U08)i4_num_ref;
992 ps_search_results->u1_num_best_results = (U08)i4_num_best_results;
993 ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part;
994 ps_search_results->pu1_is_past = pu1_is_past;
995 ps_search_results->u1_split_flag = 0;
996 ps_search_results->best_cu_cost = MAX_32BIT_VAL;
997 }
998
999 /**
1000 ********************************************************************************
1001 * @fn hme_reset_search_results((search_results_t *ps_search_results,
1002 * S32 i4_part_mask)
1003 *
1004 *
1005 * @brief Resets the best results to maximum values, so as to allow search
1006 * for the new CU's partitions. The existing results may be from an
1007 * older CU using same structure.
1008 *
1009 * @param[in] ps_search_results: search results structure
1010 *
1011 * @param[in] i4_part_mask : bit mask of active partitions
1012 *
1013 * @return part mask (bit mask of active partitions to search)
1014 ********************************************************************************
1015 */
hme_reset_search_results(search_results_t * ps_search_results,S32 i4_part_mask,S32 mv_res)1016 void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res)
1017 {
1018 S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref;
1019 S08 i1_ref_idx;
1020 S32 i, j;
1021 search_node_t *ps_search_node;
1022
1023 /* store this for future use */
1024 ps_search_results->i4_part_mask = i4_part_mask;
1025
1026 /* Reset the spli_flag to zero */
1027 ps_search_results->u1_split_flag = 0;
1028
1029 HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res);
1030 HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res);
1031
1032 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1033 {
1034 /* Reset the individual partitino results */
1035 for(i = 0; i < TOT_NUM_PARTS; i++)
1036 {
1037 if(!(i4_part_mask & (1 << i)))
1038 continue;
1039
1040 ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i];
1041
1042 for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
1043 {
1044 ps_search_node[j].s_mv.i2_mvx = 0;
1045 ps_search_node[j].s_mv.i2_mvy = 0;
1046 ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL;
1047 ps_search_node[j].i4_sad = MAX_32BIT_VAL;
1048 ps_search_node[j].i4_sdi = 0;
1049 ps_search_node[j].i1_ref_idx = -1;
1050 ps_search_node[j].u1_subpel_done = 0;
1051 ps_search_node[j].u1_is_avail = 1;
1052 ps_search_node[j].i4_mv_cost = 0;
1053 }
1054 }
1055 }
1056 }
1057 /**
1058 ********************************************************************************
1059 * @fn hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
1060 * S32 i4_step,
1061 * range_prms_t *ps_mvrange)
1062 *
1063 * @brief Given a central pt within mv range, and a grid of points surrounding
1064 * this pt, this function returns a grid mask of pts within search rng
1065 *
1066 * @param[in] ps_search_node: the centre pt of the grid
1067 *
1068 * @param[in] i4_step: step size of grid
1069 *
1070 * @param[in] ps_mvrange: structure containing the current mv range
1071 *
1072 * @return bitmask of the pts in grid within search range
1073 ********************************************************************************
1074 */
hme_clamp_grid_by_mvrange(search_node_t * ps_search_node,S32 i4_step,range_prms_t * ps_mvrange)1075 S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange)
1076 {
1077 S32 i4_mask = GRID_ALL_PTS_VALID;
1078 if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x)
1079 {
1080 i4_mask &= (GRID_RT_3_INVALID);
1081 }
1082 if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x)
1083 {
1084 i4_mask &= (GRID_LT_3_INVALID);
1085 }
1086 if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y)
1087 {
1088 i4_mask &= (GRID_BOT_3_INVALID);
1089 }
1090 if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y)
1091 {
1092 i4_mask &= (GRID_TOP_3_INVALID);
1093 }
1094 return i4_mask;
1095 }
1096
1097 /**
1098 ********************************************************************************
1099 * @fn layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1100 S32 i4_layer_id)
1101 *
1102 * @brief returns the layer ctxt of the layer with given id from the temporally
1103 * previous frame
1104 *
1105 * @param[in] ps_ctxt : ME context
1106 *
1107 * @param[in] i4_layer_id : id of layer required
1108 *
1109 * @return layer ctxt of given layer id in temporally previous frame
1110 ********************************************************************************
1111 */
hme_get_past_layer_ctxt(me_ctxt_t * ps_ctxt,me_frm_ctxt_t * ps_frm_ctxt,S32 i4_layer_id,S32 i4_num_me_frm_pllel)1112 layer_ctxt_t *hme_get_past_layer_ctxt(
1113 me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel)
1114 {
1115 S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0];
1116 S32 i;
1117 layers_descr_t *ps_desc;
1118
1119 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1120 {
1121 ps_desc = &ps_ctxt->as_ref_descr[i];
1122 if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1123 return (ps_desc->aps_layers[i4_layer_id]);
1124 }
1125 return NULL;
1126 }
1127
1128 /**
1129 ********************************************************************************
1130 * @fn layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1131 S32 i4_layer_id)
1132 *
1133 * @brief returns the layer ctxt of the layer with given id from the temporally
1134 * previous frame
1135 *
1136 * @param[in] ps_ctxt : ME context
1137 *
1138 * @param[in] i4_layer_id : id of layer required
1139 *
1140 * @return layer ctxt of given layer id in temporally previous frame
1141 ********************************************************************************
1142 */
hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t * ps_ctxt,S32 i4_layer_id)1143 layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id)
1144 {
1145 S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0];
1146 S32 i;
1147 layers_descr_t *ps_desc;
1148
1149 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1150 {
1151 ps_desc = &ps_ctxt->as_ref_descr[i];
1152 if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1153 return (ps_desc->aps_layers[i4_layer_id]);
1154 }
1155 return NULL;
1156 }
1157
1158 /**
1159 ********************************************************************************
1160 * @fn void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
1161 BLK_SIZE_T e_blk_size,
1162 S32 i4_num_ref,
1163 S32 i4_num_results_per_part)
1164 *
1165 * @brief Given a blk size to be used for this layer, this function initialize
1166 * the mv bank to make it ready to store and return results.
1167 *
1168 * @param[in, out] ps_layer_ctxt: pointer to layer ctxt
1169 *
1170 * @param[in] e_blk_size : resolution at which mvs are stored
1171 *
1172 * @param[in] i4_num_ref: number of reference frames corresponding to which
1173 * results are stored.
1174 *
1175 * @param[in] e_blk_size : resolution at which mvs are stored
1176 *
1177 * @param[in] i4_num_results_per_part : Number of results to be stored per
1178 * ref idx. So these many best results stored
1179 *
1180 * @return void
1181 ********************************************************************************
1182 */
hme_init_mv_bank(layer_ctxt_t * ps_layer_ctxt,BLK_SIZE_T e_blk_size,S32 i4_num_ref,S32 i4_num_results_per_part,U08 u1_enc)1183 void hme_init_mv_bank(
1184 layer_ctxt_t *ps_layer_ctxt,
1185 BLK_SIZE_T e_blk_size,
1186 S32 i4_num_ref,
1187 S32 i4_num_results_per_part,
1188 U08 u1_enc)
1189 {
1190 layer_mv_t *ps_mv_bank;
1191 hme_mv_t *ps_mv1, *ps_mv2;
1192 S08 *pi1_ref_id1, *pi1_ref_id2;
1193 S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col;
1194 S32 i4_i, i4_j, blk_ht;
1195
1196 ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1197 ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part;
1198 ps_mv_bank->i4_num_ref = i4_num_ref;
1199 mvs_in_blk = i4_num_ref * i4_num_results_per_part;
1200 ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk;
1201
1202 /*************************************************************************/
1203 /* Store blk size, from blk size derive blk width and use this to compute*/
1204 /* number of blocks every row. We also pad to left and top by 1, to */
1205 /* support the prediction mechanism. */
1206 /*************************************************************************/
1207 ps_mv_bank->e_blk_size = e_blk_size;
1208 blk_wd = gau1_blk_size_to_wd[e_blk_size];
1209 blk_ht = gau1_blk_size_to_ht[e_blk_size];
1210
1211 blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd;
1212 blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht;
1213
1214 if(u1_enc)
1215 {
1216 /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
1217 WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6);
1218 WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6);
1219
1220 blks_in_row = (num_ctb_cols << 3);
1221 blks_in_col = (num_ctb_rows << 3);
1222 }
1223
1224 blks_in_row += 2;
1225 mvs_in_row = blks_in_row * mvs_in_blk;
1226
1227 ps_mv_bank->i4_num_blks_per_row = blks_in_row;
1228 ps_mv_bank->i4_num_mvs_per_row = mvs_in_row;
1229
1230 /* To ensure run time requirements fall within allocation time request */
1231 ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row);
1232
1233 /*************************************************************************/
1234 /* Increment by one full row at top for padding and one column in left */
1235 /* this gives us the actual start of mv for 0,0 blk */
1236 /*************************************************************************/
1237 ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk;
1238 ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk;
1239
1240 memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t));
1241 memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08));
1242
1243 /*************************************************************************/
1244 /* Initialize top row, left col and right col with zeros since these are */
1245 /* used as candidates during searches. */
1246 /*************************************************************************/
1247 ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row;
1248 ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk;
1249 pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row;
1250 pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk;
1251 for(i4_i = 0; i4_i < blks_in_col; i4_i++)
1252 {
1253 for(i4_j = 0; i4_j < mvs_in_blk; i4_j++)
1254 {
1255 ps_mv1[i4_j].i2_mv_x = 0;
1256 ps_mv1[i4_j].i2_mv_y = 0;
1257 ps_mv2[i4_j].i2_mv_x = 0;
1258 ps_mv2[i4_j].i2_mv_y = 0;
1259 pi1_ref_id1[i4_j] = -1;
1260 pi1_ref_id2[i4_j] = -1;
1261 }
1262 ps_mv1 += mvs_in_row;
1263 ps_mv2 += mvs_in_row;
1264 pi1_ref_id1 += mvs_in_row;
1265 pi1_ref_id2 += mvs_in_row;
1266 }
1267 }
hme_fill_mvbank_intra(layer_ctxt_t * ps_layer_ctxt)1268 void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt)
1269 {
1270 layer_mv_t *ps_mv_bank;
1271 hme_mv_t *ps_mv;
1272 S08 *pi1_ref_id;
1273 S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col;
1274 S32 i, j, blk_ht;
1275 BLK_SIZE_T e_blk_size;
1276
1277 ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1278
1279 /*************************************************************************/
1280 /* Store blk size, from blk size derive blk width and use this to compute*/
1281 /* number of blocks every row. We also pad to left and top by 1, to */
1282 /* support the prediction mechanism. */
1283 /*************************************************************************/
1284 e_blk_size = ps_mv_bank->e_blk_size;
1285 blk_wd = gau1_blk_size_to_wd[e_blk_size];
1286 blk_ht = gau1_blk_size_to_wd[e_blk_size];
1287 blks_in_row = ps_layer_ctxt->i4_wd / blk_wd;
1288 blks_in_col = ps_layer_ctxt->i4_ht / blk_ht;
1289 mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk;
1290
1291 /*************************************************************************/
1292 /* Increment by one full row at top for padding and one column in left */
1293 /* this gives us the actual start of mv for 0,0 blk */
1294 /*************************************************************************/
1295 ps_mv = ps_mv_bank->ps_mv;
1296 pi1_ref_id = ps_mv_bank->pi1_ref_idx;
1297
1298 for(i = 0; i < blks_in_col; i++)
1299 {
1300 for(j = 0; j < blks_in_row; j++)
1301 {
1302 ps_mv[j].i2_mv_x = INTRA_MV;
1303 ps_mv[j].i2_mv_y = INTRA_MV;
1304 pi1_ref_id[j] = -1;
1305 }
1306 ps_mv += ps_mv_bank->i4_num_mvs_per_row;
1307 pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row;
1308 }
1309 }
1310
1311 /**
1312 ********************************************************************************
1313 * @fn void hme_derive_search_range(range_prms_t *ps_range,
1314 * range_prms_t *ps_pic_limit,
1315 * range_prms_t *ps_mv_limit,
1316 * S32 i4_x,
1317 * S32 i4_y,
1318 * S32 blk_wd,
1319 * S32 blk_ht)
1320 *
1321 * @brief given picture limits and blk dimensions and mv search limits, obtains
1322 * teh valid search range such that the blk stays within pic boundaries,
1323 * where picture boundaries include padded portions of picture
1324 *
1325 * @param[out] ps_range: updated with actual search range
1326 *
1327 * @param[in] ps_pic_limit : picture boundaries
1328 *
1329 * @param[in] ps_mv_limit: Search range limits for the mvs
1330 *
1331 * @param[in] i4_x : x coordinate of the blk
1332 *
1333 * @param[in] i4_y : y coordinate of the blk
1334 *
1335 * @param[in] blk_wd : blk width
1336 *
1337 * @param[in] blk_ht : blk height
1338 *
1339 * @return void
1340 ********************************************************************************
1341 */
hme_derive_search_range(range_prms_t * ps_range,range_prms_t * ps_pic_limit,range_prms_t * ps_mv_limit,S32 i4_x,S32 i4_y,S32 blk_wd,S32 blk_ht)1342 void hme_derive_search_range(
1343 range_prms_t *ps_range,
1344 range_prms_t *ps_pic_limit,
1345 range_prms_t *ps_mv_limit,
1346 S32 i4_x,
1347 S32 i4_y,
1348 S32 blk_wd,
1349 S32 blk_ht)
1350 {
1351 ps_range->i2_max_x =
1352 MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x);
1353 ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
1354 ps_range->i2_max_y =
1355 MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y);
1356 ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
1357 }
1358
1359 /**
1360 ********************************************************************************
1361 * @fn void hme_get_spatial_candt(search_node_t *ps_search_node,
1362 * layer_ctxt_t *ps_curr_layer,
1363 * S32 i4_blk_x,
1364 * S32 i4_blk_y,
1365 * S08 i1_ref_id,
1366 * S32 i4_result_id)
1367 *
1368 * @brief obtains a candt from the same mv bank as the current one, its called
1369 * spatial candt as it does not require scaling for temporal distances
1370 *
1371 * @param[out] ps_search_node: mv and ref id updated here of the candt
1372 *
1373 * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1374 *
1375 * @param[in] i4_blk_x : x coordinate of the block in mv bank
1376 *
1377 * @param[in] i4_blk_y : y coordinate of the block in mv bank
1378 *
1379 * @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv
1380 * results, useful if multiple ref idx candts maintained separately.
1381 *
1382 * @param[in] i4_result_id : If multiple results stored per ref idx, this
1383 * pts to the id of the result
1384 *
1385 * @param[in] tr_avail : top right availability of the block
1386 *
1387 * @param[in] bl_avail : bottom left availability of the block
1388 *
1389 * @return void
1390 ********************************************************************************
1391 */
hme_get_spatial_candt(layer_ctxt_t * ps_curr_layer,BLK_SIZE_T e_search_blk_size,S32 i4_blk_x,S32 i4_blk_y,S08 i1_ref_idx,search_node_t * ps_top_neighbours,search_node_t * ps_left_neighbours,S32 i4_result_id,S32 tr_avail,S32 bl_avail,S32 encode)1392 void hme_get_spatial_candt(
1393 layer_ctxt_t *ps_curr_layer,
1394 BLK_SIZE_T e_search_blk_size,
1395 S32 i4_blk_x,
1396 S32 i4_blk_y,
1397 S08 i1_ref_idx,
1398 search_node_t *ps_top_neighbours,
1399 search_node_t *ps_left_neighbours,
1400 S32 i4_result_id,
1401 S32 tr_avail,
1402 S32 bl_avail,
1403 S32 encode)
1404
1405 {
1406 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1407 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1408 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1409 search_node_t *ps_search_node;
1410 S32 i4_offset;
1411 hme_mv_t *ps_mv, *ps_mv_base;
1412 S08 *pi1_ref_idx, *pi1_ref_idx_base;
1413 S32 jump = 1, mvs_in_blk, mvs_in_row;
1414 S32 shift = (encode ? 2 : 0);
1415
1416 if(i4_blk_size1 != i4_blk_size2)
1417 {
1418 i4_blk_x <<= 1;
1419 i4_blk_y <<= 1;
1420 jump = 2;
1421 if((i4_blk_size1 << 2) == i4_blk_size2)
1422 {
1423 i4_blk_x <<= 1;
1424 i4_blk_y <<= 1;
1425 jump = 4;
1426 }
1427 }
1428
1429 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1430 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1431
1432 /* Adjust teh blk coord to point to top left locn */
1433 i4_blk_x -= 1;
1434 i4_blk_y -= 1;
1435 /* Pick up the mvs from the location */
1436 i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1437 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1438
1439 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1440 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1441
1442 ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1443 pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1444
1445 ps_mv_base = ps_mv;
1446 pi1_ref_idx_base = pi1_ref_idx;
1447
1448 /* ps_mv and pi1_ref_idx now point to the top left locn */
1449 /* Get 4 mvs as follows: */
1450 ps_search_node = ps_top_neighbours;
1451 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1452
1453 /* Move to top */
1454 ps_search_node++;
1455 ps_mv += mvs_in_blk;
1456 pi1_ref_idx += mvs_in_blk;
1457 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1458
1459 /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1460 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1461 {
1462 ps_search_node++;
1463 ps_mv += (mvs_in_blk * (jump >> 1));
1464 pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1465 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1466 }
1467 else
1468 {
1469 ps_search_node++;
1470 ps_search_node->s_mv.i2_mvx = 0;
1471 ps_search_node->s_mv.i2_mvy = 0;
1472 ps_search_node->i1_ref_idx = i1_ref_idx;
1473 ps_search_node->u1_is_avail = 0;
1474 ps_search_node->u1_subpel_done = 0;
1475 }
1476
1477 /* Move to tr: this will be tr w.r.t. the blk being searched */
1478 ps_search_node++;
1479 if(tr_avail == 0)
1480 {
1481 ps_search_node->s_mv.i2_mvx = 0;
1482 ps_search_node->s_mv.i2_mvy = 0;
1483 ps_search_node->i1_ref_idx = i1_ref_idx;
1484 ps_search_node->u1_is_avail = 0;
1485 ps_search_node->u1_subpel_done = 0;
1486 }
1487 else
1488 {
1489 ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1490 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1491 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1492 }
1493
1494 /* Move to left */
1495 ps_search_node = ps_left_neighbours;
1496 ps_mv = ps_mv_base + mvs_in_row;
1497 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1498 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1499
1500 /* Move to l1 */
1501 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1502 {
1503 ps_search_node++;
1504 ps_mv += (mvs_in_row * (jump >> 1));
1505 pi1_ref_idx += (mvs_in_row * (jump >> 1));
1506 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1507 }
1508 else
1509 {
1510 ps_search_node++;
1511 ps_search_node->s_mv.i2_mvx = 0;
1512 ps_search_node->s_mv.i2_mvy = 0;
1513 ps_search_node->i1_ref_idx = i1_ref_idx;
1514 ps_search_node->u1_is_avail = 0;
1515 ps_search_node->u1_subpel_done = 0;
1516 }
1517
1518 /* Move to bl */
1519 ps_search_node++;
1520 if(bl_avail == 0)
1521 {
1522 ps_search_node->s_mv.i2_mvx = 0;
1523 ps_search_node->s_mv.i2_mvy = 0;
1524 ps_search_node->i1_ref_idx = i1_ref_idx;
1525 ps_search_node->u1_is_avail = 0;
1526 }
1527 else
1528 {
1529 ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1530 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1531 COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1532 }
1533 }
1534
hme_get_spatial_candt_in_l1_me(layer_ctxt_t * ps_curr_layer,BLK_SIZE_T e_search_blk_size,S32 i4_blk_x,S32 i4_blk_y,S08 i1_ref_idx,U08 u1_pred_dir,search_node_t * ps_top_neighbours,search_node_t * ps_left_neighbours,S32 i4_result_id,S32 tr_avail,S32 bl_avail,S32 i4_num_act_ref_l0,S32 i4_num_act_ref_l1)1535 void hme_get_spatial_candt_in_l1_me(
1536 layer_ctxt_t *ps_curr_layer,
1537 BLK_SIZE_T e_search_blk_size,
1538 S32 i4_blk_x,
1539 S32 i4_blk_y,
1540 S08 i1_ref_idx,
1541 U08 u1_pred_dir,
1542 search_node_t *ps_top_neighbours,
1543 search_node_t *ps_left_neighbours,
1544 S32 i4_result_id,
1545 S32 tr_avail,
1546 S32 bl_avail,
1547 S32 i4_num_act_ref_l0,
1548 S32 i4_num_act_ref_l1)
1549 {
1550 search_node_t *ps_search_node;
1551 hme_mv_t *ps_mv, *ps_mv_base;
1552
1553 S32 i4_offset;
1554 S32 mvs_in_blk, mvs_in_row;
1555 S08 *pi1_ref_idx, *pi1_ref_idx_base;
1556 S32 i4_mv_pos_in_implicit_array;
1557
1558 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1559
1560 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1561 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1562 S32 jump = 1;
1563 S32 shift = 0;
1564 S32 i4_num_results_in_given_dir =
1565 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1)
1566 : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0));
1567
1568 if(i4_blk_size1 != i4_blk_size2)
1569 {
1570 i4_blk_x <<= 1;
1571 i4_blk_y <<= 1;
1572 jump = 2;
1573 if((i4_blk_size1 << 2) == i4_blk_size2)
1574 {
1575 i4_blk_x <<= 1;
1576 i4_blk_y <<= 1;
1577 jump = 4;
1578 }
1579 }
1580
1581 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1582 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1583
1584 /* Adjust the blk coord to point to top left locn */
1585 i4_blk_x -= 1;
1586 i4_blk_y -= 1;
1587 /* Pick up the mvs from the location */
1588 i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1589 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1590
1591 i4_offset +=
1592 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0);
1593
1594 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1595 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1596
1597 ps_mv_base = ps_mv;
1598 pi1_ref_idx_base = pi1_ref_idx;
1599
1600 /* TL */
1601 {
1602 /* ps_mv and pi1_ref_idx now point to the top left locn */
1603 ps_search_node = ps_top_neighbours;
1604
1605 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1606 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1607
1608 if(-1 != i4_mv_pos_in_implicit_array)
1609 {
1610 COPY_MV_TO_SEARCH_NODE(
1611 ps_search_node,
1612 &ps_mv[i4_mv_pos_in_implicit_array],
1613 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1614 i1_ref_idx,
1615 shift);
1616 }
1617 else
1618 {
1619 ps_search_node->u1_is_avail = 0;
1620 ps_search_node->s_mv.i2_mvx = 0;
1621 ps_search_node->s_mv.i2_mvy = 0;
1622 ps_search_node->i1_ref_idx = i1_ref_idx;
1623 }
1624 }
1625
1626 /* Move to top */
1627 {
1628 /* ps_mv and pi1_ref_idx now point to the top left locn */
1629 ps_search_node++;
1630 ps_mv += mvs_in_blk;
1631 pi1_ref_idx += mvs_in_blk;
1632
1633 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1634 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1635
1636 if(-1 != i4_mv_pos_in_implicit_array)
1637 {
1638 COPY_MV_TO_SEARCH_NODE(
1639 ps_search_node,
1640 &ps_mv[i4_mv_pos_in_implicit_array],
1641 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1642 i1_ref_idx,
1643 shift);
1644 }
1645 else
1646 {
1647 ps_search_node->u1_is_avail = 0;
1648 ps_search_node->s_mv.i2_mvx = 0;
1649 ps_search_node->s_mv.i2_mvy = 0;
1650 ps_search_node->i1_ref_idx = i1_ref_idx;
1651 }
1652 }
1653
1654 /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1655 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1656 {
1657 ps_search_node++;
1658 ps_mv += (mvs_in_blk * (jump >> 1));
1659 pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1660
1661 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1662 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1663
1664 if(-1 != i4_mv_pos_in_implicit_array)
1665 {
1666 COPY_MV_TO_SEARCH_NODE(
1667 ps_search_node,
1668 &ps_mv[i4_mv_pos_in_implicit_array],
1669 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1670 i1_ref_idx,
1671 shift);
1672 }
1673 else
1674 {
1675 ps_search_node->u1_is_avail = 0;
1676 ps_search_node->s_mv.i2_mvx = 0;
1677 ps_search_node->s_mv.i2_mvy = 0;
1678 ps_search_node->i1_ref_idx = i1_ref_idx;
1679 }
1680 }
1681 else
1682 {
1683 ps_search_node++;
1684 ps_search_node->u1_is_avail = 0;
1685 ps_search_node->s_mv.i2_mvx = 0;
1686 ps_search_node->s_mv.i2_mvy = 0;
1687 ps_search_node->i1_ref_idx = i1_ref_idx;
1688 }
1689
1690 /* Move to tr: this will be tr w.r.t. the blk being searched */
1691 ps_search_node++;
1692 if(tr_avail == 0)
1693 {
1694 ps_search_node->s_mv.i2_mvx = 0;
1695 ps_search_node->s_mv.i2_mvy = 0;
1696 ps_search_node->i1_ref_idx = i1_ref_idx;
1697 ps_search_node->u1_is_avail = 0;
1698 ps_search_node->u1_subpel_done = 0;
1699 }
1700 else
1701 {
1702 /* ps_mv and pi1_ref_idx now point to the top left locn */
1703 ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1704 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1705
1706 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1707 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1708
1709 if(-1 != i4_mv_pos_in_implicit_array)
1710 {
1711 COPY_MV_TO_SEARCH_NODE(
1712 ps_search_node,
1713 &ps_mv[i4_mv_pos_in_implicit_array],
1714 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1715 i1_ref_idx,
1716 shift);
1717 }
1718 else
1719 {
1720 ps_search_node->u1_is_avail = 0;
1721 ps_search_node->s_mv.i2_mvx = 0;
1722 ps_search_node->s_mv.i2_mvy = 0;
1723 ps_search_node->i1_ref_idx = i1_ref_idx;
1724 }
1725 }
1726
1727 /* Move to left */
1728 {
1729 /* ps_mv and pi1_ref_idx now point to the top left locn */
1730 ps_search_node = ps_left_neighbours;
1731 ps_mv = ps_mv_base + mvs_in_row;
1732 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1733
1734 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1735 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1736
1737 if(-1 != i4_mv_pos_in_implicit_array)
1738 {
1739 COPY_MV_TO_SEARCH_NODE(
1740 ps_search_node,
1741 &ps_mv[i4_mv_pos_in_implicit_array],
1742 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1743 i1_ref_idx,
1744 shift);
1745 }
1746 else
1747 {
1748 ps_search_node->u1_is_avail = 0;
1749 ps_search_node->s_mv.i2_mvx = 0;
1750 ps_search_node->s_mv.i2_mvy = 0;
1751 ps_search_node->i1_ref_idx = i1_ref_idx;
1752 }
1753 }
1754
1755 /* Move to l1 */
1756 if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1757 {
1758 /* ps_mv and pi1_ref_idx now point to the top left locn */
1759 ps_search_node++;
1760 ps_mv += (mvs_in_row * (jump >> 1));
1761 pi1_ref_idx += (mvs_in_row * (jump >> 1));
1762
1763 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1764 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1765
1766 if(-1 != i4_mv_pos_in_implicit_array)
1767 {
1768 COPY_MV_TO_SEARCH_NODE(
1769 ps_search_node,
1770 &ps_mv[i4_mv_pos_in_implicit_array],
1771 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1772 i1_ref_idx,
1773 shift);
1774 }
1775 else
1776 {
1777 ps_search_node->u1_is_avail = 0;
1778 ps_search_node->s_mv.i2_mvx = 0;
1779 ps_search_node->s_mv.i2_mvy = 0;
1780 ps_search_node->i1_ref_idx = i1_ref_idx;
1781 }
1782 }
1783 else
1784 {
1785 ps_search_node++;
1786 ps_search_node->u1_is_avail = 0;
1787 ps_search_node->s_mv.i2_mvx = 0;
1788 ps_search_node->s_mv.i2_mvy = 0;
1789 ps_search_node->i1_ref_idx = i1_ref_idx;
1790 }
1791
1792 /* Move to bl */
1793 ps_search_node++;
1794 if(bl_avail == 0)
1795 {
1796 ps_search_node->s_mv.i2_mvx = 0;
1797 ps_search_node->s_mv.i2_mvy = 0;
1798 ps_search_node->i1_ref_idx = i1_ref_idx;
1799 ps_search_node->u1_is_avail = 0;
1800 }
1801 else
1802 {
1803 /* ps_mv and pi1_ref_idx now point to the top left locn */
1804 ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1805 pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1806
1807 i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1808 pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1809
1810 if(-1 != i4_mv_pos_in_implicit_array)
1811 {
1812 COPY_MV_TO_SEARCH_NODE(
1813 ps_search_node,
1814 &ps_mv[i4_mv_pos_in_implicit_array],
1815 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1816 i1_ref_idx,
1817 shift);
1818 }
1819 else
1820 {
1821 ps_search_node->u1_is_avail = 0;
1822 ps_search_node->s_mv.i2_mvx = 0;
1823 ps_search_node->s_mv.i2_mvy = 0;
1824 ps_search_node->i1_ref_idx = i1_ref_idx;
1825 }
1826 }
1827 }
1828
1829 /**
1830 ********************************************************************************
1831 * @fn void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
1832 * S32 i4_blk_x,
1833 * S32 i4_blk_y,
1834 * mvgrid_t *ps_mv_grid ,
1835 * S32 i1_ref_id)
1836 *
1837 * @brief The 18x18 MV grid for a ctb, is filled in first row and 1st col
1838 * this corresponds to neighbours (TL, T, TR, L, BL)
1839 *
1840 * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1841 *
1842 * @param[in] blk_x : x coordinate of the block in mv bank
1843 *
1844 * @param[in] blk_y : y coordinate of the block in mv bank
1845 *
1846 * @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
1847 *
1848 * @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
1849 * results, useful if multiple ref idx candts maintained separately.
1850 *
1851 * @return void
1852 ********************************************************************************
1853 */
hme_fill_ctb_neighbour_mvs(layer_ctxt_t * ps_curr_layer,S32 blk_x,S32 blk_y,mv_grid_t * ps_mv_grid,U08 u1_pred_dir_ctr,U08 u1_default_ref_id,S32 i4_num_act_ref_l0)1854 void hme_fill_ctb_neighbour_mvs(
1855 layer_ctxt_t *ps_curr_layer,
1856 S32 blk_x,
1857 S32 blk_y,
1858 mv_grid_t *ps_mv_grid,
1859 U08 u1_pred_dir_ctr,
1860 U08 u1_default_ref_id,
1861 S32 i4_num_act_ref_l0)
1862 {
1863 search_node_t *ps_grid_node;
1864 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1865 S32 i4_offset;
1866 hme_mv_t *ps_mv, *ps_mv_base;
1867 S08 *pi1_ref_idx, *pi1_ref_idx_base;
1868 S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row;
1869
1870 if(ps_layer_mvbank->e_blk_size == BLK_4x4)
1871 {
1872 /* searching 16x16, mvs are for 4x4 */
1873 jump = 1;
1874 blk_x <<= 2;
1875 blk_y <<= 2;
1876 }
1877 else
1878 {
1879 /* Searching 16x16, mvs are for 8x8 */
1880 blk_x <<= 1;
1881 blk_y <<= 1;
1882 }
1883 ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16);
1884
1885 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1886 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1887
1888 /* Adjust the blk coord to point to top left locn */
1889 blk_x -= 1;
1890 blk_y -= 1;
1891
1892 /* Pick up the mvs from the location */
1893 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1894 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
1895
1896 i4_offset += (u1_pred_dir_ctr == 1);
1897
1898 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1899 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1900
1901 ps_mv_base = ps_mv;
1902 pi1_ref_idx_base = pi1_ref_idx;
1903
1904 /* the 0, 0 entry of the grid pts to top left for the ctb */
1905 ps_grid_node = &ps_mv_grid->as_node[0];
1906
1907 /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */
1908 for(i = 0; i < 18; i++)
1909 {
1910 COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1911 ps_grid_node++;
1912 inc = 1;
1913 /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1914 if(i & 1)
1915 inc = jump;
1916
1917 ps_mv += (mvs_in_blk * inc);
1918 pi1_ref_idx += (mvs_in_blk * inc);
1919 }
1920
1921 ps_mv = ps_mv_base + mvs_in_row;
1922 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1923
1924 /* now copy left 16 left mvs */
1925 ps_grid_node = &ps_mv_grid->as_node[0];
1926 ps_grid_node += (ps_mv_grid->i4_stride);
1927 for(i = 0; i < 16; i++)
1928 {
1929 COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1930 ps_grid_node += ps_mv_grid->i4_stride;
1931 inc = 1;
1932 /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1933 if(!(i & 1))
1934 inc = jump;
1935
1936 ps_mv += (mvs_in_row * inc);
1937 pi1_ref_idx += (mvs_in_row * inc);
1938 }
1939 /* last one set to invalid as bottom left not yet encoded */
1940 ps_grid_node->u1_is_avail = 0;
1941 }
1942
hme_reset_wkg_mem(buf_mgr_t * ps_buf_mgr)1943 void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr)
1944 {
1945 ps_buf_mgr->i4_used = 0;
1946 }
hme_init_wkg_mem(buf_mgr_t * ps_buf_mgr,U08 * pu1_mem,S32 size)1947 void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size)
1948 {
1949 ps_buf_mgr->pu1_wkg_mem = pu1_mem;
1950 ps_buf_mgr->i4_total = size;
1951 hme_reset_wkg_mem(ps_buf_mgr);
1952 }
1953
hme_init_mv_grid(mv_grid_t * ps_mv_grid)1954 void hme_init_mv_grid(mv_grid_t *ps_mv_grid)
1955 {
1956 S32 i, j;
1957 search_node_t *ps_search_node;
1958 /*************************************************************************/
1959 /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/
1960 /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */
1961 /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/
1962 /* left and top edges of this grid is filled run time. The center portion*/
1963 /* represents the actual CTB MVs (16x16) and is also filled run time. */
1964 /* However, the availability is always set as available (init time) */
1965 /*************************************************************************/
1966 ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID;
1967 ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD;
1968 ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset];
1969 for(i = 0; i < 16; i++)
1970 {
1971 for(j = 0; j < 16; j++)
1972 {
1973 ps_search_node[j].u1_is_avail = 1;
1974 }
1975
1976 ps_search_node += ps_mv_grid->i4_stride;
1977 }
1978 }
1979 /**
1980 ********************************************************************************
1981 * @fn void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1982 *
1983 * @brief Pads horizontally to left side. Each pixel replicated across a line
1984 *
1985 * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
1986 *
1987 * @param[in] stride : stride of destination buffer
1988 *
1989 * @param[in] pad_wd : Amt of horizontal padding to be done
1990 *
1991 * @param[in] pad_ht : Number of lines for which horizontal padding to be done
1992 *
1993 * @return void
1994 ********************************************************************************
1995 */
hme_pad_left(U08 * pu1_dst,S32 stride,S32 pad_wd,S32 pad_ht)1996 void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1997 {
1998 S32 i, j;
1999 U08 u1_val;
2000 for(i = 0; i < pad_ht; i++)
2001 {
2002 u1_val = pu1_dst[0];
2003 for(j = -pad_wd; j < 0; j++)
2004 pu1_dst[j] = u1_val;
2005
2006 pu1_dst += stride;
2007 }
2008 }
2009 /**
2010 ********************************************************************************
2011 * @fn void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2012 *
2013 * @brief Pads horizontally to rt side. Each pixel replicated across a line
2014 *
2015 * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
2016 *
2017 * @param[in] stride : stride of destination buffer
2018 *
2019 * @param[in] pad_wd : Amt of horizontal padding to be done
2020 *
2021 * @param[in] pad_ht : Number of lines for which horizontal padding to be done
2022 *
2023 * @return void
2024 ********************************************************************************
2025 */
hme_pad_right(U08 * pu1_dst,S32 stride,S32 pad_wd,S32 pad_ht)2026 void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2027 {
2028 S32 i, j;
2029 U08 u1_val;
2030 for(i = 0; i < pad_ht; i++)
2031 {
2032 u1_val = pu1_dst[0];
2033 for(j = 1; j <= pad_wd; j++)
2034 pu1_dst[j] = u1_val;
2035
2036 pu1_dst += stride;
2037 }
2038 }
2039 /**
2040 ********************************************************************************
2041 * @fn void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2042 *
2043 * @brief Pads vertically on the top. Repeats the top line for top padding
2044 *
2045 * @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2046 *
2047 * @param[in] stride : stride of destination buffer
2048 *
2049 * @param[in] pad_ht : Amt of vertical padding to be done
2050 *
2051 * @param[in] pad_wd : Number of columns for which vertical padding to be done
2052 *
2053 * @return void
2054 ********************************************************************************
2055 */
hme_pad_top(U08 * pu1_dst,S32 stride,S32 pad_ht,S32 pad_wd)2056 void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2057 {
2058 S32 i;
2059 for(i = 1; i <= pad_ht; i++)
2060 memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd);
2061 }
2062 /**
2063 ********************************************************************************
2064 * @fn void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2065 *
2066 * @brief Pads vertically on the bot. Repeats the top line for top padding
2067 *
2068 * @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2069 *
2070 * @param[in] stride : stride of destination buffer
2071 *
2072 * @param[in] pad_ht : Amt of vertical padding to be done
2073 *
2074 * @param[in] pad_wd : Number of columns for which vertical padding to be done
2075 *
2076 * @return void
2077 ********************************************************************************
2078 */
hme_pad_bot(U08 * pu1_dst,S32 stride,S32 pad_ht,S32 pad_wd)2079 void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2080 {
2081 S32 i;
2082 for(i = 1; i <= pad_ht; i++)
2083 memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd);
2084 }
2085
2086 /**
2087 ********************************************************************************
2088 * @fn void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer, S32 pos_x,
2089 * S32 pos_y, S32 size)
2090 *
2091 * @brief Does weighting of the input in case the search needs to happen
2092 * with reference frames weighted
2093 *
2094 * @param[in] ps_curr_layer: layer ctxt
2095 *
2096 * @param[in] pos_x : x coordinate of the input blk in the picture
2097 *
2098 * @param[in] pos_y : y coordinate of hte input blk in the picture
2099 *
2100 * @param[in] size : size of the input block
2101 *
2102 * @param[in] num_ref : Number of reference frames
2103 *
2104 * @return void
2105 ********************************************************************************
2106 */
hme_get_wt_inp(layer_ctxt_t * ps_curr_layer,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 dst_stride,S32 pos_x,S32 pos_y,S32 size,S32 num_ref,U08 u1_is_wt_pred_on)2107 void hme_get_wt_inp(
2108 layer_ctxt_t *ps_curr_layer,
2109 wgt_pred_ctxt_t *ps_wt_inp_prms,
2110 S32 dst_stride,
2111 S32 pos_x,
2112 S32 pos_y,
2113 S32 size,
2114 S32 num_ref,
2115 U08 u1_is_wt_pred_on)
2116 {
2117 S32 ref, i, j;
2118 U08 *pu1_src, *pu1_dst, *pu1_src_tmp;
2119 S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc;
2120 S32 x_count, y_count;
2121
2122 /* Fixed source */
2123 pu1_src = ps_curr_layer->pu1_inp;
2124
2125 /* Make sure the start positions of block are inside frame limits */
2126 pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
2127 pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
2128
2129 pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride));
2130
2131 /* In case we handle imcomplete CTBs, we copy only as much as reqd */
2132 /* from input buffers to prevent out of bound accesses. In this */
2133 /* case, we do padding in x or y or both dirns */
2134 x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
2135 y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
2136
2137 for(i = 0; i < num_ref + 1; i++)
2138 {
2139 ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
2140 }
2141
2142 /* Run thro all ref ids */
2143 for(ref = 0; ref < num_ref + 1; ref++)
2144 {
2145 S32 wt, off;
2146 S32 inv_wt;
2147
2148 pu1_src_tmp = pu1_src;
2149
2150 /* Each ref id may have differnet wt/offset. */
2151 /* So we have unique inp buf for each ref id */
2152 pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2153
2154 if(ref == num_ref)
2155 {
2156 /* last ref will be non weighted input */
2157 for(i = 0; i < y_count; i++)
2158 {
2159 for(j = 0; j < x_count; j++)
2160 {
2161 pu1_dst[j] = pu1_src_tmp[j];
2162 }
2163 pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2164 pu1_dst += dst_stride;
2165 }
2166 }
2167 else
2168 {
2169 /* Wt and off specific to this ref id */
2170 wt = ps_wt_inp_prms->a_wpred_wt[ref];
2171 inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref];
2172 off = ps_wt_inp_prms->a_wpred_off[ref];
2173
2174 /* Generate size*size worth of modified input samples */
2175 for(i = 0; i < y_count; i++)
2176 {
2177 for(j = 0; j < x_count; j++)
2178 {
2179 S32 tmp;
2180
2181 /* Since we scale input, we use inverse transform of wt pred */
2182 //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc);
2183 tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc);
2184 pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255));
2185 }
2186 pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2187 pu1_dst += dst_stride;
2188 }
2189 }
2190
2191 /* Check and do padding in right direction if need be */
2192 pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2193 if(x_count != size)
2194 {
2195 hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
2196 }
2197
2198 /* Check and do padding in bottom directino if need be */
2199 if(y_count != size)
2200 {
2201 hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
2202 }
2203 }
2204 }
2205 /**
2206 ****************************************************************************************
2207 * @fn hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst,
2208 * pu_result_t *ps_pu_results_inp,
2209 * UWORD8 u1_num_results_per_part,
2210 * UWORD8 u1_num_best_cand)
2211 *
2212 * @brief Does the candidate evaluation across all the current candidates and returns
2213 * the best two or one candidates across given lists
2214 *
2215 * @param[in] - ps_pu_results_inp : Pointer to the input candidates
2216 * - u1_num_results_per_part: Number of available candidates
2217 *
2218 * @param[out] - ps_pu_results_dst : Pointer to best PU results
2219 *
2220 ****************************************************************************************
2221 */
hme_pick_best_pu_cand(pu_result_t * ps_pu_results_dst,pu_result_t * ps_pu_results_list0,pu_result_t * ps_pu_results_list1,UWORD8 u1_num_results_per_part_l0,UWORD8 u1_num_results_per_part_l1,UWORD8 u1_candidate_rank)2222 void hme_pick_best_pu_cand(
2223 pu_result_t *ps_pu_results_dst,
2224 pu_result_t *ps_pu_results_list0,
2225 pu_result_t *ps_pu_results_list1,
2226 UWORD8 u1_num_results_per_part_l0,
2227 UWORD8 u1_num_results_per_part_l1,
2228 UWORD8 u1_candidate_rank)
2229 {
2230 struct cand_pos_data
2231 {
2232 U08 u1_cand_list_id;
2233
2234 U08 u1_cand_id_in_cand_list;
2235 } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2236
2237 S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2238 U08 i, j;
2239
2240 for(i = 0; i < u1_num_results_per_part_l0; i++)
2241 {
2242 ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost;
2243 as_cand_pos_data[i].u1_cand_id_in_cand_list = i;
2244 as_cand_pos_data[i].u1_cand_list_id = 0;
2245 }
2246
2247 for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++)
2248 {
2249 ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost;
2250 as_cand_pos_data[j].u1_cand_id_in_cand_list = i;
2251 as_cand_pos_data[j].u1_cand_list_id = 1;
2252 }
2253
2254 SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
2255 ai4_costs,
2256 as_cand_pos_data,
2257 u1_num_results_per_part_l0 + u1_num_results_per_part_l1,
2258 struct cand_pos_data);
2259
2260 if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id)
2261 {
2262 ps_pu_results_dst[0] =
2263 ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2264 }
2265 else
2266 {
2267 ps_pu_results_dst[0] =
2268 ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2269 }
2270 }
2271
2272 /* Returns the number of candidates */
hme_tu_recur_cand_harvester(part_type_results_t * ps_cand_container,inter_pu_results_t * ps_pu_data,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_part_mask)2273 static S32 hme_tu_recur_cand_harvester(
2274 part_type_results_t *ps_cand_container,
2275 inter_pu_results_t *ps_pu_data,
2276 inter_ctb_prms_t *ps_inter_ctb_prms,
2277 S32 i4_part_mask)
2278 {
2279 part_type_results_t s_cand_data;
2280
2281 U08 i, j;
2282 PART_ID_T e_part_id;
2283
2284 S32 i4_num_cands = 0;
2285
2286 /* 2Nx2N part_type decision part */
2287 if(i4_part_mask & ENABLE_2Nx2N)
2288 {
2289 U08 u1_num_candt_to_pick;
2290
2291 e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0];
2292
2293 ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1);
2294
2295 if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N))
2296 {
2297 u1_num_candt_to_pick =
2298 MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands,
2299 ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2300 ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2301 }
2302 else
2303 {
2304 u1_num_candt_to_pick =
2305 MIN(1,
2306 ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2307 ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2308 }
2309
2310 if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset)
2311 {
2312 u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25);
2313 }
2314
2315 for(i = 0; i < u1_num_candt_to_pick; i++)
2316 {
2317 /* Picks the best two candidates of all the available ones */
2318 hme_pick_best_pu_cand(
2319 ps_cand_container[i4_num_cands].as_pu_results,
2320 ps_pu_data->aps_pu_results[0][e_part_id],
2321 ps_pu_data->aps_pu_results[1][e_part_id],
2322 ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2323 ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2324 i);
2325
2326 /* Update the other params part_type and total_cost in part_type_results */
2327 ps_cand_container[i4_num_cands].u1_part_type = e_part_id;
2328 ps_cand_container[i4_num_cands].i4_tot_cost =
2329 ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost;
2330
2331 i4_num_cands++;
2332 }
2333 }
2334
2335 /* SMP */
2336 {
2337 S32 i4_total_cost;
2338
2339 S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1;
2340 S32 start_part_type = PRT_2NxN;
2341 S32 best_cost = MAX_32BIT_VAL;
2342 S32 part_type_cnt = 0;
2343
2344 for(j = 0; j < num_part_types; j++)
2345 {
2346 if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2347 {
2348 continue;
2349 }
2350
2351 for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2352 {
2353 e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2354
2355 /* Pick the best candidate for the partition acroos lists */
2356 hme_pick_best_pu_cand(
2357 &s_cand_data.as_pu_results[i],
2358 ps_pu_data->aps_pu_results[0][e_part_id],
2359 ps_pu_data->aps_pu_results[1][e_part_id],
2360 ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2361 ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2362 0);
2363 }
2364
2365 i4_total_cost =
2366 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2367
2368 if(i4_total_cost < best_cost)
2369 {
2370 /* Stores the index of the best part_type in the sub-catoegory */
2371 best_cost = i4_total_cost;
2372
2373 ps_cand_container[i4_num_cands] = s_cand_data;
2374
2375 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2376 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2377 }
2378
2379 part_type_cnt++;
2380 }
2381
2382 i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2383 }
2384
2385 /* AMP */
2386 {
2387 S32 i4_total_cost;
2388
2389 S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1;
2390 S32 start_part_type = PRT_2NxnU;
2391 S32 best_cost = MAX_32BIT_VAL;
2392 S32 part_type_cnt = 0;
2393
2394 for(j = 0; j < num_part_types; j++)
2395 {
2396 if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2397 {
2398 continue;
2399 }
2400
2401 for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2402 {
2403 e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2404
2405 /* Pick the best candidate for the partition acroos lists */
2406 hme_pick_best_pu_cand(
2407 &s_cand_data.as_pu_results[i],
2408 ps_pu_data->aps_pu_results[0][e_part_id],
2409 ps_pu_data->aps_pu_results[1][e_part_id],
2410 ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2411 ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2412 0);
2413 }
2414
2415 i4_total_cost =
2416 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2417
2418 if(i4_total_cost < best_cost)
2419 {
2420 /* Stores the index of the best part_type in the sub-catoegory */
2421 best_cost = i4_total_cost;
2422
2423 ps_cand_container[i4_num_cands] = s_cand_data;
2424
2425 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2426 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2427 }
2428
2429 part_type_cnt++;
2430 }
2431
2432 i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2433 }
2434
2435 return i4_num_cands;
2436 }
2437
2438 /**
2439 *****************************************************************************
2440 * @fn hme_decide_part_types(search_results_t *ps_search_results)
2441 *
2442 * @brief Does uni/bi evaluation accross various partition types,
2443 * decides best inter partition types for the CU, compares
2444 * intra cost and decides the best K results for the CU
2445 *
2446 * This is called post subpel refinmenent for 16x16s, 8x8s and
2447 * for post merge evaluation for 32x32,64x64 CUs
2448 *
2449 * @param[in,out] ps_search_results : Search results data structure
2450 * - In : 2 lists of upto 2mvs & refids, active partition mask
2451 * - Out: Best results for final rdo evaluation of the cu
2452 *
2453 * @param[in] ps_subpel_prms : Sub pel params data structure
2454 *
2455 *
2456 * @par Description
2457 * --------------------------------------------------------------------------------
2458 * Flow:
2459 * for each category (SMP,AMP,2Nx2N based on part mask)
2460 * {
2461 * for each part_type
2462 * {
2463 * for each part
2464 * pick best candidate from each list
2465 * combine uni part type
2466 * update best results for part type
2467 * }
2468 * pick the best part type for given category (for SMP & AMP)
2469 * }
2470 * ||
2471 * ||
2472 * \/
2473 * Bi-Pred evaluation:
2474 * for upto 4 best part types
2475 * {
2476 * for each part
2477 * {
2478 * compute fixed size had for all uni and remember coeffs
2479 * compute bisatd
2480 * uni vs bi and gives upto two results
2481 * also gives the pt level pred buffer
2482 * }
2483 * }
2484 * ||
2485 * ||
2486 * \/
2487 * select X candidates for tu recursion as per the Note below
2488 * tu_rec_on_part_type (reuse transform coeffs)
2489 * ||
2490 * ||
2491 * \/
2492 * insert intra nodes at appropriate result id
2493 * ||
2494 * ||
2495 * \/
2496 * populate y best resuls for rdo based on preset
2497 *
2498 * Note :
2499 * number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2500 * number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2501 * --------------------------------------------------------------------------------
2502 *
2503 * @return None
2504 ********************************************************************************
2505 */
hme_decide_part_types(inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,inter_ctb_prms_t * ps_inter_ctb_prms,me_frm_ctxt_t * ps_ctxt,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)2506 void hme_decide_part_types(
2507 inter_cu_results_t *ps_cu_results,
2508 inter_pu_results_t *ps_pu_results,
2509 inter_ctb_prms_t *ps_inter_ctb_prms,
2510 me_frm_ctxt_t *ps_ctxt,
2511 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
2512 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
2513
2514 )
2515 {
2516 S32 i, j;
2517 S32 i4_part_mask;
2518 ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2519 ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2520 S32 i4_noise_term;
2521 WORD32 e_part_id;
2522
2523 PF_SAD_FXN_TU_REC apf_err_compute[4];
2524
2525 part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS];
2526 part_type_results_t *ps_part_type_results;
2527
2528 S32 num_best_cand = 0;
2529 const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
2530
2531 i4_part_mask = ps_cu_results->i4_part_mask;
2532
2533 num_best_cand = hme_tu_recur_cand_harvester(
2534 as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask);
2535
2536 /* Partition ID for the current PU */
2537 e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0];
2538
2539 ps_part_type_results = as_part_type_results;
2540 for(i = 0; i < num_best_cand; i++)
2541 {
2542 hme_compute_pred_and_evaluate_bi(
2543 ps_cu_results,
2544 ps_pu_results,
2545 ps_inter_ctb_prms,
2546 &(ps_part_type_results[i]),
2547 au8_pred_sigmaXSquare[i],
2548 au8_pred_sigmaX[i],
2549 ps_cmn_utils_optimised_function_list,
2550 ps_me_optimised_function_list
2551
2552 );
2553 }
2554 /* Perform TU_REC on the best candidates selected */
2555 {
2556 WORD32 i4_sad_grid;
2557 WORD32 ai4_tu_split_flag[4];
2558 WORD32 ai4_tu_early_cbf[4];
2559
2560 WORD32 best_cost[NUM_BEST_ME_OUTPUTS];
2561 WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS];
2562 WORD16 i2_wght;
2563 WORD32 i4_satd;
2564
2565 err_prms_t s_err_prms;
2566 err_prms_t *ps_err_prms = &s_err_prms;
2567
2568 /* Default cost and final idx initialization */
2569 for(i = 0; i < num_best_cand; i++)
2570 {
2571 best_cost[i] = MAX_32BIT_VAL;
2572 ai4_final_idx[i] = -1;
2573 }
2574
2575 /* Assign the stad function to the err_compute function pointer :
2576 Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */
2577 apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
2578 apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
2579 apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
2580 apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
2581
2582 ps_err_prms->pi4_sad_grid = &i4_sad_grid;
2583 ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag;
2584 ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth;
2585 ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf;
2586 ps_err_prms->i4_grid_mask = 1;
2587 ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem;
2588 ps_err_prms->u1_max_tr_size = 32;
2589
2590 if(ps_inter_ctb_prms->u1_is_cu_noisy)
2591 {
2592 ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY;
2593 }
2594
2595 /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that
2596 are disabled by Part_mask */
2597 for(i = 0; i < num_best_cand; i++)
2598 {
2599 part_type_results_t *ps_best_results;
2600 pu_result_t *ps_pu_result;
2601 WORD32 part_type_cost;
2602 WORD32 cand_idx;
2603
2604 WORD32 pred_dir;
2605 S32 i4_inp_off;
2606
2607 S32 lambda;
2608 U08 lambda_qshift;
2609 U08 *apu1_inp[MAX_NUM_INTER_PARTS];
2610 S16 ai2_wt[MAX_NUM_INTER_PARTS];
2611 S32 ai4_inv_wt[MAX_NUM_INTER_PARTS];
2612 S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS];
2613
2614 WORD32 part_type = ps_part_type_results[i].u1_part_type;
2615 WORD32 e_cu_size = ps_cu_results->u1_cu_size;
2616 WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
2617 U08 u1_num_parts = gau1_num_parts_in_part_type[part_type];
2618 U08 u1_inp_buf_idx = UCHAR_MAX;
2619
2620 ps_err_prms->i4_part_mask = i4_part_mask;
2621 ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
2622 ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
2623 ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred;
2624 ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride;
2625
2626 /* Current offset for the present part type */
2627 i4_inp_off = ps_cu_results->i4_inp_offset;
2628
2629 ps_best_results = &(ps_part_type_results[i]);
2630
2631 part_type_cost = 0;
2632 lambda = ps_inter_ctb_prms->i4_lamda;
2633 lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift;
2634
2635 for(j = 0; j < u1_num_parts; j++)
2636 {
2637 ps_pu_result = &(ps_best_results->as_pu_results[j]);
2638
2639 pred_dir = ps_pu_result->pu.b2_pred_mode;
2640
2641 if(PRED_L0 == pred_dir)
2642 {
2643 apu1_inp[j] =
2644 ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] +
2645 i4_inp_off;
2646 ai2_wt[j] =
2647 ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx]
2648 ->s_weight_offset.i2_luma_weight;
2649 ai4_inv_wt[j] =
2650 ps_inter_ctb_prms->pi4_inv_wt
2651 [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2652 ai4_inv_wt_shift_val[j] =
2653 ps_inter_ctb_prms->pi4_inv_wt_shift_val
2654 [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2655 }
2656 else if(PRED_L1 == pred_dir)
2657 {
2658 apu1_inp[j] =
2659 ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] +
2660 i4_inp_off;
2661 ai2_wt[j] =
2662 ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx]
2663 ->s_weight_offset.i2_luma_weight;
2664 ai4_inv_wt[j] =
2665 ps_inter_ctb_prms->pi4_inv_wt
2666 [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2667 ai4_inv_wt_shift_val[j] =
2668 ps_inter_ctb_prms->pi4_inv_wt_shift_val
2669 [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2670 }
2671 else if(PRED_BI == pred_dir)
2672 {
2673 apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off;
2674 ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc;
2675 ai4_inv_wt[j] = i4_default_src_wt;
2676 ai4_inv_wt_shift_val[j] = 0;
2677 }
2678 else
2679 {
2680 ASSERT(0);
2681 }
2682
2683 part_type_cost += ps_pu_result->i4_mv_cost;
2684 }
2685
2686 if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1]))
2687 {
2688 ps_err_prms->pu1_inp = apu1_inp[0];
2689 ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride;
2690 i2_wght = ai2_wt[0];
2691 }
2692 else
2693 {
2694 if(1 != ihevce_get_free_pred_buf_indices(
2695 &u1_inp_buf_idx,
2696 &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2697 1))
2698 {
2699 ASSERT(0);
2700 }
2701 else
2702 {
2703 U08 *pu1_dst =
2704 ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2705 U08 *pu1_src = apu1_inp[0];
2706 U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2;
2707 U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2;
2708 U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2;
2709 U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2;
2710
2711 ps_cmn_utils_optimised_function_list->pf_copy_2d(
2712 pu1_dst,
2713 MAX_CU_SIZE,
2714 pu1_src,
2715 ps_inter_ctb_prms->i4_inp_stride,
2716 u1_pu1_wd,
2717 u1_pu1_ht);
2718
2719 pu1_dst +=
2720 (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2721 ? u1_pu1_ht * MAX_CU_SIZE
2722 : u1_pu1_wd);
2723 pu1_src =
2724 apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2725 ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride
2726 : u1_pu1_wd);
2727
2728 ps_cmn_utils_optimised_function_list->pf_copy_2d(
2729 pu1_dst,
2730 MAX_CU_SIZE,
2731 pu1_src,
2732 ps_inter_ctb_prms->i4_inp_stride,
2733 u1_pu2_wd,
2734 u1_pu2_ht);
2735
2736 ps_err_prms->pu1_inp =
2737 ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2738 ps_err_prms->i4_inp_stride = MAX_CU_SIZE;
2739 i2_wght = ai2_wt[1];
2740 }
2741 }
2742
2743 #if !DISABLE_TU_RECURSION
2744 i4_satd = apf_err_compute[e_cu_size](
2745 ps_err_prms,
2746 lambda,
2747 lambda_qshift,
2748 ps_inter_ctb_prms->i4_qstep_ls8,
2749 ps_ctxt->ps_func_selector);
2750 #else
2751 ps_err_prms->pi4_sad_grid = &i4_satd;
2752
2753 pf_err_compute(ps_err_prms);
2754
2755 if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64))
2756 {
2757 ai4_tu_split_flag[0] = 1;
2758 ai4_tu_split_flag[1] = 1;
2759 ai4_tu_split_flag[2] = 1;
2760 ai4_tu_split_flag[3] = 1;
2761
2762 ps_err_prms->i4_tu_split_cost = 0;
2763 }
2764 else
2765 {
2766 ai4_tu_split_flag[0] = 1;
2767 ai4_tu_split_flag[1] = 1;
2768 ai4_tu_split_flag[2] = 1;
2769 ai4_tu_split_flag[3] = 1;
2770
2771 ps_err_prms->i4_tu_split_cost = 0;
2772 }
2773 #endif
2774
2775 #if UNI_SATD_SCALE
2776 i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc;
2777 #endif
2778
2779 if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
2780 {
2781 ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX;
2782 ULWORD64 u8_src_variance, u8_pred_variance;
2783 unsigned long u4_shift_val;
2784 S32 i4_bits_req;
2785 S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
2786
2787 if(1 == u1_num_parts)
2788 {
2789 u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0];
2790 u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX;
2791
2792 if(e_cu_size == CU_8x8)
2793 {
2794 PART_ID_T e_part_id = (PART_ID_T)(
2795 (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) +
2796 ((ps_cu_results->u1_y_off & 1) << 1));
2797
2798 u4_shift_val = ihevce_calc_stim_injected_variance(
2799 ps_inter_ctb_prms->pu8_part_src_sigmaX,
2800 ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2801 &u8_src_variance,
2802 ai4_inv_wt[0],
2803 ai4_inv_wt_shift_val[0],
2804 ps_inter_ctb_prms->wpred_log_wdc,
2805 e_part_id);
2806 }
2807 else
2808 {
2809 u4_shift_val = ihevce_calc_stim_injected_variance(
2810 ps_inter_ctb_prms->pu8_part_src_sigmaX,
2811 ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2812 &u8_src_variance,
2813 ai4_inv_wt[0],
2814 ai4_inv_wt_shift_val[0],
2815 ps_inter_ctb_prms->wpred_log_wdc,
2816 e_part_id);
2817 }
2818
2819 u8_pred_variance = u8_pred_variance >> u4_shift_val;
2820
2821 GETRANGE64(i4_bits_req, u8_pred_variance);
2822
2823 if(i4_bits_req > 27)
2824 {
2825 u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2826 u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2827 }
2828
2829 if(u8_src_variance == u8_pred_variance)
2830 {
2831 u8_temp_var = (1 << STIM_Q_FORMAT);
2832 }
2833 else
2834 {
2835 u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2836 u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2837 u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2838 (u8_pred_variance * u8_pred_variance);
2839 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2840 u8_temp_var = (u8_temp_var / u8_temp_var1);
2841 }
2842
2843 i4_noise_term = (UWORD32)u8_temp_var;
2844
2845 ASSERT(i4_noise_term >= 0);
2846
2847 i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2848
2849 u8_temp_var = i4_satd;
2850 u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2851 u8_temp_var += (1 << ((i4_q_level)-1));
2852 i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2853 }
2854 else /*if(e_cu_size <= CU_16x16)*/
2855 {
2856 unsigned long temp_shift_val;
2857 PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = {
2858 ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1]
2859 };
2860
2861 u4_shift_val = ihevce_calc_variance_for_diff_weights(
2862 ps_inter_ctb_prms->pu8_part_src_sigmaX,
2863 ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2864 &u8_src_variance,
2865 ai4_inv_wt,
2866 ai4_inv_wt_shift_val,
2867 ps_best_results->as_pu_results,
2868 ps_inter_ctb_prms->wpred_log_wdc,
2869 ae_part_id,
2870 gau1_blk_size_to_wd[e_blk_size],
2871 u1_num_parts,
2872 1);
2873
2874 temp_shift_val = u4_shift_val;
2875
2876 u4_shift_val = ihevce_calc_variance_for_diff_weights(
2877 au8_pred_sigmaX[i],
2878 au8_pred_sigmaXSquare[i],
2879 &u8_pred_variance,
2880 ai4_inv_wt,
2881 ai4_inv_wt_shift_val,
2882 ps_best_results->as_pu_results,
2883 0,
2884 ae_part_id,
2885 gau1_blk_size_to_wd[e_blk_size],
2886 u1_num_parts,
2887 0);
2888
2889 u8_pred_variance = u8_pred_variance >> temp_shift_val;
2890
2891 GETRANGE64(i4_bits_req, u8_pred_variance);
2892
2893 if(i4_bits_req > 27)
2894 {
2895 u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2896 u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2897 }
2898
2899 if(u8_src_variance == u8_pred_variance)
2900 {
2901 u8_temp_var = (1 << STIM_Q_FORMAT);
2902 }
2903 else
2904 {
2905 u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2906 u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2907 u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2908 (u8_pred_variance * u8_pred_variance);
2909 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2910 u8_temp_var = (u8_temp_var / u8_temp_var1);
2911 }
2912
2913 i4_noise_term = (UWORD32)u8_temp_var;
2914
2915 ASSERT(i4_noise_term >= 0);
2916 ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT)));
2917
2918 i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2919
2920 u8_temp_var = i4_satd;
2921 u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2922 u8_temp_var += (1 << ((i4_q_level)-1));
2923 i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2924
2925 ASSERT(i4_satd >= 0);
2926 }
2927 }
2928
2929 if(u1_inp_buf_idx != UCHAR_MAX)
2930 {
2931 ihevce_set_pred_buf_as_free(
2932 &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2933 u1_inp_buf_idx);
2934 }
2935
2936 part_type_cost += i4_satd;
2937
2938 /*Update the best results with the new results */
2939 ps_best_results->i4_tot_cost = part_type_cost;
2940
2941 ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost;
2942
2943 ASSERT(ai4_tu_split_flag[0] >= 0);
2944 if(e_cu_size == CU_64x64)
2945 {
2946 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2947 ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1];
2948 ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2];
2949 ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3];
2950
2951 /* Update the TU early cbf flags into the best results structure */
2952 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2953 ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1];
2954 ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2];
2955 ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3];
2956 }
2957 else
2958 {
2959 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2960 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2961 }
2962
2963 if(part_type_cost < best_cost[num_best_cand - 1])
2964 {
2965 /* Push and sort current part type if it is one of the num_best_cand */
2966 for(cand_idx = 0; cand_idx < i; cand_idx++)
2967 {
2968 if(part_type_cost <= best_cost[cand_idx])
2969 {
2970 memmove(
2971 &ai4_final_idx[cand_idx + 1],
2972 &ai4_final_idx[cand_idx],
2973 sizeof(WORD32) * (i - cand_idx));
2974 memmove(
2975 &best_cost[cand_idx + 1],
2976 &best_cost[cand_idx],
2977 sizeof(WORD32) * (i - cand_idx));
2978 break;
2979 }
2980 }
2981
2982 ai4_final_idx[cand_idx] = i;
2983 best_cost[cand_idx] = part_type_cost;
2984 }
2985 }
2986
2987 ps_cu_results->u1_num_best_results = num_best_cand;
2988
2989 for(i = 0; i < num_best_cand; i++)
2990 {
2991 ASSERT(ai4_final_idx[i] < num_best_cand);
2992
2993 if(ai4_final_idx[i] != -1)
2994 {
2995 memcpy(
2996 &(ps_cu_results->ps_best_results[i]),
2997 &(ps_part_type_results[ai4_final_idx[i]]),
2998 sizeof(part_type_results_t));
2999 }
3000 }
3001 }
3002
3003 for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++)
3004 {
3005 ihevce_set_pred_buf_as_free(
3006 &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i);
3007 }
3008 }
3009
3010 /**
3011 **************************************************************************************************
3012 * @fn hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3013 *
3014 * @brief Does the population of the inter_cu_results structure with the results after the
3015 * subpel refinement
3016 *
3017 * This is called post subpel refinmenent for 16x16s, 8x8s and
3018 * for post merge evaluation for 32x32,64x64 CUs
3019 *
3020 * @param[in,out] ps_search_results : Search results data structure
3021 * - ps_cu_results : cu_results data structure
3022 * ps_pu_result : Pointer to the memory for storing PU's
3023 *
3024 ****************************************************************************************************
3025 */
hme_populate_pus(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,hme_subpel_prms_t * ps_subpel_prms,search_results_t * ps_search_results,inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result,inter_ctb_prms_t * ps_inter_ctb_prms,wgt_pred_ctxt_t * ps_wt_prms,layer_ctxt_t * ps_curr_layer,U08 * pu1_pred_dir_searched,WORD32 i4_num_active_ref)3026 void hme_populate_pus(
3027 me_ctxt_t *ps_thrd_ctxt,
3028 me_frm_ctxt_t *ps_ctxt,
3029 hme_subpel_prms_t *ps_subpel_prms,
3030 search_results_t *ps_search_results,
3031 inter_cu_results_t *ps_cu_results,
3032 inter_pu_results_t *ps_pu_results,
3033 pu_result_t *ps_pu_result,
3034 inter_ctb_prms_t *ps_inter_ctb_prms,
3035 wgt_pred_ctxt_t *ps_wt_prms,
3036 layer_ctxt_t *ps_curr_layer,
3037 U08 *pu1_pred_dir_searched,
3038 WORD32 i4_num_active_ref)
3039 {
3040 WORD32 i, j, k;
3041 WORD32 i4_part_mask;
3042 WORD32 i4_ref;
3043 UWORD8 e_part_id;
3044 pu_result_t *ps_curr_pu;
3045 search_node_t *ps_search_node;
3046 part_attr_t *ps_part_attr;
3047 UWORD8 e_cu_size = ps_search_results->e_cu_size;
3048 WORD32 num_results_per_part_l0 = 0;
3049 WORD32 num_results_per_part_l1 = 0;
3050 WORD32 i4_ref_id;
3051 WORD32 i4_total_act_ref;
3052
3053 i4_part_mask = ps_search_results->i4_part_mask;
3054
3055 /* pred_buf_mngr init */
3056 {
3057 hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD);
3058
3059 ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX;
3060
3061 for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++)
3062 {
3063 ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] =
3064 ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3065 ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i);
3066 }
3067
3068 ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3069 }
3070
3071 ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
3072 ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy;
3073 ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda;
3074
3075 /* Populate the CU level parameters */
3076 ps_cu_results->u1_cu_size = ps_search_results->e_cu_size;
3077 ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3078 ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask;
3079 ps_cu_results->u1_x_off = ps_search_results->u1_x_off;
3080 ps_cu_results->u1_y_off = ps_search_results->u1_y_off;
3081
3082 i4_total_act_ref =
3083 ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
3084 /*Populate the partition results
3085 Loop across all the active references that are enabled right now */
3086 for(i = 0; i < MAX_PART_TYPES; i++)
3087 {
3088 if(!(i4_part_mask & gai4_part_type_to_part_mask[i]))
3089 {
3090 continue;
3091 }
3092
3093 for(j = 0; j < gau1_num_parts_in_part_type[i]; j++)
3094 {
3095 /* Partition ID for the current PU */
3096 e_part_id = (UWORD8)ge_part_type_to_part_id[i][j];
3097 ps_part_attr = &gas_part_attr_in_cu[e_part_id];
3098
3099 num_results_per_part_l0 = 0;
3100 num_results_per_part_l1 = 0;
3101
3102 ps_pu_results->aps_pu_results[0][e_part_id] =
3103 ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST);
3104 ps_pu_results->aps_pu_results[1][e_part_id] =
3105 ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3106
3107 for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3108 {
3109 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3110
3111 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3112 {
3113 ps_search_node =
3114 &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k];
3115
3116 /* If subpel is done then the node is a valid candidate else break the loop */
3117 if(ps_search_node->u1_subpel_done)
3118 {
3119 i4_ref_id = ps_search_node->i1_ref_idx;
3120
3121 ASSERT(i4_ref_id >= 0);
3122
3123 /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */
3124 if(!u1_pred_dir)
3125 {
3126 ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] +
3127 num_results_per_part_l0;
3128
3129 ASSERT(
3130 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3131 ps_inter_ctb_prms->u1_num_active_ref_l0);
3132
3133 /* Always populate the ref_idx value in l0_ref_idx */
3134 ps_curr_pu->pu.mv.i1_l0_ref_idx =
3135 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3136 ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3137 ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3138 ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3139
3140 ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] =
3141 ps_wt_prms->apu1_wt_inp[i4_ref_id];
3142
3143 num_results_per_part_l0++;
3144 }
3145 else
3146 {
3147 ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] +
3148 num_results_per_part_l1;
3149
3150 ASSERT(
3151 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3152 ps_inter_ctb_prms->u1_num_active_ref_l1);
3153
3154 /* populate the ref_idx value in l1_ref_idx */
3155 ps_curr_pu->pu.mv.i1_l1_ref_idx =
3156 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3157 ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3158 ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3159 ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3160
3161 /* Copy the values from weighted params to common_frm_aprams */
3162 ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] =
3163 ps_wt_prms->apu1_wt_inp[i4_ref_id];
3164
3165 num_results_per_part_l1++;
3166 }
3167 ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3168 ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3169
3170 #if UNI_SATD_SCALE
3171 /*SATD is scaled by weight. Hence rescale the SATD */
3172 ps_curr_pu->i4_tot_cost =
3173 ((ps_search_node->i4_sad *
3174 ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3175 (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3176 ps_inter_ctb_prms->wpred_log_wdc) +
3177 ps_search_node->i4_mv_cost;
3178 #endif
3179
3180 /* Packed format of the width and height */
3181 ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1;
3182 ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1;
3183
3184 ps_curr_pu->pu.b4_pos_x =
3185 (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >>
3186 2);
3187 ps_curr_pu->pu.b4_pos_y =
3188 (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >>
3189 2);
3190
3191 ps_curr_pu->pu.b1_intra_flag = 0;
3192
3193 /* Unweighted input */
3194 ps_inter_ctb_prms->pu1_non_wt_inp =
3195 ps_wt_prms->apu1_wt_inp[i4_total_act_ref];
3196
3197 ps_search_node++;
3198 }
3199 else
3200 {
3201 break;
3202 }
3203 }
3204 }
3205
3206 ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0;
3207 ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1;
3208 }
3209 }
3210 }
3211
3212 /**
3213 *********************************************************************************************************
3214 * @fn hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3215 *
3216 * @brief Does the population of the inter_cu_results structure with the results after the
3217 * subpel refinement
3218 *
3219 * This is called post subpel refinmenent for 16x16s, 8x8s and
3220 * for post merge evaluation for 32x32,64x64 CUs
3221 *
3222 * @param[in,out] ps_search_results : Search results data structure
3223 * - ps_cu_results : cu_results data structure
3224 * ps_pu_results : Pointer for the PU's
3225 * ps_pu_result : Pointer to the memory for storing PU's
3226 *
3227 *********************************************************************************************************
3228 */
hme_populate_pus_8x8_cu(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,hme_subpel_prms_t * ps_subpel_prms,search_results_t * ps_search_results,inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result,inter_ctb_prms_t * ps_inter_ctb_prms,U08 * pu1_pred_dir_searched,WORD32 i4_num_active_ref,U08 u1_blk_8x8_mask)3229 void hme_populate_pus_8x8_cu(
3230 me_ctxt_t *ps_thrd_ctxt,
3231 me_frm_ctxt_t *ps_ctxt,
3232 hme_subpel_prms_t *ps_subpel_prms,
3233 search_results_t *ps_search_results,
3234 inter_cu_results_t *ps_cu_results,
3235 inter_pu_results_t *ps_pu_results,
3236 pu_result_t *ps_pu_result,
3237 inter_ctb_prms_t *ps_inter_ctb_prms,
3238 U08 *pu1_pred_dir_searched,
3239 WORD32 i4_num_active_ref,
3240 U08 u1_blk_8x8_mask)
3241 {
3242 WORD32 i, k;
3243 WORD32 i4_part_mask;
3244 WORD32 i4_ref;
3245 pu_result_t *ps_curr_pu;
3246 search_node_t *ps_search_node;
3247 WORD32 i4_ref_id;
3248 WORD32 x_off, y_off;
3249
3250 /* Make part mask available as only 2Nx2N
3251 Later support for 4x8 and 8x4 needs to be added */
3252 i4_part_mask = ENABLE_2Nx2N;
3253
3254 x_off = ps_search_results->u1_x_off;
3255 y_off = ps_search_results->u1_y_off;
3256
3257 for(i = 0; i < 4; i++)
3258 {
3259 if(u1_blk_8x8_mask & (1 << i))
3260 {
3261 UWORD8 u1_x_pos, u1_y_pos;
3262
3263 WORD32 num_results_per_part_l0 = 0;
3264 WORD32 num_results_per_part_l1 = 0;
3265
3266 ps_cu_results->u1_cu_size = CU_8x8;
3267 ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3268 ps_cu_results->i4_part_mask = i4_part_mask;
3269 ps_cu_results->u1_x_off = x_off + (i & 1) * 8;
3270 ps_cu_results->u1_y_off = y_off + (i >> 1) * 8;
3271 ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64);
3272
3273 ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL;
3274 ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3275
3276 u1_x_pos = ps_cu_results->u1_x_off >> 2;
3277 u1_y_pos = ps_cu_results->u1_y_off >> 2;
3278
3279 if(!(ps_search_results->i4_part_mask & ENABLE_NxN))
3280 {
3281 ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0];
3282
3283 ps_cu_results->i4_part_mask = 0;
3284 ps_cu_results->u1_num_best_results = 0;
3285
3286 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3287
3288 ps_curr_pu->pu.b4_wd = 1;
3289 ps_curr_pu->pu.b4_ht = 1;
3290 ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3291 ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3292 ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3293
3294 ps_cu_results++;
3295 ps_pu_results++;
3296
3297 continue;
3298 }
3299
3300 ps_pu_results->aps_pu_results[0][0] =
3301 ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST);
3302 ps_pu_results->aps_pu_results[1][0] =
3303 ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3304
3305 for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3306 {
3307 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3308
3309 /* Select the NxN partition node for the current ref_idx in the search results*/
3310 ps_search_node =
3311 ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i];
3312
3313 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3314 {
3315 /* If subpel is done then the node is a valid candidate else break the loop */
3316 if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done))
3317 {
3318 i4_ref_id = ps_search_node->i1_ref_idx;
3319
3320 ASSERT(i4_ref_id >= 0);
3321
3322 if(!u1_pred_dir)
3323 {
3324 ps_curr_pu =
3325 ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3326
3327 ASSERT(
3328 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3329 ps_inter_ctb_prms->u1_num_active_ref_l0);
3330
3331 ps_curr_pu->pu.mv.i1_l0_ref_idx =
3332 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3333 ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3334 ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3335 ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3336
3337 num_results_per_part_l0++;
3338 }
3339 else
3340 {
3341 ps_curr_pu =
3342 ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3343
3344 ASSERT(
3345 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3346 ps_inter_ctb_prms->u1_num_active_ref_l1);
3347
3348 ps_curr_pu->pu.mv.i1_l1_ref_idx =
3349 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3350 ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3351 ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3352 ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3353
3354 num_results_per_part_l1++;
3355 }
3356 ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3357 ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3358
3359 #if UNI_SATD_SCALE
3360 /*SATD is scaled by weight. Hence rescale the SATD */
3361 ps_curr_pu->i4_tot_cost =
3362 ((ps_search_node->i4_sad *
3363 ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3364 (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3365 ps_inter_ctb_prms->wpred_log_wdc) +
3366 ps_search_node->i4_mv_cost;
3367 #endif
3368
3369 ps_curr_pu->pu.b4_wd = 1;
3370 ps_curr_pu->pu.b4_ht = 1;
3371 ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3372 ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3373 ps_curr_pu->pu.b1_intra_flag = 0;
3374
3375 ps_search_node++;
3376 }
3377 else
3378 {
3379 /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU
3380 to remove 8x8's as possible candidates during evaluation */
3381
3382 ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3383
3384 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3385
3386 ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3387
3388 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3389
3390 break;
3391 }
3392 }
3393 }
3394
3395 /* Update the num_results per_part across lists L0 and L1 */
3396 ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0;
3397 ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1;
3398 }
3399 ps_cu_results++;
3400 ps_pu_results++;
3401 }
3402 }
3403
3404 /**
3405 ********************************************************************************
3406 * @fn hme_insert_intra_nodes_post_bipred
3407 *
3408 * @brief Compares intra costs (populated by IPE) with the best inter costs
3409 * (populated after evaluating bi-pred) and updates the best results
3410 * if intra cost is better
3411 *
3412 * @param[in,out] ps_cu_results [inout] : Best results structure of CU
3413 * ps_cur_ipe_ctb [in] : intra results for the current CTB
3414 * i4_frm_qstep [in] : current frame quantizer(qscale)*
3415 *
3416 * @return None
3417 ********************************************************************************
3418 */
hme_insert_intra_nodes_post_bipred(inter_cu_results_t * ps_cu_results,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,WORD32 i4_frm_qstep)3419 void hme_insert_intra_nodes_post_bipred(
3420 inter_cu_results_t *ps_cu_results,
3421 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
3422 WORD32 i4_frm_qstep)
3423 {
3424 WORD32 i;
3425 WORD32 num_results;
3426 WORD32 cu_size = ps_cu_results->u1_cu_size;
3427 UWORD8 u1_x_off = ps_cu_results->u1_x_off;
3428 UWORD8 u1_y_off = ps_cu_results->u1_y_off;
3429
3430 /* Id of the 32x32 block, 16x16 block in a CTB */
3431 WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5);
3432 WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1);
3433
3434 /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */
3435 WORD32 disable_intra64 = 0;
3436 WORD32 disable_intra32 = 0;
3437 WORD32 disable_intra16 = 0;
3438
3439 S32 i4_intra_2nx2n_cost;
3440
3441 /* ME final results for this CU (post seeding of best uni/bi pred results) */
3442 part_type_results_t *ps_best_result;
3443
3444 i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE;
3445
3446 /*If inter candidates are enabled then enter the for loop to update the intra candidate */
3447
3448 if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size))
3449 {
3450 ps_cu_results->u1_num_best_results = 1;
3451 }
3452
3453 num_results = ps_cu_results->u1_num_best_results;
3454
3455 ps_best_result = &ps_cu_results->ps_best_results[0];
3456
3457 /* Disable intra16/32/64 flags based on split flags recommended by IPE */
3458 if(ps_cur_ipe_ctb->u1_split_flag)
3459 {
3460 disable_intra64 = 1;
3461 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
3462 {
3463 disable_intra32 = 1;
3464
3465 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
3466 .as_intra16_analyse[i4_16x16_id]
3467 .b1_split_flag)
3468 {
3469 disable_intra16 = 1;
3470 }
3471 }
3472 }
3473
3474 /* Derive the intra cost based on current cu size and offset */
3475 switch(cu_size)
3476 {
3477 case CU_8x8:
3478 {
3479 i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)];
3480
3481 /* Accounting for coding noise in the open loop IPE cost */
3482 i4_intra_2nx2n_cost +=
3483 ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3484
3485 break;
3486 }
3487
3488 case CU_16x16:
3489 {
3490 i4_intra_2nx2n_cost =
3491 ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)];
3492
3493 /* Accounting for coding noise in the open loop IPE cost */
3494 i4_intra_2nx2n_cost +=
3495 ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */
3496
3497 if(disable_intra16)
3498 {
3499 /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */
3500 i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3501 }
3502 break;
3503 }
3504
3505 case CU_32x32:
3506 {
3507 i4_intra_2nx2n_cost =
3508 ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)];
3509
3510 /* Accounting for coding noise in the open loop IPE cost */
3511 i4_intra_2nx2n_cost +=
3512 (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3513
3514 if(disable_intra32)
3515 {
3516 /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */
3517 i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3518 }
3519 break;
3520 }
3521
3522 case CU_64x64:
3523 {
3524 i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost;
3525
3526 /* Accounting for coding noise in the open loop IPE cost */
3527 i4_intra_2nx2n_cost +=
3528 (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3529
3530 if(disable_intra64)
3531 {
3532 /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */
3533 i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3534 }
3535 break;
3536 }
3537
3538 default:
3539 ASSERT(0);
3540 }
3541
3542 {
3543 /*****************************************************************/
3544 /* Intra / Inter cost comparison for 2Nx2N : cu size 8/16/32/64 */
3545 /* Identify where the current result isto be placed. Basically */
3546 /* find the node which has cost just higher than node under test */
3547 /*****************************************************************/
3548 for(i = 0; i < num_results; i++)
3549 {
3550 /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */
3551 WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost;
3552
3553 if(i4_intra_2nx2n_cost < inter_cost)
3554 {
3555 if(i < (num_results - 1))
3556 {
3557 memmove(
3558 ps_best_result + i + 1,
3559 ps_best_result + i,
3560 sizeof(ps_best_result[0]) * (num_results - 1 - i));
3561 }
3562
3563 /* Insert the intra node result */
3564 ps_best_result[i].u1_part_type = PRT_2Nx2N;
3565 ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost;
3566 ps_best_result[i].ai4_tu_split_flag[0] = 0;
3567 ps_best_result[i].ai4_tu_split_flag[1] = 0;
3568 ps_best_result[i].ai4_tu_split_flag[2] = 0;
3569 ps_best_result[i].ai4_tu_split_flag[3] = 0;
3570
3571 /* Populate intra flag, cost and default mvs, refidx for intra pu */
3572 ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost;
3573 //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost;
3574 ps_best_result[i].as_pu_results[0].i4_mv_cost = 0;
3575 ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1;
3576 ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1;
3577 ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1;
3578 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV;
3579 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV;
3580 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV;
3581 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV;
3582
3583 break;
3584 }
3585 }
3586 }
3587 }
3588
hme_recompute_lambda_from_min_8x8_act_in_ctb(me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb)3589 S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
3590 me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb)
3591 {
3592 double lambda;
3593 double lambda_modifier;
3594 WORD32 i4_cu_qp;
3595 frm_lambda_ctxt_t *ps_frm_lambda_ctxt;
3596 //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
3597 WORD32 i4_frame_qp;
3598 rc_quant_t *ps_rc_quant_ctxt;
3599 WORD32 i4_is_bpic;
3600
3601 ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt;
3602 //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base;
3603 i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp;
3604 ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt;
3605 i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled;
3606
3607 i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset];
3608
3609 {
3610 if(ps_ctxt->i4_l0me_qp_mod)
3611 {
3612 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
3613 #if LAMDA_BASED_ON_QUANT
3614 WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0];
3615 #else
3616 WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0];
3617 #endif
3618 i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
3619 QP_LEVEL_MOD_ACT_FACTOR;
3620
3621 #endif
3622 }
3623 if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale)
3624 i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale;
3625 else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale)
3626 i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale;
3627
3628 i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp];
3629 }
3630
3631 if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp)
3632 i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp;
3633 else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp)
3634 i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp;
3635
3636 lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3));
3637
3638 lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier;
3639
3640 if(i4_is_bpic)
3641 {
3642 lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3643 }
3644 if(ps_ctxt->i4_use_const_lamda_modifier)
3645 {
3646 if(ps_ctxt->s_frm_prms.is_i_pic)
3647 {
3648 lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3649 }
3650 else
3651 {
3652 lambda_modifier = CONST_LAMDA_MOD_VAL;
3653 }
3654 }
3655 lambda *= lambda_modifier;
3656
3657 return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)));
3658 }
3659
3660 /**
3661 ********************************************************************************
3662 * @fn hme_update_dynamic_search_params
3663 *
3664 * @brief Update the Dynamic search params based on the current MVs
3665 *
3666 * @param[in,out] ps_dyn_range_prms [inout] : Dyn. Range Param str.
3667 * i2_mvy [in] : current MV y comp.
3668 *
3669 * @return None
3670 ********************************************************************************
3671 */
hme_update_dynamic_search_params(dyn_range_prms_t * ps_dyn_range_prms,WORD16 i2_mvy)3672 void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy)
3673 {
3674 /* If MV is up large, update i2_dyn_max_y */
3675 if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y)
3676 ps_dyn_range_prms->i2_dyn_max_y = i2_mvy;
3677 /* If MV is down large, update i2_dyn_min_y */
3678 if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y)
3679 ps_dyn_range_prms->i2_dyn_min_y = i2_mvy;
3680 }
3681
hme_add_new_node_to_a_sorted_array(search_node_t * ps_result_node,search_node_t ** pps_sorted_array,U08 * pu1_shifts,U32 u4_num_results_updated,U08 u1_shift)3682 void hme_add_new_node_to_a_sorted_array(
3683 search_node_t *ps_result_node,
3684 search_node_t **pps_sorted_array,
3685 U08 *pu1_shifts,
3686 U32 u4_num_results_updated,
3687 U08 u1_shift)
3688 {
3689 U32 i;
3690
3691 if(NULL == pu1_shifts)
3692 {
3693 S32 i4_cur_node_cost = ps_result_node->i4_tot_cost;
3694
3695 for(i = 0; i < u4_num_results_updated; i++)
3696 {
3697 if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost)
3698 {
3699 memmove(
3700 &pps_sorted_array[i + 1],
3701 &pps_sorted_array[i],
3702 (u4_num_results_updated - i) * sizeof(search_node_t *));
3703
3704 break;
3705 }
3706 }
3707 }
3708 else
3709 {
3710 S32 i4_cur_node_cost =
3711 (u1_shift == 0) ? ps_result_node->i4_tot_cost
3712 : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift;
3713
3714 for(i = 0; i < u4_num_results_updated; i++)
3715 {
3716 S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost
3717 : (pps_sorted_array[i]->i4_tot_cost +
3718 (1 << (pu1_shifts[i] - 1))) >>
3719 pu1_shifts[i];
3720
3721 if(i4_cur_node_cost < i4_prev_node_cost)
3722 {
3723 memmove(
3724 &pps_sorted_array[i + 1],
3725 &pps_sorted_array[i],
3726 (u4_num_results_updated - i) * sizeof(search_node_t *));
3727 memmove(
3728 &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08));
3729
3730 break;
3731 }
3732 }
3733
3734 pu1_shifts[i] = u1_shift;
3735 }
3736
3737 pps_sorted_array[i] = ps_result_node;
3738 }
3739
hme_find_pos_of_implicitly_stored_ref_id(S08 * pi1_ref_idx,S08 i1_ref_idx,S32 i4_result_id,S32 i4_num_results)3740 S32 hme_find_pos_of_implicitly_stored_ref_id(
3741 S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results)
3742 {
3743 S32 i;
3744
3745 for(i = 0; i < i4_num_results; i++)
3746 {
3747 if(i1_ref_idx == pi1_ref_idx[i])
3748 {
3749 if(0 == i4_result_id)
3750 {
3751 return i;
3752 }
3753 else
3754 {
3755 i4_result_id--;
3756 }
3757 }
3758 }
3759
3760 return -1;
3761 }
3762
hme_search_node_populator(search_node_t * ps_search_node,hme_mv_t * ps_mv,S08 i1_ref_idx,S08 i1_mv_magnitude_shift)3763 static __inline void hme_search_node_populator(
3764 search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift)
3765 {
3766 ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift);
3767 ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift);
3768 ps_search_node->i1_ref_idx = i1_ref_idx;
3769 ps_search_node->u1_is_avail = 1;
3770 ps_search_node->u1_subpel_done = 0;
3771 }
3772
hme_populate_search_candidates(fpel_srch_cand_init_data_t * ps_ctxt)3773 S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt)
3774 {
3775 hme_mv_t *ps_mv;
3776
3777 S32 wd_c, ht_c, wd_p, ht_p;
3778 S32 blksize_p, blksize_c;
3779 S32 i;
3780 S08 *pi1_ref_idx;
3781 /* Cache for storing offsets */
3782 S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS];
3783
3784 layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer;
3785 layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer;
3786 layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
3787 layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
3788 search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands;
3789 hme_mv_t s_zero_mv = { 0 };
3790
3791 S32 i4_pos_x = ps_ctxt->i4_pos_x;
3792 S32 i4_pos_y = ps_ctxt->i4_pos_y;
3793 S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0;
3794 S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1;
3795 U08 u1_pred_dir = ps_ctxt->u1_pred_dir;
3796 U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr;
3797 U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank;
3798 U08 u1_num_results_in_coarse_mvbank =
3799 (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref)
3800 : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref);
3801 S32 i4_init_offset_projected =
3802 (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0;
3803 S32 i4_init_offset_spatial =
3804 (u1_pred_dir_ctr == 1)
3805 ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank)
3806 : 0;
3807 U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index;
3808 U08 u1_max_num_search_cands =
3809 gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index];
3810 S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1);
3811 U16 u2_is_offset_available = 0;
3812 U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1;
3813
3814 /* Width and ht of current and prev layers */
3815 wd_c = ps_curr_layer->i4_wd;
3816 ht_c = ps_curr_layer->i4_ht;
3817 wd_p = ps_coarse_layer->i4_wd;
3818 ht_p = ps_coarse_layer->i4_ht;
3819
3820 blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size];
3821 blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size];
3822
3823 /* ASSERT for valid sizes */
3824 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
3825
3826 {
3827 S32 x = i4_pos_x >> 4;
3828 S32 y = i4_pos_y >> 4;
3829
3830 if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size])
3831 {
3832 x *= 2;
3833 y *= 2;
3834
3835 u1_search_blk_to_spatial_mvbank_blk_size_factor = 2;
3836 }
3837
3838 i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) *
3839 ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3840 }
3841
3842 for(i = 0; i < i4_num_srch_cands; i++)
3843 {
3844 SEARCH_CANDIDATE_TYPE_T e_search_cand_type =
3845 gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index]
3846 [i];
3847 SEARCH_CAND_LOCATIONS_T e_search_cand_loc =
3848 gae_search_cand_type_to_location_map[e_search_cand_type];
3849 S08 i1_result_id = MIN(
3850 gai1_search_cand_type_to_result_id_map[e_search_cand_type],
3851 (e_search_cand_loc < 0 ? 0
3852 : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1));
3853 U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3854 U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3855 U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) ||
3856 (ZERO_MV_ALTREF == e_search_cand_type);
3857
3858 /* When spatial candidates are available, use them, else use the projected candidates */
3859 /* This is required since some blocks will never have certain spatial candidates, and in order */
3860 /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list, */
3861 /* all candidates apart from the 'LEFT' have been marked as projected */
3862 if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) ||
3863 (e_search_cand_loc == TOPRIGHT)) &&
3864 (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand)
3865 {
3866 if(e_search_cand_loc == TOPLEFT)
3867 {
3868 u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available ||
3869 !ps_ctxt->u1_is_left_available;
3870 }
3871 else if(e_search_cand_loc == TOPRIGHT)
3872 {
3873 u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available;
3874 }
3875 else
3876 {
3877 u1_is_spatial_cand = ps_ctxt->u1_is_top_available;
3878 }
3879
3880 u1_is_proj_cand = !u1_is_spatial_cand;
3881 }
3882
3883 switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2))
3884 {
3885 case 1:
3886 {
3887 hme_search_node_populator(
3888 ps_search_cands[i].ps_search_node,
3889 &s_zero_mv,
3890 (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id
3891 : ps_ctxt->i1_alt_default_ref_id,
3892 0);
3893
3894 break;
3895 }
3896 case 2:
3897 {
3898 S08 i1_mv_magnitude_shift = 0;
3899
3900 S32 i4_offset = i4_init_offset_spatial;
3901
3902 i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1);
3903 i4_offset += i1_result_id;
3904
3905 switch(e_search_cand_loc)
3906 {
3907 case LEFT:
3908 {
3909 if(ps_ctxt->u1_is_left_available)
3910 {
3911 i1_mv_magnitude_shift = -2;
3912
3913 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3914
3915 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3916 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3917 }
3918 else
3919 {
3920 i1_mv_magnitude_shift = 0;
3921
3922 ps_mv = &s_zero_mv;
3923 pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3924 }
3925
3926 break;
3927 }
3928 case TOPLEFT:
3929 {
3930 if(ps_ctxt->u1_is_topLeft_available)
3931 {
3932 i1_mv_magnitude_shift = -2;
3933
3934 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3935 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3936
3937 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3938 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3939 }
3940 else
3941 {
3942 i1_mv_magnitude_shift = 0;
3943
3944 ps_mv = &s_zero_mv;
3945 pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3946 }
3947
3948 break;
3949 }
3950 case TOP:
3951 {
3952 if(ps_ctxt->u1_is_top_available)
3953 {
3954 i1_mv_magnitude_shift = -2;
3955
3956 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3957
3958 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3959 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3960 }
3961 else
3962 {
3963 i1_mv_magnitude_shift = 0;
3964
3965 ps_mv = &s_zero_mv;
3966 pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3967 }
3968
3969 break;
3970 }
3971 case TOPRIGHT:
3972 {
3973 if(ps_ctxt->u1_is_topRight_available)
3974 {
3975 i1_mv_magnitude_shift = -2;
3976
3977 i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk *
3978 u1_search_blk_to_spatial_mvbank_blk_size_factor;
3979 i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3980
3981 ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3982 pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3983 }
3984 else
3985 {
3986 i1_mv_magnitude_shift = 0;
3987 ps_mv = &s_zero_mv;
3988 pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3989 }
3990
3991 break;
3992 }
3993 default:
3994 {
3995 /* AiyAiyYo!! */
3996 ASSERT(0);
3997 }
3998 }
3999
4000 hme_search_node_populator(
4001 ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift);
4002
4003 break;
4004 }
4005 case 4:
4006 {
4007 ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type);
4008 ASSERT(ILLUSORY_LOCATION != e_search_cand_loc);
4009
4010 i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1);
4011
4012 if(!(u2_is_offset_available & (1 << e_search_cand_loc)))
4013 {
4014 S32 x, y;
4015
4016 x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc];
4017 y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc];
4018
4019 /* Safety check to avoid uninitialized access across temporal layers */
4020 x = CLIP3(x, 0, (wd_c - blksize_p));
4021 y = CLIP3(y, 0, (ht_c - blksize_p));
4022
4023 /* Project the positions to prev layer */
4024 x = x >> blksize_p;
4025 y = y >> blksize_p;
4026
4027 ai4_cand_offsets[e_search_cand_loc] =
4028 (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk);
4029 ai4_cand_offsets[e_search_cand_loc] +=
4030 (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row);
4031 ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected;
4032
4033 u2_is_offset_available |= (1 << e_search_cand_loc);
4034 }
4035
4036 ps_mv =
4037 ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4038 pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx +
4039 ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4040
4041 hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1);
4042
4043 break;
4044 }
4045 default:
4046 {
4047 /* NoNoNoNoNooooooooNO! */
4048 ASSERT(0);
4049 }
4050 }
4051
4052 ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0);
4053 ASSERT(
4054 !u1_pred_dir
4055 ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4056 i4_num_act_ref_l0)
4057 : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4058 ps_ctxt->i4_num_act_ref_l1));
4059 }
4060
4061 return i4_num_srch_cands;
4062 }
4063
hme_mv_clipper(hme_search_prms_t * ps_search_prms_blk,S32 i4_num_srch_cands,S08 i1_check_for_mult_refs,U08 u1_fpel_refine_extent,U08 u1_hpel_refine_extent,U08 u1_qpel_refine_extent)4064 void hme_mv_clipper(
4065 hme_search_prms_t *ps_search_prms_blk,
4066 S32 i4_num_srch_cands,
4067 S08 i1_check_for_mult_refs,
4068 U08 u1_fpel_refine_extent,
4069 U08 u1_hpel_refine_extent,
4070 U08 u1_qpel_refine_extent)
4071 {
4072 S32 candt;
4073 range_prms_t *ps_range_prms;
4074
4075 for(candt = 0; candt < i4_num_srch_cands; candt++)
4076 {
4077 search_node_t *ps_search_node;
4078
4079 ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node;
4080 ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx];
4081
4082 /* Clip the motion vectors as well here since after clipping
4083 two candidates can become same and they will be removed during deduplication */
4084 CLIP_MV_WITHIN_RANGE(
4085 ps_search_node->ps_mv->i2_mvx,
4086 ps_search_node->ps_mv->i2_mvy,
4087 ps_range_prms,
4088 u1_fpel_refine_extent,
4089 u1_hpel_refine_extent,
4090 u1_qpel_refine_extent);
4091 }
4092 }
4093
hme_init_pred_buf_info(hme_pred_buf_info_t (* ps_info)[MAX_NUM_INTER_PARTS],hme_pred_buf_mngr_t * ps_buf_mngr,U08 u1_pu1_wd,U08 u1_pu1_ht,PART_TYPE_T e_part_type)4094 void hme_init_pred_buf_info(
4095 hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
4096 hme_pred_buf_mngr_t *ps_buf_mngr,
4097 U08 u1_pu1_wd,
4098 U08 u1_pu1_ht,
4099 PART_TYPE_T e_part_type)
4100 {
4101 U08 u1_pred_buf_array_id;
4102
4103 if(1 != ihevce_get_free_pred_buf_indices(
4104 &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1))
4105 {
4106 ASSERT(0);
4107 }
4108 else
4109 {
4110 ps_info[0][0].i4_pred_stride = MAX_CU_SIZE;
4111 ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id];
4112 ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id;
4113
4114 if(PRT_2Nx2N != e_part_type)
4115 {
4116 ps_info[0][1].i4_pred_stride = MAX_CU_SIZE;
4117 ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] +
4118 (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]
4119 ? u1_pu1_ht * ps_info[0][1].i4_pred_stride
4120 : u1_pu1_wd);
4121 ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id;
4122 }
4123 }
4124 }
4125
hme_debrief_bipred_eval(part_type_results_t * ps_part_type_result,hme_pred_buf_info_t (* ps_pred_buf_info)[MAX_NUM_INTER_PARTS],hme_pred_buf_mngr_t * ps_pred_buf_mngr,U08 * pu1_allocated_pred_buf_array_indixes,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)4126 void hme_debrief_bipred_eval(
4127 part_type_results_t *ps_part_type_result,
4128 hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
4129 hme_pred_buf_mngr_t *ps_pred_buf_mngr,
4130 U08 *pu1_allocated_pred_buf_array_indixes,
4131 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
4132
4133 )
4134 {
4135 PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type;
4136
4137 U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator;
4138 U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]];
4139
4140 if(0 == ps_part_type_result->u1_part_type)
4141 {
4142 if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI)
4143 {
4144 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4145
4146 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4147 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4148
4149 ihevce_set_pred_buf_as_free(
4150 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4151
4152 ihevce_set_pred_buf_as_free(
4153 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4154 }
4155 else
4156 {
4157 ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4158 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4159
4160 ihevce_set_pred_buf_as_free(
4161 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4162
4163 ihevce_set_pred_buf_as_free(
4164 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4165
4166 if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4167 {
4168 ihevce_set_pred_buf_as_free(
4169 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4170 }
4171 }
4172 }
4173 else
4174 {
4175 U08 *pu1_src_pred;
4176 U08 *pu1_dst_pred;
4177 S32 i4_src_pred_stride;
4178 S32 i4_dst_pred_stride;
4179
4180 U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2;
4181 U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2;
4182 U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2;
4183 U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2;
4184
4185 U08 u1_condition_for_switch =
4186 (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) |
4187 ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1);
4188
4189 switch(u1_condition_for_switch)
4190 {
4191 case 0:
4192 {
4193 ps_part_type_result->pu1_pred =
4194 ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]];
4195 ps_part_type_result->i4_pred_stride = MAX_CU_SIZE;
4196
4197 ihevce_set_pred_buf_as_free(
4198 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4199
4200 ihevce_set_pred_buf_as_free(
4201 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4202
4203 if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4204 {
4205 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4206 pu1_dst_pred = ps_part_type_result->pu1_pred;
4207 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4208 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4209
4210 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4211 pu1_dst_pred,
4212 i4_dst_pred_stride,
4213 pu1_src_pred,
4214 i4_src_pred_stride,
4215 u1_pu1_wd,
4216 u1_pu1_ht);
4217 }
4218
4219 if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id)
4220 {
4221 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4222 pu1_dst_pred = ps_part_type_result->pu1_pred +
4223 (u1_is_part_vertical
4224 ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4225 : u1_pu1_wd);
4226 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4227 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4228
4229 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4230 pu1_dst_pred,
4231 i4_dst_pred_stride,
4232 pu1_src_pred,
4233 i4_src_pred_stride,
4234 u1_pu2_wd,
4235 u1_pu2_ht);
4236 }
4237
4238 break;
4239 }
4240 case 1:
4241 {
4242 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4243
4244 ihevce_set_pred_buf_as_free(
4245 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4246
4247 /* Copy PU1 pred into PU2's pred buf */
4248 if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) &&
4249 (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id))
4250 {
4251 ps_part_type_result->pu1_pred =
4252 ps_pred_buf_info[0][1].pu1_pred -
4253 (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride
4254 : u1_pu1_wd);
4255 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4256
4257 ihevce_set_pred_buf_as_free(
4258 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4259
4260 pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred;
4261 pu1_dst_pred = ps_part_type_result->pu1_pred;
4262 i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4263 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4264
4265 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4266 pu1_dst_pred,
4267 i4_dst_pred_stride,
4268 pu1_src_pred,
4269 i4_src_pred_stride,
4270 u1_pu1_wd,
4271 u1_pu1_ht);
4272 }
4273 else
4274 {
4275 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4276 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4277
4278 ihevce_set_pred_buf_as_free(
4279 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4280
4281 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4282 pu1_dst_pred = ps_part_type_result->pu1_pred;
4283 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4284 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4285
4286 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4287 pu1_dst_pred,
4288 i4_dst_pred_stride,
4289 pu1_src_pred,
4290 i4_src_pred_stride,
4291 u1_pu2_wd,
4292 u1_pu2_ht);
4293 }
4294
4295 break;
4296 }
4297 case 2:
4298 {
4299 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4300
4301 ihevce_set_pred_buf_as_free(
4302 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4303
4304 /* Copy PU2 pred into PU1's pred buf */
4305 if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) &&
4306 (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id))
4307 {
4308 ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4309 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4310
4311 ihevce_set_pred_buf_as_free(
4312 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4313
4314 pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred;
4315 pu1_dst_pred = ps_part_type_result->pu1_pred +
4316 (u1_is_part_vertical
4317 ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4318 : u1_pu1_wd);
4319 i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4320 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4321
4322 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4323 pu1_dst_pred,
4324 i4_dst_pred_stride,
4325 pu1_src_pred,
4326 i4_src_pred_stride,
4327 u1_pu2_wd,
4328 u1_pu2_ht);
4329 }
4330 else
4331 {
4332 ps_part_type_result->pu1_pred =
4333 ps_pred_buf_info[2][1].pu1_pred -
4334 (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride
4335 : u1_pu1_wd);
4336 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4337
4338 ihevce_set_pred_buf_as_free(
4339 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4340
4341 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4342 pu1_dst_pred = ps_part_type_result->pu1_pred;
4343 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4344 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4345
4346 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4347 pu1_dst_pred,
4348 i4_dst_pred_stride,
4349 pu1_src_pred,
4350 i4_src_pred_stride,
4351 u1_pu1_wd,
4352 u1_pu1_ht);
4353 }
4354
4355 break;
4356 }
4357 case 3:
4358 {
4359 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4360 ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4361 ASSERT(
4362 ps_pred_buf_info[2][1].u1_pred_buf_array_id ==
4363 ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4364
4365 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4366 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4367
4368 ihevce_set_pred_buf_as_free(
4369 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4370
4371 break;
4372 }
4373 }
4374 }
4375 }
4376
hme_decide_search_candidate_priority_in_l1_and_l2_me(SEARCH_CANDIDATE_TYPE_T e_cand_type,ME_QUALITY_PRESETS_T e_quality_preset)4377 U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
4378 SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset)
4379 {
4380 U08 u1_priority_val =
4381 gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type];
4382
4383 if(UCHAR_MAX == u1_priority_val)
4384 {
4385 ASSERT(0);
4386 }
4387
4388 ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4389
4390 return u1_priority_val;
4391 }
4392
hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type,U08 u1_index)4393 U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index)
4394 {
4395 U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type];
4396
4397 if(UCHAR_MAX == u1_priority_val)
4398 {
4399 ASSERT(0);
4400 }
4401
4402 ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4403
4404 return u1_priority_val;
4405 }
4406
hme_search_cand_data_init(S32 * pi4_id_Z,S32 * pi4_id_coloc,S32 * pi4_num_coloc_cands,U08 * pu1_search_candidate_list_index,S32 i4_num_act_ref_l0,S32 i4_num_act_ref_l1,U08 u1_is_bidir_enabled,U08 u1_4x4_blk_in_l1me)4407 void hme_search_cand_data_init(
4408 S32 *pi4_id_Z,
4409 S32 *pi4_id_coloc,
4410 S32 *pi4_num_coloc_cands,
4411 U08 *pu1_search_candidate_list_index,
4412 S32 i4_num_act_ref_l0,
4413 S32 i4_num_act_ref_l1,
4414 U08 u1_is_bidir_enabled,
4415 U08 u1_4x4_blk_in_l1me)
4416 {
4417 S32 i, j;
4418 S32 i4_num_coloc_cands;
4419
4420 U08 u1_search_candidate_list_index;
4421
4422 if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me)
4423 {
4424 S32 i;
4425
4426 u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2;
4427 i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4428
4429 switch(i4_num_act_ref_l0)
4430 {
4431 case 1:
4432 {
4433 for(i = 0; i < 2; i++)
4434 {
4435 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4436 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4437 u1_search_candidate_list_index);
4438 }
4439
4440 break;
4441 }
4442 case 2:
4443 {
4444 for(i = 0; i < 4; i++)
4445 {
4446 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4447 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4448 u1_search_candidate_list_index);
4449 }
4450
4451 break;
4452 }
4453 case 3:
4454 {
4455 for(i = 0; i < 6; i++)
4456 {
4457 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4458 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4459 u1_search_candidate_list_index);
4460 }
4461
4462 break;
4463 }
4464 case 4:
4465 {
4466 for(i = 0; i < 8; i++)
4467 {
4468 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4469 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4470 u1_search_candidate_list_index);
4471 }
4472
4473 break;
4474 }
4475 default:
4476 {
4477 ASSERT(0);
4478 }
4479 }
4480
4481 *pi4_num_coloc_cands = i4_num_coloc_cands;
4482 *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4483 }
4484 else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me)
4485 {
4486 S32 i;
4487
4488 i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4489 u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1;
4490
4491 switch(i4_num_act_ref_l0)
4492 {
4493 case 1:
4494 {
4495 for(i = 0; i < 2; i++)
4496 {
4497 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4498 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4499 u1_search_candidate_list_index);
4500 }
4501
4502 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4503 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4504
4505 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4506 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4507
4508 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4509 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4510
4511 i4_num_coloc_cands += 3;
4512
4513 break;
4514 }
4515 case 2:
4516 {
4517 for(i = 0; i < 4; i++)
4518 {
4519 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4520 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4521 u1_search_candidate_list_index);
4522 }
4523
4524 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4525 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4526
4527 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4528 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4529
4530 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4531 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4532
4533 pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4534 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4535
4536 pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4537 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4538
4539 pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4540 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4541
4542 i4_num_coloc_cands += 6;
4543
4544 break;
4545 }
4546 case 3:
4547 {
4548 for(i = 0; i < 6; i++)
4549 {
4550 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4551 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4552 u1_search_candidate_list_index);
4553 }
4554
4555 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4556 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4557
4558 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4559 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4560
4561 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4562 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4563
4564 pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4565 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4566
4567 pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4568 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4569
4570 pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4571 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4572
4573 i4_num_coloc_cands += 6;
4574
4575 break;
4576 }
4577 case 4:
4578 {
4579 for(i = 0; i < 8; i++)
4580 {
4581 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4582 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4583 u1_search_candidate_list_index);
4584 }
4585
4586 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4587 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4588
4589 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4590 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4591
4592 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4593 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4594
4595 pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4596 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4597
4598 pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4599 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4600
4601 pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4602 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4603
4604 i4_num_coloc_cands += 6;
4605
4606 break;
4607 }
4608 default:
4609 {
4610 ASSERT(0);
4611 }
4612 }
4613
4614 *pi4_num_coloc_cands = i4_num_coloc_cands;
4615 *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4616 }
4617 else
4618 {
4619 /* The variable 'u1_search_candidate_list_index' is hardcoded */
4620 /* to 10 and 11 respectively. But, these values are not returned */
4621 /* by this function since the actual values are dependent on */
4622 /* the number of refs in L0 and L1 respectively */
4623 /* Hence, the actual return values are being recomputed */
4624 /* in the latter part of this block */
4625
4626 if(!u1_4x4_blk_in_l1me)
4627 {
4628 u1_search_candidate_list_index = 10;
4629
4630 i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4631
4632 for(i = 0; i < i4_num_coloc_cands; i++)
4633 {
4634 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4635 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4636 u1_search_candidate_list_index);
4637 }
4638 }
4639 else
4640 {
4641 u1_search_candidate_list_index = 11;
4642
4643 i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4644
4645 for(i = 0; i < i4_num_coloc_cands; i++)
4646 {
4647 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4648 (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4649 u1_search_candidate_list_index);
4650 }
4651
4652 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4653 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4654
4655 pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4656 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4657
4658 pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4659 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4660 }
4661
4662 for(j = 0; j < 2; j++)
4663 {
4664 if(0 == j)
4665 {
4666 pu1_search_candidate_list_index[j] =
4667 8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me;
4668 pi4_num_coloc_cands[j] =
4669 (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2);
4670 }
4671 else
4672 {
4673 pu1_search_candidate_list_index[j] =
4674 8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me;
4675 pi4_num_coloc_cands[j] =
4676 (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2);
4677 }
4678 }
4679 }
4680
4681 if(i4_num_act_ref_l0 || i4_num_act_ref_l1)
4682 {
4683 pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me(
4684 (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]);
4685 }
4686
4687 if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled)
4688 {
4689 pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me(
4690 (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]);
4691 }
4692 }
4693
4694 static U08
hme_determine_base_block_size(S32 * pi4_valid_part_array,S32 i4_num_valid_parts,U08 u1_cu_size)4695 hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size)
4696 {
4697 ASSERT(i4_num_valid_parts > 0);
4698
4699 if(1 == i4_num_valid_parts)
4700 {
4701 ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N);
4702
4703 return u1_cu_size;
4704 }
4705 else
4706 {
4707 if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR)
4708 {
4709 return u1_cu_size / 2;
4710 }
4711 else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R)
4712 {
4713 return u1_cu_size / 4;
4714 }
4715 }
4716
4717 return u1_cu_size / 4;
4718 }
4719
hme_compute_variance_of_pu_from_base_blocks(ULWORD64 * pu8_SigmaX,ULWORD64 * pu8_SigmaXSquared,U08 u1_cu_size,U08 u1_base_block_size,S32 i4_part_id)4720 static U32 hme_compute_variance_of_pu_from_base_blocks(
4721 ULWORD64 *pu8_SigmaX,
4722 ULWORD64 *pu8_SigmaXSquared,
4723 U08 u1_cu_size,
4724 U08 u1_base_block_size,
4725 S32 i4_part_id)
4726 {
4727 U08 i, j;
4728 ULWORD64 u8_final_variance;
4729
4730 U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4731 S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4732 S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4733 U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4734 U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4735 U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4736 U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4737 U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4738 ULWORD64 u8_final_SigmaXSquared = 0;
4739 ULWORD64 u8_final_SigmaX = 0;
4740
4741 if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4742 {
4743 U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4744 ? (gai1_is_part_vertical[i4_part_id]
4745 ? 0
4746 : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4747 : 0;
4748 U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4749 ? (gai1_is_part_vertical[i4_part_id]
4750 ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4751 : 0)
4752 : 0;
4753 U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4754 U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4755
4756 for(i = u1_row_start_index; i < u1_row_end_index; i++)
4757 {
4758 for(j = u1_column_start_index; j < u1_column_end_index; j++)
4759 {
4760 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4761 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4762 }
4763 }
4764
4765 u8_final_variance =
4766 u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4767 u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4768 u8_final_variance +=
4769 ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4770 (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4771 u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4772 (u1_num_base_blocks * u4_num_pixels_in_base_block);
4773
4774 ASSERT(u8_final_variance <= UINT_MAX);
4775 }
4776 else
4777 {
4778 U08 u1_row_start_index;
4779 U08 u1_column_start_index;
4780 U08 u1_row_end_index;
4781 U08 u1_column_end_index;
4782
4783 switch(gau1_part_id_to_part_num[i4_part_id])
4784 {
4785 case 0:
4786 {
4787 u1_row_start_index = 0;
4788 u1_column_start_index = 0;
4789
4790 break;
4791 }
4792 case 1:
4793 {
4794 u1_row_start_index = 0;
4795 u1_column_start_index = u1_num_base_blocks_in_pu_row;
4796
4797 break;
4798 }
4799 case 2:
4800 {
4801 u1_row_start_index = u1_num_base_blocks_in_pu_column;
4802 u1_column_start_index = 0;
4803
4804 break;
4805 }
4806 case 3:
4807 {
4808 u1_row_start_index = u1_num_base_blocks_in_pu_column;
4809 u1_column_start_index = u1_num_base_blocks_in_pu_row;
4810
4811 break;
4812 }
4813 }
4814
4815 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4816 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4817
4818 for(i = u1_row_start_index; i < u1_row_end_index; i++)
4819 {
4820 for(j = u1_column_start_index; j < u1_column_end_index; j++)
4821 {
4822 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4823 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4824 }
4825 }
4826
4827 u8_final_variance =
4828 u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4829 u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4830 u8_final_variance +=
4831 ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4832 (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4833 u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4834 (u1_num_base_blocks * u4_num_pixels_in_base_block);
4835
4836 ASSERT(u8_final_variance <= UINT_MAX);
4837 }
4838
4839 return u8_final_variance;
4840 }
4841
hme_compute_variance_for_all_parts(U08 * pu1_data,S32 i4_data_stride,S32 * pi4_valid_part_array,U32 * pu4_variance,S32 i4_num_valid_parts,U08 u1_cu_size)4842 void hme_compute_variance_for_all_parts(
4843 U08 *pu1_data,
4844 S32 i4_data_stride,
4845 S32 *pi4_valid_part_array,
4846 U32 *pu4_variance,
4847 S32 i4_num_valid_parts,
4848 U08 u1_cu_size)
4849 {
4850 ULWORD64 au8_SigmaX[16];
4851 ULWORD64 au8_SigmaXSquared[16];
4852 U08 i, j, k, l;
4853 U08 u1_base_block_size;
4854 U08 u1_num_base_blocks_in_cu_row;
4855 U08 u1_num_base_blocks_in_cu_column;
4856
4857 u1_base_block_size =
4858 hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
4859
4860 u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column =
4861 u1_cu_size / u1_base_block_size;
4862
4863 ASSERT(u1_num_base_blocks_in_cu_row <= 4);
4864
4865 for(i = 0; i < u1_num_base_blocks_in_cu_column; i++)
4866 {
4867 for(j = 0; j < u1_num_base_blocks_in_cu_row; j++)
4868 {
4869 U08 *pu1_buf =
4870 pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride);
4871
4872 au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0;
4873 au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0;
4874
4875 for(k = 0; k < u1_base_block_size; k++)
4876 {
4877 for(l = 0; l < u1_base_block_size; l++)
4878 {
4879 au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] +=
4880 pu1_buf[l + k * i4_data_stride];
4881 au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] +=
4882 pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride];
4883 }
4884 }
4885 }
4886 }
4887
4888 for(i = 0; i < i4_num_valid_parts; i++)
4889 {
4890 pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks(
4891 au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]);
4892 }
4893 }
4894
hme_compute_final_sigma_of_pu_from_base_blocks(U32 * pu4_SigmaX,U32 * pu4_SigmaXSquared,ULWORD64 * pu8_final_sigmaX,ULWORD64 * pu8_final_sigmaX_Squared,U08 u1_cu_size,U08 u1_base_block_size,S32 i4_part_id,U08 u1_base_blk_array_stride)4895 void hme_compute_final_sigma_of_pu_from_base_blocks(
4896 U32 *pu4_SigmaX,
4897 U32 *pu4_SigmaXSquared,
4898 ULWORD64 *pu8_final_sigmaX,
4899 ULWORD64 *pu8_final_sigmaX_Squared,
4900 U08 u1_cu_size,
4901 U08 u1_base_block_size,
4902 S32 i4_part_id,
4903 U08 u1_base_blk_array_stride)
4904 {
4905 U08 i, j;
4906 //U08 u1_num_base_blocks_in_cu_row;
4907
4908 U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4909 S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4910 S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4911 U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4912 U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4913 U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4914 U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4915 U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block);
4916
4917 /*if (u1_is_for_src)
4918 {
4919 u1_num_base_blocks_in_cu_row = 16;
4920 }
4921 else
4922 {
4923 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4924 }*/
4925
4926 pu8_final_sigmaX[i4_part_id] = 0;
4927 pu8_final_sigmaX_Squared[i4_part_id] = 0;
4928
4929 if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4930 {
4931 U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4932 ? (gai1_is_part_vertical[i4_part_id]
4933 ? 0
4934 : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4935 : 0;
4936 U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4937 ? (gai1_is_part_vertical[i4_part_id]
4938 ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4939 : 0)
4940 : 0;
4941 U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4942 U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4943
4944 for(i = u1_row_start_index; i < u1_row_end_index; i++)
4945 {
4946 for(j = u1_column_start_index; j < u1_column_end_index; j++)
4947 {
4948 pu8_final_sigmaX_Squared[i4_part_id] +=
4949 pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
4950 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
4951 }
4952 }
4953 }
4954 else
4955 {
4956 U08 u1_row_start_index;
4957 U08 u1_column_start_index;
4958 U08 u1_row_end_index;
4959 U08 u1_column_end_index;
4960
4961 switch(gau1_part_id_to_part_num[i4_part_id])
4962 {
4963 case 0:
4964 {
4965 u1_row_start_index = 0;
4966 u1_column_start_index = 0;
4967
4968 break;
4969 }
4970 case 1:
4971 {
4972 u1_row_start_index = 0;
4973 u1_column_start_index = u1_num_base_blocks_in_pu_row;
4974
4975 break;
4976 }
4977 case 2:
4978 {
4979 u1_row_start_index = u1_num_base_blocks_in_pu_column;
4980 u1_column_start_index = 0;
4981
4982 break;
4983 }
4984 case 3:
4985 {
4986 u1_row_start_index = u1_num_base_blocks_in_pu_column;
4987 u1_column_start_index = u1_num_base_blocks_in_pu_row;
4988
4989 break;
4990 }
4991 }
4992
4993 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4994 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4995
4996 for(i = u1_row_start_index; i < u1_row_end_index; i++)
4997 {
4998 for(j = u1_column_start_index; j < u1_column_end_index; j++)
4999 {
5000 pu8_final_sigmaX_Squared[i4_part_id] +=
5001 pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
5002 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
5003 }
5004 }
5005 }
5006
5007 pu8_final_sigmaX_Squared[i4_part_id] *= u4_N;
5008 }
5009
hme_compute_stim_injected_distortion_for_all_parts(U08 * pu1_pred,S32 i4_pred_stride,S32 * pi4_valid_part_array,ULWORD64 * pu8_src_sigmaX,ULWORD64 * pu8_src_sigmaXSquared,S32 * pi4_sad_array,S32 i4_alpha_stim_multiplier,S32 i4_inv_wt,S32 i4_inv_wt_shift_val,S32 i4_num_valid_parts,S32 i4_wpred_log_wdc,U08 u1_cu_size)5010 void hme_compute_stim_injected_distortion_for_all_parts(
5011 U08 *pu1_pred,
5012 S32 i4_pred_stride,
5013 S32 *pi4_valid_part_array,
5014 ULWORD64 *pu8_src_sigmaX,
5015 ULWORD64 *pu8_src_sigmaXSquared,
5016 S32 *pi4_sad_array,
5017 S32 i4_alpha_stim_multiplier,
5018 S32 i4_inv_wt,
5019 S32 i4_inv_wt_shift_val,
5020 S32 i4_num_valid_parts,
5021 S32 i4_wpred_log_wdc,
5022 U08 u1_cu_size)
5023 {
5024 U32 au4_sigmaX[16], au4_sigmaXSquared[16];
5025 ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17];
5026 S32 i4_noise_term;
5027 U16 i2_count;
5028
5029 ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist;
5030 ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var;
5031
5032 U08 u1_base_block_size;
5033
5034 WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
5035
5036 u1_base_block_size =
5037 hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
5038
5039 ASSERT(u1_cu_size >= 16);
5040
5041 hme_compute_sigmaX_and_sigmaXSquared(
5042 pu1_pred,
5043 i4_pred_stride,
5044 au4_sigmaX,
5045 au4_sigmaXSquared,
5046 u1_base_block_size,
5047 u1_base_block_size,
5048 u1_cu_size,
5049 u1_cu_size,
5050 1,
5051 u1_cu_size / u1_base_block_size);
5052
5053 /* Noise Term Computation */
5054 for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++)
5055 {
5056 unsigned long u4_shift_val;
5057 S32 i4_bits_req;
5058 S32 part_id = pi4_valid_part_array[i2_count];
5059
5060 if(i4_alpha_stim_multiplier)
5061 {
5062 /* Final SigmaX and SigmaX-Squared Calculation */
5063 hme_compute_final_sigma_of_pu_from_base_blocks(
5064 au4_sigmaX,
5065 au4_sigmaXSquared,
5066 au8_final_ref_sigmaX,
5067 au8_final_ref_sigmaXSquared,
5068 u1_cu_size,
5069 u1_base_block_size,
5070 part_id,
5071 (u1_cu_size / u1_base_block_size));
5072
5073 u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
5074 u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
5075
5076 u4_shift_val = ihevce_calc_stim_injected_variance(
5077 pu8_src_sigmaX,
5078 pu8_src_sigmaXSquared,
5079 &u8_src_var,
5080 i4_inv_wt,
5081 i4_inv_wt_shift_val,
5082 i4_wpred_log_wdc,
5083 part_id);
5084
5085 u8_ref_var = u8_ref_var >> u4_shift_val;
5086
5087 GETRANGE64(i4_bits_req, u8_ref_var);
5088
5089 if(i4_bits_req > 27)
5090 {
5091 u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
5092 u8_src_var = u8_src_var >> (i4_bits_req - 27);
5093 }
5094
5095 if(u8_src_var == u8_ref_var)
5096 {
5097 u8_temp_var = (1 << STIM_Q_FORMAT);
5098 }
5099 else
5100 {
5101 u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT));
5102 u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
5103 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
5104 u8_temp_var = (u8_temp_var / u8_temp_var1);
5105 u8_temp_var = (2 * u8_temp_var);
5106 }
5107
5108 i4_noise_term = (UWORD32)u8_temp_var;
5109
5110 ASSERT(i4_noise_term >= 0);
5111
5112 i4_noise_term *= i4_alpha_stim_multiplier;
5113 }
5114 else
5115 {
5116 i4_noise_term = 0;
5117 }
5118
5119 u8_pure_dist = pi4_sad_array[part_id];
5120 u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
5121 u8_pure_dist += (1 << ((i4_q_level)-1));
5122 pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level));
5123 }
5124 }
5125
hme_compute_sigmaX_and_sigmaXSquared(U08 * pu1_data,S32 i4_buf_stride,void * pv_sigmaX,void * pv_sigmaXSquared,U08 u1_base_blk_wd,U08 u1_base_blk_ht,U08 u1_blk_wd,U08 u1_blk_ht,U08 u1_is_sigma_pointer_size_32_bit,U08 u1_array_stride)5126 void hme_compute_sigmaX_and_sigmaXSquared(
5127 U08 *pu1_data,
5128 S32 i4_buf_stride,
5129 void *pv_sigmaX,
5130 void *pv_sigmaXSquared,
5131 U08 u1_base_blk_wd,
5132 U08 u1_base_blk_ht,
5133 U08 u1_blk_wd,
5134 U08 u1_blk_ht,
5135 U08 u1_is_sigma_pointer_size_32_bit,
5136 U08 u1_array_stride)
5137 {
5138 U08 i, j, k, l;
5139 U08 u1_num_base_blks_in_row;
5140 U08 u1_num_base_blks_in_column;
5141
5142 u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd;
5143 u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht;
5144
5145 if(u1_is_sigma_pointer_size_32_bit)
5146 {
5147 U32 *sigmaX, *sigmaXSquared;
5148
5149 sigmaX = (U32 *)pv_sigmaX;
5150 sigmaXSquared = (U32 *)pv_sigmaXSquared;
5151
5152 /* Loop to compute the sigma_X and sigma_X_Squared */
5153 for(i = 0; i < u1_num_base_blks_in_column; i++)
5154 {
5155 for(j = 0; j < u1_num_base_blks_in_row; j++)
5156 {
5157 U32 u4_sigmaX = 0, u4_sigmaXSquared = 0;
5158 U08 *pu1_buf =
5159 pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5160
5161 for(k = 0; k < u1_base_blk_ht; k++)
5162 {
5163 for(l = 0; l < u1_base_blk_wd; l++)
5164 {
5165 u4_sigmaX += pu1_buf[l + k * i4_buf_stride];
5166 u4_sigmaXSquared +=
5167 (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5168 }
5169 }
5170
5171 sigmaX[j + i * u1_array_stride] = u4_sigmaX;
5172 sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared;
5173 }
5174 }
5175 }
5176 else
5177 {
5178 ULWORD64 *sigmaX, *sigmaXSquared;
5179
5180 sigmaX = (ULWORD64 *)pv_sigmaX;
5181 sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared;
5182
5183 /* Loop to compute the sigma_X and sigma_X_Squared */
5184 for(i = 0; i < u1_num_base_blks_in_column; i++)
5185 {
5186 for(j = 0; j < u1_num_base_blks_in_row; j++)
5187 {
5188 ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0;
5189 U08 *pu1_buf =
5190 pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5191
5192 for(k = 0; k < u1_base_blk_ht; k++)
5193 {
5194 for(l = 0; l < u1_base_blk_wd; l++)
5195 {
5196 u8_sigmaX += pu1_buf[l + k * i4_buf_stride];
5197 u8_sigmaXSquared +=
5198 (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5199 }
5200 }
5201
5202 u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht;
5203
5204 sigmaX[j + i * u1_array_stride] = u8_sigmaX;
5205 sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared;
5206 }
5207 }
5208 }
5209 }
5210
5211 #if TEMPORAL_NOISE_DETECT
ihevce_16x16block_temporal_noise_detect(WORD32 had_block_size,WORD32 ctb_width,WORD32 ctb_height,ihevce_ctb_noise_params * ps_ctb_noise_params,fpel_srch_cand_init_data_t * s_proj_srch_cand_init_data,hme_search_prms_t * s_search_prms_blk,me_frm_ctxt_t * ps_ctxt,WORD32 num_pred_dir,WORD32 i4_num_act_ref_l0,WORD32 i4_num_act_ref_l1,WORD32 i4_cu_x_off,WORD32 i4_cu_y_off,wgt_pred_ctxt_t * ps_wt_inp_prms,WORD32 input_stride,WORD32 index_8x8_block,WORD32 num_horz_blocks,WORD32 num_8x8_in_ctb_row,WORD32 i4_16x16_index)5212 WORD32 ihevce_16x16block_temporal_noise_detect(
5213 WORD32 had_block_size,
5214 WORD32 ctb_width,
5215 WORD32 ctb_height,
5216 ihevce_ctb_noise_params *ps_ctb_noise_params,
5217 fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
5218 hme_search_prms_t *s_search_prms_blk,
5219 me_frm_ctxt_t *ps_ctxt,
5220 WORD32 num_pred_dir,
5221 WORD32 i4_num_act_ref_l0,
5222 WORD32 i4_num_act_ref_l1,
5223 WORD32 i4_cu_x_off,
5224 WORD32 i4_cu_y_off,
5225 wgt_pred_ctxt_t *ps_wt_inp_prms,
5226 WORD32 input_stride,
5227 WORD32 index_8x8_block,
5228 WORD32 num_horz_blocks,
5229 WORD32 num_8x8_in_ctb_row,
5230 WORD32 i4_16x16_index)
5231 {
5232 WORD32 i;
5233 WORD32 noise_detected;
5234
5235 UWORD8 *pu1_l0_block;
5236 UWORD8 *pu1_l1_block;
5237
5238 WORD32 mean;
5239 UWORD32 variance_8x8;
5240
5241 /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
5242 WORD16 pi2_residue_16x16[256];
5243 WORD32 mean_16x16;
5244 UWORD32 variance_16x16[2];
5245
5246 /* throw errors in case of un- supported arguments */
5247 /* assumptions size is 8 or 16 or 32 */
5248 assert(
5249 (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32)); //ihevc_assert
5250
5251 /* initialize the variables */
5252 noise_detected = 0;
5253 variance_8x8 = 0;
5254
5255 mean = 0;
5256
5257 {
5258 i = 0;
5259 /* get the ref/pred and source using the MV of both directions */
5260 /* pick the best candidates in each direction */
5261 /* Colocated cands */
5262 {
5263 // steps to be done
5264 /* pick the candidates */
5265 /* do motion compoensation using the candidates got from prev step : pick from the offset */
5266 /* get the ref or the pred from the offset*/
5267 /* get the source data */
5268 /* send the pred - source to noise detect */
5269 /* do noise detect on the residue of source and pred */
5270
5271 layer_mv_t *ps_layer_mvbank;
5272 hme_mv_t *ps_mv;
5273
5274 //S32 i;
5275 S32 wd_c, ht_c, wd_p, ht_p;
5276 S32 blksize_p, blk_x, blk_y, i4_offset;
5277 S08 *pi1_ref_idx;
5278 fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data;
5279 layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer;
5280 layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer;
5281 err_prms_t s_err_prms;
5282 S32 i4_blk_wd;
5283 S32 i4_blk_ht;
5284 BLK_SIZE_T e_blk_size;
5285 hme_search_prms_t *ps_search_prms;
5286 S32 i4_part_mask;
5287 S32 *pi4_valid_part_ids;
5288
5289 /* has list of valid partition to search terminated by -1 */
5290 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
5291
5292 /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/
5293
5294 S32 i4_pos_x;
5295 S32 i4_pos_y;
5296 U08 u1_pred_dir; // = ps_ctxt_2->u1_pred_dir;
5297 U08 u1_default_ref_id = 0; //ps_ctxt_2->u1_default_ref_id;
5298 S32 i4_inp_off, i4_ref_offset, i4_ref_stride;
5299
5300 /* The reference is actually an array of ptrs since there are several */
5301 /* reference id. So an array gets passed form calling function */
5302 U08 **ppu1_ref;
5303
5304 /* Atributes of input candidates */
5305 search_node_t as_search_node[2];
5306 wgt_pred_ctxt_t *ps_wt_inp_prms;
5307
5308 S32 posx;
5309 S32 posy;
5310 S32 i4_num_results_to_proj;
5311 S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
5312 S32 i4_inp_stride;
5313
5314 /* intialize variables */
5315 /* Width and ht of current and prev layers */
5316 wd_c = ps_curr_layer->i4_wd;
5317 ht_c = ps_curr_layer->i4_ht;
5318 wd_p = ps_coarse_layer->i4_wd;
5319 ht_p = ps_coarse_layer->i4_ht;
5320
5321 ps_search_prms = s_search_prms_blk;
5322
5323 ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
5324 e_blk_size = ps_search_prms->e_blk_size;
5325 i4_part_mask = ps_search_prms->i4_part_mask;
5326
5327 i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
5328 i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
5329
5330 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
5331 blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
5332
5333 /* ASSERT for valid sizes */
5334 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
5335
5336 i4_pos_x = i4_cu_x_off;
5337 i4_pos_y = i4_cu_y_off;
5338 posx = i4_pos_x + 2;
5339 posy = i4_pos_y + 2;
5340
5341 i4_inp_stride = ps_search_prms->i4_inp_stride;
5342 /* Move to the location of the search blk in inp buffer */
5343 //i4_inp_off = i4_cu_x_off;
5344 //i4_inp_off += i4_cu_y_off * i4_inp_stride;
5345 i4_inp_off = (i4_16x16_index % 4) * 16;
5346 i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride;
5347
5348 /***********pick the candidates**************************************/
5349 for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++)
5350 {
5351 WORD32 actual_pred_dir = 0;
5352
5353 if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0)
5354 {
5355 actual_pred_dir = 1;
5356 }
5357 else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0)
5358 {
5359 actual_pred_dir = 0;
5360 }
5361 else if(u1_pred_dir == 1)
5362 {
5363 actual_pred_dir = 1;
5364 }
5365
5366 i4_num_results_to_proj = 1; // only the best proj
5367
5368 /* Safety check to avoid uninitialized access across temporal layers */
5369 posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */
5370 posy = CLIP3(posy, 0, (ht_c - blksize_p));
5371
5372 /* Project the positions to prev layer */
5373 blk_x = posx >> blksize_p;
5374 blk_y = posy >> blksize_p;
5375
5376 /* Pick up the mvs from the location */
5377 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
5378 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
5379
5380 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
5381 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
5382
5383 if(actual_pred_dir == 1)
5384 {
5385 ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5386 pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5387 }
5388
5389 {
5390 as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1;
5391 as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1;
5392 as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0];
5393
5394 if((as_search_node[actual_pred_dir].i1_ref_idx < 0) ||
5395 (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV))
5396 {
5397 as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id;
5398 as_search_node[actual_pred_dir].s_mv.i2_mvx = 0;
5399 as_search_node[actual_pred_dir].s_mv.i2_mvy = 0;
5400 }
5401 }
5402
5403 /********************************************************************************************/
5404 {
5405 /* declare the variables */
5406 //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
5407
5408 pi4_valid_part_ids = ai4_valid_part_ids;
5409 i4_ref_stride = ps_curr_layer->i4_rec_stride;
5410 s_err_prms.i4_inp_stride = i4_inp_stride;
5411 s_err_prms.i4_ref_stride = i4_ref_stride;
5412 s_err_prms.i4_part_mask = i4_part_mask;
5413 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
5414 s_err_prms.i4_blk_wd = i4_blk_wd;
5415 s_err_prms.i4_blk_ht = i4_blk_ht;
5416 s_err_prms.i4_step = 1;
5417 s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
5418 //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
5419
5420 /*************************************************************************/
5421 /* Depending on flag i4_use_rec, we use either input of previously */
5422 /* encoded pictures or we use recon of previously encoded pictures. */
5423 i4_ref_stride = ps_curr_layer->i4_rec_stride;
5424 ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy; // pointer to the pred
5425
5426 i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off; //i4_x_off;
5427
5428 s_err_prms.pu1_ref =
5429 ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset;
5430 s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx;
5431 s_err_prms.pu1_ref +=
5432 as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride;
5433
5434 /*get the source */
5435 s_err_prms.pu1_inp =
5436 ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] +
5437 i4_inp_off; //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off;
5438
5439 /* send the pred - source to noise detect */
5440 // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref);
5441 }
5442 /* change the l0/l1 blcok pointer names accrodingle */
5443
5444 /* get memory pointers the input and the reference */
5445 pu1_l0_block = s_err_prms.pu1_inp;
5446 pu1_l1_block = s_err_prms.pu1_ref;
5447
5448 {
5449 WORD32 i2, j2;
5450 WORD32 dim = 16;
5451 UWORD8 *buf1;
5452 UWORD8 *buf2;
5453 for(i2 = 0; i2 < dim; i2++)
5454 {
5455 buf1 = pu1_l0_block + i2 * i4_inp_stride;
5456 buf2 = pu1_l1_block + i2 * i4_ref_stride;
5457
5458 for(j2 = 0; j2 < dim; j2++)
5459 {
5460 pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]);
5461 }
5462 }
5463
5464 ihevce_calc_variance_signed(
5465 pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16);
5466
5467 /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */
5468 if(variance_16x16[u1_pred_dir] >
5469 ((TEMPORAL_VARIANCE_FACTOR *
5470 ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >>
5471 Q_TEMPORAL_VARIANCE_FACTOR))
5472 {
5473 /* update noisy block count only if all best MV in diff directions indicates noise */
5474 if(u1_pred_dir == num_pred_dir - 1)
5475 {
5476 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1;
5477 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1;
5478 ps_ctb_noise_params
5479 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1;
5480 ps_ctb_noise_params
5481 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1;
5482 noise_detected = 1;
5483 }
5484 }
5485 else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/
5486 {
5487 noise_detected = 0;
5488 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0;
5489 ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0;
5490 ps_ctb_noise_params
5491 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0;
5492 ps_ctb_noise_params
5493 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0;
5494 break;
5495 }
5496 } // variance analysis and calculation
5497 } // for each direction
5498 } // HME code
5499
5500 } // for each 16x16 block
5501
5502 return (noise_detected);
5503 }
5504 #endif
5505
hme_qpel_interp_avg_1pt(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,S32 i4_buf_id,U08 ** ppu1_final,S32 * pi4_final_stride)5506 void hme_qpel_interp_avg_1pt(
5507 interp_prms_t *ps_prms,
5508 S32 i4_mv_x,
5509 S32 i4_mv_y,
5510 S32 i4_buf_id,
5511 U08 **ppu1_final,
5512 S32 *pi4_final_stride)
5513 {
5514 U08 *pu1_src1, *pu1_src2, *pu1_dst;
5515 qpel_input_buf_cfg_t *ps_inp_cfg;
5516 S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
5517
5518 /*************************************************************************/
5519 /* For a given QPEL pt, we need to determine the 2 source pts that are */
5520 /* needed to do the QPEL averaging. The logic to do this is as follows */
5521 /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are */
5522 /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0 */
5523 /* pt of th reference blk that is colocated to the inp blk. */
5524 /* A j E k B */
5525 /* l m n o p */
5526 /* F q G r H */
5527 /* s t u v w */
5528 /* C x I y D */
5529 /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
5530 /* and (1,1) respectively in the fpel buffer (id = 0) */
5531 /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf */
5532 /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf */
5533 /* G is hxhy pt in offset 0,0 in hxhy buf */
5534 /* All above offsets are computed w.r.t. motion displaced pt in */
5535 /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and */
5536 /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G */
5537 /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3 */
5538 /* If we consider pt v to be derived. v has a fractional comp of 3, 3 */
5539 /* v is avg of H and I. So the table look up of v should give following */
5540 /* buf 1 (H) : offset = (1, 0) buf id = 2. */
5541 /* buf 2 (I) : offset = 0 , 1) buf id = 1. */
5542 /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1. */
5543 /*************************************************************************/
5544 i4_mv_x_frac = i4_mv_x & 3;
5545 i4_mv_y_frac = i4_mv_y & 3;
5546
5547 i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
5548
5549 /* Derive the descriptor that has all offset and size info */
5550 ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
5551
5552 pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
5553 pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
5554 pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
5555
5556 pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
5557 pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
5558 pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
5559
5560 pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
5561 hevc_avg_2d(
5562 pu1_src1,
5563 pu1_src2,
5564 ps_prms->i4_ref_stride,
5565 ps_prms->i4_ref_stride,
5566 ps_prms->i4_blk_wd,
5567 ps_prms->i4_blk_ht,
5568 pu1_dst,
5569 ps_prms->i4_out_stride);
5570 ppu1_final[i4_buf_id] = pu1_dst;
5571 pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
5572 }
5573
hme_qpel_interp_avg_2pt_vert_with_reuse(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,U08 ** ppu1_final,S32 * pi4_final_stride)5574 void hme_qpel_interp_avg_2pt_vert_with_reuse(
5575 interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5576 {
5577 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
5578
5579 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
5580 }
5581
hme_qpel_interp_avg_2pt_horz_with_reuse(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,U08 ** ppu1_final,S32 * pi4_final_stride)5582 void hme_qpel_interp_avg_2pt_horz_with_reuse(
5583 interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5584 {
5585 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
5586
5587 hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
5588 }
5589
hme_set_mv_limit_using_dvsr_data(me_frm_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,range_prms_t * ps_mv_limit,S16 * pi2_prev_enc_frm_max_mv_y,U08 u1_num_act_ref_pics)5590 void hme_set_mv_limit_using_dvsr_data(
5591 me_frm_ctxt_t *ps_ctxt,
5592 layer_ctxt_t *ps_curr_layer,
5593 range_prms_t *ps_mv_limit,
5594 S16 *pi2_prev_enc_frm_max_mv_y,
5595 U08 u1_num_act_ref_pics)
5596 {
5597 WORD32 ref_ctr;
5598
5599 /* Only for B/b pic. */
5600 if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
5601 {
5602 WORD16 i2_mv_y_per_poc, i2_max_mv_y;
5603 WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff;
5604 WORD32 prev_poc_count = 0;
5605 WORD32 i4_p_idx;
5606
5607 pi2_prev_enc_frm_max_mv_y[0] = 0;
5608
5609 cur_poc = ps_ctxt->i4_curr_poc;
5610
5611 i4_p_idx = 0;
5612
5613 /* Get abs MAX for symmetric search */
5614 i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y;
5615 /* Assuming P to P distance as 4 */
5616 i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2;
5617
5618 for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++)
5619 {
5620 /* Get the prev. encoded frame POC */
5621 prev_poc = ps_ctxt->i4_prev_poc;
5622
5623 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
5624 abs_poc_diff = ABS((cur_poc - ref_poc));
5625 /* Get the cur. max MV based on POC distance */
5626 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
5627 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
5628
5629 ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5630 ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
5631 ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5632 ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
5633
5634 /* Find the MAX MV for the prev. encoded frame to optimize */
5635 /* the reverse dependency of ME on Enc.Loop */
5636 if(ref_poc == prev_poc)
5637 {
5638 /* TO DO : Same thing for horz. search also */
5639 pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y;
5640 prev_poc_count++;
5641 }
5642 }
5643 }
5644 else
5645 {
5646 ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
5647
5648 /* Set the Config. File Params for P pic. */
5649 for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
5650 {
5651 ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5652 ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
5653 ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5654 ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
5655 }
5656
5657 /* For P PIC., go with Config. File Params */
5658 pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y;
5659 }
5660 }
5661
hme_part_mask_populator(U08 * pu1_inp,S32 i4_inp_stride,U08 u1_limit_active_partitions,U08 u1_is_bPic,U08 u1_is_refPic,U08 u1_blk_8x8_mask,ME_QUALITY_PRESETS_T e_me_quality_preset)5662 S32 hme_part_mask_populator(
5663 U08 *pu1_inp,
5664 S32 i4_inp_stride,
5665 U08 u1_limit_active_partitions,
5666 U08 u1_is_bPic,
5667 U08 u1_is_refPic,
5668 U08 u1_blk_8x8_mask,
5669 ME_QUALITY_PRESETS_T e_me_quality_preset)
5670 {
5671 if(15 != u1_blk_8x8_mask)
5672 {
5673 return ENABLE_NxN;
5674 }
5675 else
5676 {
5677 U08 u1_call_inp_segmentation_based_part_mask_populator =
5678 (ME_XTREME_SPEED_25 != e_me_quality_preset) ||
5679 (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) ||
5680 (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) ||
5681 (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6);
5682
5683 if(u1_call_inp_segmentation_based_part_mask_populator)
5684 {
5685 S32 i4_part_mask =
5686 hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions);
5687
5688 if(e_me_quality_preset == ME_XTREME_SPEED)
5689 {
5690 i4_part_mask &= ~ENABLE_AMP;
5691 }
5692
5693 if(e_me_quality_preset == ME_XTREME_SPEED_25)
5694 {
5695 i4_part_mask &= ~ENABLE_AMP;
5696
5697 i4_part_mask &= ~ENABLE_SMP;
5698 }
5699
5700 return i4_part_mask;
5701 }
5702 else
5703 {
5704 return ENABLE_2Nx2N;
5705 }
5706 }
5707 }
5708