1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 ******************************************************************************
23 * @file hme_coarse.c
24 *
25 * @brief
26 * Contains ME algorithm for the coarse layer.
27 *
28 * @author
29 * Ittiam
30 *
31 *
32 * List of Functions
33 * hme_update_mv_bank_coarse()
34 * hme_coarse()
35 ******************************************************************************
36 */
37
38 /*****************************************************************************/
39 /* File Includes */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_refine.h"
103 #include "hme_err_compute.h"
104 #include "hme_common_utils.h"
105 #include "hme_search_algo.h"
106
107 /*******************************************************************************
108 * MACROS
109 *******************************************************************************/
110 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
111 { \
112 ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift); \
113 ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift); \
114 *pi1_ref_idx = ps_search_node->i1_ref_idx; \
115 }
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
120
121 /**
122 ********************************************************************************
123 * @fn void hme_update_mv_bank_coarse(search_results_t *ps_search_results,
124 * layer_mv_t *ps_layer_mv,
125 * S32 i4_blk_x,
126 * S32 i4_blk_y,
127 * search_node_t *ps_search_node_4x8_l,
128 * search_node_t *ps_search_node_8x4_t,
129 * S08 i1_ref_idx,
130 * mvbank_update_prms_t *ps_prms
131 *
132 * @brief Updates the coarse layer MV Bank for a given ref id and blk pos
133 *
134 * @param[in] ps_search_results: Search results data structure
135 *
136 * @param[in, out] ps_layer_mv : MV Bank for this layer
137 *
138 * @param[in] i4_search_blk_x: column number of the 4x4 blk searched
139 *
140 * @param[in] i4_search_blk_y: row number of the 4x4 blk searched
141 *
142 * @param[in] ps_search_node_4x8_t: Best MV of the 4x8T blk
143 *
144 * @param[in] ps_search_node_8x4_l: Best MV of the 8x4L blk
145 *
146 * @param[in] i1_ref_idx : Reference ID that has been searched
147 *
148 * @param[in] ps_prms : Parameters pertaining to the MV Bank update
149 *
150 * @return None
151 ********************************************************************************
152 */
hme_update_mv_bank_coarse(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,search_node_t * ps_search_node_4x8_t,search_node_t * ps_search_node_8x4_l,S08 i1_ref_idx,mvbank_update_prms_t * ps_prms)153 void hme_update_mv_bank_coarse(
154 search_results_t *ps_search_results,
155 layer_mv_t *ps_layer_mv,
156 S32 i4_search_blk_x,
157 S32 i4_search_blk_y,
158 search_node_t *ps_search_node_4x8_t,
159 search_node_t *ps_search_node_8x4_l,
160 S08 i1_ref_idx,
161 mvbank_update_prms_t *ps_prms)
162 {
163 /* These point to the MV and ref idx posn to be udpated */
164 hme_mv_t *ps_mv;
165 S08 *pi1_ref_idx;
166
167 /* Offset within the bank */
168 S32 i4_offset;
169
170 S32 i, j, i4_blk_x, i4_blk_y;
171
172 /* Best results for 8x4R and 4x8B blocks */
173 search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
174
175 /* Number of MVs in a block */
176 S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref;
177
178 search_node_t *aps_search_nodes[4];
179
180 /* The search blk may be different in size from the blk used to hold MV */
181 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
182 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
183
184 /* Compute the offset in the MV bank */
185 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
186 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
187
188 /* Identify the correct offset in the mvbank and the reference id buf */
189 ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx));
190 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx));
191
192 /*************************************************************************/
193 /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
194 /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */
195 /* If number of results to be stored is 4, then we store all these 4 */
196 /* results, else we pick best ones */
197 /*************************************************************************/
198 ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
199 ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
200
201 ASSERT(num_mvs <= 4);
202
203 /* Doing this to sort best results */
204 aps_search_nodes[0] = ps_search_node_8x4_r;
205 aps_search_nodes[1] = ps_search_node_4x8_b;
206 aps_search_nodes[2] = ps_search_node_8x4_l;
207 aps_search_nodes[3] = ps_search_node_4x8_t;
208 if(num_mvs == 4)
209 {
210 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0);
211 ps_mv++;
212 pi1_ref_idx++;
213 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0);
214 ps_mv++;
215 pi1_ref_idx++;
216 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0);
217 ps_mv++;
218 pi1_ref_idx++;
219 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0);
220 ps_mv++;
221 pi1_ref_idx++;
222 return;
223 }
224
225 /* Run through the results, store them in best to worst order */
226 for(i = 0; i < num_mvs; i++)
227 {
228 for(j = i + 1; j < 4; j++)
229 {
230 if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost)
231 {
232 SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *);
233 }
234 }
235 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0);
236 ps_mv++;
237 pi1_ref_idx++;
238 }
239 }
240
241 /**
242 ********************************************************************************
243 * @fn void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
244 *
245 * @brief Frame init entry point Coarse ME.
246 *
247 * @param[in,out] ps_ctxt: ME Handle
248 *
249 * @param[in] ps_coarse_prms : Coarse layer config params
250 *
251 * @return None
252 ********************************************************************************
253 */
hme_coarse_frm_init(coarse_me_ctxt_t * ps_ctxt,coarse_prms_t * ps_coarse_prms)254 void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
255 {
256 layer_ctxt_t *ps_curr_layer;
257
258 S32 i4_pic_wd, i4_pic_ht;
259
260 S32 num_blks_in_pic, num_blks_in_row;
261
262 BLK_SIZE_T e_search_blk_size = BLK_4x4;
263
264 S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
265
266 /* Number of references to search */
267 S32 i4_num_ref;
268
269 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
270 i4_num_ref = ps_coarse_prms->i4_num_ref;
271
272 i4_pic_wd = ps_curr_layer->i4_wd;
273 i4_pic_ht = ps_curr_layer->i4_ht;
274 /* Macro updates num_blks_in_pic and num_blks_in_row*/
275 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
276
277 /************************************************************************/
278 /* Initialize the mv bank that holds results of this layer. */
279 /************************************************************************/
280 hme_init_mv_bank(
281 ps_curr_layer,
282 BLK_4x4,
283 i4_num_ref,
284 ps_coarse_prms->num_results,
285 ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]);
286
287 return;
288 }
289
290 /**
291 ********************************************************************************
292 * @fn void hme_derive_worst_case_search_range(range_prms_t *ps_range,
293 * range_prms_t *ps_pic_limit,
294 * range_prms_t *ps_mv_limit,
295 * S32 i4_x,
296 * S32 i4_y,
297 * S32 blk_wd,
298 * S32 blk_ht)
299 *
300 * @brief given picture limits and blk dimensions and mv search limits, obtains
301 * teh valid search range such that the blk stays within pic boundaries,
302 * where picture boundaries include padded portions of picture
303 *
304 * @param[out] ps_range: updated with actual search range
305 *
306 * @param[in] ps_pic_limit : picture boundaries
307 *
308 * @param[in] ps_mv_limit: Search range limits for the mvs
309 *
310 * @param[in] i4_x : x coordinate of the blk
311 *
312 * @param[in] i4_y : y coordinate of the blk
313 *
314 * @param[in] blk_wd : blk width
315 *
316 * @param[in] blk_ht : blk height
317 *
318 * @return void
319 ********************************************************************************
320 */
hme_derive_worst_case_search_range(range_prms_t * ps_range,range_prms_t * ps_pic_limit,range_prms_t * ps_mv_limit,S32 i4_x,S32 i4_y,S32 blk_wd,S32 blk_ht)321 void hme_derive_worst_case_search_range(
322 range_prms_t *ps_range,
323 range_prms_t *ps_pic_limit,
324 range_prms_t *ps_mv_limit,
325 S32 i4_x,
326 S32 i4_y,
327 S32 blk_wd,
328 S32 blk_ht)
329 {
330 /* Taking max x of left block, min x of current block */
331 ps_range->i2_max_x =
332 MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x);
333 ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
334 /* Taking max y of top block, min y of current block */
335 ps_range->i2_max_y =
336 MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y);
337 ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
338 }
339
340 /**
341 ********************************************************************************
342 * @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx,
343 * range_prms_t *ps_mv_range,
344 * range_prms_t *ps_mv_limit,
345 * hme_mv_t *ps_best_mv_4x8,
346 * hme_mv_t *ps_best_mv_8x4,
347 * pred_ctxt_t *ps_pred_ctxt,
348 * PF_MV_COST_FXN pf_mv_cost_compute,
349 * ME_QUALITY_PRESETS_T e_me_quality_preset,
350 * S16 *pi2_sads_4x4_current,
351 * S16 *pi2_sads_4x4_east,
352 * S16 *pi2_sads_4x4_south,
353 * FILE *fp_dump_sad)
354 *
355 * @brief Does a full search on entire srch window with a given step size in coarse layer
356 *
357 * @param[in] i1_ref_idx : Cur ref idx
358 *
359 * @param[in] ps_layer_ctxt: All info about this layer
360 *
361 * @param[out] ps_best_mv : type hme_mv_t contains best mv x and y
362 *
363 * @param[in] ps_pred_ctxt : Prediction ctxt for cost computation
364 *
365 * @param[in] pf_mv_cost_compute : mv cost computation function
366 *
367 * @return void
368 ********************************************************************************
369 */
hme_combine_4x4_sads_and_compute_cost_high_quality(S08 i1_ref_idx,range_prms_t * ps_mv_range,range_prms_t * ps_mv_limit,hme_mv_t * ps_best_mv_4x8,hme_mv_t * ps_best_mv_8x4,pred_ctxt_t * ps_pred_ctxt,PF_MV_COST_FXN pf_mv_cost_compute,S16 * pi2_sads_4x4_current,S16 * pi2_sads_4x4_east,S16 * pi2_sads_4x4_south)370 void hme_combine_4x4_sads_and_compute_cost_high_quality(
371 S08 i1_ref_idx,
372 range_prms_t *ps_mv_range,
373 range_prms_t *ps_mv_limit,
374 hme_mv_t *ps_best_mv_4x8,
375 hme_mv_t *ps_best_mv_8x4,
376 pred_ctxt_t *ps_pred_ctxt,
377 PF_MV_COST_FXN pf_mv_cost_compute,
378 S16 *pi2_sads_4x4_current,
379 S16 *pi2_sads_4x4_east,
380 S16 *pi2_sads_4x4_south)
381 {
382 /* These control number of parts and number of pts in grid to search */
383 S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
384 S32 step_shift_x, step_shift_y;
385 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
386
387 S32 min_cost_4x8 = MAX_32BIT_VAL;
388 S32 min_cost_8x4 = MAX_32BIT_VAL;
389
390 search_node_t s_search_node;
391 s_search_node.i1_ref_idx = i1_ref_idx;
392
393 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
394 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
395 step_shift_x = step_shift_y = 1;
396
397 mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
398 mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
399 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
400 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
401
402 /* Run 2loops to sweep over the reference area */
403 for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
404 {
405 for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
406 {
407 S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
408 S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
409 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
410
411 /* Get SAD by adding SAD for current and neighbour S */
412 sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
413 sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
414
415 // fprintf(fp_dump_sad,"%d\t",sad);
416 s_search_node.s_mv.i2_mvx = mvx;
417 s_search_node.s_mv.i2_mvy = mvy;
418
419 cost_4x8 = cost_8x4 =
420 pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL);
421
422 cost_4x8 += sad_4x8;
423 cost_8x4 += sad_8x4;
424
425 if(cost_4x8 < min_cost_4x8)
426 {
427 best_mv_x_4x8 = mvx;
428 best_mv_y_4x8 = mvy;
429 min_cost_4x8 = cost_4x8;
430 }
431 if(cost_8x4 < min_cost_8x4)
432 {
433 best_mv_x_8x4 = mvx;
434 best_mv_y_8x4 = mvy;
435 min_cost_8x4 = cost_8x4;
436 }
437 }
438 }
439
440 ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
441 ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
442
443 ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
444 ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
445 }
446
hme_combine_4x4_sads_and_compute_cost_high_speed(S08 i1_ref_idx,range_prms_t * ps_mv_range,range_prms_t * ps_mv_limit,hme_mv_t * ps_best_mv_4x8,hme_mv_t * ps_best_mv_8x4,pred_ctxt_t * ps_pred_ctxt,PF_MV_COST_FXN pf_mv_cost_compute,S16 * pi2_sads_4x4_current,S16 * pi2_sads_4x4_east,S16 * pi2_sads_4x4_south)447 void hme_combine_4x4_sads_and_compute_cost_high_speed(
448 S08 i1_ref_idx,
449 range_prms_t *ps_mv_range,
450 range_prms_t *ps_mv_limit,
451 hme_mv_t *ps_best_mv_4x8,
452 hme_mv_t *ps_best_mv_8x4,
453 pred_ctxt_t *ps_pred_ctxt,
454 PF_MV_COST_FXN pf_mv_cost_compute,
455 S16 *pi2_sads_4x4_current,
456 S16 *pi2_sads_4x4_east,
457 S16 *pi2_sads_4x4_south)
458 {
459 /* These control number of parts and number of pts in grid to search */
460 S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
461 S32 step_shift_x, step_shift_y;
462 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
463
464 S32 rnd, lambda, lambda_q_shift;
465
466 S32 min_cost_4x8 = MAX_32BIT_VAL;
467 S32 min_cost_8x4 = MAX_32BIT_VAL;
468
469 (void)pf_mv_cost_compute;
470 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
471 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
472 step_shift_x = step_shift_y = 2;
473
474 mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
475 mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
476 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
477 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
478
479 lambda = ps_pred_ctxt->lambda;
480 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
481 rnd = 1 << (lambda_q_shift - 1);
482
483 ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
484 ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
485
486 /* Run 2loops to sweep over the reference area */
487 for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
488 {
489 for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
490 {
491 S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
492
493 S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
494 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
495
496 /* Get SAD by adding SAD for current and neighbour S */
497 sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
498 sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
499
500 // fprintf(fp_dump_sad,"%d\t",sad);
501
502 cost_4x8 = cost_8x4 =
503 (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx;
504
505 cost_4x8 += (mvx != 0) ? 1 : 0;
506 cost_4x8 += (mvy != 0) ? 1 : 0;
507 cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift;
508
509 cost_8x4 += (mvx != 0) ? 1 : 0;
510 cost_8x4 += (mvy != 0) ? 1 : 0;
511 cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift;
512
513 cost_4x8 += sad_4x8;
514 cost_8x4 += sad_8x4;
515
516 if(cost_4x8 < min_cost_4x8)
517 {
518 best_mv_x_4x8 = mvx;
519 best_mv_y_4x8 = mvy;
520 min_cost_4x8 = cost_4x8;
521 }
522 if(cost_8x4 < min_cost_8x4)
523 {
524 best_mv_x_8x4 = mvx;
525 best_mv_y_8x4 = mvy;
526 min_cost_8x4 = cost_8x4;
527 }
528 }
529 }
530
531 ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
532 ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
533
534 ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
535 ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
536 }
537
538 /**
539 ********************************************************************************
540 * @fn hme_store_4x4_sads(hme_search_prms_t *ps_search_prms,
541 * layer_ctxt_t *ps_layer_ctxt)
542 *
543 * @brief Does a 4x4 sad computation on a given range and stores it in memory
544 *
545 * @param[in] ps_search_prms : Search prms structure containing info like
546 * blk dimensions, search range etc
547 *
548 * @param[in] ps_layer_ctxt: All info about this layer
549 *
550 * @param[in] ps_wt_inp_prms: All info about weighted input
551 *
552 * @param[in] e_me_quality_preset: motion estimation quality preset
553 *
554 * @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range
555 *
556 * @return void
557 ********************************************************************************
558 */
559
hme_store_4x4_sads_high_quality(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,range_prms_t * ps_mv_limit,wgt_pred_ctxt_t * ps_wt_inp_prms,S16 * pi2_sads_4x4)560 void hme_store_4x4_sads_high_quality(
561 hme_search_prms_t *ps_search_prms,
562 layer_ctxt_t *ps_layer_ctxt,
563 range_prms_t *ps_mv_limit,
564 wgt_pred_ctxt_t *ps_wt_inp_prms,
565 S16 *pi2_sads_4x4)
566 {
567 S32 sad, i, j;
568
569 /* Input and reference attributes */
570 U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
571 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
572
573 /* The reference is actually an array of ptrs since there are several */
574 /* reference id. So an array gets passed form calling function */
575 U08 **ppu1_ref, *pu1_ref_coloc;
576
577 S32 stepy, stepx, step_shift_x, step_shift_y;
578 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
579
580 /* Points to the range limits for mv */
581 range_prms_t *ps_range_prms;
582
583 /* Reference index to be searched */
584 S32 i4_search_idx = ps_search_prms->i1_ref_idx;
585 /* Using the member 0 to store for all ref. idx. */
586 ps_range_prms = ps_search_prms->aps_mv_range[0];
587 pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
588 i4_inp_stride = ps_search_prms->i4_inp_stride;
589
590 /* Move to the location of the search blk in inp buffer */
591 pu1_inp_orig += ps_search_prms->i4_cu_x_off;
592 pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
593
594 /*************************************************************************/
595 /* we use either input of previously encoded pictures as reference */
596 /* in coarse layer */
597 /*************************************************************************/
598 i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
599 ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
600
601 /* colocated position in reference picture */
602 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
603 pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
604
605 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
606 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
607 step_shift_x = step_shift_y = 1;
608
609 mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
610 mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
611 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
612 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
613
614 /* Run 2loops to sweep over the reference area */
615 for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
616 {
617 for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
618 {
619 /* Set up the reference and inp ptr */
620 pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
621 pu1_inp = pu1_inp_orig;
622 /* SAD computation */
623 {
624 sad = 0;
625 for(i = 0; i < 4; i++)
626 {
627 for(j = 0; j < 4; j++)
628 {
629 sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
630 }
631 pu1_inp += i4_inp_stride;
632 pu1_ref += i4_ref_stride;
633 }
634 }
635
636 pi2_sads_4x4
637 [((mvx >> step_shift_x) + mv_x_offset) +
638 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
639 }
640 }
641 }
642
hme_store_4x4_sads_high_speed(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,range_prms_t * ps_mv_limit,wgt_pred_ctxt_t * ps_wt_inp_prms,S16 * pi2_sads_4x4)643 void hme_store_4x4_sads_high_speed(
644 hme_search_prms_t *ps_search_prms,
645 layer_ctxt_t *ps_layer_ctxt,
646 range_prms_t *ps_mv_limit,
647 wgt_pred_ctxt_t *ps_wt_inp_prms,
648 S16 *pi2_sads_4x4)
649 {
650 S32 sad, i, j;
651
652 /* Input and reference attributes */
653 U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
654 S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
655
656 /* The reference is actually an array of ptrs since there are several */
657 /* reference id. So an array gets passed form calling function */
658 U08 **ppu1_ref, *pu1_ref_coloc;
659
660 S32 stepy, stepx, step_shift_x, step_shift_y;
661 S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
662
663 /* Points to the range limits for mv */
664 range_prms_t *ps_range_prms;
665
666 /* Reference index to be searched */
667 S32 i4_search_idx = ps_search_prms->i1_ref_idx;
668
669 /* Using the member 0 for all ref. idx */
670 ps_range_prms = ps_search_prms->aps_mv_range[0];
671 pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
672 i4_inp_stride = ps_search_prms->i4_inp_stride;
673
674 /* Move to the location of the search blk in inp buffer */
675 pu1_inp_orig += ps_search_prms->i4_cu_x_off;
676 pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
677
678 /*************************************************************************/
679 /* we use either input of previously encoded pictures as reference */
680 /* in coarse layer */
681 /*************************************************************************/
682 i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
683 ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
684
685 /* colocated position in reference picture */
686 i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
687 pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
688
689 stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
690 /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
691 step_shift_x = step_shift_y = 2;
692
693 mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
694 mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
695 mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
696 mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
697
698 /* Run 2loops to sweep over the reference area */
699 for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
700 {
701 for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
702 {
703 /* Set up the reference and inp ptr */
704 pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
705 pu1_inp = pu1_inp_orig;
706 /* SAD computation */
707 {
708 sad = 0;
709 for(i = 0; i < 4; i++)
710 {
711 for(j = 0; j < 4; j++)
712 {
713 sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
714 }
715 pu1_inp += i4_inp_stride;
716 pu1_ref += i4_ref_stride;
717 }
718 }
719
720 pi2_sads_4x4
721 [((mvx >> step_shift_x) + mv_x_offset) +
722 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
723 }
724 }
725 }
726 /**
727 ********************************************************************************
728 * @fn void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
729 *
730 * @brief Top level entry point for Coarse ME. Runs across blks and searches
731 * at a 4x4 blk granularity by using 4x8 and 8x4 patterns.
732 *
733 * @param[in,out] ps_ctxt: ME Handle
734 *
735 * @param[in] ps_coarse_prms : Coarse layer config params
736 *
737 * @param[in] ps_multi_thrd_ctxt : Multi thread context
738 *
739 * @return None
740 ********************************************************************************
741 */
hme_coarsest(coarse_me_ctxt_t * ps_ctxt,coarse_prms_t * ps_coarse_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)742 void hme_coarsest(
743 coarse_me_ctxt_t *ps_ctxt,
744 coarse_prms_t *ps_coarse_prms,
745 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
746 WORD32 i4_ping_pong,
747 void **ppv_dep_mngr_hme_sync)
748 {
749 S16 *pi2_cur_ref_sads_4x4;
750 S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF];
751 S32 num_rows_coarse;
752 S32 sad_top_offset, sad_current_offset;
753 S32 search_node_top_offset, search_node_left_offset;
754
755 ME_QUALITY_PRESETS_T e_me_quality_preset =
756 ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
757
758 search_results_t *ps_search_results;
759 mvbank_update_prms_t s_mv_update_prms;
760 BLK_SIZE_T e_search_blk_size = BLK_4x4;
761 hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4;
762
763 S32 global_id_8x4, global_id_4x8;
764
765 /*************************************************************************/
766 /* These directly point to the best search result nodes that will be */
767 /* updated by the search algorithm, rather than have to go through an */
768 /* elaborate structure */
769 /*************************************************************************/
770 search_node_t *aps_best_search_node_8x4[MAX_NUM_REF];
771 search_node_t *aps_best_search_node_4x8[MAX_NUM_REF];
772
773 /* These point to various spatial candts */
774 search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl;
775 search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl;
776 search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8;
777 search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8;
778 search_node_t as_top_neighbours[4], as_left_neighbours[3];
779
780 /* Holds the global mv for a given ref index */
781 search_node_t s_candt_global[MAX_NUM_REF];
782
783 /* All the search candidates */
784 search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS];
785 search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS];
786 search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8;
787
788 /* Actual range per blk and the pic level boundaries */
789 range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF];
790
791 /* Current and prev pic layer ctxt at the coarsest layer */
792 layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
793
794 /* best mv of full search */
795 hme_mv_t best_mv_4x8, best_mv_8x4;
796
797 /* Book keeping at blk level */
798 S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row;
799
800 S32 blk_y;
801
802 /* Block dimensions */
803 S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
804
805 S32 lambda = ps_coarse_prms->lambda;
806
807 /* Number of references to search */
808 S32 i4_num_ref;
809
810 S32 i4_i, id, i;
811 S08 i1_ref_idx;
812
813 S32 i4_pic_wd, i4_pic_ht;
814 S32 i4_layer_id;
815
816 S32 end_of_frame;
817
818 pf_get_wt_inp fp_get_wt_inp;
819
820 /* Maximum search iterations around any candidate */
821 S32 i4_max_iters = ps_coarse_prms->i4_max_iters;
822
823 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
824 ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id);
825
826 /* We need only one instance of search results structure */
827 ps_search_results = &ps_ctxt->s_search_results_8x8;
828
829 ps_search_candts_8x4 = &as_search_candts_8x4[0];
830 ps_search_candts_4x8 = &as_search_candts_4x8[0];
831
832 end_of_frame = 0;
833
834 i4_pic_wd = ps_curr_layer->i4_wd;
835 i4_pic_ht = ps_curr_layer->i4_ht;
836
837 fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
838 ->pf_get_wt_inp_8x8;
839
840 num_rows_coarse = ps_ctxt->i4_num_row_bufs;
841
842 /*************************************************************************/
843 /* Coarse Layer always does explicit search. Number of reference frames */
844 /* to search is a configurable parameter supplied by the application */
845 /*************************************************************************/
846 i4_num_ref = ps_coarse_prms->i4_num_ref;
847 i4_layer_id = ps_coarse_prms->i4_layer_id;
848
849 /*************************************************************************/
850 /* The search algorithm goes as follows: */
851 /* */
852 /* ___ */
853 /* | e | */
854 /* ___|___|___ */
855 /* | c | a | b | */
856 /* |___|___|___| */
857 /* | d | */
858 /* |___| */
859 /* */
860 /* For the target block a, we collect best results from 2 8x4 blks */
861 /* These are c-a and a-b. The 4x8 blks are e-a and a-d */
862 /* c-a result is already available from results of blk c. a-b is */
863 /* evaluated in this blk. Likewise e-a result is stored in a row buffer */
864 /* a-d is evaluated this blk */
865 /* So we store a row buffer which stores best 4x8 results of all top blk */
866 /*************************************************************************/
867
868 /************************************************************************/
869 /* Initialize the pointers to the best node. */
870 /************************************************************************/
871 for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
872 {
873 aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B];
874 aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R];
875 }
876
877 /************************************************************************/
878 /* Initialize the "searchresults" structure. This will set up the number*/
879 /* of search types, result updates etc */
880 /************************************************************************/
881 {
882 S32 num_results_per_part;
883 /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and */
884 /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to */
885 /* only evaluate 1 result per part. In the coarse layer, we are */
886 /* limited to 2 results max per part, and max of 8 results. */
887 num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2;
888 hme_init_search_results(
889 ps_search_results,
890 i4_num_ref,
891 ps_coarse_prms->num_results,
892 num_results_per_part,
893 BLK_8x8,
894 0,
895 0,
896 ps_ctxt->au1_is_past);
897 }
898
899 /* Macro updates num_blks_in_pic and num_blks_in_row*/
900 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
901
902 num_4x4_blks_in_row = num_blks_in_row + 1;
903
904 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
905 s_mv_update_prms.i4_num_ref = i4_num_ref;
906 s_mv_update_prms.i4_shift = 0;
907
908 /* For full search, support 2 or 4 step size */
909 if(ps_coarse_prms->do_full_search)
910 {
911 ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4));
912 }
913
914 for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
915 {
916 S32 blk, delta_poc;
917 S32 mv_x_clip, mv_y_clip;
918 /* Initialize only the first row */
919 for(blk = 0; blk < num_blks_in_row; blk++)
920 {
921 INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i);
922 }
923
924 delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]);
925
926 /* Setting search range for different references based on the delta poc */
927 /*************************************************************************/
928 /* set the MV limit per ref. pic. */
929 /* - P pic. : Based on the config params. */
930 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
931 /*************************************************************************/
932 {
933 /* TO DO : Remove hard coding of P-P dist. of 4 */
934 mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4;
935
936 /* Only for B/b pic. */
937 if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
938 {
939 WORD16 i2_mv_y_per_poc;
940
941 /* Get abs MAX for symmetric search */
942 i2_mv_y_per_poc =
943 MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
944 (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id])));
945
946 mv_y_clip = i2_mv_y_per_poc * delta_poc;
947 }
948 /* Set the Config. File Params for P pic. */
949 else
950 {
951 /* TO DO : Remove hard coding of P-P dist. of 4 */
952 mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4;
953 }
954
955 /* Making mv_x and mv_y range multiple of 4 */
956 mv_x_clip = (((mv_x_clip + 3) >> 2) << 2);
957 mv_y_clip = (((mv_y_clip + 3) >> 2) << 2);
958 /* Clipping the range of mv_x and mv_y */
959 mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER);
960 mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER);
961
962 as_mv_limit[i4_i].i2_min_x = -mv_x_clip;
963 as_mv_limit[i4_i].i2_min_y = -mv_y_clip;
964 as_mv_limit[i4_i].i2_max_x = mv_x_clip;
965 as_mv_limit[i4_i].i2_max_y = mv_y_clip;
966 }
967 /*Populating SAD block size based on search range */
968 ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) *
969 ((2 * mv_y_clip) / ps_coarse_prms->full_search_step);
970 ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i];
971 }
972
973 for(i = 0; i < 2 * MAX_INIT_CANDTS; i++)
974 {
975 search_node_t *ps_search_node;
976 ps_search_node = &ps_ctxt->s_init_search_node[i];
977 INIT_SEARCH_NODE(ps_search_node, 0);
978 }
979 for(i = 0; i < 3; i++)
980 {
981 search_node_t *ps_search_node;
982 ps_search_node = &as_left_neighbours[i];
983 INIT_SEARCH_NODE(ps_search_node, 0);
984 ps_search_node = &as_top_neighbours[i];
985 INIT_SEARCH_NODE(ps_search_node, 0);
986 }
987 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
988 /* Set up place holders to hold the search nodes of each initial candt */
989 for(i = 0; i < MAX_INIT_CANDTS; i++)
990 {
991 ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
992
993 ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i];
994
995 ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters;
996 ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters;
997 }
998
999 /* For Top,TopLeft and Left cand., no need for refinement */
1000 id = 0;
1001 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
1002 {
1003 /* This search candt has the full search result */
1004 ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
1005 id++;
1006 }
1007
1008 ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node;
1009 ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1010 id++;
1011 ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node;
1012 ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1013 id++;
1014 ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node;
1015 ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1016 id++;
1017 /* This search candt stores the global candt */
1018 global_id_8x4 = id;
1019 id++;
1020
1021 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
1022 {
1023 /* This search candt has the full search result */
1024 ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
1025 id++;
1026 }
1027 /* Don't increment id as (0,0) is removed from cand. list. Initializing */
1028 /* the pointer for hme_init_pred_ctxt_no_encode() */
1029 ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node;
1030
1031 /* For Top,TopLeft and Left cand., no need for refinement */
1032 id = 0;
1033 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
1034 {
1035 /* This search candt has the full search result */
1036 ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
1037 id++;
1038 }
1039
1040 ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node;
1041 ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1042 id++;
1043 ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node;
1044 ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1045 id++;
1046 ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node;
1047 ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1048 id++;
1049 /* This search candt stores the global candt */
1050 global_id_4x8 = id;
1051 id++;
1052 if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
1053 {
1054 /* This search candt has the full search result */
1055 ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
1056 id++;
1057 }
1058 /* Don't increment id4as (0,0) is removed from cand. list. Initializing */
1059 /* the pointer for hme_init_pred_ctxt_no_encode() */
1060 ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node;
1061
1062 /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */
1063 ps_candt_zeromv_8x4->s_mv.i2_mvx = 0;
1064 ps_candt_zeromv_8x4->s_mv.i2_mvy = 0;
1065 ps_candt_zeromv_4x8->s_mv.i2_mvx = 0;
1066 ps_candt_zeromv_4x8->s_mv.i2_mvy = 0;
1067
1068 /* SET UP THE PRED CTXT FOR L0 AND L1 */
1069 {
1070 S32 pred_lx;
1071
1072 /* Bottom left always not available */
1073 as_left_neighbours[2].u1_is_avail = 0;
1074
1075 for(pred_lx = 0; pred_lx < 2; pred_lx++)
1076 {
1077 pred_ctxt_t *ps_pred_ctxt;
1078
1079 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
1080 hme_init_pred_ctxt_no_encode(
1081 ps_pred_ctxt,
1082 ps_search_results,
1083 as_top_neighbours,
1084 as_left_neighbours,
1085 NULL,
1086 ps_candt_zeromv_8x4,
1087 ps_candt_zeromv_8x4,
1088 pred_lx,
1089 lambda,
1090 ps_coarse_prms->lambda_q_shift,
1091 ps_ctxt->apu1_ref_bits_tlu_lc,
1092 ps_ctxt->ai2_ref_scf);
1093 }
1094 }
1095
1096 /*************************************************************************/
1097 /* Initialize the search parameters for search algo with the following */
1098 /* parameters: No SATD, calculated number of initial candidates, */
1099 /* No post refinement, initial step size and number of iterations as */
1100 /* passed by the calling function. */
1101 /* Also, we use input for this layer search, and not recon. */
1102 /*************************************************************************/
1103 if(e_me_quality_preset == ME_XTREME_SPEED_25)
1104 s_search_prms_8x4.i4_num_init_candts = 1;
1105 else
1106 s_search_prms_8x4.i4_num_init_candts = id;
1107 s_search_prms_8x4.i4_use_satd = 0;
1108 s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step;
1109 s_search_prms_8x4.i4_num_steps_post_refine = 0;
1110 s_search_prms_8x4.i4_use_rec = 0;
1111 s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4;
1112 s_search_prms_8x4.e_blk_size = BLK_8x4;
1113 s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters;
1114 /* Coarse layer is always explicit */
1115 if(ME_MEDIUM_SPEED > e_me_quality_preset)
1116 {
1117 s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse;
1118 }
1119 else
1120 {
1121 s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
1122 }
1123
1124 s_search_prms_8x4.i4_inp_stride = 8;
1125 s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0;
1126 if(ps_coarse_prms->do_full_search)
1127 s_search_prms_8x4.i4_max_iters = 1;
1128 s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B);
1129 /* Using the member 0 to store for all ref. idx. */
1130 s_search_prms_8x4.aps_mv_range[0] = &s_range_prms;
1131 s_search_prms_8x4.ps_search_results = ps_search_results;
1132 s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step;
1133
1134 s_search_prms_4x8 = s_search_prms_8x4;
1135 s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8;
1136 s_search_prms_4x8.e_blk_size = BLK_4x8;
1137 s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R);
1138
1139 s_search_prms_4x4 = s_search_prms_8x4;
1140 /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */
1141 s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8;
1142 s_search_prms_4x4.e_blk_size = BLK_4x4;
1143 s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N);
1144 /*************************************************************************/
1145 /* Picture limit on all 4 sides. This will be used to set mv limits for */
1146 /* every block given its coordinate. */
1147 /*************************************************************************/
1148 SET_PIC_LIMIT(
1149 s_pic_limit,
1150 ps_curr_layer->i4_pad_x_inp,
1151 ps_curr_layer->i4_pad_y_inp,
1152 ps_curr_layer->i4_wd,
1153 ps_curr_layer->i4_ht,
1154 s_search_prms_4x4.i4_num_steps_post_refine);
1155
1156 /* Pick the global mv from previous reference */
1157 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1158 {
1159 if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1160 {
1161 /* Distance of current pic from reference */
1162 S32 i4_delta_poc;
1163
1164 hme_mv_t s_mv;
1165 i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx];
1166
1167 hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc);
1168
1169 s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x;
1170 s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y;
1171 s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx;
1172
1173 /*********************************************************************/
1174 /* Initialize the histogram for each reference index in current */
1175 /* layer ctxt */
1176 /*********************************************************************/
1177 hme_init_histogram(
1178 ps_ctxt->aps_mv_hist[i1_ref_idx],
1179 (S32)as_mv_limit[i1_ref_idx].i2_max_x,
1180 (S32)as_mv_limit[i1_ref_idx].i2_max_y);
1181 }
1182
1183 /*********************************************************************/
1184 /* Initialize the dyn. search range params. for each reference index */
1185 /* in current layer ctxt */
1186 /*********************************************************************/
1187 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1188 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
1189 {
1190 INIT_DYN_SEARCH_PRMS(
1191 &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx],
1192 ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]);
1193 }
1194 }
1195
1196 /*************************************************************************/
1197 /* if exhaustive algorithmm then we use only 1 candt 0, 0 */
1198 /* else we use a lot of causal and non causal candts */
1199 /* finally set number to the configured number of candts */
1200 /*************************************************************************/
1201
1202 /* Loop in raster order over each 4x4 blk in a given row till end of frame */
1203 while(0 == end_of_frame)
1204 {
1205 job_queue_t *ps_job;
1206 void *pv_hme_dep_mngr;
1207 WORD32 offset_val, check_dep_pos, set_dep_pos;
1208
1209 /* Get the current layer HME Dep Mngr */
1210 /* Note : Use layer_id - 1 in HME layers */
1211 pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1];
1212
1213 /* Get the current row from the job queue */
1214 ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
1215 ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong);
1216
1217 /* If all rows are done, set the end of process flag to 1, */
1218 /* and the current row to -1 */
1219 if(NULL == ps_job)
1220 {
1221 blk_y = -1;
1222 end_of_frame = 1;
1223 }
1224 else
1225 {
1226 ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type);
1227
1228 /* Obtain the current row's details from the job */
1229 blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
1230
1231 if(1 == ps_ctxt->s_frm_prms.is_i_pic)
1232 {
1233 /* set the output dependency of current row */
1234 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
1235 continue;
1236 }
1237
1238 /* Set Variables for Dep. Checking and Setting */
1239 set_dep_pos = blk_y + 1;
1240 if(blk_y > 0)
1241 {
1242 offset_val = 2;
1243 check_dep_pos = blk_y - 1;
1244 }
1245 else
1246 {
1247 /* First row should run without waiting */
1248 offset_val = -1;
1249 check_dep_pos = 0;
1250 }
1251
1252 /* Loop over all the blocks in current row */
1253 /* One block extra, since the last block in a row needs East block */
1254 for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++)
1255 {
1256 /* Wait till top row block is processed */
1257 /* Currently checking till top right block*/
1258 if(blk_x < (num_blks_in_row))
1259 {
1260 ihevce_dmgr_chk_row_row_sync(
1261 pv_hme_dep_mngr,
1262 blk_x,
1263 offset_val,
1264 check_dep_pos,
1265 0, /* Col Tile No. : Not supported in PreEnc*/
1266 ps_ctxt->thrd_id);
1267 }
1268
1269 /***************************************************************/
1270 /* Get Weighted input for all references */
1271 /***************************************************************/
1272 fp_get_wt_inp(
1273 ps_curr_layer,
1274 &ps_ctxt->s_wt_pred,
1275 1 << (blk_size_shift + 1),
1276 blk_x << blk_size_shift,
1277 (blk_y - 1) << blk_size_shift,
1278 1 << (blk_size_shift + 1),
1279 i4_num_ref,
1280 ps_ctxt->i4_wt_pred_enable_flag);
1281
1282 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
1283 hme_reset_search_results(
1284 ps_search_results,
1285 s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask,
1286 MV_RES_FPEL);
1287
1288 /* Compute the search node offsets */
1289 /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries */
1290 search_node_top_offset =
1291 blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row;
1292 search_node_left_offset =
1293 MAX((blk_x - 1), 0) +
1294 ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row;
1295
1296 /* Input offset: wrt CU start. Offset for South block */
1297 s_search_prms_4x4.i4_cu_x_off = 0;
1298 s_search_prms_4x4.i4_cu_y_off = 4;
1299 s_search_prms_4x4.i4_inp_stride = 8;
1300 s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift;
1301 s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift;
1302
1303 s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift;
1304 s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1)
1305 << blk_size_shift;
1306
1307 /* This layer will always use explicit ME */
1308 /* Loop across different Ref IDx */
1309 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1310 {
1311 sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
1312 ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] *
1313 ai4_sad_4x4_block_stride[i1_ref_idx];
1314 sad_current_offset =
1315 (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
1316 ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx];
1317
1318 /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */
1319 if(0 == blk_x)
1320 INIT_SEARCH_NODE(
1321 &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x],
1322 i1_ref_idx);
1323
1324 pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx];
1325
1326 /* Initialize changing params here */
1327 s_search_prms_8x4.i1_ref_idx = i1_ref_idx;
1328 s_search_prms_4x8.i1_ref_idx = i1_ref_idx;
1329 s_search_prms_4x4.i1_ref_idx = i1_ref_idx;
1330
1331 if(num_blks_in_row == blk_x)
1332 {
1333 S16 *pi2_sads_4x4_current;
1334 /* Since the current 4x4 block will be a padded region, which may not match with any of the reference */
1335 pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1336
1337 memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
1338 }
1339
1340 /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/
1341 if((0 == blk_y) || (num_blks_in_row == blk_x))
1342 {
1343 S16 *pi2_sads_4x4_current;
1344 /* Computer 4x4 SADs for current block */
1345 /* Pointer to store SADs */
1346 pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1347
1348 hme_derive_worst_case_search_range(
1349 &s_range_prms,
1350 &s_pic_limit,
1351 &as_mv_limit[i1_ref_idx],
1352 blk_x << blk_size_shift,
1353 blk_y << blk_size_shift,
1354 blk_wd,
1355 blk_ht);
1356
1357 if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1358 {
1359 ((ihevce_me_optimised_function_list_t *)
1360 ps_ctxt->pv_me_optimised_function_list)
1361 ->pf_store_4x4_sads_high_quality(
1362 &s_search_prms_4x4,
1363 ps_curr_layer,
1364 &as_mv_limit[i1_ref_idx],
1365 &ps_ctxt->s_wt_pred,
1366 pi2_sads_4x4_current);
1367 }
1368 else
1369 {
1370 ((ihevce_me_optimised_function_list_t *)
1371 ps_ctxt->pv_me_optimised_function_list)
1372 ->pf_store_4x4_sads_high_speed(
1373 &s_search_prms_4x4,
1374 ps_curr_layer,
1375 &as_mv_limit[i1_ref_idx],
1376 &ps_ctxt->s_wt_pred,
1377 pi2_sads_4x4_current);
1378 }
1379 }
1380 else
1381 {
1382 /* For the zero mv candt, the ref idx to be modified */
1383 ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx;
1384 ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx;
1385
1386 if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1387 {
1388 /* For the global mvs alone, the search node points to a local variable */
1389 ps_search_candts_8x4[global_id_8x4].ps_search_node =
1390 &s_candt_global[i1_ref_idx];
1391 ps_search_candts_4x8[global_id_4x8].ps_search_node =
1392 &s_candt_global[i1_ref_idx];
1393 }
1394
1395 hme_get_spatial_candt(
1396 ps_curr_layer,
1397 BLK_4x4,
1398 blk_x,
1399 blk_y - 1,
1400 i1_ref_idx,
1401 as_top_neighbours,
1402 as_left_neighbours,
1403 0,
1404 1,
1405 0,
1406 0);
1407 /* set up the various candts */
1408 *ps_candt_4x8_l = as_left_neighbours[0];
1409 *ps_candt_4x8_t = as_top_neighbours[1];
1410 *ps_candt_4x8_tl = as_top_neighbours[0];
1411 *ps_candt_8x4_l = *ps_candt_4x8_l;
1412 *ps_candt_8x4_tl = *ps_candt_4x8_tl;
1413 *ps_candt_8x4_t = *ps_candt_4x8_t;
1414
1415 {
1416 S32 pred_lx;
1417 S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top;
1418 pred_ctxt_t *ps_pred_ctxt;
1419 PF_MV_COST_FXN pf_mv_cost_compute;
1420
1421 /* Computer 4x4 SADs for current block */
1422 /* Pointer to store SADs */
1423 pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1424
1425 hme_derive_worst_case_search_range(
1426 &s_range_prms,
1427 &s_pic_limit,
1428 &as_mv_limit[i1_ref_idx],
1429 blk_x << blk_size_shift,
1430 blk_y << blk_size_shift,
1431 blk_wd,
1432 blk_ht);
1433 if(i4_pic_ht == blk_y)
1434 {
1435 memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
1436 }
1437 else
1438 {
1439 if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1440 {
1441 ((ihevce_me_optimised_function_list_t *)
1442 ps_ctxt->pv_me_optimised_function_list)
1443 ->pf_store_4x4_sads_high_quality(
1444 &s_search_prms_4x4,
1445 ps_curr_layer,
1446 &as_mv_limit[i1_ref_idx],
1447 &ps_ctxt->s_wt_pred,
1448 pi2_sads_4x4_current);
1449 }
1450 else
1451 {
1452 ((ihevce_me_optimised_function_list_t *)
1453 ps_ctxt->pv_me_optimised_function_list)
1454 ->pf_store_4x4_sads_high_speed(
1455 &s_search_prms_4x4,
1456 ps_curr_layer,
1457 &as_mv_limit[i1_ref_idx],
1458 &ps_ctxt->s_wt_pred,
1459 pi2_sads_4x4_current);
1460 }
1461 }
1462 /* Set pred direction to L0 or L1 */
1463 pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx];
1464
1465 /* Suitable context (L0 or L1) */
1466 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
1467
1468 /* Coarse layer is always explicit */
1469 if(ME_PRISTINE_QUALITY > e_me_quality_preset)
1470 {
1471 pf_mv_cost_compute = compute_mv_cost_coarse;
1472 }
1473 else
1474 {
1475 /* Cost function is not called in high speed case. Below one is just a dummy function */
1476 pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
1477 }
1478
1479 /*********************************************************************/
1480 /* Now, compute the mv for the top block */
1481 /*********************************************************************/
1482 pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset;
1483
1484 /*********************************************************************/
1485 /* For every blk in the picture, the search range needs to be derived*/
1486 /* Any blk can have any mv, but practical search constraints are */
1487 /* imposed by the picture boundary and amt of padding. */
1488 /*********************************************************************/
1489 hme_derive_search_range(
1490 &s_range_prms,
1491 &s_pic_limit,
1492 &as_mv_limit[i1_ref_idx],
1493 blk_x << blk_size_shift,
1494 (blk_y - 1) << blk_size_shift,
1495 blk_wd,
1496 blk_ht);
1497
1498 /* Computer the mv for the top block */
1499 if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1500 {
1501 ((ihevce_me_optimised_function_list_t *)
1502 ps_ctxt->pv_me_optimised_function_list)
1503 ->pf_combine_4x4_sads_and_compute_cost_high_quality(
1504 i1_ref_idx,
1505 &s_range_prms, /* Both 4x8 and 8x4 has same search range */
1506 &as_mv_limit[i1_ref_idx],
1507 &best_mv_4x8,
1508 &best_mv_8x4,
1509 ps_pred_ctxt,
1510 pf_mv_cost_compute,
1511 pi2_sads_4x4_top, /* Current SAD block */
1512 (pi2_sads_4x4_top +
1513 ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
1514 pi2_sads_4x4_current); /* South SAD block */
1515 }
1516 else
1517 {
1518 ((ihevce_me_optimised_function_list_t *)
1519 ps_ctxt->pv_me_optimised_function_list)
1520 ->pf_combine_4x4_sads_and_compute_cost_high_speed(
1521 i1_ref_idx,
1522 &s_range_prms, /* Both 4x8 and 8x4 has same search range */
1523 &as_mv_limit[i1_ref_idx],
1524 &best_mv_4x8,
1525 &best_mv_8x4,
1526 ps_pred_ctxt,
1527 pf_mv_cost_compute,
1528 pi2_sads_4x4_top, /* Current SAD block */
1529 (pi2_sads_4x4_top +
1530 ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
1531 pi2_sads_4x4_current); /* South SAD block */
1532 }
1533
1534 ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x;
1535 ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y;
1536 ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx;
1537
1538 ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x;
1539 ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y;
1540 ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx;
1541 }
1542
1543 /* call the appropriate Search Algo for 4x8S. The 4x8N would */
1544 /* have already been called by top block */
1545 hme_pred_search_square_stepn(
1546 &s_search_prms_8x4,
1547 ps_curr_layer,
1548 &ps_ctxt->s_wt_pred,
1549 e_me_quality_preset,
1550 (ihevce_me_optimised_function_list_t *)
1551 ps_ctxt->pv_me_optimised_function_list
1552
1553 );
1554
1555 /* Call the appropriate search algo for 8x4E */
1556 hme_pred_search_square_stepn(
1557 &s_search_prms_4x8,
1558 ps_curr_layer,
1559 &ps_ctxt->s_wt_pred,
1560 e_me_quality_preset,
1561 (ihevce_me_optimised_function_list_t *)
1562 ps_ctxt->pv_me_optimised_function_list);
1563
1564 if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1565 {
1566 /* Histogram updates across different Ref ID for global MV */
1567 hme_update_histogram(
1568 ps_ctxt->aps_mv_hist[i1_ref_idx],
1569 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx,
1570 aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy);
1571 hme_update_histogram(
1572 ps_ctxt->aps_mv_hist[i1_ref_idx],
1573 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx,
1574 aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy);
1575 }
1576
1577 /* update the best results to the mv bank */
1578 hme_update_mv_bank_coarse(
1579 ps_search_results,
1580 ps_curr_layer->ps_layer_mvbank,
1581 blk_x,
1582 (blk_y - 1),
1583 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
1584 search_node_top_offset, /* Top Candidate */
1585 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
1586 search_node_left_offset, /* Left candidate */
1587 i1_ref_idx,
1588 &s_mv_update_prms);
1589
1590 /* Copy the best search result to 5 row array for future use */
1591 *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x +
1592 ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
1593 *(aps_best_search_node_4x8[i1_ref_idx]);
1594
1595 *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x +
1596 ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
1597 *(aps_best_search_node_8x4[i1_ref_idx]);
1598
1599 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
1600 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1601 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
1602 {
1603 WORD32 num_mvs, i, j;
1604 search_node_t *aps_search_nodes[4];
1605 /* Best results for 8x4R and 4x8B blocks */
1606 search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
1607
1608 num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
1609
1610 /*************************************************************************/
1611 /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
1612 /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */
1613 /* If number of results to be stored is 4, then we store all these 4 */
1614 /* results, else we pick best ones */
1615 /*************************************************************************/
1616 ps_search_node_8x4_r =
1617 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
1618 ps_search_node_4x8_b =
1619 ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
1620
1621 ASSERT(num_mvs <= 4);
1622
1623 /* Doing this to sort best results */
1624 aps_search_nodes[0] = ps_search_node_8x4_r;
1625 aps_search_nodes[1] = ps_search_node_4x8_b;
1626 aps_search_nodes[2] =
1627 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
1628 search_node_left_offset; /* Left candidate */
1629 aps_search_nodes[3] =
1630 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
1631 search_node_top_offset; /* Top Candidate */
1632
1633 /* Note : Need to be resolved!!! */
1634 /* Added this to match with "hme_update_mv_bank_coarse" */
1635 if(num_mvs != 4)
1636 {
1637 /* Run through the results, store them in best to worst order */
1638 for(i = 0; i < num_mvs; i++)
1639 {
1640 for(j = i + 1; j < 4; j++)
1641 {
1642 if(aps_search_nodes[j]->i4_tot_cost <
1643 aps_search_nodes[i]->i4_tot_cost)
1644 {
1645 SWAP_HME(
1646 aps_search_nodes[j],
1647 aps_search_nodes[i],
1648 search_node_t *);
1649 }
1650 }
1651 }
1652 }
1653
1654 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
1655 for(i = 0; i < num_mvs; i++)
1656 {
1657 hme_update_dynamic_search_params(
1658 &ps_ctxt->s_coarse_dyn_range_prms
1659 .as_dyn_range_prms[i4_layer_id][i1_ref_idx],
1660 aps_search_nodes[i]->s_mv.i2_mvy);
1661 }
1662 }
1663 }
1664 }
1665
1666 /* Update the number of blocks processed in the current row */
1667 ihevce_dmgr_set_row_row_sync(
1668 pv_hme_dep_mngr,
1669 (blk_x + 1),
1670 blk_y,
1671 0 /* Col Tile No. : Not supported in PreEnc*/);
1672 }
1673
1674 /* set the output dependency after completion of row */
1675 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
1676 }
1677 }
1678
1679 return;
1680 }
1681