1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ih264e_me.c
24 *
25 * @brief
26 *
27 *
28 * @author
29 * Ittiam
30 *
31 * @par List of Functions:
32 * -
33 *
34 * @remarks
35 * None
36 *
37 *******************************************************************************
38 */
39
40 /*****************************************************************************/
41 /* File Includes */
42 /*****************************************************************************/
43
44 /* System include files */
45 #include <stdio.h>
46 #include <assert.h>
47 #include <limits.h>
48 #include <string.h>
49
50 /* User include files */
51 #include "ime_typedefs.h"
52 #include "ime_distortion_metrics.h"
53 #include "ime_defs.h"
54 #include "ime_structs.h"
55 #include "ime.h"
56 #include "ime_macros.h"
57 #include "ime_statistics.h"
58
59 /**
60 *******************************************************************************
61 *
62 * @brief Diamond Search
63 *
64 * @par Description:
65 * This function computes the sad at vertices of several layers of diamond grid
66 * at a time. The number of layers of diamond grid that would be evaluated is
67 * configurable.The function computes the sad at vertices of a diamond grid. If
68 * the sad at the center of the diamond grid is lesser than the sad at any other
69 * point of the diamond grid, the function marks the candidate Mb partition as
70 * mv.
71 *
72 * @param[in] ps_mb_part
73 * pointer to current mb partition ctxt with respect to ME
74 *
75 * @param[in] ps_me_ctxt
76 * pointer to me context
77 *
78 * @param[in] u4_lambda_motion
79 * lambda motion
80 *
81 * @param[in] u4_enable_fast_sad
82 * enable/disable fast sad computation
83 *
84 * @returns mv pair & corresponding distortion and cost
85 *
86 * @remarks Diamond Srch, radius is 1
87 *
88 *******************************************************************************
89 */
ime_diamond_search_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)90 void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
91 {
92 /* MB partition info */
93 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
94
95 /* lagrange parameter */
96 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
97
98 /* srch range*/
99 WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
100 WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
101 WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
102 WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
103
104 /* enabled fast sad computation */
105 // UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
106
107 /* pointer to src macro block */
108 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
109 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
110
111 /* strides */
112 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
113 WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
114
115 /* least cost */
116 WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
117
118 /* least sad */
119 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
120
121 /* mv pair */
122 WORD16 i2_mvx, i2_mvy;
123
124 /* mv bits */
125 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
126
127 /* temp var */
128 WORD32 i4_cost[4];
129 WORD32 i4_sad[4];
130 UWORD8 *pu1_ref;
131 WORD16 i2_mv_u_x, i2_mv_u_y;
132
133 /* Diamond search Iteration Max Cnt */
134 UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
135
136 /* temp var */
137 // UWORD8 u1_prev_jump = NONE;
138 // UWORD8 u1_curr_jump = NONE;
139 // UWORD8 u1_next_jump;
140 // WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
141 // WORD32 mask;
142 // UWORD8 *apu1_ref[4];
143 // WORD32 i, cnt;
144 // WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
145
146 /* mv with best sad during initial evaluation */
147 i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
148 i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
149
150 i2_mv_u_x = i2_mvx;
151 i2_mv_u_y = i2_mvy;
152
153 while (u4_num_layers--)
154 {
155 /* FIXME : is this the write way to check for out of bounds ? */
156 if ( (i2_mvx - 1 < i4_srch_range_w) ||
157 (i2_mvx + 1 > i4_srch_range_e) ||
158 (i2_mvy - 1 < i4_srch_range_n) ||
159 (i2_mvy + 1 > i4_srch_range_s) )
160 {
161 break;
162 }
163
164 pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
165
166 ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
167 pu1_curr_mb,
168 i4_ref_strd,
169 i4_src_strd,
170 i4_sad);
171
172 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
173 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
174 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
175 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
176
177 /* compute cost */
178 i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
179 + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
180 i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
181 + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
182 i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
183 + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
184 i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
185 + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
186
187
188 if (i4_cost_least > i4_cost[0])
189 {
190 i4_cost_least = i4_cost[0];
191 i4_distortion_least = i4_sad[0];
192
193 i2_mv_u_x = (i2_mvx - 1);
194 i2_mv_u_y = i2_mvy;
195 }
196
197 if (i4_cost_least > i4_cost[1])
198 {
199 i4_cost_least = i4_cost[1];
200 i4_distortion_least = i4_sad[1];
201
202 i2_mv_u_x = (i2_mvx + 1);
203 i2_mv_u_y = i2_mvy;
204 }
205
206 if (i4_cost_least > i4_cost[2])
207 {
208 i4_cost_least = i4_cost[2];
209 i4_distortion_least = i4_sad[2];
210
211 i2_mv_u_x = i2_mvx;
212 i2_mv_u_y = i2_mvy - 1;
213 }
214
215 if (i4_cost_least > i4_cost[3])
216 {
217 i4_cost_least = i4_cost[3];
218 i4_distortion_least = i4_sad[3];
219
220 i2_mv_u_x = i2_mvx;
221 i2_mv_u_y = i2_mvy + 1;
222 }
223
224 if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
225 {
226 ps_mb_part->u4_exit = 1;
227 break;
228 }
229 else
230 {
231 i2_mvx = i2_mv_u_x;
232 i2_mvy = i2_mv_u_y;
233 }
234
235
236 }
237
238 if (i4_cost_least < ps_mb_part->i4_mb_cost)
239 {
240 ps_mb_part->i4_mb_cost = i4_cost_least;
241 ps_mb_part->i4_mb_distortion = i4_distortion_least;
242 ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
243 ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
244 }
245
246 }
247
248
249 /**
250 *******************************************************************************
251 *
252 * @brief This function computes the best motion vector among the tentative mv
253 * candidates chosen.
254 *
255 * @par Description:
256 * This function determines the position in the search window at which the motion
257 * estimation should begin in order to minimise the number of search iterations.
258 *
259 * @param[in] ps_mb_part
260 * pointer to current mb partition ctxt with respect to ME
261 *
262 * @param[in] u4_lambda_motion
263 * lambda motion
264 *
265 * @param[in] u4_fast_flag
266 * enable/disable fast sad computation
267 *
268 * @returns mv pair & corresponding distortion and cost
269 *
270 * @remarks none
271 *
272 *******************************************************************************
273 */
274
ime_evaluate_init_srchposn_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)275 void ime_evaluate_init_srchposn_16x16
276 (
277 me_ctxt_t *ps_me_ctxt,
278 WORD32 i4_reflist
279 )
280 {
281 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
282
283 /* candidate mv cnt */
284 UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
285
286 /* list of candidate mvs */
287 ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
288
289 /* pointer to src macro block */
290 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
291 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
292
293 /* strides */
294 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
295 WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
296
297 /* enabled fast sad computation */
298 UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
299
300 /* SAD(distortion metric) of an 8x8 block */
301 WORD32 i4_mb_distortion;
302
303 /* cost = distortion + u4_lambda_motion * rate */
304 WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
305
306 /* mb partitions info */
307 mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
308
309 /* mv bits */
310 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
311
312 /* temp var */
313 UWORD32 i, j;
314 WORD32 i4_srch_pos_idx = 0;
315 UWORD8 *pu1_ref = NULL;
316
317 /* Carry out a search using each of the motion vector pairs identified above as predictors. */
318 /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
319 for(i = 0; i < u4_num_candidates; i++)
320 {
321 /* compute sad */
322 WORD32 c_sad = 1;
323
324 for(j = 0; j < i; j++ )
325 {
326 if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
327 (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
328 {
329 c_sad = 0;
330 break;
331 }
332 }
333 if(c_sad)
334 {
335 /* adjust ref pointer */
336 pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
337
338 /* compute distortion */
339 ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
340
341 DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
342 /* compute cost */
343 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
344 + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
345
346 if (i4_mb_cost < i4_mb_cost_least)
347 {
348 i4_mb_cost_least = i4_mb_cost;
349
350 i4_distortion_least = i4_mb_distortion;
351
352 i4_srch_pos_idx = i;
353 }
354 }
355 }
356
357 if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
358 {
359 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
360 ps_mb_part->i4_mb_cost = i4_mb_cost_least;
361 ps_mb_part->i4_mb_distortion = i4_distortion_least;
362 ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
363 ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
364 }
365 }
366
367 /**
368 *******************************************************************************
369 *
370 * @brief Searches for the best matching full pixel predictor within the search
371 * range
372 *
373 * @par Description:
374 * This function begins by computing the mv predict vector for the current mb.
375 * This is used for cost computations. Further basing on the algo. chosen, it
376 * looks through a set of candidate vectors that best represent the mb a least
377 * cost and returns this information.
378 *
379 * @param[in] ps_proc
380 * pointer to current proc ctxt
381 *
382 * @param[in] ps_me_ctxt
383 * pointer to me context
384 *
385 * @returns mv pair & corresponding distortion and cost
386 *
387 * @remarks none
388 *
389 *******************************************************************************
390 */
ime_full_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_ref_list)391 void ime_full_pel_motion_estimation_16x16
392 (
393 me_ctxt_t *ps_me_ctxt,
394 WORD32 i4_ref_list
395 )
396 {
397 /* mb part info */
398 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
399
400 /******************************************************************/
401 /* Modify Search range about initial candidate instead of zero mv */
402 /******************************************************************/
403 /*
404 * FIXME: The motion vectors in a way can become unbounded. It may so happen that
405 * MV might exceed the limit of the profile configured.
406 */
407 ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
408 -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
409 ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
410 ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
411 ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
412 -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
413 ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
414 ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
415
416 /************************************************************/
417 /* Traverse about best initial candidate for mv */
418 /************************************************************/
419
420 switch (ps_me_ctxt->u4_me_speed_preset)
421 {
422 case DMND_SRCH:
423 ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
424 break;
425 default:
426 assert(0);
427 break;
428 }
429 }
430
431 /**
432 *******************************************************************************
433 *
434 * @brief Searches for the best matching sub pixel predictor within the search
435 * range
436 *
437 * @par Description:
438 * This function begins by searching across all sub pixel sample points
439 * around the full pel motion vector. The vector with least cost is chosen as
440 * the mv for the current mb. If the skip mode is not evaluated while analysing
441 * the initial search candidates then analyse it here and update the mv.
442 *
443 * @param[in] ps_proc
444 * pointer to current proc ctxt
445 *
446 * @param[in] ps_me_ctxt
447 * pointer to me context
448 *
449 * @returns none
450 *
451 * @remarks none
452 *
453 *******************************************************************************
454 */
ime_sub_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)455 void ime_sub_pel_motion_estimation_16x16
456 (
457 me_ctxt_t *ps_me_ctxt,
458 WORD32 i4_reflist
459 )
460 {
461 /* pointers to src & ref macro block */
462 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
463
464 /* pointers to ref. half pel planes */
465 UWORD8 *pu1_ref_mb_half_x;
466 UWORD8 *pu1_ref_mb_half_y;
467 UWORD8 *pu1_ref_mb_half_xy;
468
469 /* pointers to ref. half pel planes */
470 UWORD8 *pu1_ref_mb_half_x_temp;
471 UWORD8 *pu1_ref_mb_half_y_temp;
472 UWORD8 *pu1_ref_mb_half_xy_temp;
473
474 /* strides */
475 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
476
477 WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
478
479 /* mb partitions info */
480 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
481
482 /* SAD(distortion metric) of an mb */
483 WORD32 i4_mb_distortion;
484 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
485
486 /* cost = distortion + u4_lambda_motion * rate */
487 WORD32 i4_mb_cost;
488 WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
489
490 /*Best half pel buffer*/
491 UWORD8 *pu1_best_hpel_buf = NULL;
492
493 /* mv bits */
494 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
495
496 /* Motion vectors in full-pel units */
497 WORD16 mv_x, mv_y;
498
499 /* lambda - lagrange constant */
500 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
501
502 /* Flags to check if half pel points needs to be evaluated */
503 /**************************************/
504 /* 1 bit for each half pel candidate */
505 /* bit 0 - half x = 1, half y = 0 */
506 /* bit 1 - half x = -1, half y = 0 */
507 /* bit 2 - half x = 0, half y = 1 */
508 /* bit 3 - half x = 0, half y = -1 */
509 /* bit 4 - half x = 1, half y = 1 */
510 /* bit 5 - half x = -1, half y = 1 */
511 /* bit 6 - half x = 1, half y = -1 */
512 /* bit 7 - half x = -1, half y = -1 */
513 /**************************************/
514 /* temp var */
515 WORD16 i2_mv_u_x, i2_mv_u_y;
516 WORD32 i, j;
517 WORD32 ai4_sad[8];
518
519 WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
520
521 i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
522 i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
523
524 /************************************************************/
525 /* Evaluate half pel */
526 /************************************************************/
527 mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
528 mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
529
530
531 /**************************************************************/
532 /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
533 /* left side of full pel */
534 /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
535 /* top side of full pel */
536 /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */
537 /* on the top left side of full pel */
538 /* for the function pf_ime_sub_pel_compute_sad_16x16 the */
539 /* default postions are */
540 /* ps_me_ctxt->pu1_half_x = right halp_pel */
541 /* ps_me_ctxt->pu1_half_y = bottom halp_pel */
542 /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */
543 /* Hence corresponding adjustments made here */
544 /**************************************************************/
545
546 pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
547 pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
548 pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
549
550 ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
551 pu1_ref_mb_half_y,
552 pu1_ref_mb_half_xy,
553 i4_src_strd, i4_ref_strd,
554 ai4_sad);
555
556 /* Half x plane */
557 for(i = 0; i < 2; i++)
558 {
559 WORD32 mv_x_tmp = (mv_x << 2) + 2;
560 WORD32 mv_y_tmp = (mv_y << 2);
561
562 mv_x_tmp -= (i * 4);
563
564 i4_mb_distortion = ai4_sad[i];
565
566 /* compute cost */
567 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
568 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
569
570 if (i4_mb_cost < i4_mb_cost_least)
571 {
572 i4_mb_cost_least = i4_mb_cost;
573
574 i4_distortion_least = i4_mb_distortion;
575
576 i2_mv_u_x = mv_x_tmp;
577
578 i2_mv_u_y = mv_y_tmp;
579
580 #ifndef HP_PL /*choosing whether left or right half_x*/
581 ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
582 pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
583
584 i4_srch_pos_idx = 0;
585 #endif
586 }
587
588 }
589
590 /* Half y plane */
591 for(i = 0; i < 2; i++)
592 {
593 WORD32 mv_x_tmp = (mv_x << 2);
594 WORD32 mv_y_tmp = (mv_y << 2) + 2;
595
596 mv_y_tmp -= (i * 4);
597
598 i4_mb_distortion = ai4_sad[2 + i];
599
600 /* compute cost */
601 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
602 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
603
604 if (i4_mb_cost < i4_mb_cost_least)
605 {
606 i4_mb_cost_least = i4_mb_cost;
607
608 i4_distortion_least = i4_mb_distortion;
609
610 i2_mv_u_x = mv_x_tmp;
611
612 i2_mv_u_y = mv_y_tmp;
613
614 #ifndef HP_PL/*choosing whether top or bottom half_y*/
615 ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
616 pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
617
618 i4_srch_pos_idx = 1;
619 #endif
620 }
621
622 }
623
624 /* Half xy plane */
625 for(j = 0; j < 2; j++)
626 {
627 for(i = 0; i < 2; i++)
628 {
629 WORD32 mv_x_tmp = (mv_x << 2) + 2;
630 WORD32 mv_y_tmp = (mv_y << 2) + 2;
631
632 mv_x_tmp -= (i * 4);
633 mv_y_tmp -= (j * 4);
634
635 i4_mb_distortion = ai4_sad[4 + i + 2 * j];
636
637 /* compute cost */
638 i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
639 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
640
641 if (i4_mb_cost < i4_mb_cost_least)
642 {
643 i4_mb_cost_least = i4_mb_cost;
644
645 i4_distortion_least = i4_mb_distortion;
646
647 i2_mv_u_x = mv_x_tmp;
648
649 i2_mv_u_y = mv_y_tmp;
650
651 #ifndef HP_PL /*choosing between four half_xy */
652 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
653 pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
654
655 i4_srch_pos_idx = 2;
656 #endif
657 }
658
659 }
660 }
661
662 if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
663 {
664 ps_mb_part->i4_mb_cost = i4_mb_cost_least;
665 ps_mb_part->i4_mb_distortion = i4_distortion_least;
666 ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
667 ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
668 ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
669 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
670 }
671 }
672
673 /**
674 *******************************************************************************
675 *
676 * @brief This function computes cost of skip macroblocks
677 *
678 * @par Description:
679 *
680 * @param[in] ps_me_ctxt
681 * pointer to me ctxt
682 *
683 *
684 * @returns none
685 *
686 * @remarks
687 * NOTE: while computing the skip cost, do not enable early exit from compute
688 * sad function because, a negative bias gets added later
689 * Note tha the last ME candidate in me ctxt is taken as skip motion vector
690 *
691 *******************************************************************************
692 */
ime_compute_skip_cost(me_ctxt_t * ps_me_ctxt,ime_mv_t * ps_skip_mv,mb_part_ctxt * ps_smb_part_info,UWORD32 u4_use_stat_sad,WORD32 i4_reflist,WORD32 i4_is_slice_type_b)693 void ime_compute_skip_cost
694 (
695 me_ctxt_t *ps_me_ctxt,
696 ime_mv_t *ps_skip_mv,
697 mb_part_ctxt *ps_smb_part_info,
698 UWORD32 u4_use_stat_sad,
699 WORD32 i4_reflist,
700 WORD32 i4_is_slice_type_b
701 )
702 {
703
704 /* SAD(distortion metric) of an mb */
705 WORD32 i4_mb_distortion;
706
707 /* cost = distortion + u4_lambda_motion * rate */
708 WORD32 i4_mb_cost;
709
710 /* temp var */
711 UWORD8 *pu1_ref = NULL;
712
713 ime_mv_t s_skip_mv;
714
715 s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
716 s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
717
718 /* Check if the skip mv is out of bounds or subpel */
719 {
720 /* skip mv */
721 ime_mv_t s_clip_skip_mv;
722
723 s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
724 s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
725
726 if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
727 (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
728 (ps_skip_mv->i2_mvx & 0x3) ||
729 (ps_skip_mv->i2_mvy & 0x3))
730 {
731 return ;
732 }
733 }
734
735
736 /* adjust ref pointer */
737 pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
738 + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
739
740 if(u4_use_stat_sad == 1)
741 {
742 UWORD32 u4_is_nonzero;
743
744 ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
745 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
746 ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
747 &i4_mb_distortion, &u4_is_nonzero);
748
749 if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
750 {
751 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
752 ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
753 }
754 }
755 else
756 {
757 ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
758 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
759 ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
760
761 if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
762 {
763 ps_me_ctxt->i4_min_sad = i4_mb_distortion;
764 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
765 }
766 }
767
768
769 /* for skip mode cost & distortion are identical
770 * But we shall add a bias to favor skip mode.
771 * Doc. JVT B118 Suggests SKIP_BIAS as 16.
772 * TODO : Empirical analysis of SKIP_BIAS is necessary */
773
774 i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b));
775
776 if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
777 {
778 ps_smb_part_info->i4_mb_cost = i4_mb_cost;
779 ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
780 ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
781 ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
782 }
783 }
784
785