1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vpx_config.h"
16
17 #include "vpx_mem/vpx_mem.h"
18
19 #include "vp9/common/vp9_common.h"
20
21 #include "vp9/encoder/vp9_onyx_int.h"
22 #include "vp9/encoder/vp9_mcomp.h"
23
24 // #define NEW_DIAMOND_SEARCH
25
get_buf_from_mv(const struct buf_2d * buf,const MV * mv)26 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
27 const MV *mv) {
28 return &buf->buf[mv->row * buf->stride + mv->col];
29 }
30
vp9_set_mv_search_range(MACROBLOCK * x,const MV * mv)31 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
36
37 col_min = MAX(col_min, (MV_LOW >> 3) + 1);
38 row_min = MAX(row_min, (MV_LOW >> 3) + 1);
39 col_max = MIN(col_max, (MV_UPP >> 3) - 1);
40 row_max = MIN(row_max, (MV_UPP >> 3) - 1);
41
42 // Get intersection of UMV window and valid MV window to reduce # of checks
43 // in diamond search.
44 if (x->mv_col_min < col_min)
45 x->mv_col_min = col_min;
46 if (x->mv_col_max > col_max)
47 x->mv_col_max = col_max;
48 if (x->mv_row_min < row_min)
49 x->mv_row_min = row_min;
50 if (x->mv_row_max > row_max)
51 x->mv_row_max = row_max;
52 }
53
vp9_init_search_range(VP9_COMP * cpi,int size)54 int vp9_init_search_range(VP9_COMP *cpi, int size) {
55 int sr = 0;
56
57 // Minimum search size no matter what the passed in value.
58 size = MAX(16, size);
59
60 while ((size << sr) < MAX_FULL_PEL_VAL)
61 sr++;
62
63 sr += cpi->sf.reduce_first_step_size;
64 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
65 return sr;
66 }
67
mv_cost(const MV * mv,const int * joint_cost,int * comp_cost[2])68 static INLINE int mv_cost(const MV *mv,
69 const int *joint_cost, int *comp_cost[2]) {
70 return joint_cost[vp9_get_mv_joint(mv)] +
71 comp_cost[0][mv->row] + comp_cost[1][mv->col];
72 }
73
vp9_mv_bit_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int weight)74 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
75 const int *mvjcost, int *mvcost[2], int weight) {
76 const MV diff = { mv->row - ref->row,
77 mv->col - ref->col };
78 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
79 }
80
mv_err_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int error_per_bit)81 static int mv_err_cost(const MV *mv, const MV *ref,
82 const int *mvjcost, int *mvcost[2],
83 int error_per_bit) {
84 if (mvcost) {
85 const MV diff = { mv->row - ref->row,
86 mv->col - ref->col };
87 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
88 error_per_bit, 13);
89 }
90 return 0;
91 }
92
mvsad_err_cost(const MV * mv,const MV * ref,const int * mvjsadcost,int * mvsadcost[2],int error_per_bit)93 static int mvsad_err_cost(const MV *mv, const MV *ref,
94 const int *mvjsadcost, int *mvsadcost[2],
95 int error_per_bit) {
96 if (mvsadcost) {
97 const MV diff = { mv->row - ref->row,
98 mv->col - ref->col };
99 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
100 error_per_bit, 8);
101 }
102 return 0;
103 }
104
vp9_init_dsmotion_compensation(MACROBLOCK * x,int stride)105 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
106 int len, ss_count = 1;
107
108 x->ss[0].mv.col = x->ss[0].mv.row = 0;
109 x->ss[0].offset = 0;
110
111 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
112 // Generate offsets for 4 search sites per step.
113 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
114 int i;
115 for (i = 0; i < 4; ++i) {
116 search_site *const ss = &x->ss[ss_count++];
117 ss->mv = ss_mvs[i];
118 ss->offset = ss->mv.row * stride + ss->mv.col;
119 }
120 }
121
122 x->ss_count = ss_count;
123 x->searches_per_step = 4;
124 }
125
vp9_init3smotion_compensation(MACROBLOCK * x,int stride)126 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
127 int len, ss_count = 1;
128
129 x->ss[0].mv.col = x->ss[0].mv.row = 0;
130 x->ss[0].offset = 0;
131
132 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
133 // Generate offsets for 8 search sites per step.
134 const MV ss_mvs[8] = {
135 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len},
136 {-len, -len}, {-len, len}, {len, -len}, {len, len}
137 };
138 int i;
139 for (i = 0; i < 8; ++i) {
140 search_site *const ss = &x->ss[ss_count++];
141 ss->mv = ss_mvs[i];
142 ss->offset = ss->mv.row * stride + ss->mv.col;
143 }
144 }
145
146 x->ss_count = ss_count;
147 x->searches_per_step = 8;
148 }
149
150 /*
151 * To avoid the penalty for crossing cache-line read, preload the reference
152 * area in a small buffer, which is aligned to make sure there won't be crossing
153 * cache-line read while reading from this buffer. This reduced the cpu
154 * cycles spent on reading ref data in sub-pixel filter functions.
155 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
156 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
157 * could reduce the area.
158 */
159
160 /* estimated cost of a motion vector (r,c) */
161 #define MVC(r, c) \
162 (mvcost ? \
163 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
164 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
165 error_per_bit + 4096) >> 13 : 0)
166
167
168 // convert motion vector component to offset for svf calc
sp(int x)169 static INLINE int sp(int x) {
170 return (x & 7) << 1;
171 }
172
pre(const uint8_t * buf,int stride,int r,int c,int offset)173 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c,
174 int offset) {
175 return &buf[(r >> 3) * stride + (c >> 3) - offset];
176 }
177
178 /* returns subpixel variance error function */
179 #define DIST(r, c) \
180 vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
181 src_stride, &sse)
182
183 /* checks if (r, c) has better score than previous best */
184 #define CHECK_BETTER(v, r, c) \
185 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
186 thismse = (DIST(r, c)); \
187 if ((v = MVC(r, c) + thismse) < besterr) { \
188 besterr = v; \
189 br = r; \
190 bc = c; \
191 *distortion = thismse; \
192 *sse1 = sse; \
193 } \
194 } else { \
195 v = INT_MAX; \
196 }
197
198 #define FIRST_LEVEL_CHECKS \
199 { \
200 unsigned int left, right, up, down, diag; \
201 CHECK_BETTER(left, tr, tc - hstep); \
202 CHECK_BETTER(right, tr, tc + hstep); \
203 CHECK_BETTER(up, tr - hstep, tc); \
204 CHECK_BETTER(down, tr + hstep, tc); \
205 whichdir = (left < right ? 0 : 1) + \
206 (up < down ? 0 : 2); \
207 switch (whichdir) { \
208 case 0: \
209 CHECK_BETTER(diag, tr - hstep, tc - hstep); \
210 break; \
211 case 1: \
212 CHECK_BETTER(diag, tr - hstep, tc + hstep); \
213 break; \
214 case 2: \
215 CHECK_BETTER(diag, tr + hstep, tc - hstep); \
216 break; \
217 case 3: \
218 CHECK_BETTER(diag, tr + hstep, tc + hstep); \
219 break; \
220 } \
221 }
222
223 #define SECOND_LEVEL_CHECKS \
224 { \
225 int kr, kc; \
226 unsigned int second; \
227 if (tr != br && tc != bc) { \
228 kr = br - tr; \
229 kc = bc - tc; \
230 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
231 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
232 } else if (tr == br && tc != bc) { \
233 kc = bc - tc; \
234 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
235 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
236 switch (whichdir) { \
237 case 0: \
238 case 1: \
239 CHECK_BETTER(second, tr + hstep, tc + kc); \
240 break; \
241 case 2: \
242 case 3: \
243 CHECK_BETTER(second, tr - hstep, tc + kc); \
244 break; \
245 } \
246 } else if (tr != br && tc == bc) { \
247 kr = br - tr; \
248 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
249 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
250 switch (whichdir) { \
251 case 0: \
252 case 2: \
253 CHECK_BETTER(second, tr + kr, tc + hstep); \
254 break; \
255 case 1: \
256 case 3: \
257 CHECK_BETTER(second, tr + kr, tc - hstep); \
258 break; \
259 } \
260 } \
261 }
262
vp9_find_best_sub_pixel_tree(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1)263 int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
264 MV *bestmv, const MV *ref_mv,
265 int allow_hp,
266 int error_per_bit,
267 const vp9_variance_fn_ptr_t *vfp,
268 int forced_stop,
269 int iters_per_step,
270 int *mvjcost, int *mvcost[2],
271 int *distortion,
272 unsigned int *sse1) {
273 const uint8_t *z = x->plane[0].src.buf;
274 const int src_stride = x->plane[0].src.stride;
275 const MACROBLOCKD *xd = &x->e_mbd;
276 unsigned int besterr = INT_MAX;
277 unsigned int sse;
278 unsigned int whichdir;
279 int thismse;
280 unsigned int halfiters = iters_per_step;
281 unsigned int quarteriters = iters_per_step;
282 unsigned int eighthiters = iters_per_step;
283
284 const int y_stride = xd->plane[0].pre[0].stride;
285 const int offset = bestmv->row * y_stride + bestmv->col;
286 const uint8_t *y = xd->plane[0].pre[0].buf + offset;
287
288 int rr = ref_mv->row;
289 int rc = ref_mv->col;
290 int br = bestmv->row * 8;
291 int bc = bestmv->col * 8;
292 int hstep = 4;
293 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
294 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
295 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
296 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
297
298 int tr = br;
299 int tc = bc;
300
301 // central mv
302 bestmv->row *= 8;
303 bestmv->col *= 8;
304
305 // calculate central point error
306 besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
307 *distortion = besterr;
308 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
309
310 // 1/2 pel
311 FIRST_LEVEL_CHECKS;
312 if (halfiters > 1) {
313 SECOND_LEVEL_CHECKS;
314 }
315 tr = br;
316 tc = bc;
317
318 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
319 if (forced_stop != 2) {
320 hstep >>= 1;
321 FIRST_LEVEL_CHECKS;
322 if (quarteriters > 1) {
323 SECOND_LEVEL_CHECKS;
324 }
325 tr = br;
326 tc = bc;
327 }
328
329 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
330 hstep >>= 1;
331 FIRST_LEVEL_CHECKS;
332 if (eighthiters > 1) {
333 SECOND_LEVEL_CHECKS;
334 }
335 tr = br;
336 tc = bc;
337 }
338 // These lines insure static analysis doesn't warn that
339 // tr and tc aren't used after the above point.
340 (void) tr;
341 (void) tc;
342
343 bestmv->row = br;
344 bestmv->col = bc;
345
346 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
347 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
348 return INT_MAX;
349
350 return besterr;
351 }
352
353 #undef DIST
354 /* returns subpixel variance error function */
355 #define DIST(r, c) \
356 vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
357 z, src_stride, &sse, second_pred)
358
vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * mvjcost,int * mvcost[2],int * distortion,unsigned int * sse1,const uint8_t * second_pred,int w,int h)359 int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
360 MV *bestmv, const MV *ref_mv,
361 int allow_hp,
362 int error_per_bit,
363 const vp9_variance_fn_ptr_t *vfp,
364 int forced_stop,
365 int iters_per_step,
366 int *mvjcost, int *mvcost[2],
367 int *distortion,
368 unsigned int *sse1,
369 const uint8_t *second_pred,
370 int w, int h) {
371 const uint8_t *z = x->plane[0].src.buf;
372 const int src_stride = x->plane[0].src.stride;
373 const MACROBLOCKD *xd = &x->e_mbd;
374 unsigned int besterr = INT_MAX;
375 unsigned int sse;
376 unsigned int whichdir;
377 int thismse;
378 const unsigned int halfiters = iters_per_step;
379 const unsigned int quarteriters = iters_per_step;
380 const unsigned int eighthiters = iters_per_step;
381
382 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
383 const int y_stride = xd->plane[0].pre[0].stride;
384 const int offset = bestmv->row * y_stride + bestmv->col;
385 const uint8_t *y = xd->plane[0].pre[0].buf + offset;
386
387 int rr = ref_mv->row;
388 int rc = ref_mv->col;
389 int br = bestmv->row * 8;
390 int bc = bestmv->col * 8;
391 int hstep = 4;
392 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
393 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
394 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
395 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
396
397 int tr = br;
398 int tc = bc;
399
400 // central mv
401 bestmv->row *= 8;
402 bestmv->col *= 8;
403
404 // calculate central point error
405 // TODO(yunqingwang): central pointer error was already calculated in full-
406 // pixel search, and can be passed in this function.
407 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
408 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
409 *distortion = besterr;
410 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
411
412 // Each subsequent iteration checks at least one point in
413 // common with the last iteration could be 2 ( if diag selected)
414 // 1/2 pel
415 FIRST_LEVEL_CHECKS;
416 if (halfiters > 1) {
417 SECOND_LEVEL_CHECKS;
418 }
419 tr = br;
420 tc = bc;
421
422 // Each subsequent iteration checks at least one point in common with
423 // the last iteration could be 2 ( if diag selected) 1/4 pel
424
425 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
426 if (forced_stop != 2) {
427 hstep >>= 1;
428 FIRST_LEVEL_CHECKS;
429 if (quarteriters > 1) {
430 SECOND_LEVEL_CHECKS;
431 }
432 tr = br;
433 tc = bc;
434 }
435
436 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
437 hstep >>= 1;
438 FIRST_LEVEL_CHECKS;
439 if (eighthiters > 1) {
440 SECOND_LEVEL_CHECKS;
441 }
442 tr = br;
443 tc = bc;
444 }
445 // These lines insure static analysis doesn't warn that
446 // tr and tc aren't used after the above point.
447 (void) tr;
448 (void) tc;
449
450 bestmv->row = br;
451 bestmv->col = bc;
452
453 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
454 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
455 return INT_MAX;
456
457 return besterr;
458 }
459
460 #undef MVC
461 #undef PRE
462 #undef DIST
463 #undef CHECK_BETTER
464
check_bounds(const MACROBLOCK * x,int row,int col,int range)465 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
466 int range) {
467 return ((row - range) >= x->mv_row_min) &
468 ((row + range) <= x->mv_row_max) &
469 ((col - range) >= x->mv_col_min) &
470 ((col + range) <= x->mv_col_max);
471 }
472
is_mv_in(const MACROBLOCK * x,const MV * mv)473 static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
474 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
475 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
476 }
477
478 #define CHECK_BETTER \
479 {\
480 if (thissad < bestsad) {\
481 if (use_mvcost) \
482 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
483 mvjsadcost, mvsadcost, sad_per_bit);\
484 if (thissad < bestsad) {\
485 bestsad = thissad;\
486 best_site = i;\
487 }\
488 }\
489 }
490
491 #define MAX_PATTERN_SCALES 11
492 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
493 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
494
495 // Generic pattern search function that searches over multiple scales.
496 // Each scale can have a different number of candidates and shape of
497 // candidates as indicated in the num_candidates and candidates arrays
498 // passed into this function
vp9_pattern_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int do_refine,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])499 static int vp9_pattern_search(const MACROBLOCK *x,
500 MV *ref_mv,
501 int search_param,
502 int sad_per_bit,
503 int do_init_search, int do_refine,
504 const vp9_variance_fn_ptr_t *vfp,
505 int use_mvcost,
506 const MV *center_mv, MV *best_mv,
507 const int num_candidates[MAX_PATTERN_SCALES],
508 const MV candidates[MAX_PATTERN_SCALES]
509 [MAX_PATTERN_CANDIDATES]) {
510 const MACROBLOCKD *const xd = &x->e_mbd;
511 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
512 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
513 };
514 int i, j, s, t;
515 const struct buf_2d *const what = &x->plane[0].src;
516 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
517 int br, bc;
518 int bestsad = INT_MAX;
519 int thissad;
520 int k = -1;
521 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
522 int best_init_s = search_param_to_steps[search_param];
523 const int *const mvjsadcost = x->nmvjointsadcost;
524 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
525
526 // adjust ref_mv to make sure it is within MV range
527 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
528 br = ref_mv->row;
529 bc = ref_mv->col;
530
531 // Work out the start point for the search
532 bestsad = vfp->sdf(what->buf, what->stride,
533 get_buf_from_mv(in_what, ref_mv), in_what->stride,
534 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
535 mvjsadcost, mvsadcost, sad_per_bit);
536
537 // Search all possible scales upto the search param around the center point
538 // pick the scale of the point that is best as the starting scale of
539 // further steps around it.
540 if (do_init_search) {
541 s = best_init_s;
542 best_init_s = -1;
543 for (t = 0; t <= s; ++t) {
544 int best_site = -1;
545 if (check_bounds(x, br, bc, 1 << t)) {
546 for (i = 0; i < num_candidates[t]; i++) {
547 const MV this_mv = {br + candidates[t][i].row,
548 bc + candidates[t][i].col};
549 thissad = vfp->sdf(what->buf, what->stride,
550 get_buf_from_mv(in_what, &this_mv),
551 in_what->stride, bestsad);
552 CHECK_BETTER
553 }
554 } else {
555 for (i = 0; i < num_candidates[t]; i++) {
556 const MV this_mv = {br + candidates[t][i].row,
557 bc + candidates[t][i].col};
558 if (!is_mv_in(x, &this_mv))
559 continue;
560 thissad = vfp->sdf(what->buf, what->stride,
561 get_buf_from_mv(in_what, &this_mv),
562 in_what->stride, bestsad);
563 CHECK_BETTER
564 }
565 }
566 if (best_site == -1) {
567 continue;
568 } else {
569 best_init_s = t;
570 k = best_site;
571 }
572 }
573 if (best_init_s != -1) {
574 br += candidates[best_init_s][k].row;
575 bc += candidates[best_init_s][k].col;
576 }
577 }
578
579 // If the center point is still the best, just skip this and move to
580 // the refinement step.
581 if (best_init_s != -1) {
582 int best_site = -1;
583 s = best_init_s;
584
585 do {
586 // No need to search all 6 points the 1st time if initial search was used
587 if (!do_init_search || s != best_init_s) {
588 if (check_bounds(x, br, bc, 1 << s)) {
589 for (i = 0; i < num_candidates[s]; i++) {
590 const MV this_mv = {br + candidates[s][i].row,
591 bc + candidates[s][i].col};
592 thissad = vfp->sdf(what->buf, what->stride,
593 get_buf_from_mv(in_what, &this_mv),
594 in_what->stride, bestsad);
595 CHECK_BETTER
596 }
597 } else {
598 for (i = 0; i < num_candidates[s]; i++) {
599 const MV this_mv = {br + candidates[s][i].row,
600 bc + candidates[s][i].col};
601 if (!is_mv_in(x, &this_mv))
602 continue;
603 thissad = vfp->sdf(what->buf, what->stride,
604 get_buf_from_mv(in_what, &this_mv),
605 in_what->stride, bestsad);
606 CHECK_BETTER
607 }
608 }
609
610 if (best_site == -1) {
611 continue;
612 } else {
613 br += candidates[s][best_site].row;
614 bc += candidates[s][best_site].col;
615 k = best_site;
616 }
617 }
618
619 do {
620 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
621 best_site = -1;
622 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
623 next_chkpts_indices[1] = k;
624 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
625
626 if (check_bounds(x, br, bc, 1 << s)) {
627 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
628 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
629 bc + candidates[s][next_chkpts_indices[i]].col};
630 thissad = vfp->sdf(what->buf, what->stride,
631 get_buf_from_mv(in_what, &this_mv),
632 in_what->stride, bestsad);
633 CHECK_BETTER
634 }
635 } else {
636 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
637 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
638 bc + candidates[s][next_chkpts_indices[i]].col};
639 if (!is_mv_in(x, &this_mv))
640 continue;
641 thissad = vfp->sdf(what->buf, what->stride,
642 get_buf_from_mv(in_what, &this_mv),
643 in_what->stride, bestsad);
644 CHECK_BETTER
645 }
646 }
647
648 if (best_site != -1) {
649 k = next_chkpts_indices[best_site];
650 br += candidates[s][k].row;
651 bc += candidates[s][k].col;
652 }
653 } while (best_site != -1);
654 } while (s--);
655 }
656
657 // Check 4 1-away neighbors if do_refine is true.
658 // For most well-designed schemes do_refine will not be necessary.
659 if (do_refine) {
660 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
661
662 for (j = 0; j < 16; j++) {
663 int best_site = -1;
664 if (check_bounds(x, br, bc, 1)) {
665 for (i = 0; i < 4; i++) {
666 const MV this_mv = {br + neighbors[i].row,
667 bc + neighbors[i].col};
668 thissad = vfp->sdf(what->buf, what->stride,
669 get_buf_from_mv(in_what, &this_mv),
670 in_what->stride, bestsad);
671 CHECK_BETTER
672 }
673 } else {
674 for (i = 0; i < 4; i++) {
675 const MV this_mv = {br + neighbors[i].row,
676 bc + neighbors[i].col};
677 if (!is_mv_in(x, &this_mv))
678 continue;
679 thissad = vfp->sdf(what->buf, what->stride,
680 get_buf_from_mv(in_what, &this_mv),
681 in_what->stride, bestsad);
682 CHECK_BETTER
683 }
684 }
685
686 if (best_site == -1) {
687 break;
688 } else {
689 br += neighbors[best_site].row;
690 bc += neighbors[best_site].col;
691 }
692 }
693 }
694
695 best_mv->row = br;
696 best_mv->col = bc;
697
698 return bestsad;
699 }
700
vp9_get_mvpred_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)701 int vp9_get_mvpred_var(const MACROBLOCK *x,
702 const MV *best_mv, const MV *center_mv,
703 const vp9_variance_fn_ptr_t *vfp,
704 int use_mvcost) {
705 const MACROBLOCKD *const xd = &x->e_mbd;
706 const struct buf_2d *const what = &x->plane[0].src;
707 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
708 const MV mv = {best_mv->row * 8, best_mv->col * 8};
709 unsigned int unused;
710
711 return vfp->vf(what->buf, what->stride,
712 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
713 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
714 x->mvcost, x->errorperbit) : 0);
715 }
716
vp9_get_mvpred_av_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const uint8_t * second_pred,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)717 int vp9_get_mvpred_av_var(const MACROBLOCK *x,
718 const MV *best_mv, const MV *center_mv,
719 const uint8_t *second_pred,
720 const vp9_variance_fn_ptr_t *vfp,
721 int use_mvcost) {
722 const MACROBLOCKD *const xd = &x->e_mbd;
723 const struct buf_2d *const what = &x->plane[0].src;
724 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
725 const MV mv = {best_mv->row * 8, best_mv->col * 8};
726 unsigned int unused;
727
728 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
729 what->buf, what->stride, &unused, second_pred) +
730 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
731 x->mvcost, x->errorperbit) : 0);
732 }
733
vp9_hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)734 int vp9_hex_search(const MACROBLOCK *x,
735 MV *ref_mv,
736 int search_param,
737 int sad_per_bit,
738 int do_init_search,
739 const vp9_variance_fn_ptr_t *vfp,
740 int use_mvcost,
741 const MV *center_mv, MV *best_mv) {
742 // First scale has 8-closest points, the rest have 6 points in hex shape
743 // at increasing scales
744 static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
745 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
746 };
747 // Note that the largest candidate step at each scale is 2^scale
748 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
749 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
750 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
751 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
752 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
753 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
754 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
755 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
756 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
757 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
758 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
759 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
760 { -1024, 0}},
761 };
762 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
763 do_init_search, 0, vfp, use_mvcost,
764 center_mv, best_mv,
765 hex_num_candidates, hex_candidates);
766 }
767
vp9_bigdia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)768 int vp9_bigdia_search(const MACROBLOCK *x,
769 MV *ref_mv,
770 int search_param,
771 int sad_per_bit,
772 int do_init_search,
773 const vp9_variance_fn_ptr_t *vfp,
774 int use_mvcost,
775 const MV *center_mv,
776 MV *best_mv) {
777 // First scale has 4-closest points, the rest have 8 points in diamond
778 // shape at increasing scales
779 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
780 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
781 };
782 // Note that the largest candidate step at each scale is 2^scale
783 static const MV bigdia_candidates[MAX_PATTERN_SCALES]
784 [MAX_PATTERN_CANDIDATES] = {
785 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
786 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
787 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
788 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
789 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
790 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
791 {-16, 16}, {-32, 0}},
792 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
793 {-32, 32}, {-64, 0}},
794 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
795 {-64, 64}, {-128, 0}},
796 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
797 {-128, 128}, {-256, 0}},
798 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
799 {-256, 256}, {-512, 0}},
800 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
801 {-512, 512}, {-1024, 0}},
802 };
803 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
804 do_init_search, 0, vfp, use_mvcost,
805 center_mv, best_mv,
806 bigdia_num_candidates, bigdia_candidates);
807 }
808
vp9_square_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)809 int vp9_square_search(const MACROBLOCK *x,
810 MV *ref_mv,
811 int search_param,
812 int sad_per_bit,
813 int do_init_search,
814 const vp9_variance_fn_ptr_t *vfp,
815 int use_mvcost,
816 const MV *center_mv,
817 MV *best_mv) {
818 // All scales have 8 closest points in square shape
819 static const int square_num_candidates[MAX_PATTERN_SCALES] = {
820 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
821 };
822 // Note that the largest candidate step at each scale is 2^scale
823 static const MV square_candidates[MAX_PATTERN_SCALES]
824 [MAX_PATTERN_CANDIDATES] = {
825 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
826 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
827 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
828 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
829 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
830 {-16, 16}, {-16, 0}},
831 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
832 {-32, 32}, {-32, 0}},
833 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
834 {-64, 64}, {-64, 0}},
835 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
836 {-128, 128}, {-128, 0}},
837 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
838 {-256, 256}, {-256, 0}},
839 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
840 {-512, 512}, {-512, 0}},
841 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
842 {0, 1024}, {-1024, 1024}, {-1024, 0}},
843 };
844 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
845 do_init_search, 0, vfp, use_mvcost,
846 center_mv, best_mv,
847 square_num_candidates, square_candidates);
848 }
849
vp9_fast_hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)850 int vp9_fast_hex_search(const MACROBLOCK *x,
851 MV *ref_mv,
852 int search_param,
853 int sad_per_bit,
854 int do_init_search, // must be zero for fast_hex
855 const vp9_variance_fn_ptr_t *vfp,
856 int use_mvcost,
857 const MV *center_mv,
858 MV *best_mv) {
859 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
860 sad_per_bit, do_init_search, vfp, use_mvcost,
861 center_mv, best_mv);
862 }
863
vp9_fast_dia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)864 int vp9_fast_dia_search(const MACROBLOCK *x,
865 MV *ref_mv,
866 int search_param,
867 int sad_per_bit,
868 int do_init_search,
869 const vp9_variance_fn_ptr_t *vfp,
870 int use_mvcost,
871 const MV *center_mv,
872 MV *best_mv) {
873 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
874 sad_per_bit, do_init_search, vfp, use_mvcost,
875 center_mv, best_mv);
876 }
877
878 #undef CHECK_BETTER
879
vp9_full_range_search_c(const MACROBLOCK * x,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv)880 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
881 int search_param, int sad_per_bit, int *num00,
882 const vp9_variance_fn_ptr_t *fn_ptr,
883 int *mvjcost, int *mvcost[2],
884 const MV *center_mv) {
885 const MACROBLOCKD *const xd = &x->e_mbd;
886 const uint8_t *what = x->plane[0].src.buf;
887 const int what_stride = x->plane[0].src.stride;
888 const uint8_t *in_what;
889 const int in_what_stride = xd->plane[0].pre[0].stride;
890
891 unsigned int bestsad = INT_MAX;
892 int ref_row, ref_col;
893
894 unsigned int thissad;
895 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
896
897 const int *mvjsadcost = x->nmvjointsadcost;
898 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
899
900 int tr, tc;
901 int best_tr = 0;
902 int best_tc = 0;
903 int range = 64;
904
905 int start_col, end_col;
906 int start_row, end_row;
907 int i;
908
909 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
910 ref_row = ref_mv->row;
911 ref_col = ref_mv->col;
912 *num00 = 11;
913 best_mv->row = ref_row;
914 best_mv->col = ref_col;
915
916 // Work out the start point for the search
917 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
918
919 // Check the starting position
920 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
921 + mvsad_err_cost(best_mv, &fcenter_mv,
922 mvjsadcost, mvsadcost, sad_per_bit);
923
924 start_row = MAX(-range, x->mv_row_min - ref_row);
925 start_col = MAX(-range, x->mv_col_min - ref_col);
926 end_row = MIN(range, x->mv_row_max - ref_row);
927 end_col = MIN(range, x->mv_col_max - ref_col);
928
929 for (tr = start_row; tr <= end_row; ++tr) {
930 for (tc = start_col; tc <= end_col; tc += 4) {
931 if ((tc + 3) <= end_col) {
932 unsigned int sad_array[4];
933 unsigned char const *addr_ref[4];
934 for (i = 0; i < 4; ++i)
935 addr_ref[i] = in_what + tr * in_what_stride + tc + i;
936
937 fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
938
939 for (i = 0; i < 4; ++i) {
940 if (sad_array[i] < bestsad) {
941 const MV this_mv = {ref_row + tr, ref_col + tc + i};
942 thissad = sad_array[i] +
943 mvsad_err_cost(&this_mv, &fcenter_mv,
944 mvjsadcost, mvsadcost, sad_per_bit);
945 if (thissad < bestsad) {
946 bestsad = thissad;
947 best_tr = tr;
948 best_tc = tc + i;
949 }
950 }
951 }
952 } else {
953 for (i = 0; i < end_col - tc; ++i) {
954 const uint8_t *check_here = in_what + tr * in_what_stride + tc + i;
955 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
956 bestsad);
957
958 if (thissad < bestsad) {
959 const MV this_mv = {ref_row + tr, ref_col + tc + i};
960 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
961 mvjsadcost, mvsadcost, sad_per_bit);
962
963 if (thissad < bestsad) {
964 bestsad = thissad;
965 best_tr = tr;
966 best_tc = tc + i;
967 }
968 }
969 }
970 }
971 }
972 }
973 best_mv->row += best_tr;
974 best_mv->col += best_tc;
975 return bestsad;
976 }
977
vp9_diamond_search_sad_c(const MACROBLOCK * x,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv)978 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
979 MV *ref_mv, MV *best_mv,
980 int search_param, int sad_per_bit, int *num00,
981 const vp9_variance_fn_ptr_t *fn_ptr,
982 int *mvjcost, int *mvcost[2],
983 const MV *center_mv) {
984 int i, j, step;
985
986 const MACROBLOCKD *const xd = &x->e_mbd;
987 const uint8_t *what = x->plane[0].src.buf;
988 const int what_stride = x->plane[0].src.stride;
989 const uint8_t *in_what;
990 const int in_what_stride = xd->plane[0].pre[0].stride;
991 const uint8_t *best_address;
992
993 int bestsad = INT_MAX;
994 int best_site = 0;
995 int last_site = 0;
996
997 int ref_row, ref_col;
998
999 // search_param determines the length of the initial step and hence the number
1000 // of iterations
1001 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
1002 // (MAX_FIRST_STEP/4) pel... etc.
1003 const search_site *const ss = &x->ss[search_param * x->searches_per_step];
1004 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1005
1006 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1007
1008 const int *mvjsadcost = x->nmvjointsadcost;
1009 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1010
1011 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1012 ref_row = ref_mv->row;
1013 ref_col = ref_mv->col;
1014 *num00 = 0;
1015 best_mv->row = ref_row;
1016 best_mv->col = ref_col;
1017
1018 // Work out the start point for the search
1019 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1020 best_address = in_what;
1021
1022 // Check the starting position
1023 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
1024 + mvsad_err_cost(best_mv, &fcenter_mv,
1025 mvjsadcost, mvsadcost, sad_per_bit);
1026
1027 i = 1;
1028
1029 for (step = 0; step < tot_steps; step++) {
1030 for (j = 0; j < x->searches_per_step; j++) {
1031 const MV this_mv = {best_mv->row + ss[i].mv.row,
1032 best_mv->col + ss[i].mv.col};
1033 if (is_mv_in(x, &this_mv)) {
1034 const uint8_t *const check_here = ss[i].offset + best_address;
1035 int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
1036 bestsad);
1037
1038 if (thissad < bestsad) {
1039 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1040 mvjsadcost, mvsadcost, sad_per_bit);
1041
1042 if (thissad < bestsad) {
1043 bestsad = thissad;
1044 best_site = i;
1045 }
1046 }
1047 }
1048
1049 i++;
1050 }
1051
1052 if (best_site != last_site) {
1053 best_mv->row += ss[best_site].mv.row;
1054 best_mv->col += ss[best_site].mv.col;
1055 best_address += ss[best_site].offset;
1056 last_site = best_site;
1057 #if defined(NEW_DIAMOND_SEARCH)
1058 while (1) {
1059 const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1060 best_mv->col + ss[best_site].mv.col};
1061 if (is_mv_in(x, &this_mv)) {
1062 const uint8_t *const check_here = ss[best_site].offset + best_address;
1063 int thissad = fn_ptr->sdf(what, what_stride, check_here,
1064 in_what_stride, bestsad);
1065 if (thissad < bestsad) {
1066 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1067 mvjsadcost, mvsadcost, sad_per_bit);
1068 if (thissad < bestsad) {
1069 bestsad = thissad;
1070 best_mv->row += ss[best_site].mv.row;
1071 best_mv->col += ss[best_site].mv.col;
1072 best_address += ss[best_site].offset;
1073 continue;
1074 }
1075 }
1076 }
1077 break;
1078 };
1079 #endif
1080 } else if (best_address == in_what) {
1081 (*num00)++;
1082 }
1083 }
1084 return bestsad;
1085 }
1086
vp9_diamond_search_sadx4(const MACROBLOCK * x,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv)1087 int vp9_diamond_search_sadx4(const MACROBLOCK *x,
1088 MV *ref_mv, MV *best_mv, int search_param,
1089 int sad_per_bit, int *num00,
1090 const vp9_variance_fn_ptr_t *fn_ptr,
1091 int *mvjcost, int *mvcost[2],
1092 const MV *center_mv) {
1093 int i, j, step;
1094
1095 const MACROBLOCKD *const xd = &x->e_mbd;
1096 uint8_t *what = x->plane[0].src.buf;
1097 const int what_stride = x->plane[0].src.stride;
1098 const uint8_t *in_what;
1099 const int in_what_stride = xd->plane[0].pre[0].stride;
1100 const uint8_t *best_address;
1101
1102 unsigned int bestsad = INT_MAX;
1103 int best_site = 0;
1104 int last_site = 0;
1105
1106 int ref_row;
1107 int ref_col;
1108
1109 // search_param determines the length of the initial step and hence the number
1110 // of iterations.
1111 // 0 = initial step (MAX_FIRST_STEP) pel
1112 // 1 = (MAX_FIRST_STEP/2) pel,
1113 // 2 = (MAX_FIRST_STEP/4) pel...
1114 const search_site *ss = &x->ss[search_param * x->searches_per_step];
1115 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1116
1117 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1118
1119 const int *mvjsadcost = x->nmvjointsadcost;
1120 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1121
1122 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1123 ref_row = ref_mv->row;
1124 ref_col = ref_mv->col;
1125 *num00 = 0;
1126 best_mv->row = ref_row;
1127 best_mv->col = ref_col;
1128
1129 // Work out the start point for the search
1130 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1131 best_address = in_what;
1132
1133 // Check the starting position
1134 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
1135 + mvsad_err_cost(best_mv, &fcenter_mv,
1136 mvjsadcost, mvsadcost, sad_per_bit);
1137
1138 i = 1;
1139
1140 for (step = 0; step < tot_steps; step++) {
1141 int all_in = 1, t;
1142
1143 // All_in is true if every one of the points we are checking are within
1144 // the bounds of the image.
1145 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
1146 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
1147 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
1148 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
1149
1150 // If all the pixels are within the bounds we don't check whether the
1151 // search point is valid in this loop, otherwise we check each point
1152 // for validity..
1153 if (all_in) {
1154 unsigned int sad_array[4];
1155
1156 for (j = 0; j < x->searches_per_step; j += 4) {
1157 unsigned char const *block_offset[4];
1158
1159 for (t = 0; t < 4; t++)
1160 block_offset[t] = ss[i + t].offset + best_address;
1161
1162 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1163 sad_array);
1164
1165 for (t = 0; t < 4; t++, i++) {
1166 if (sad_array[t] < bestsad) {
1167 const MV this_mv = {best_mv->row + ss[i].mv.row,
1168 best_mv->col + ss[i].mv.col};
1169 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1170 mvjsadcost, mvsadcost, sad_per_bit);
1171
1172 if (sad_array[t] < bestsad) {
1173 bestsad = sad_array[t];
1174 best_site = i;
1175 }
1176 }
1177 }
1178 }
1179 } else {
1180 for (j = 0; j < x->searches_per_step; j++) {
1181 // Trap illegal vectors
1182 const MV this_mv = {best_mv->row + ss[i].mv.row,
1183 best_mv->col + ss[i].mv.col};
1184
1185 if (is_mv_in(x, &this_mv)) {
1186 const uint8_t *const check_here = ss[i].offset + best_address;
1187 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1188 in_what_stride, bestsad);
1189
1190 if (thissad < bestsad) {
1191 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1192 mvjsadcost, mvsadcost, sad_per_bit);
1193
1194 if (thissad < bestsad) {
1195 bestsad = thissad;
1196 best_site = i;
1197 }
1198 }
1199 }
1200 i++;
1201 }
1202 }
1203 if (best_site != last_site) {
1204 best_mv->row += ss[best_site].mv.row;
1205 best_mv->col += ss[best_site].mv.col;
1206 best_address += ss[best_site].offset;
1207 last_site = best_site;
1208 #if defined(NEW_DIAMOND_SEARCH)
1209 while (1) {
1210 const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1211 best_mv->col + ss[best_site].mv.col};
1212 if (is_mv_in(x, &this_mv)) {
1213 const uint8_t *const check_here = ss[best_site].offset + best_address;
1214 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1215 in_what_stride, bestsad);
1216 if (thissad < bestsad) {
1217 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1218 mvjsadcost, mvsadcost, sad_per_bit);
1219 if (thissad < bestsad) {
1220 bestsad = thissad;
1221 best_mv->row += ss[best_site].mv.row;
1222 best_mv->col += ss[best_site].mv.col;
1223 best_address += ss[best_site].offset;
1224 continue;
1225 }
1226 }
1227 }
1228 break;
1229 };
1230 #endif
1231 } else if (best_address == in_what) {
1232 (*num00)++;
1233 }
1234 }
1235 return bestsad;
1236 }
1237
1238 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
1239 point as the best match, we will do a final 1-away diamond
1240 refining search */
1241
vp9_full_pixel_diamond(const VP9_COMP * cpi,MACROBLOCK * x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,const vp9_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv)1242 int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
1243 MV *mvp_full, int step_param,
1244 int sadpb, int further_steps, int do_refine,
1245 const vp9_variance_fn_ptr_t *fn_ptr,
1246 const MV *ref_mv, MV *dst_mv) {
1247 MV temp_mv;
1248 int thissme, n, num00 = 0;
1249 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1250 step_param, sadpb, &n,
1251 fn_ptr, x->nmvjointcost,
1252 x->mvcost, ref_mv);
1253 if (bestsme < INT_MAX)
1254 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1255 *dst_mv = temp_mv;
1256
1257 // If there won't be more n-step search, check to see if refining search is
1258 // needed.
1259 if (n > further_steps)
1260 do_refine = 0;
1261
1262 while (n < further_steps) {
1263 ++n;
1264
1265 if (num00) {
1266 num00--;
1267 } else {
1268 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1269 step_param + n, sadpb, &num00,
1270 fn_ptr, x->nmvjointcost, x->mvcost,
1271 ref_mv);
1272 if (thissme < INT_MAX)
1273 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1274
1275 // check to see if refining search is needed.
1276 if (num00 > further_steps - n)
1277 do_refine = 0;
1278
1279 if (thissme < bestsme) {
1280 bestsme = thissme;
1281 *dst_mv = temp_mv;
1282 }
1283 }
1284 }
1285
1286 // final 1-away diamond refining search
1287 if (do_refine) {
1288 const int search_range = 8;
1289 MV best_mv = *dst_mv;
1290 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1291 fn_ptr, x->nmvjointcost, x->mvcost,
1292 ref_mv);
1293 if (thissme < INT_MAX)
1294 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1295 if (thissme < bestsme) {
1296 bestsme = thissme;
1297 *dst_mv = best_mv;
1298 }
1299 }
1300 return bestsme;
1301 }
1302
vp9_full_search_sad_c(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv,MV * best_mv)1303 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
1304 int sad_per_bit, int distance,
1305 const vp9_variance_fn_ptr_t *fn_ptr,
1306 int *mvjcost, int *mvcost[2],
1307 const MV *center_mv, MV *best_mv) {
1308 int r, c;
1309 const MACROBLOCKD *const xd = &x->e_mbd;
1310 const struct buf_2d *const what = &x->plane[0].src;
1311 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1312 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1313 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1314 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1315 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1316 const int *mvjsadcost = x->nmvjointsadcost;
1317 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1318 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1319 int best_sad = fn_ptr->sdf(what->buf, what->stride,
1320 get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
1321 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
1322 *best_mv = *ref_mv;
1323
1324 for (r = row_min; r < row_max; ++r) {
1325 for (c = col_min; c < col_max; ++c) {
1326 const MV mv = {r, c};
1327 const int sad = fn_ptr->sdf(what->buf, what->stride,
1328 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
1329 mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
1330 sad_per_bit);
1331
1332 if (sad < best_sad) {
1333 best_sad = sad;
1334 *best_mv = mv;
1335 }
1336 }
1337 }
1338 return best_sad;
1339 }
1340
vp9_full_search_sadx3(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv,MV * best_mv)1341 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
1342 int sad_per_bit, int distance,
1343 const vp9_variance_fn_ptr_t *fn_ptr,
1344 int *mvjcost, int *mvcost[2],
1345 const MV *center_mv, MV *best_mv) {
1346 const MACROBLOCKD *const xd = &x->e_mbd;
1347 const uint8_t *const what = x->plane[0].src.buf;
1348 const int what_stride = x->plane[0].src.stride;
1349 const uint8_t *const in_what = xd->plane[0].pre[0].buf;
1350 const int in_what_stride = xd->plane[0].pre[0].stride;
1351 MV this_mv;
1352 unsigned int bestsad = INT_MAX;
1353 int r, c;
1354 unsigned int thissad;
1355 int ref_row = ref_mv->row;
1356 int ref_col = ref_mv->col;
1357
1358 // Apply further limits to prevent us looking using vectors that stretch
1359 // beyond the UMV border
1360 const int row_min = MAX(ref_row - distance, x->mv_row_min);
1361 const int row_max = MIN(ref_row + distance, x->mv_row_max);
1362 const int col_min = MAX(ref_col - distance, x->mv_col_min);
1363 const int col_max = MIN(ref_col + distance, x->mv_col_max);
1364 unsigned int sad_array[3];
1365 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1366 const int *mvjsadcost = x->nmvjointsadcost;
1367 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1368
1369 // Work out the mid point for the search
1370 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
1371
1372 best_mv->row = ref_row;
1373 best_mv->col = ref_col;
1374
1375 // Baseline value at the centre
1376 bestsad = fn_ptr->sdf(what, what_stride,
1377 bestaddress, in_what_stride, 0x7fffffff)
1378 + mvsad_err_cost(best_mv, &fcenter_mv,
1379 mvjsadcost, mvsadcost, sad_per_bit);
1380
1381 for (r = row_min; r < row_max; r++) {
1382 const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
1383 this_mv.row = r;
1384 c = col_min;
1385
1386 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1387 int i;
1388
1389 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1390
1391 for (i = 0; i < 3; i++) {
1392 thissad = sad_array[i];
1393
1394 if (thissad < bestsad) {
1395 this_mv.col = c;
1396 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1397 mvjsadcost, mvsadcost, sad_per_bit);
1398
1399 if (thissad < bestsad) {
1400 bestsad = thissad;
1401 best_mv->row = r;
1402 best_mv->col = c;
1403 }
1404 }
1405 check_here++;
1406 c++;
1407 }
1408 }
1409
1410 while (c < col_max) {
1411 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
1412 bestsad);
1413
1414 if (thissad < bestsad) {
1415 this_mv.col = c;
1416 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1417 mvjsadcost, mvsadcost, sad_per_bit);
1418
1419 if (thissad < bestsad) {
1420 bestsad = thissad;
1421 best_mv->row = r;
1422 best_mv->col = c;
1423 }
1424 }
1425
1426 check_here++;
1427 c++;
1428 }
1429 }
1430 return bestsad;
1431 }
1432
vp9_full_search_sadx8(const MACROBLOCK * x,const MV * ref_mv,int sad_per_bit,int distance,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv,MV * best_mv)1433 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
1434 int sad_per_bit, int distance,
1435 const vp9_variance_fn_ptr_t *fn_ptr,
1436 int *mvjcost, int *mvcost[2],
1437 const MV *center_mv, MV *best_mv) {
1438 const MACROBLOCKD *const xd = &x->e_mbd;
1439 const uint8_t *const what = x->plane[0].src.buf;
1440 const int what_stride = x->plane[0].src.stride;
1441 const uint8_t *const in_what = xd->plane[0].pre[0].buf;
1442 const int in_what_stride = xd->plane[0].pre[0].stride;
1443 MV this_mv;
1444 unsigned int bestsad = INT_MAX;
1445 int r, c;
1446 int ref_row = ref_mv->row;
1447 int ref_col = ref_mv->col;
1448
1449 // Apply further limits to prevent us looking using vectors that stretch
1450 // beyond the UMV border
1451 const int row_min = MAX(ref_row - distance, x->mv_row_min);
1452 const int row_max = MIN(ref_row + distance, x->mv_row_max);
1453 const int col_min = MAX(ref_col - distance, x->mv_col_min);
1454 const int col_max = MIN(ref_col + distance, x->mv_col_max);
1455 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
1456 unsigned int sad_array[3];
1457 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1458
1459 const int *mvjsadcost = x->nmvjointsadcost;
1460 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1461
1462 // Work out the mid point for the search
1463 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
1464
1465 best_mv->row = ref_row;
1466 best_mv->col = ref_col;
1467
1468 // Baseline value at the center
1469 bestsad = fn_ptr->sdf(what, what_stride,
1470 bestaddress, in_what_stride, 0x7fffffff)
1471 + mvsad_err_cost(best_mv, &fcenter_mv,
1472 mvjsadcost, mvsadcost, sad_per_bit);
1473
1474 for (r = row_min; r < row_max; r++) {
1475 const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
1476 this_mv.row = r;
1477 c = col_min;
1478
1479 while ((c + 7) < col_max) {
1480 int i;
1481
1482 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1483
1484 for (i = 0; i < 8; i++) {
1485 unsigned int thissad = (unsigned int)sad_array8[i];
1486
1487 if (thissad < bestsad) {
1488 this_mv.col = c;
1489 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1490 mvjsadcost, mvsadcost, sad_per_bit);
1491
1492 if (thissad < bestsad) {
1493 bestsad = thissad;
1494 best_mv->row = r;
1495 best_mv->col = c;
1496 }
1497 }
1498
1499 check_here++;
1500 c++;
1501 }
1502 }
1503
1504 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1505 int i;
1506
1507 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1508
1509 for (i = 0; i < 3; i++) {
1510 unsigned int thissad = sad_array[i];
1511
1512 if (thissad < bestsad) {
1513 this_mv.col = c;
1514 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1515 mvjsadcost, mvsadcost, sad_per_bit);
1516
1517 if (thissad < bestsad) {
1518 bestsad = thissad;
1519 best_mv->row = r;
1520 best_mv->col = c;
1521 }
1522 }
1523
1524 check_here++;
1525 c++;
1526 }
1527 }
1528
1529 while (c < col_max) {
1530 unsigned int thissad = fn_ptr->sdf(what, what_stride,
1531 check_here, in_what_stride, bestsad);
1532
1533 if (thissad < bestsad) {
1534 this_mv.col = c;
1535 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1536 mvjsadcost, mvsadcost, sad_per_bit);
1537
1538 if (thissad < bestsad) {
1539 bestsad = thissad;
1540 best_mv->row = r;
1541 best_mv->col = c;
1542 }
1543 }
1544
1545 check_here++;
1546 c++;
1547 }
1548 }
1549 return bestsad;
1550 }
1551
vp9_refining_search_sad_c(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv)1552 int vp9_refining_search_sad_c(const MACROBLOCK *x,
1553 MV *ref_mv, int error_per_bit,
1554 int search_range,
1555 const vp9_variance_fn_ptr_t *fn_ptr,
1556 int *mvjcost, int *mvcost[2],
1557 const MV *center_mv) {
1558 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1559 const MACROBLOCKD *const xd = &x->e_mbd;
1560 const struct buf_2d *const what = &x->plane[0].src;
1561 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1562 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1563 const int *mvjsadcost = x->nmvjointsadcost;
1564 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1565
1566 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
1567 get_buf_from_mv(in_what, ref_mv),
1568 in_what->stride, 0x7fffffff) +
1569 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1570 int i, j;
1571
1572 for (i = 0; i < search_range; i++) {
1573 int best_site = -1;
1574
1575 for (j = 0; j < 4; j++) {
1576 const MV mv = {ref_mv->row + neighbors[j].row,
1577 ref_mv->col + neighbors[j].col};
1578 if (is_mv_in(x, &mv)) {
1579 unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1580 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
1581 if (sad < best_sad) {
1582 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
1583 error_per_bit);
1584 if (sad < best_sad) {
1585 best_sad = sad;
1586 best_site = j;
1587 }
1588 }
1589 }
1590 }
1591
1592 if (best_site == -1) {
1593 break;
1594 } else {
1595 ref_mv->row += neighbors[best_site].row;
1596 ref_mv->col += neighbors[best_site].col;
1597 }
1598 }
1599 return best_sad;
1600 }
1601
vp9_refining_search_sadx4(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv)1602 int vp9_refining_search_sadx4(const MACROBLOCK *x,
1603 MV *ref_mv, int error_per_bit,
1604 int search_range,
1605 const vp9_variance_fn_ptr_t *fn_ptr,
1606 int *mvjcost, int *mvcost[2],
1607 const MV *center_mv) {
1608 const MACROBLOCKD *const xd = &x->e_mbd;
1609 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1610 int i, j;
1611
1612 const int what_stride = x->plane[0].src.stride;
1613 const int in_what_stride = xd->plane[0].pre[0].stride;
1614 const uint8_t *what = x->plane[0].src.buf;
1615 const uint8_t *best_address = xd->plane[0].pre[0].buf +
1616 (ref_mv->row * xd->plane[0].pre[0].stride) +
1617 ref_mv->col;
1618
1619 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1620
1621 const int *mvjsadcost = x->nmvjointsadcost;
1622 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1623
1624 unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address,
1625 in_what_stride, 0x7fffffff) +
1626 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1627
1628 for (i = 0; i < search_range; i++) {
1629 int best_site = -1;
1630 int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
1631 ((ref_mv->row + 1) < x->mv_row_max) &
1632 ((ref_mv->col - 1) > x->mv_col_min) &
1633 ((ref_mv->col + 1) < x->mv_col_max);
1634
1635 if (all_in) {
1636 unsigned int sad_array[4];
1637 uint8_t const *block_offset[4] = {
1638 best_address - in_what_stride,
1639 best_address - 1,
1640 best_address + 1,
1641 best_address + in_what_stride
1642 };
1643
1644 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1645 sad_array);
1646
1647 for (j = 0; j < 4; j++) {
1648 if (sad_array[j] < bestsad) {
1649 const MV this_mv = {ref_mv->row + neighbors[j].row,
1650 ref_mv->col + neighbors[j].col};
1651 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv,
1652 mvjsadcost, mvsadcost, error_per_bit);
1653
1654 if (sad_array[j] < bestsad) {
1655 bestsad = sad_array[j];
1656 best_site = j;
1657 }
1658 }
1659 }
1660 } else {
1661 for (j = 0; j < 4; j++) {
1662 const MV this_mv = {ref_mv->row + neighbors[j].row,
1663 ref_mv->col + neighbors[j].col};
1664
1665 if (is_mv_in(x, &this_mv)) {
1666 const uint8_t *check_here = neighbors[j].row * in_what_stride +
1667 neighbors[j].col + best_address;
1668 unsigned int thissad = fn_ptr->sdf(what, what_stride,
1669 check_here, in_what_stride,
1670 bestsad);
1671
1672 if (thissad < bestsad) {
1673 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1674 mvjsadcost, mvsadcost, error_per_bit);
1675
1676 if (thissad < bestsad) {
1677 bestsad = thissad;
1678 best_site = j;
1679 }
1680 }
1681 }
1682 }
1683 }
1684
1685 if (best_site == -1) {
1686 break;
1687 } else {
1688 ref_mv->row += neighbors[best_site].row;
1689 ref_mv->col += neighbors[best_site].col;
1690 best_address += (neighbors[best_site].row) * in_what_stride +
1691 neighbors[best_site].col;
1692 }
1693 }
1694
1695 return bestsad;
1696 }
1697
1698 // This function is called when we do joint motion search in comp_inter_inter
1699 // mode.
vp9_refining_search_8p_c(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,int * mvjcost,int * mvcost[2],const MV * center_mv,const uint8_t * second_pred,int w,int h)1700 int vp9_refining_search_8p_c(const MACROBLOCK *x,
1701 MV *ref_mv, int error_per_bit,
1702 int search_range,
1703 const vp9_variance_fn_ptr_t *fn_ptr,
1704 int *mvjcost, int *mvcost[2],
1705 const MV *center_mv,
1706 const uint8_t *second_pred, int w, int h) {
1707 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
1708 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
1709 const MACROBLOCKD *const xd = &x->e_mbd;
1710 const struct buf_2d *const what = &x->plane[0].src;
1711 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1712 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1713 const int *mvjsadcost = x->nmvjointsadcost;
1714 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1715 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
1716 get_buf_from_mv(in_what, ref_mv), in_what->stride,
1717 second_pred, 0x7fffffff) +
1718 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1719 int i, j;
1720
1721 for (i = 0; i < search_range; ++i) {
1722 int best_site = -1;
1723
1724 for (j = 0; j < 8; ++j) {
1725 const MV mv = {ref_mv->row + neighbors[j].row,
1726 ref_mv->col + neighbors[j].col};
1727
1728 if (is_mv_in(x, &mv)) {
1729 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
1730 get_buf_from_mv(in_what, &mv), in_what->stride,
1731 second_pred, best_sad);
1732 if (sad < best_sad) {
1733 sad += mvsad_err_cost(&mv, &fcenter_mv,
1734 mvjsadcost, mvsadcost, error_per_bit);
1735 if (sad < best_sad) {
1736 best_sad = sad;
1737 best_site = j;
1738 }
1739 }
1740 }
1741 }
1742
1743 if (best_site == -1) {
1744 break;
1745 } else {
1746 ref_mv->row += neighbors[best_site].row;
1747 ref_mv->col += neighbors[best_site].col;
1748 }
1749 }
1750 return best_sad;
1751 }
1752