1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
25 /* MV costing is based on the distribution of vectors in the previous
26 * frame and as such will tend to over state the cost of vectors. In
27 * addition coding a new vector can have a knock on effect on the cost
28 * of subsequent vectors and the quality of prediction from NEAR and
29 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
30 * limited extent, for some account to be taken of these factors.
31 */
32 const int mv_idx_row =
33 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
34 const int mv_idx_col =
35 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
36 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
37 }
38
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
40 int error_per_bit) {
41 /* Ignore mv costing if mvcost is NULL */
42 if (mvcost) {
43 const int mv_idx_row =
44 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
45 const int mv_idx_col =
46 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
47 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
48 128) >>
49 8;
50 }
51 return 0;
52 }
53
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
55 int error_per_bit) {
56 /* Calculate sad error cost on full pixel basis. */
57 /* Ignore mv costing if mvsadcost is NULL */
58 if (mvsadcost) {
59 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
60 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
61 error_per_bit +
62 128) >>
63 8;
64 }
65 return 0;
66 }
67
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
69 int Len;
70 int search_site_count = 0;
71
72 /* Generate offsets for 4 search sites per step. */
73 Len = MAX_FIRST_STEP;
74 x->ss[search_site_count].mv.col = 0;
75 x->ss[search_site_count].mv.row = 0;
76 x->ss[search_site_count].offset = 0;
77 search_site_count++;
78
79 while (Len > 0) {
80 /* Compute offsets for search sites. */
81 x->ss[search_site_count].mv.col = 0;
82 x->ss[search_site_count].mv.row = -Len;
83 x->ss[search_site_count].offset = -Len * stride;
84 search_site_count++;
85
86 /* Compute offsets for search sites. */
87 x->ss[search_site_count].mv.col = 0;
88 x->ss[search_site_count].mv.row = Len;
89 x->ss[search_site_count].offset = Len * stride;
90 search_site_count++;
91
92 /* Compute offsets for search sites. */
93 x->ss[search_site_count].mv.col = -Len;
94 x->ss[search_site_count].mv.row = 0;
95 x->ss[search_site_count].offset = -Len;
96 search_site_count++;
97
98 /* Compute offsets for search sites. */
99 x->ss[search_site_count].mv.col = Len;
100 x->ss[search_site_count].mv.row = 0;
101 x->ss[search_site_count].offset = Len;
102 search_site_count++;
103
104 /* Contract. */
105 Len /= 2;
106 }
107
108 x->ss_count = search_site_count;
109 x->searches_per_step = 4;
110 }
111
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
113 int Len;
114 int search_site_count = 0;
115
116 /* Generate offsets for 8 search sites per step. */
117 Len = MAX_FIRST_STEP;
118 x->ss[search_site_count].mv.col = 0;
119 x->ss[search_site_count].mv.row = 0;
120 x->ss[search_site_count].offset = 0;
121 search_site_count++;
122
123 while (Len > 0) {
124 /* Compute offsets for search sites. */
125 x->ss[search_site_count].mv.col = 0;
126 x->ss[search_site_count].mv.row = -Len;
127 x->ss[search_site_count].offset = -Len * stride;
128 search_site_count++;
129
130 /* Compute offsets for search sites. */
131 x->ss[search_site_count].mv.col = 0;
132 x->ss[search_site_count].mv.row = Len;
133 x->ss[search_site_count].offset = Len * stride;
134 search_site_count++;
135
136 /* Compute offsets for search sites. */
137 x->ss[search_site_count].mv.col = -Len;
138 x->ss[search_site_count].mv.row = 0;
139 x->ss[search_site_count].offset = -Len;
140 search_site_count++;
141
142 /* Compute offsets for search sites. */
143 x->ss[search_site_count].mv.col = Len;
144 x->ss[search_site_count].mv.row = 0;
145 x->ss[search_site_count].offset = Len;
146 search_site_count++;
147
148 /* Compute offsets for search sites. */
149 x->ss[search_site_count].mv.col = -Len;
150 x->ss[search_site_count].mv.row = -Len;
151 x->ss[search_site_count].offset = -Len * stride - Len;
152 search_site_count++;
153
154 /* Compute offsets for search sites. */
155 x->ss[search_site_count].mv.col = Len;
156 x->ss[search_site_count].mv.row = -Len;
157 x->ss[search_site_count].offset = -Len * stride + Len;
158 search_site_count++;
159
160 /* Compute offsets for search sites. */
161 x->ss[search_site_count].mv.col = -Len;
162 x->ss[search_site_count].mv.row = Len;
163 x->ss[search_site_count].offset = Len * stride - Len;
164 search_site_count++;
165
166 /* Compute offsets for search sites. */
167 x->ss[search_site_count].mv.col = Len;
168 x->ss[search_site_count].mv.row = Len;
169 x->ss[search_site_count].offset = Len * stride + Len;
170 search_site_count++;
171
172 /* Contract. */
173 Len /= 2;
174 }
175
176 x->ss_count = search_site_count;
177 x->searches_per_step = 8;
178 }
179
180 /*
181 * To avoid the penalty for crossing cache-line read, preload the reference
182 * area in a small buffer, which is aligned to make sure there won't be crossing
183 * cache-line read while reading from this buffer. This reduced the cpu
184 * cycles spent on reading ref data in sub-pixel filter functions.
185 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187 * could reduce the area.
188 */
189
190 /* estimated cost of a motion vector (r,c) */
191 #define MVC(r, c) \
192 (mvcost \
193 ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
194 : 0)
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3) << 1)
199 /* returns subpixel variance error function. */
200 #define DIST(r, c) \
201 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
202 #define IFMVCV(r, c, s, e) \
203 if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
204 /* returns distortion + motion vector cost */
205 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
206 /* checks if (r,c) has better score than previous best */
207 #define CHECK_BETTER(v, r, c) \
208 IFMVCV(r, c, \
209 { \
210 thismse = DIST(r, c); \
211 if ((v = (MVC(r, c) + thismse)) < besterr) { \
212 besterr = v; \
213 br = r; \
214 bc = c; \
215 *distortion = thismse; \
216 *sse1 = sse; \
217 } \
218 }, \
219 v = UINT_MAX;)
220
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)221 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
222 int_mv *bestmv, int_mv *ref_mv,
223 int error_per_bit,
224 const vp8_variance_fn_ptr_t *vfp,
225 int *mvcost[2], int *distortion,
226 unsigned int *sse1) {
227 unsigned char *z = (*(b->base_src) + b->src);
228
229 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
230 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
231 int tr = br, tc = bc;
232 unsigned int besterr;
233 unsigned int left, right, up, down, diag;
234 unsigned int sse;
235 unsigned int whichdir;
236 unsigned int halfiters = 4;
237 unsigned int quarteriters = 4;
238 int thismse;
239
240 int minc = VPXMAX(x->mv_col_min * 4,
241 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
242 int maxc = VPXMIN(x->mv_col_max * 4,
243 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
244 int minr = VPXMAX(x->mv_row_min * 4,
245 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
246 int maxr = VPXMIN(x->mv_row_max * 4,
247 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
248
249 int y_stride;
250 int offset;
251 int pre_stride = x->e_mbd.pre.y_stride;
252 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
253
254 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
255 MACROBLOCKD *xd = &x->e_mbd;
256 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
257 bestmv->as_mv.col;
258 unsigned char *y;
259 int buf_r1, buf_r2, buf_c1;
260
261 /* Clamping to avoid out-of-range data access */
262 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
263 ? (bestmv->as_mv.row - x->mv_row_min)
264 : 3;
265 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
266 ? (x->mv_row_max - bestmv->as_mv.row)
267 : 3;
268 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
269 ? (bestmv->as_mv.col - x->mv_col_min)
270 : 3;
271 y_stride = 32;
272
273 /* Copy to intermediate buffer before searching. */
274 vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
275 y_stride, 16 + buf_r1 + buf_r2);
276 y = xd->y_buf + y_stride * buf_r1 + buf_c1;
277 #else
278 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
279 bestmv->as_mv.col;
280 y_stride = pre_stride;
281 #endif
282
283 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
284
285 /* central mv */
286 bestmv->as_mv.row *= 8;
287 bestmv->as_mv.col *= 8;
288
289 /* calculate central point error */
290 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
291 *distortion = besterr;
292 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
293
294 /* TODO: Each subsequent iteration checks at least one point in common
295 * with the last iteration could be 2 ( if diag selected)
296 */
297 while (--halfiters) {
298 /* 1/2 pel */
299 CHECK_BETTER(left, tr, tc - 2);
300 CHECK_BETTER(right, tr, tc + 2);
301 CHECK_BETTER(up, tr - 2, tc);
302 CHECK_BETTER(down, tr + 2, tc);
303
304 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
305
306 switch (whichdir) {
307 case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
308 case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
309 case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
310 case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
311 }
312
313 /* no reason to check the same one again. */
314 if (tr == br && tc == bc) break;
315
316 tr = br;
317 tc = bc;
318 }
319
320 /* TODO: Each subsequent iteration checks at least one point in common
321 * with the last iteration could be 2 ( if diag selected)
322 */
323
324 /* 1/4 pel */
325 while (--quarteriters) {
326 CHECK_BETTER(left, tr, tc - 1);
327 CHECK_BETTER(right, tr, tc + 1);
328 CHECK_BETTER(up, tr - 1, tc);
329 CHECK_BETTER(down, tr + 1, tc);
330
331 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
332
333 switch (whichdir) {
334 case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
335 case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
336 case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
337 case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
338 }
339
340 /* no reason to check the same one again. */
341 if (tr == br && tc == bc) break;
342
343 tr = br;
344 tc = bc;
345 }
346
347 bestmv->as_mv.row = br * 2;
348 bestmv->as_mv.col = bc * 2;
349
350 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
351 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
352 return INT_MAX;
353 }
354
355 return besterr;
356 }
357 #undef MVC
358 #undef PRE
359 #undef SP
360 #undef DIST
361 #undef IFMVCV
362 #undef ERR
363 #undef CHECK_BETTER
364
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
366 int_mv *bestmv, int_mv *ref_mv,
367 int error_per_bit,
368 const vp8_variance_fn_ptr_t *vfp,
369 int *mvcost[2], int *distortion,
370 unsigned int *sse1) {
371 int bestmse = INT_MAX;
372 int_mv startmv;
373 int_mv this_mv;
374 unsigned char *z = (*(b->base_src) + b->src);
375 int left, right, up, down, diag;
376 unsigned int sse;
377 int whichdir;
378 int thismse;
379 int y_stride;
380 int pre_stride = x->e_mbd.pre.y_stride;
381 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
382
383 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
384 MACROBLOCKD *xd = &x->e_mbd;
385 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
386 bestmv->as_mv.col;
387 unsigned char *y;
388
389 y_stride = 32;
390 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
391 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
392 y = xd->y_buf + y_stride + 1;
393 #else
394 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
395 bestmv->as_mv.col;
396 y_stride = pre_stride;
397 #endif
398
399 /* central mv */
400 bestmv->as_mv.row *= 8;
401 bestmv->as_mv.col *= 8;
402 startmv = *bestmv;
403
404 /* calculate central point error */
405 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
406 *distortion = bestmse;
407 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
408
409 /* go left then right and check error */
410 this_mv.as_mv.row = startmv.as_mv.row;
411 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
412 /* "halfpix" horizontal variance */
413 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
414 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
415
416 if (left < bestmse) {
417 *bestmv = this_mv;
418 bestmse = left;
419 *distortion = thismse;
420 *sse1 = sse;
421 }
422
423 this_mv.as_mv.col += 8;
424 /* "halfpix" horizontal variance */
425 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
426 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
427
428 if (right < bestmse) {
429 *bestmv = this_mv;
430 bestmse = right;
431 *distortion = thismse;
432 *sse1 = sse;
433 }
434
435 /* go up then down and check error */
436 this_mv.as_mv.col = startmv.as_mv.col;
437 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
438 /* "halfpix" vertical variance */
439 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
440 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
441
442 if (up < bestmse) {
443 *bestmv = this_mv;
444 bestmse = up;
445 *distortion = thismse;
446 *sse1 = sse;
447 }
448
449 this_mv.as_mv.row += 8;
450 /* "halfpix" vertical variance */
451 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
452 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
453
454 if (down < bestmse) {
455 *bestmv = this_mv;
456 bestmse = down;
457 *distortion = thismse;
458 *sse1 = sse;
459 }
460
461 /* now check 1 more diagonal */
462 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
463 this_mv = startmv;
464
465 switch (whichdir) {
466 case 0:
467 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
468 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
469 /* "halfpix" horizontal/vertical variance */
470 thismse =
471 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
472 break;
473 case 1:
474 this_mv.as_mv.col += 4;
475 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
476 /* "halfpix" horizontal/vertical variance */
477 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
478 break;
479 case 2:
480 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
481 this_mv.as_mv.row += 4;
482 /* "halfpix" horizontal/vertical variance */
483 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
484 break;
485 case 3:
486 default:
487 this_mv.as_mv.col += 4;
488 this_mv.as_mv.row += 4;
489 /* "halfpix" horizontal/vertical variance */
490 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
491 break;
492 }
493
494 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
495
496 if (diag < bestmse) {
497 *bestmv = this_mv;
498 bestmse = diag;
499 *distortion = thismse;
500 *sse1 = sse;
501 }
502
503 /* time to check quarter pels. */
504 if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
505
506 if (bestmv->as_mv.col < startmv.as_mv.col) y--;
507
508 startmv = *bestmv;
509
510 /* go left then right and check error */
511 this_mv.as_mv.row = startmv.as_mv.row;
512
513 if (startmv.as_mv.col & 7) {
514 this_mv.as_mv.col = startmv.as_mv.col - 2;
515 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
516 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
517 } else {
518 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
519 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
520 b->src_stride, &sse);
521 }
522
523 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
524
525 if (left < bestmse) {
526 *bestmv = this_mv;
527 bestmse = left;
528 *distortion = thismse;
529 *sse1 = sse;
530 }
531
532 this_mv.as_mv.col += 4;
533 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
534 z, b->src_stride, &sse);
535 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
536
537 if (right < bestmse) {
538 *bestmv = this_mv;
539 bestmse = right;
540 *distortion = thismse;
541 *sse1 = sse;
542 }
543
544 /* go up then down and check error */
545 this_mv.as_mv.col = startmv.as_mv.col;
546
547 if (startmv.as_mv.row & 7) {
548 this_mv.as_mv.row = startmv.as_mv.row - 2;
549 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
550 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
551 } else {
552 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
553 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
554 b->src_stride, &sse);
555 }
556
557 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
558
559 if (up < bestmse) {
560 *bestmv = this_mv;
561 bestmse = up;
562 *distortion = thismse;
563 *sse1 = sse;
564 }
565
566 this_mv.as_mv.row += 4;
567 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
568 z, b->src_stride, &sse);
569 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
570
571 if (down < bestmse) {
572 *bestmv = this_mv;
573 bestmse = down;
574 *distortion = thismse;
575 *sse1 = sse;
576 }
577
578 /* now check 1 more diagonal */
579 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
580
581 this_mv = startmv;
582
583 switch (whichdir) {
584 case 0:
585
586 if (startmv.as_mv.row & 7) {
587 this_mv.as_mv.row -= 2;
588
589 if (startmv.as_mv.col & 7) {
590 this_mv.as_mv.col -= 2;
591 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
592 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
593 } else {
594 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
595 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
596 b->src_stride, &sse);
597 }
598 } else {
599 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
600
601 if (startmv.as_mv.col & 7) {
602 this_mv.as_mv.col -= 2;
603 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
604 z, b->src_stride, &sse);
605 } else {
606 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
607 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
608 &sse);
609 }
610 }
611
612 break;
613 case 1:
614 this_mv.as_mv.col += 2;
615
616 if (startmv.as_mv.row & 7) {
617 this_mv.as_mv.row -= 2;
618 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
619 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
620 } else {
621 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
622 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
623 b->src_stride, &sse);
624 }
625
626 break;
627 case 2:
628 this_mv.as_mv.row += 2;
629
630 if (startmv.as_mv.col & 7) {
631 this_mv.as_mv.col -= 2;
632 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
633 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
634 } else {
635 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
636 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
637 b->src_stride, &sse);
638 }
639
640 break;
641 case 3:
642 this_mv.as_mv.col += 2;
643 this_mv.as_mv.row += 2;
644 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
645 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
646 break;
647 }
648
649 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
650
651 if (diag < bestmse) {
652 *bestmv = this_mv;
653 bestmse = diag;
654 *distortion = thismse;
655 *sse1 = sse;
656 }
657
658 return bestmse;
659 }
660
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)661 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
662 int_mv *bestmv, int_mv *ref_mv,
663 int error_per_bit,
664 const vp8_variance_fn_ptr_t *vfp,
665 int *mvcost[2], int *distortion,
666 unsigned int *sse1) {
667 int bestmse = INT_MAX;
668 int_mv startmv;
669 int_mv this_mv;
670 unsigned char *z = (*(b->base_src) + b->src);
671 int left, right, up, down, diag;
672 unsigned int sse;
673 int whichdir;
674 int thismse;
675 int y_stride;
676 int pre_stride = x->e_mbd.pre.y_stride;
677 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
678
679 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
680 MACROBLOCKD *xd = &x->e_mbd;
681 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
682 bestmv->as_mv.col;
683 unsigned char *y;
684
685 y_stride = 32;
686 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
687 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
688 y = xd->y_buf + y_stride + 1;
689 #else
690 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
691 bestmv->as_mv.col;
692 y_stride = pre_stride;
693 #endif
694
695 /* central mv */
696 bestmv->as_mv.row *= 8;
697 bestmv->as_mv.col *= 8;
698 startmv = *bestmv;
699
700 /* calculate central point error */
701 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
702 *distortion = bestmse;
703 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
704
705 /* go left then right and check error */
706 this_mv.as_mv.row = startmv.as_mv.row;
707 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
708 /* "halfpix" horizontal variance */
709 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
710 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
711
712 if (left < bestmse) {
713 *bestmv = this_mv;
714 bestmse = left;
715 *distortion = thismse;
716 *sse1 = sse;
717 }
718
719 this_mv.as_mv.col += 8;
720 /* "halfpix" horizontal variance */
721 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
722 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
723
724 if (right < bestmse) {
725 *bestmv = this_mv;
726 bestmse = right;
727 *distortion = thismse;
728 *sse1 = sse;
729 }
730
731 /* go up then down and check error */
732 this_mv.as_mv.col = startmv.as_mv.col;
733 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
734 /* "halfpix" vertical variance */
735 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
736 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
737
738 if (up < bestmse) {
739 *bestmv = this_mv;
740 bestmse = up;
741 *distortion = thismse;
742 *sse1 = sse;
743 }
744
745 this_mv.as_mv.row += 8;
746 /* "halfpix" vertical variance */
747 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
748 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
749
750 if (down < bestmse) {
751 *bestmv = this_mv;
752 bestmse = down;
753 *distortion = thismse;
754 *sse1 = sse;
755 }
756
757 /* now check 1 more diagonal - */
758 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
759 this_mv = startmv;
760
761 switch (whichdir) {
762 case 0:
763 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
764 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
765 /* "halfpix" horizontal/vertical variance */
766 thismse =
767 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
768 break;
769 case 1:
770 this_mv.as_mv.col += 4;
771 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
772 /* "halfpix" horizontal/vertical variance */
773 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
774 break;
775 case 2:
776 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
777 this_mv.as_mv.row += 4;
778 /* "halfpix" horizontal/vertical variance */
779 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
780 break;
781 case 3:
782 default:
783 this_mv.as_mv.col += 4;
784 this_mv.as_mv.row += 4;
785 /* "halfpix" horizontal/vertical variance */
786 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
787 break;
788 }
789
790 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
791
792 if (diag < bestmse) {
793 *bestmv = this_mv;
794 bestmse = diag;
795 *distortion = thismse;
796 *sse1 = sse;
797 }
798
799 return bestmse;
800 }
801
802 #define CHECK_BOUNDS(range) \
803 { \
804 all_in = 1; \
805 all_in &= ((br - range) >= x->mv_row_min); \
806 all_in &= ((br + range) <= x->mv_row_max); \
807 all_in &= ((bc - range) >= x->mv_col_min); \
808 all_in &= ((bc + range) <= x->mv_col_max); \
809 }
810
811 #define CHECK_POINT \
812 { \
813 if (this_mv.as_mv.col < x->mv_col_min) continue; \
814 if (this_mv.as_mv.col > x->mv_col_max) continue; \
815 if (this_mv.as_mv.row < x->mv_row_min) continue; \
816 if (this_mv.as_mv.row > x->mv_row_max) continue; \
817 }
818
819 #define CHECK_BETTER \
820 { \
821 if (thissad < bestsad) { \
822 thissad += \
823 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
824 if (thissad < bestsad) { \
825 bestsad = thissad; \
826 best_site = i; \
827 } \
828 } \
829 }
830
831 static const MV next_chkpts[6][3] = {
832 { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
833 { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
834 { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
835 };
836
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int_mv * center_mv)837 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
838 int_mv *best_mv, int search_param, int sad_per_bit,
839 const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
840 int_mv *center_mv) {
841 MV hex[6] = {
842 { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
843 };
844 MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
845 int i, j;
846
847 unsigned char *what = (*(b->base_src) + b->src);
848 int what_stride = b->src_stride;
849 int pre_stride = x->e_mbd.pre.y_stride;
850 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
851
852 int in_what_stride = pre_stride;
853 int br, bc;
854 int_mv this_mv;
855 unsigned int bestsad;
856 unsigned int thissad;
857 unsigned char *base_offset;
858 unsigned char *this_offset;
859 int k = -1;
860 int all_in;
861 int best_site = -1;
862 int hex_range = 127;
863 int dia_range = 8;
864
865 int_mv fcenter_mv;
866 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
867 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
868
869 /* adjust ref_mv to make sure it is within MV range */
870 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
871 x->mv_row_max);
872 br = ref_mv->as_mv.row;
873 bc = ref_mv->as_mv.col;
874
875 /* Work out the start point for the search */
876 base_offset = (unsigned char *)(base_pre + d->offset);
877 this_offset = base_offset + (br * (pre_stride)) + bc;
878 this_mv.as_mv.row = br;
879 this_mv.as_mv.col = bc;
880 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
881 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
882
883 #if CONFIG_MULTI_RES_ENCODING
884 /* Lower search range based on prediction info */
885 if (search_param >= 6)
886 goto cal_neighbors;
887 else if (search_param >= 5)
888 hex_range = 4;
889 else if (search_param >= 4)
890 hex_range = 6;
891 else if (search_param >= 3)
892 hex_range = 15;
893 else if (search_param >= 2)
894 hex_range = 31;
895 else if (search_param >= 1)
896 hex_range = 63;
897
898 dia_range = 8;
899 #else
900 (void)search_param;
901 #endif
902
903 /* hex search */
904 CHECK_BOUNDS(2)
905
906 if (all_in) {
907 for (i = 0; i < 6; ++i) {
908 this_mv.as_mv.row = br + hex[i].row;
909 this_mv.as_mv.col = bc + hex[i].col;
910 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
911 this_mv.as_mv.col;
912 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
913 CHECK_BETTER
914 }
915 } else {
916 for (i = 0; i < 6; ++i) {
917 this_mv.as_mv.row = br + hex[i].row;
918 this_mv.as_mv.col = bc + hex[i].col;
919 CHECK_POINT
920 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
921 this_mv.as_mv.col;
922 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
923 CHECK_BETTER
924 }
925 }
926
927 if (best_site == -1) {
928 goto cal_neighbors;
929 } else {
930 br += hex[best_site].row;
931 bc += hex[best_site].col;
932 k = best_site;
933 }
934
935 for (j = 1; j < hex_range; ++j) {
936 best_site = -1;
937 CHECK_BOUNDS(2)
938
939 if (all_in) {
940 for (i = 0; i < 3; ++i) {
941 this_mv.as_mv.row = br + next_chkpts[k][i].row;
942 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
943 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
944 this_mv.as_mv.col;
945 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
946 CHECK_BETTER
947 }
948 } else {
949 for (i = 0; i < 3; ++i) {
950 this_mv.as_mv.row = br + next_chkpts[k][i].row;
951 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
952 CHECK_POINT
953 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
954 this_mv.as_mv.col;
955 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
956 CHECK_BETTER
957 }
958 }
959
960 if (best_site == -1) {
961 break;
962 } else {
963 br += next_chkpts[k][best_site].row;
964 bc += next_chkpts[k][best_site].col;
965 k += 5 + best_site;
966 if (k >= 12) {
967 k -= 12;
968 } else if (k >= 6) {
969 k -= 6;
970 }
971 }
972 }
973
974 /* check 4 1-away neighbors */
975 cal_neighbors:
976 for (j = 0; j < dia_range; ++j) {
977 best_site = -1;
978 CHECK_BOUNDS(1)
979
980 if (all_in) {
981 for (i = 0; i < 4; ++i) {
982 this_mv.as_mv.row = br + neighbors[i].row;
983 this_mv.as_mv.col = bc + neighbors[i].col;
984 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
985 this_mv.as_mv.col;
986 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
987 CHECK_BETTER
988 }
989 } else {
990 for (i = 0; i < 4; ++i) {
991 this_mv.as_mv.row = br + neighbors[i].row;
992 this_mv.as_mv.col = bc + neighbors[i].col;
993 CHECK_POINT
994 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
995 this_mv.as_mv.col;
996 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
997 CHECK_BETTER
998 }
999 }
1000
1001 if (best_site == -1) {
1002 break;
1003 } else {
1004 br += neighbors[best_site].row;
1005 bc += neighbors[best_site].col;
1006 }
1007 }
1008
1009 best_mv->as_mv.row = br;
1010 best_mv->as_mv.col = bc;
1011
1012 return bestsad;
1013 }
1014 #undef CHECK_BOUNDS
1015 #undef CHECK_POINT
1016 #undef CHECK_BETTER
1017
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1018 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1019 int_mv *best_mv, int search_param, int sad_per_bit,
1020 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1021 int *mvcost[2], int_mv *center_mv) {
1022 int i, j, step;
1023
1024 unsigned char *what = (*(b->base_src) + b->src);
1025 int what_stride = b->src_stride;
1026 unsigned char *in_what;
1027 int pre_stride = x->e_mbd.pre.y_stride;
1028 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1029 int in_what_stride = pre_stride;
1030 unsigned char *best_address;
1031
1032 int tot_steps;
1033 int_mv this_mv;
1034
1035 unsigned int bestsad;
1036 unsigned int thissad;
1037 int best_site = 0;
1038 int last_site = 0;
1039
1040 int ref_row;
1041 int ref_col;
1042 int this_row_offset;
1043 int this_col_offset;
1044 search_site *ss;
1045
1046 unsigned char *check_here;
1047
1048 int *mvsadcost[2];
1049 int_mv fcenter_mv;
1050
1051 mvsadcost[0] = x->mvsadcost[0];
1052 mvsadcost[1] = x->mvsadcost[1];
1053 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1054 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1055
1056 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1057 x->mv_row_max);
1058 ref_row = ref_mv->as_mv.row;
1059 ref_col = ref_mv->as_mv.col;
1060 *num00 = 0;
1061 best_mv->as_mv.row = ref_row;
1062 best_mv->as_mv.col = ref_col;
1063
1064 /* Work out the start point for the search */
1065 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1066 ref_col);
1067 best_address = in_what;
1068
1069 /* Check the starting position */
1070 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1071 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1072
1073 /* search_param determines the length of the initial step and hence
1074 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1075 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1076 */
1077 ss = &x->ss[search_param * x->searches_per_step];
1078 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1079
1080 i = 1;
1081
1082 for (step = 0; step < tot_steps; ++step) {
1083 for (j = 0; j < x->searches_per_step; ++j) {
1084 /* Trap illegal vectors */
1085 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1086 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1087
1088 if ((this_col_offset > x->mv_col_min) &&
1089 (this_col_offset < x->mv_col_max) &&
1090 (this_row_offset > x->mv_row_min) &&
1091 (this_row_offset < x->mv_row_max))
1092
1093 {
1094 check_here = ss[i].offset + best_address;
1095 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1096
1097 if (thissad < bestsad) {
1098 this_mv.as_mv.row = this_row_offset;
1099 this_mv.as_mv.col = this_col_offset;
1100 thissad +=
1101 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1102
1103 if (thissad < bestsad) {
1104 bestsad = thissad;
1105 best_site = i;
1106 }
1107 }
1108 }
1109
1110 i++;
1111 }
1112
1113 if (best_site != last_site) {
1114 best_mv->as_mv.row += ss[best_site].mv.row;
1115 best_mv->as_mv.col += ss[best_site].mv.col;
1116 best_address += ss[best_site].offset;
1117 last_site = best_site;
1118 } else if (best_address == in_what) {
1119 (*num00)++;
1120 }
1121 }
1122
1123 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1124 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1125
1126 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1127 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1128 }
1129
1130 #if HAVE_SSE2 || HAVE_MSA
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1131 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1132 int_mv *best_mv, int search_param, int sad_per_bit,
1133 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1134 int *mvcost[2], int_mv *center_mv) {
1135 int i, j, step;
1136
1137 unsigned char *what = (*(b->base_src) + b->src);
1138 int what_stride = b->src_stride;
1139 unsigned char *in_what;
1140 int pre_stride = x->e_mbd.pre.y_stride;
1141 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1142 int in_what_stride = pre_stride;
1143 unsigned char *best_address;
1144
1145 int tot_steps;
1146 int_mv this_mv;
1147
1148 unsigned int bestsad;
1149 unsigned int thissad;
1150 int best_site = 0;
1151 int last_site = 0;
1152
1153 int ref_row;
1154 int ref_col;
1155 int this_row_offset;
1156 int this_col_offset;
1157 search_site *ss;
1158
1159 unsigned char *check_here;
1160
1161 int *mvsadcost[2];
1162 int_mv fcenter_mv;
1163
1164 mvsadcost[0] = x->mvsadcost[0];
1165 mvsadcost[1] = x->mvsadcost[1];
1166 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1167 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1168
1169 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1170 x->mv_row_max);
1171 ref_row = ref_mv->as_mv.row;
1172 ref_col = ref_mv->as_mv.col;
1173 *num00 = 0;
1174 best_mv->as_mv.row = ref_row;
1175 best_mv->as_mv.col = ref_col;
1176
1177 /* Work out the start point for the search */
1178 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1179 ref_col);
1180 best_address = in_what;
1181
1182 /* Check the starting position */
1183 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1184 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1185
1186 /* search_param determines the length of the initial step and hence the
1187 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1188 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1189 */
1190 ss = &x->ss[search_param * x->searches_per_step];
1191 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1192
1193 i = 1;
1194
1195 for (step = 0; step < tot_steps; ++step) {
1196 int all_in = 1, t;
1197
1198 /* To know if all neighbor points are within the bounds, 4 bounds
1199 * checking are enough instead of checking 4 bounds for each
1200 * points.
1201 */
1202 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1203 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1204 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1205 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1206
1207 if (all_in) {
1208 unsigned int sad_array[4];
1209
1210 for (j = 0; j < x->searches_per_step; j += 4) {
1211 const unsigned char *block_offset[4];
1212
1213 for (t = 0; t < 4; ++t) {
1214 block_offset[t] = ss[i + t].offset + best_address;
1215 }
1216
1217 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1218 sad_array);
1219
1220 for (t = 0; t < 4; t++, i++) {
1221 if (sad_array[t] < bestsad) {
1222 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1223 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1224 sad_array[t] +=
1225 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1226
1227 if (sad_array[t] < bestsad) {
1228 bestsad = sad_array[t];
1229 best_site = i;
1230 }
1231 }
1232 }
1233 }
1234 } else {
1235 for (j = 0; j < x->searches_per_step; ++j) {
1236 /* Trap illegal vectors */
1237 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1238 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1239
1240 if ((this_col_offset > x->mv_col_min) &&
1241 (this_col_offset < x->mv_col_max) &&
1242 (this_row_offset > x->mv_row_min) &&
1243 (this_row_offset < x->mv_row_max)) {
1244 check_here = ss[i].offset + best_address;
1245 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1246
1247 if (thissad < bestsad) {
1248 this_mv.as_mv.row = this_row_offset;
1249 this_mv.as_mv.col = this_col_offset;
1250 thissad +=
1251 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1252
1253 if (thissad < bestsad) {
1254 bestsad = thissad;
1255 best_site = i;
1256 }
1257 }
1258 }
1259 i++;
1260 }
1261 }
1262
1263 if (best_site != last_site) {
1264 best_mv->as_mv.row += ss[best_site].mv.row;
1265 best_mv->as_mv.col += ss[best_site].mv.col;
1266 best_address += ss[best_site].offset;
1267 last_site = best_site;
1268 } else if (best_address == in_what) {
1269 (*num00)++;
1270 }
1271 }
1272
1273 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1274 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1275
1276 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1277 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1278 }
1279 #endif // HAVE_SSE2 || HAVE_MSA
1280
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1281 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1282 int sad_per_bit, int distance,
1283 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1284 int_mv *center_mv) {
1285 unsigned char *what = (*(b->base_src) + b->src);
1286 int what_stride = b->src_stride;
1287 unsigned char *in_what;
1288 int pre_stride = x->e_mbd.pre.y_stride;
1289 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1290 int in_what_stride = pre_stride;
1291 int mv_stride = pre_stride;
1292 unsigned char *bestaddress;
1293 int_mv *best_mv = &d->bmi.mv;
1294 int_mv this_mv;
1295 unsigned int bestsad;
1296 unsigned int thissad;
1297 int r, c;
1298
1299 unsigned char *check_here;
1300
1301 int ref_row = ref_mv->as_mv.row;
1302 int ref_col = ref_mv->as_mv.col;
1303
1304 int row_min = ref_row - distance;
1305 int row_max = ref_row + distance;
1306 int col_min = ref_col - distance;
1307 int col_max = ref_col + distance;
1308
1309 int *mvsadcost[2];
1310 int_mv fcenter_mv;
1311
1312 mvsadcost[0] = x->mvsadcost[0];
1313 mvsadcost[1] = x->mvsadcost[1];
1314 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1315 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1316
1317 /* Work out the mid point for the search */
1318 in_what = base_pre + d->offset;
1319 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1320
1321 best_mv->as_mv.row = ref_row;
1322 best_mv->as_mv.col = ref_col;
1323
1324 /* Baseline value at the centre */
1325 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1326 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1327
1328 /* Apply further limits to prevent us looking using vectors that
1329 * stretch beyiond the UMV border
1330 */
1331 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1332
1333 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1334
1335 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1336
1337 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1338
1339 for (r = row_min; r < row_max; ++r) {
1340 this_mv.as_mv.row = r;
1341 check_here = r * mv_stride + in_what + col_min;
1342
1343 for (c = col_min; c < col_max; ++c) {
1344 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1345
1346 this_mv.as_mv.col = c;
1347 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1348
1349 if (thissad < bestsad) {
1350 bestsad = thissad;
1351 best_mv->as_mv.row = r;
1352 best_mv->as_mv.col = c;
1353 bestaddress = check_here;
1354 }
1355
1356 check_here++;
1357 }
1358 }
1359
1360 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1361 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1362
1363 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1364 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1365 }
1366
1367 #if HAVE_SSSE3
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1368 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1369 int sad_per_bit, int distance,
1370 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1371 int_mv *center_mv) {
1372 unsigned char *what = (*(b->base_src) + b->src);
1373 int what_stride = b->src_stride;
1374 unsigned char *in_what;
1375 int pre_stride = x->e_mbd.pre.y_stride;
1376 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1377 int in_what_stride = pre_stride;
1378 int mv_stride = pre_stride;
1379 unsigned char *bestaddress;
1380 int_mv *best_mv = &d->bmi.mv;
1381 int_mv this_mv;
1382 unsigned int bestsad;
1383 unsigned int thissad;
1384 int r, c;
1385
1386 unsigned char *check_here;
1387
1388 int ref_row = ref_mv->as_mv.row;
1389 int ref_col = ref_mv->as_mv.col;
1390
1391 int row_min = ref_row - distance;
1392 int row_max = ref_row + distance;
1393 int col_min = ref_col - distance;
1394 int col_max = ref_col + distance;
1395
1396 unsigned int sad_array[3];
1397
1398 int *mvsadcost[2];
1399 int_mv fcenter_mv;
1400
1401 mvsadcost[0] = x->mvsadcost[0];
1402 mvsadcost[1] = x->mvsadcost[1];
1403 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1404 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1405
1406 /* Work out the mid point for the search */
1407 in_what = base_pre + d->offset;
1408 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1409
1410 best_mv->as_mv.row = ref_row;
1411 best_mv->as_mv.col = ref_col;
1412
1413 /* Baseline value at the centre */
1414 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1415 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1416
1417 /* Apply further limits to prevent us looking using vectors that stretch
1418 * beyond the UMV border
1419 */
1420 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1421
1422 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1423
1424 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1425
1426 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1427
1428 for (r = row_min; r < row_max; ++r) {
1429 this_mv.as_mv.row = r;
1430 check_here = r * mv_stride + in_what + col_min;
1431 c = col_min;
1432
1433 while ((c + 2) < col_max) {
1434 int i;
1435
1436 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1437
1438 for (i = 0; i < 3; ++i) {
1439 thissad = sad_array[i];
1440
1441 if (thissad < bestsad) {
1442 this_mv.as_mv.col = c;
1443 thissad +=
1444 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1445
1446 if (thissad < bestsad) {
1447 bestsad = thissad;
1448 best_mv->as_mv.row = r;
1449 best_mv->as_mv.col = c;
1450 bestaddress = check_here;
1451 }
1452 }
1453
1454 check_here++;
1455 c++;
1456 }
1457 }
1458
1459 while (c < col_max) {
1460 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1461
1462 if (thissad < bestsad) {
1463 this_mv.as_mv.col = c;
1464 thissad +=
1465 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1466
1467 if (thissad < bestsad) {
1468 bestsad = thissad;
1469 best_mv->as_mv.row = r;
1470 best_mv->as_mv.col = c;
1471 bestaddress = check_here;
1472 }
1473 }
1474
1475 check_here++;
1476 c++;
1477 }
1478 }
1479
1480 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1481 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1482
1483 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1484 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1485 }
1486 #endif // HAVE_SSSE3
1487
1488 #if HAVE_SSE4_1
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1489 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1490 int sad_per_bit, int distance,
1491 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1492 int_mv *center_mv) {
1493 unsigned char *what = (*(b->base_src) + b->src);
1494 int what_stride = b->src_stride;
1495 int pre_stride = x->e_mbd.pre.y_stride;
1496 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1497 unsigned char *in_what;
1498 int in_what_stride = pre_stride;
1499 int mv_stride = pre_stride;
1500 unsigned char *bestaddress;
1501 int_mv *best_mv = &d->bmi.mv;
1502 int_mv this_mv;
1503 unsigned int bestsad;
1504 unsigned int thissad;
1505 int r, c;
1506
1507 unsigned char *check_here;
1508
1509 int ref_row = ref_mv->as_mv.row;
1510 int ref_col = ref_mv->as_mv.col;
1511
1512 int row_min = ref_row - distance;
1513 int row_max = ref_row + distance;
1514 int col_min = ref_col - distance;
1515 int col_max = ref_col + distance;
1516
1517 DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1518 unsigned int sad_array[3];
1519
1520 int *mvsadcost[2];
1521 int_mv fcenter_mv;
1522
1523 mvsadcost[0] = x->mvsadcost[0];
1524 mvsadcost[1] = x->mvsadcost[1];
1525 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1526 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1527
1528 /* Work out the mid point for the search */
1529 in_what = base_pre + d->offset;
1530 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1531
1532 best_mv->as_mv.row = ref_row;
1533 best_mv->as_mv.col = ref_col;
1534
1535 /* Baseline value at the centre */
1536 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1537 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1538
1539 /* Apply further limits to prevent us looking using vectors that stretch
1540 * beyond the UMV border
1541 */
1542 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1543
1544 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1545
1546 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1547
1548 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1549
1550 for (r = row_min; r < row_max; ++r) {
1551 this_mv.as_mv.row = r;
1552 check_here = r * mv_stride + in_what + col_min;
1553 c = col_min;
1554
1555 while ((c + 7) < col_max) {
1556 int i;
1557
1558 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1559
1560 for (i = 0; i < 8; ++i) {
1561 thissad = sad_array8[i];
1562
1563 if (thissad < bestsad) {
1564 this_mv.as_mv.col = c;
1565 thissad +=
1566 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1567
1568 if (thissad < bestsad) {
1569 bestsad = thissad;
1570 best_mv->as_mv.row = r;
1571 best_mv->as_mv.col = c;
1572 bestaddress = check_here;
1573 }
1574 }
1575
1576 check_here++;
1577 c++;
1578 }
1579 }
1580
1581 while ((c + 2) < col_max) {
1582 int i;
1583
1584 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1585
1586 for (i = 0; i < 3; ++i) {
1587 thissad = sad_array[i];
1588
1589 if (thissad < bestsad) {
1590 this_mv.as_mv.col = c;
1591 thissad +=
1592 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1593
1594 if (thissad < bestsad) {
1595 bestsad = thissad;
1596 best_mv->as_mv.row = r;
1597 best_mv->as_mv.col = c;
1598 bestaddress = check_here;
1599 }
1600 }
1601
1602 check_here++;
1603 c++;
1604 }
1605 }
1606
1607 while (c < col_max) {
1608 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1609
1610 if (thissad < bestsad) {
1611 this_mv.as_mv.col = c;
1612 thissad +=
1613 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1614
1615 if (thissad < bestsad) {
1616 bestsad = thissad;
1617 best_mv->as_mv.row = r;
1618 best_mv->as_mv.col = c;
1619 bestaddress = check_here;
1620 }
1621 }
1622
1623 check_here++;
1624 c++;
1625 }
1626 }
1627
1628 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1629 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1630
1631 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1632 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1633 }
1634 #endif // HAVE_SSE4_1
1635
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1636 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1637 int_mv *ref_mv, int error_per_bit,
1638 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1639 int *mvcost[2], int_mv *center_mv) {
1640 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1641 int i, j;
1642 short this_row_offset, this_col_offset;
1643
1644 int what_stride = b->src_stride;
1645 int pre_stride = x->e_mbd.pre.y_stride;
1646 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1647 int in_what_stride = pre_stride;
1648 unsigned char *what = (*(b->base_src) + b->src);
1649 unsigned char *best_address =
1650 (unsigned char *)(base_pre + d->offset +
1651 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1652 unsigned char *check_here;
1653 int_mv this_mv;
1654 unsigned int bestsad;
1655 unsigned int thissad;
1656
1657 int *mvsadcost[2];
1658 int_mv fcenter_mv;
1659
1660 mvsadcost[0] = x->mvsadcost[0];
1661 mvsadcost[1] = x->mvsadcost[1];
1662 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1663 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1664
1665 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1666 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1667
1668 for (i = 0; i < search_range; ++i) {
1669 int best_site = -1;
1670
1671 for (j = 0; j < 4; ++j) {
1672 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1673 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1674
1675 if ((this_col_offset > x->mv_col_min) &&
1676 (this_col_offset < x->mv_col_max) &&
1677 (this_row_offset > x->mv_row_min) &&
1678 (this_row_offset < x->mv_row_max)) {
1679 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1680 best_address;
1681 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1682
1683 if (thissad < bestsad) {
1684 this_mv.as_mv.row = this_row_offset;
1685 this_mv.as_mv.col = this_col_offset;
1686 thissad +=
1687 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1688
1689 if (thissad < bestsad) {
1690 bestsad = thissad;
1691 best_site = j;
1692 }
1693 }
1694 }
1695 }
1696
1697 if (best_site == -1) {
1698 break;
1699 } else {
1700 ref_mv->as_mv.row += neighbors[best_site].row;
1701 ref_mv->as_mv.col += neighbors[best_site].col;
1702 best_address += (neighbors[best_site].row) * in_what_stride +
1703 neighbors[best_site].col;
1704 }
1705 }
1706
1707 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1708 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1709
1710 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1711 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1712 }
1713
1714 #if HAVE_SSE2 || HAVE_MSA
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1715 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1716 int_mv *ref_mv, int error_per_bit,
1717 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1718 int *mvcost[2], int_mv *center_mv) {
1719 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1720 int i, j;
1721 short this_row_offset, this_col_offset;
1722
1723 int what_stride = b->src_stride;
1724 int pre_stride = x->e_mbd.pre.y_stride;
1725 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1726 int in_what_stride = pre_stride;
1727 unsigned char *what = (*(b->base_src) + b->src);
1728 unsigned char *best_address =
1729 (unsigned char *)(base_pre + d->offset +
1730 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1731 unsigned char *check_here;
1732 int_mv this_mv;
1733 unsigned int bestsad;
1734 unsigned int thissad;
1735
1736 int *mvsadcost[2];
1737 int_mv fcenter_mv;
1738
1739 mvsadcost[0] = x->mvsadcost[0];
1740 mvsadcost[1] = x->mvsadcost[1];
1741 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1742 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1743
1744 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1745 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1746
1747 for (i = 0; i < search_range; ++i) {
1748 int best_site = -1;
1749 int all_in = 1;
1750
1751 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1752 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1753 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1754 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1755
1756 if (all_in) {
1757 unsigned int sad_array[4];
1758 const unsigned char *block_offset[4];
1759 block_offset[0] = best_address - in_what_stride;
1760 block_offset[1] = best_address - 1;
1761 block_offset[2] = best_address + 1;
1762 block_offset[3] = best_address + in_what_stride;
1763
1764 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1765 sad_array);
1766
1767 for (j = 0; j < 4; ++j) {
1768 if (sad_array[j] < bestsad) {
1769 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1770 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1771 sad_array[j] +=
1772 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1773
1774 if (sad_array[j] < bestsad) {
1775 bestsad = sad_array[j];
1776 best_site = j;
1777 }
1778 }
1779 }
1780 } else {
1781 for (j = 0; j < 4; ++j) {
1782 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1783 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1784
1785 if ((this_col_offset > x->mv_col_min) &&
1786 (this_col_offset < x->mv_col_max) &&
1787 (this_row_offset > x->mv_row_min) &&
1788 (this_row_offset < x->mv_row_max)) {
1789 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1790 best_address;
1791 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1792
1793 if (thissad < bestsad) {
1794 this_mv.as_mv.row = this_row_offset;
1795 this_mv.as_mv.col = this_col_offset;
1796 thissad +=
1797 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1798
1799 if (thissad < bestsad) {
1800 bestsad = thissad;
1801 best_site = j;
1802 }
1803 }
1804 }
1805 }
1806 }
1807
1808 if (best_site == -1) {
1809 break;
1810 } else {
1811 ref_mv->as_mv.row += neighbors[best_site].row;
1812 ref_mv->as_mv.col += neighbors[best_site].col;
1813 best_address += (neighbors[best_site].row) * in_what_stride +
1814 neighbors[best_site].col;
1815 }
1816 }
1817
1818 this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1819 this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1820
1821 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1822 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1823 }
1824 #endif // HAVE_SSE2 || HAVE_MSA
1825