1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23
24 #ifdef VP8_ENTROPY_STATS
25 static int mv_ref_ct[31][4][2];
26 static int mv_mode_cts[4][2];
27 #endif
28
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)29 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
30 /* MV costing is based on the distribution of vectors in the previous
31 * frame and as such will tend to over state the cost of vectors. In
32 * addition coding a new vector can have a knock on effect on the cost
33 * of subsequent vectors and the quality of prediction from NEAR and
34 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
35 * limited extent, for some account to be taken of these factors.
36 */
37 const int mv_idx_row =
38 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
39 const int mv_idx_col =
40 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
41 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
42 }
43
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)44 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
45 int error_per_bit) {
46 /* Ignore mv costing if mvcost is NULL */
47 if (mvcost) {
48 const int mv_idx_row =
49 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
50 const int mv_idx_col =
51 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
52 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
53 128) >>
54 8;
55 }
56 return 0;
57 }
58
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)59 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
60 int error_per_bit) {
61 /* Calculate sad error cost on full pixel basis. */
62 /* Ignore mv costing if mvsadcost is NULL */
63 if (mvsadcost) {
64 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
65 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
66 error_per_bit +
67 128) >>
68 8;
69 }
70 return 0;
71 }
72
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)73 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
74 int Len;
75 int search_site_count = 0;
76
77 /* Generate offsets for 4 search sites per step. */
78 Len = MAX_FIRST_STEP;
79 x->ss[search_site_count].mv.col = 0;
80 x->ss[search_site_count].mv.row = 0;
81 x->ss[search_site_count].offset = 0;
82 search_site_count++;
83
84 while (Len > 0) {
85 /* Compute offsets for search sites. */
86 x->ss[search_site_count].mv.col = 0;
87 x->ss[search_site_count].mv.row = -Len;
88 x->ss[search_site_count].offset = -Len * stride;
89 search_site_count++;
90
91 /* Compute offsets for search sites. */
92 x->ss[search_site_count].mv.col = 0;
93 x->ss[search_site_count].mv.row = Len;
94 x->ss[search_site_count].offset = Len * stride;
95 search_site_count++;
96
97 /* Compute offsets for search sites. */
98 x->ss[search_site_count].mv.col = -Len;
99 x->ss[search_site_count].mv.row = 0;
100 x->ss[search_site_count].offset = -Len;
101 search_site_count++;
102
103 /* Compute offsets for search sites. */
104 x->ss[search_site_count].mv.col = Len;
105 x->ss[search_site_count].mv.row = 0;
106 x->ss[search_site_count].offset = Len;
107 search_site_count++;
108
109 /* Contract. */
110 Len /= 2;
111 }
112
113 x->ss_count = search_site_count;
114 x->searches_per_step = 4;
115 }
116
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)117 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
118 int Len;
119 int search_site_count = 0;
120
121 /* Generate offsets for 8 search sites per step. */
122 Len = MAX_FIRST_STEP;
123 x->ss[search_site_count].mv.col = 0;
124 x->ss[search_site_count].mv.row = 0;
125 x->ss[search_site_count].offset = 0;
126 search_site_count++;
127
128 while (Len > 0) {
129 /* Compute offsets for search sites. */
130 x->ss[search_site_count].mv.col = 0;
131 x->ss[search_site_count].mv.row = -Len;
132 x->ss[search_site_count].offset = -Len * stride;
133 search_site_count++;
134
135 /* Compute offsets for search sites. */
136 x->ss[search_site_count].mv.col = 0;
137 x->ss[search_site_count].mv.row = Len;
138 x->ss[search_site_count].offset = Len * stride;
139 search_site_count++;
140
141 /* Compute offsets for search sites. */
142 x->ss[search_site_count].mv.col = -Len;
143 x->ss[search_site_count].mv.row = 0;
144 x->ss[search_site_count].offset = -Len;
145 search_site_count++;
146
147 /* Compute offsets for search sites. */
148 x->ss[search_site_count].mv.col = Len;
149 x->ss[search_site_count].mv.row = 0;
150 x->ss[search_site_count].offset = Len;
151 search_site_count++;
152
153 /* Compute offsets for search sites. */
154 x->ss[search_site_count].mv.col = -Len;
155 x->ss[search_site_count].mv.row = -Len;
156 x->ss[search_site_count].offset = -Len * stride - Len;
157 search_site_count++;
158
159 /* Compute offsets for search sites. */
160 x->ss[search_site_count].mv.col = Len;
161 x->ss[search_site_count].mv.row = -Len;
162 x->ss[search_site_count].offset = -Len * stride + Len;
163 search_site_count++;
164
165 /* Compute offsets for search sites. */
166 x->ss[search_site_count].mv.col = -Len;
167 x->ss[search_site_count].mv.row = Len;
168 x->ss[search_site_count].offset = Len * stride - Len;
169 search_site_count++;
170
171 /* Compute offsets for search sites. */
172 x->ss[search_site_count].mv.col = Len;
173 x->ss[search_site_count].mv.row = Len;
174 x->ss[search_site_count].offset = Len * stride + Len;
175 search_site_count++;
176
177 /* Contract. */
178 Len /= 2;
179 }
180
181 x->ss_count = search_site_count;
182 x->searches_per_step = 8;
183 }
184
185 /*
186 * To avoid the penalty for crossing cache-line read, preload the reference
187 * area in a small buffer, which is aligned to make sure there won't be crossing
188 * cache-line read while reading from this buffer. This reduced the cpu
189 * cycles spent on reading ref data in sub-pixel filter functions.
190 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
191 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
192 * could reduce the area.
193 */
194
195 /* estimated cost of a motion vector (r,c) */
196 #define MVC(r, c) \
197 (mvcost \
198 ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
199 : 0)
200 /* pointer to predictor base of a motionvector */
201 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
202 /* convert motion vector component to offset for svf calc */
203 #define SP(x) (((x)&3) << 1)
204 /* returns subpixel variance error function. */
205 #define DIST(r, c) \
206 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
207 #define IFMVCV(r, c, s, e) \
208 if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
209 /* returns distortion + motion vector cost */
210 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
211 /* checks if (r,c) has better score than previous best */
212 #define CHECK_BETTER(v, r, c) \
213 IFMVCV(r, c, \
214 { \
215 thismse = DIST(r, c); \
216 if ((v = (MVC(r, c) + thismse)) < besterr) { \
217 besterr = v; \
218 br = r; \
219 bc = c; \
220 *distortion = thismse; \
221 *sse1 = sse; \
222 } \
223 }, \
224 v = UINT_MAX;)
225
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)226 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
227 int_mv *bestmv, int_mv *ref_mv,
228 int error_per_bit,
229 const vp8_variance_fn_ptr_t *vfp,
230 int *mvcost[2], int *distortion,
231 unsigned int *sse1) {
232 unsigned char *z = (*(b->base_src) + b->src);
233
234 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
235 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
236 int tr = br, tc = bc;
237 unsigned int besterr;
238 unsigned int left, right, up, down, diag;
239 unsigned int sse;
240 unsigned int whichdir;
241 unsigned int halfiters = 4;
242 unsigned int quarteriters = 4;
243 int thismse;
244
245 int minc = VPXMAX(x->mv_col_min * 4,
246 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
247 int maxc = VPXMIN(x->mv_col_max * 4,
248 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
249 int minr = VPXMAX(x->mv_row_min * 4,
250 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
251 int maxr = VPXMIN(x->mv_row_max * 4,
252 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
253
254 int y_stride;
255 int offset;
256 int pre_stride = x->e_mbd.pre.y_stride;
257 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
258
259 #if ARCH_X86 || ARCH_X86_64
260 MACROBLOCKD *xd = &x->e_mbd;
261 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
262 bestmv->as_mv.col;
263 unsigned char *y;
264 int buf_r1, buf_r2, buf_c1;
265
266 /* Clamping to avoid out-of-range data access */
267 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
268 ? (bestmv->as_mv.row - x->mv_row_min)
269 : 3;
270 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
271 ? (x->mv_row_max - bestmv->as_mv.row)
272 : 3;
273 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
274 ? (bestmv->as_mv.col - x->mv_col_min)
275 : 3;
276 y_stride = 32;
277
278 /* Copy to intermediate buffer before searching. */
279 vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
280 y_stride, 16 + buf_r1 + buf_r2);
281 y = xd->y_buf + y_stride * buf_r1 + buf_c1;
282 #else
283 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
284 bestmv->as_mv.col;
285 y_stride = pre_stride;
286 #endif
287
288 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
289
290 /* central mv */
291 bestmv->as_mv.row *= 8;
292 bestmv->as_mv.col *= 8;
293
294 /* calculate central point error */
295 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
296 *distortion = besterr;
297 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
298
299 /* TODO: Each subsequent iteration checks at least one point in common
300 * with the last iteration could be 2 ( if diag selected)
301 */
302 while (--halfiters) {
303 /* 1/2 pel */
304 CHECK_BETTER(left, tr, tc - 2);
305 CHECK_BETTER(right, tr, tc + 2);
306 CHECK_BETTER(up, tr - 2, tc);
307 CHECK_BETTER(down, tr + 2, tc);
308
309 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
310
311 switch (whichdir) {
312 case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
313 case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
314 case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
315 case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
316 }
317
318 /* no reason to check the same one again. */
319 if (tr == br && tc == bc) break;
320
321 tr = br;
322 tc = bc;
323 }
324
325 /* TODO: Each subsequent iteration checks at least one point in common
326 * with the last iteration could be 2 ( if diag selected)
327 */
328
329 /* 1/4 pel */
330 while (--quarteriters) {
331 CHECK_BETTER(left, tr, tc - 1);
332 CHECK_BETTER(right, tr, tc + 1);
333 CHECK_BETTER(up, tr - 1, tc);
334 CHECK_BETTER(down, tr + 1, tc);
335
336 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
337
338 switch (whichdir) {
339 case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
340 case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
341 case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
342 case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
343 }
344
345 /* no reason to check the same one again. */
346 if (tr == br && tc == bc) break;
347
348 tr = br;
349 tc = bc;
350 }
351
352 bestmv->as_mv.row = br * 2;
353 bestmv->as_mv.col = bc * 2;
354
355 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
356 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
357 return INT_MAX;
358 }
359
360 return besterr;
361 }
362 #undef MVC
363 #undef PRE
364 #undef SP
365 #undef DIST
366 #undef IFMVCV
367 #undef ERR
368 #undef CHECK_BETTER
369
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)370 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
371 int_mv *bestmv, int_mv *ref_mv,
372 int error_per_bit,
373 const vp8_variance_fn_ptr_t *vfp,
374 int *mvcost[2], int *distortion,
375 unsigned int *sse1) {
376 int bestmse = INT_MAX;
377 int_mv startmv;
378 int_mv this_mv;
379 unsigned char *z = (*(b->base_src) + b->src);
380 int left, right, up, down, diag;
381 unsigned int sse;
382 int whichdir;
383 int thismse;
384 int y_stride;
385 int pre_stride = x->e_mbd.pre.y_stride;
386 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
387
388 #if ARCH_X86 || ARCH_X86_64
389 MACROBLOCKD *xd = &x->e_mbd;
390 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
391 bestmv->as_mv.col;
392 unsigned char *y;
393
394 y_stride = 32;
395 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
396 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
397 y = xd->y_buf + y_stride + 1;
398 #else
399 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
400 bestmv->as_mv.col;
401 y_stride = pre_stride;
402 #endif
403
404 /* central mv */
405 bestmv->as_mv.row *= 8;
406 bestmv->as_mv.col *= 8;
407 startmv = *bestmv;
408
409 /* calculate central point error */
410 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
411 *distortion = bestmse;
412 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
413
414 /* go left then right and check error */
415 this_mv.as_mv.row = startmv.as_mv.row;
416 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
417 /* "halfpix" horizontal variance */
418 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
419 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
420
421 if (left < bestmse) {
422 *bestmv = this_mv;
423 bestmse = left;
424 *distortion = thismse;
425 *sse1 = sse;
426 }
427
428 this_mv.as_mv.col += 8;
429 /* "halfpix" horizontal variance */
430 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
431 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
432
433 if (right < bestmse) {
434 *bestmv = this_mv;
435 bestmse = right;
436 *distortion = thismse;
437 *sse1 = sse;
438 }
439
440 /* go up then down and check error */
441 this_mv.as_mv.col = startmv.as_mv.col;
442 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
443 /* "halfpix" vertical variance */
444 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
445 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
446
447 if (up < bestmse) {
448 *bestmv = this_mv;
449 bestmse = up;
450 *distortion = thismse;
451 *sse1 = sse;
452 }
453
454 this_mv.as_mv.row += 8;
455 /* "halfpix" vertical variance */
456 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
457 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
458
459 if (down < bestmse) {
460 *bestmv = this_mv;
461 bestmse = down;
462 *distortion = thismse;
463 *sse1 = sse;
464 }
465
466 /* now check 1 more diagonal */
467 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
468 this_mv = startmv;
469
470 switch (whichdir) {
471 case 0:
472 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
473 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
474 /* "halfpix" horizontal/vertical variance */
475 thismse =
476 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
477 break;
478 case 1:
479 this_mv.as_mv.col += 4;
480 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
481 /* "halfpix" horizontal/vertical variance */
482 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
483 break;
484 case 2:
485 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
486 this_mv.as_mv.row += 4;
487 /* "halfpix" horizontal/vertical variance */
488 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
489 break;
490 case 3:
491 default:
492 this_mv.as_mv.col += 4;
493 this_mv.as_mv.row += 4;
494 /* "halfpix" horizontal/vertical variance */
495 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
496 break;
497 }
498
499 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
500
501 if (diag < bestmse) {
502 *bestmv = this_mv;
503 bestmse = diag;
504 *distortion = thismse;
505 *sse1 = sse;
506 }
507
508 /* time to check quarter pels. */
509 if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
510
511 if (bestmv->as_mv.col < startmv.as_mv.col) y--;
512
513 startmv = *bestmv;
514
515 /* go left then right and check error */
516 this_mv.as_mv.row = startmv.as_mv.row;
517
518 if (startmv.as_mv.col & 7) {
519 this_mv.as_mv.col = startmv.as_mv.col - 2;
520 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
521 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
522 } else {
523 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
524 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
525 b->src_stride, &sse);
526 }
527
528 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
529
530 if (left < bestmse) {
531 *bestmv = this_mv;
532 bestmse = left;
533 *distortion = thismse;
534 *sse1 = sse;
535 }
536
537 this_mv.as_mv.col += 4;
538 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
539 z, b->src_stride, &sse);
540 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
541
542 if (right < bestmse) {
543 *bestmv = this_mv;
544 bestmse = right;
545 *distortion = thismse;
546 *sse1 = sse;
547 }
548
549 /* go up then down and check error */
550 this_mv.as_mv.col = startmv.as_mv.col;
551
552 if (startmv.as_mv.row & 7) {
553 this_mv.as_mv.row = startmv.as_mv.row - 2;
554 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
555 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
556 } else {
557 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
558 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
559 b->src_stride, &sse);
560 }
561
562 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
563
564 if (up < bestmse) {
565 *bestmv = this_mv;
566 bestmse = up;
567 *distortion = thismse;
568 *sse1 = sse;
569 }
570
571 this_mv.as_mv.row += 4;
572 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
573 z, b->src_stride, &sse);
574 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
575
576 if (down < bestmse) {
577 *bestmv = this_mv;
578 bestmse = down;
579 *distortion = thismse;
580 *sse1 = sse;
581 }
582
583 /* now check 1 more diagonal */
584 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
585
586 this_mv = startmv;
587
588 switch (whichdir) {
589 case 0:
590
591 if (startmv.as_mv.row & 7) {
592 this_mv.as_mv.row -= 2;
593
594 if (startmv.as_mv.col & 7) {
595 this_mv.as_mv.col -= 2;
596 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
597 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
598 } else {
599 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
600 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
601 b->src_stride, &sse);
602 }
603 } else {
604 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
605
606 if (startmv.as_mv.col & 7) {
607 this_mv.as_mv.col -= 2;
608 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
609 z, b->src_stride, &sse);
610 } else {
611 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
612 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
613 &sse);
614 }
615 }
616
617 break;
618 case 1:
619 this_mv.as_mv.col += 2;
620
621 if (startmv.as_mv.row & 7) {
622 this_mv.as_mv.row -= 2;
623 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
624 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
625 } else {
626 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
627 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
628 b->src_stride, &sse);
629 }
630
631 break;
632 case 2:
633 this_mv.as_mv.row += 2;
634
635 if (startmv.as_mv.col & 7) {
636 this_mv.as_mv.col -= 2;
637 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
638 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
639 } else {
640 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
641 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
642 b->src_stride, &sse);
643 }
644
645 break;
646 case 3:
647 this_mv.as_mv.col += 2;
648 this_mv.as_mv.row += 2;
649 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
650 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
651 break;
652 }
653
654 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
655
656 if (diag < bestmse) {
657 *bestmv = this_mv;
658 bestmse = diag;
659 *distortion = thismse;
660 *sse1 = sse;
661 }
662
663 return bestmse;
664 }
665
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)666 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
667 int_mv *bestmv, int_mv *ref_mv,
668 int error_per_bit,
669 const vp8_variance_fn_ptr_t *vfp,
670 int *mvcost[2], int *distortion,
671 unsigned int *sse1) {
672 int bestmse = INT_MAX;
673 int_mv startmv;
674 int_mv this_mv;
675 unsigned char *z = (*(b->base_src) + b->src);
676 int left, right, up, down, diag;
677 unsigned int sse;
678 int whichdir;
679 int thismse;
680 int y_stride;
681 int pre_stride = x->e_mbd.pre.y_stride;
682 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
683
684 #if ARCH_X86 || ARCH_X86_64
685 MACROBLOCKD *xd = &x->e_mbd;
686 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
687 bestmv->as_mv.col;
688 unsigned char *y;
689
690 y_stride = 32;
691 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
692 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
693 y = xd->y_buf + y_stride + 1;
694 #else
695 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
696 bestmv->as_mv.col;
697 y_stride = pre_stride;
698 #endif
699
700 /* central mv */
701 bestmv->as_mv.row *= 8;
702 bestmv->as_mv.col *= 8;
703 startmv = *bestmv;
704
705 /* calculate central point error */
706 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
707 *distortion = bestmse;
708 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
709
710 /* go left then right and check error */
711 this_mv.as_mv.row = startmv.as_mv.row;
712 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
713 /* "halfpix" horizontal variance */
714 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
715 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
716
717 if (left < bestmse) {
718 *bestmv = this_mv;
719 bestmse = left;
720 *distortion = thismse;
721 *sse1 = sse;
722 }
723
724 this_mv.as_mv.col += 8;
725 /* "halfpix" horizontal variance */
726 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
727 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
728
729 if (right < bestmse) {
730 *bestmv = this_mv;
731 bestmse = right;
732 *distortion = thismse;
733 *sse1 = sse;
734 }
735
736 /* go up then down and check error */
737 this_mv.as_mv.col = startmv.as_mv.col;
738 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
739 /* "halfpix" vertical variance */
740 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
741 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
742
743 if (up < bestmse) {
744 *bestmv = this_mv;
745 bestmse = up;
746 *distortion = thismse;
747 *sse1 = sse;
748 }
749
750 this_mv.as_mv.row += 8;
751 /* "halfpix" vertical variance */
752 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
753 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
754
755 if (down < bestmse) {
756 *bestmv = this_mv;
757 bestmse = down;
758 *distortion = thismse;
759 *sse1 = sse;
760 }
761
762 /* now check 1 more diagonal - */
763 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
764 this_mv = startmv;
765
766 switch (whichdir) {
767 case 0:
768 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
769 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
770 /* "halfpix" horizontal/vertical variance */
771 thismse =
772 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
773 break;
774 case 1:
775 this_mv.as_mv.col += 4;
776 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
777 /* "halfpix" horizontal/vertical variance */
778 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
779 break;
780 case 2:
781 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
782 this_mv.as_mv.row += 4;
783 /* "halfpix" horizontal/vertical variance */
784 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
785 break;
786 case 3:
787 default:
788 this_mv.as_mv.col += 4;
789 this_mv.as_mv.row += 4;
790 /* "halfpix" horizontal/vertical variance */
791 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
792 break;
793 }
794
795 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
796
797 if (diag < bestmse) {
798 *bestmv = this_mv;
799 bestmse = diag;
800 *distortion = thismse;
801 *sse1 = sse;
802 }
803
804 return bestmse;
805 }
806
807 #define CHECK_BOUNDS(range) \
808 { \
809 all_in = 1; \
810 all_in &= ((br - range) >= x->mv_row_min); \
811 all_in &= ((br + range) <= x->mv_row_max); \
812 all_in &= ((bc - range) >= x->mv_col_min); \
813 all_in &= ((bc + range) <= x->mv_col_max); \
814 }
815
816 #define CHECK_POINT \
817 { \
818 if (this_mv.as_mv.col < x->mv_col_min) continue; \
819 if (this_mv.as_mv.col > x->mv_col_max) continue; \
820 if (this_mv.as_mv.row < x->mv_row_min) continue; \
821 if (this_mv.as_mv.row > x->mv_row_max) continue; \
822 }
823
824 #define CHECK_BETTER \
825 { \
826 if (thissad < bestsad) { \
827 thissad += \
828 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
829 if (thissad < bestsad) { \
830 bestsad = thissad; \
831 best_site = i; \
832 } \
833 } \
834 }
835
836 static const MV next_chkpts[6][3] = {
837 { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
838 { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
839 { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
840 };
841
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)842 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
843 int_mv *best_mv, int search_param, int sad_per_bit,
844 const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
845 int *mvcost[2], int_mv *center_mv) {
846 MV hex[6] = {
847 { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
848 };
849 MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
850 int i, j;
851
852 unsigned char *what = (*(b->base_src) + b->src);
853 int what_stride = b->src_stride;
854 int pre_stride = x->e_mbd.pre.y_stride;
855 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
856
857 int in_what_stride = pre_stride;
858 int br, bc;
859 int_mv this_mv;
860 unsigned int bestsad;
861 unsigned int thissad;
862 unsigned char *base_offset;
863 unsigned char *this_offset;
864 int k = -1;
865 int all_in;
866 int best_site = -1;
867 int hex_range = 127;
868 int dia_range = 8;
869
870 int_mv fcenter_mv;
871 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
872 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
873
874 (void)mvcost;
875
876 /* adjust ref_mv to make sure it is within MV range */
877 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
878 x->mv_row_max);
879 br = ref_mv->as_mv.row;
880 bc = ref_mv->as_mv.col;
881
882 /* Work out the start point for the search */
883 base_offset = (unsigned char *)(base_pre + d->offset);
884 this_offset = base_offset + (br * (pre_stride)) + bc;
885 this_mv.as_mv.row = br;
886 this_mv.as_mv.col = bc;
887 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
888 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
889
890 #if CONFIG_MULTI_RES_ENCODING
891 /* Lower search range based on prediction info */
892 if (search_param >= 6)
893 goto cal_neighbors;
894 else if (search_param >= 5)
895 hex_range = 4;
896 else if (search_param >= 4)
897 hex_range = 6;
898 else if (search_param >= 3)
899 hex_range = 15;
900 else if (search_param >= 2)
901 hex_range = 31;
902 else if (search_param >= 1)
903 hex_range = 63;
904
905 dia_range = 8;
906 #else
907 (void)search_param;
908 #endif
909
910 /* hex search */
911 CHECK_BOUNDS(2)
912
913 if (all_in) {
914 for (i = 0; i < 6; ++i) {
915 this_mv.as_mv.row = br + hex[i].row;
916 this_mv.as_mv.col = bc + hex[i].col;
917 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
918 this_mv.as_mv.col;
919 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
920 CHECK_BETTER
921 }
922 } else {
923 for (i = 0; i < 6; ++i) {
924 this_mv.as_mv.row = br + hex[i].row;
925 this_mv.as_mv.col = bc + hex[i].col;
926 CHECK_POINT
927 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
928 this_mv.as_mv.col;
929 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
930 CHECK_BETTER
931 }
932 }
933
934 if (best_site == -1) {
935 goto cal_neighbors;
936 } else {
937 br += hex[best_site].row;
938 bc += hex[best_site].col;
939 k = best_site;
940 }
941
942 for (j = 1; j < hex_range; ++j) {
943 best_site = -1;
944 CHECK_BOUNDS(2)
945
946 if (all_in) {
947 for (i = 0; i < 3; ++i) {
948 this_mv.as_mv.row = br + next_chkpts[k][i].row;
949 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
950 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
951 this_mv.as_mv.col;
952 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
953 CHECK_BETTER
954 }
955 } else {
956 for (i = 0; i < 3; ++i) {
957 this_mv.as_mv.row = br + next_chkpts[k][i].row;
958 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
959 CHECK_POINT
960 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
961 this_mv.as_mv.col;
962 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
963 CHECK_BETTER
964 }
965 }
966
967 if (best_site == -1) {
968 break;
969 } else {
970 br += next_chkpts[k][best_site].row;
971 bc += next_chkpts[k][best_site].col;
972 k += 5 + best_site;
973 if (k >= 12) {
974 k -= 12;
975 } else if (k >= 6) {
976 k -= 6;
977 }
978 }
979 }
980
981 /* check 4 1-away neighbors */
982 cal_neighbors:
983 for (j = 0; j < dia_range; ++j) {
984 best_site = -1;
985 CHECK_BOUNDS(1)
986
987 if (all_in) {
988 for (i = 0; i < 4; ++i) {
989 this_mv.as_mv.row = br + neighbors[i].row;
990 this_mv.as_mv.col = bc + neighbors[i].col;
991 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
992 this_mv.as_mv.col;
993 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
994 CHECK_BETTER
995 }
996 } else {
997 for (i = 0; i < 4; ++i) {
998 this_mv.as_mv.row = br + neighbors[i].row;
999 this_mv.as_mv.col = bc + neighbors[i].col;
1000 CHECK_POINT
1001 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
1002 this_mv.as_mv.col;
1003 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1004 CHECK_BETTER
1005 }
1006 }
1007
1008 if (best_site == -1) {
1009 break;
1010 } else {
1011 br += neighbors[best_site].row;
1012 bc += neighbors[best_site].col;
1013 }
1014 }
1015
1016 best_mv->as_mv.row = br;
1017 best_mv->as_mv.col = bc;
1018
1019 return bestsad;
1020 }
1021 #undef CHECK_BOUNDS
1022 #undef CHECK_POINT
1023 #undef CHECK_BETTER
1024
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1025 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1026 int_mv *best_mv, int search_param, int sad_per_bit,
1027 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1028 int *mvcost[2], int_mv *center_mv) {
1029 int i, j, step;
1030
1031 unsigned char *what = (*(b->base_src) + b->src);
1032 int what_stride = b->src_stride;
1033 unsigned char *in_what;
1034 int pre_stride = x->e_mbd.pre.y_stride;
1035 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1036 int in_what_stride = pre_stride;
1037 unsigned char *best_address;
1038
1039 int tot_steps;
1040 int_mv this_mv;
1041
1042 unsigned int bestsad;
1043 unsigned int thissad;
1044 int best_site = 0;
1045 int last_site = 0;
1046
1047 int ref_row;
1048 int ref_col;
1049 int this_row_offset;
1050 int this_col_offset;
1051 search_site *ss;
1052
1053 unsigned char *check_here;
1054
1055 int *mvsadcost[2];
1056 int_mv fcenter_mv;
1057
1058 mvsadcost[0] = x->mvsadcost[0];
1059 mvsadcost[1] = x->mvsadcost[1];
1060 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1061 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1062
1063 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1064 x->mv_row_max);
1065 ref_row = ref_mv->as_mv.row;
1066 ref_col = ref_mv->as_mv.col;
1067 *num00 = 0;
1068 best_mv->as_mv.row = ref_row;
1069 best_mv->as_mv.col = ref_col;
1070
1071 /* Work out the start point for the search */
1072 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1073 ref_col);
1074 best_address = in_what;
1075
1076 /* Check the starting position */
1077 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1078 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1079
1080 /* search_param determines the length of the initial step and hence
1081 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1082 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1083 */
1084 ss = &x->ss[search_param * x->searches_per_step];
1085 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1086
1087 i = 1;
1088
1089 for (step = 0; step < tot_steps; ++step) {
1090 for (j = 0; j < x->searches_per_step; ++j) {
1091 /* Trap illegal vectors */
1092 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1093 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1094
1095 if ((this_col_offset > x->mv_col_min) &&
1096 (this_col_offset < x->mv_col_max) &&
1097 (this_row_offset > x->mv_row_min) &&
1098 (this_row_offset < x->mv_row_max))
1099
1100 {
1101 check_here = ss[i].offset + best_address;
1102 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1103
1104 if (thissad < bestsad) {
1105 this_mv.as_mv.row = this_row_offset;
1106 this_mv.as_mv.col = this_col_offset;
1107 thissad +=
1108 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1109
1110 if (thissad < bestsad) {
1111 bestsad = thissad;
1112 best_site = i;
1113 }
1114 }
1115 }
1116
1117 i++;
1118 }
1119
1120 if (best_site != last_site) {
1121 best_mv->as_mv.row += ss[best_site].mv.row;
1122 best_mv->as_mv.col += ss[best_site].mv.col;
1123 best_address += ss[best_site].offset;
1124 last_site = best_site;
1125 } else if (best_address == in_what) {
1126 (*num00)++;
1127 }
1128 }
1129
1130 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1131 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1132
1133 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1134 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1135 }
1136
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1137 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1138 int_mv *best_mv, int search_param, int sad_per_bit,
1139 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1140 int *mvcost[2], int_mv *center_mv) {
1141 int i, j, step;
1142
1143 unsigned char *what = (*(b->base_src) + b->src);
1144 int what_stride = b->src_stride;
1145 unsigned char *in_what;
1146 int pre_stride = x->e_mbd.pre.y_stride;
1147 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1148 int in_what_stride = pre_stride;
1149 unsigned char *best_address;
1150
1151 int tot_steps;
1152 int_mv this_mv;
1153
1154 unsigned int bestsad;
1155 unsigned int thissad;
1156 int best_site = 0;
1157 int last_site = 0;
1158
1159 int ref_row;
1160 int ref_col;
1161 int this_row_offset;
1162 int this_col_offset;
1163 search_site *ss;
1164
1165 unsigned char *check_here;
1166
1167 int *mvsadcost[2];
1168 int_mv fcenter_mv;
1169
1170 mvsadcost[0] = x->mvsadcost[0];
1171 mvsadcost[1] = x->mvsadcost[1];
1172 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1173 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1174
1175 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1176 x->mv_row_max);
1177 ref_row = ref_mv->as_mv.row;
1178 ref_col = ref_mv->as_mv.col;
1179 *num00 = 0;
1180 best_mv->as_mv.row = ref_row;
1181 best_mv->as_mv.col = ref_col;
1182
1183 /* Work out the start point for the search */
1184 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1185 ref_col);
1186 best_address = in_what;
1187
1188 /* Check the starting position */
1189 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1190 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1191
1192 /* search_param determines the length of the initial step and hence the
1193 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1194 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1195 */
1196 ss = &x->ss[search_param * x->searches_per_step];
1197 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1198
1199 i = 1;
1200
1201 for (step = 0; step < tot_steps; ++step) {
1202 int all_in = 1, t;
1203
1204 /* To know if all neighbor points are within the bounds, 4 bounds
1205 * checking are enough instead of checking 4 bounds for each
1206 * points.
1207 */
1208 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1209 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1210 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1211 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1212
1213 if (all_in) {
1214 unsigned int sad_array[4];
1215
1216 for (j = 0; j < x->searches_per_step; j += 4) {
1217 const unsigned char *block_offset[4];
1218
1219 for (t = 0; t < 4; ++t) {
1220 block_offset[t] = ss[i + t].offset + best_address;
1221 }
1222
1223 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1224 sad_array);
1225
1226 for (t = 0; t < 4; t++, i++) {
1227 if (sad_array[t] < bestsad) {
1228 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1229 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1230 sad_array[t] +=
1231 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1232
1233 if (sad_array[t] < bestsad) {
1234 bestsad = sad_array[t];
1235 best_site = i;
1236 }
1237 }
1238 }
1239 }
1240 } else {
1241 for (j = 0; j < x->searches_per_step; ++j) {
1242 /* Trap illegal vectors */
1243 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1244 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1245
1246 if ((this_col_offset > x->mv_col_min) &&
1247 (this_col_offset < x->mv_col_max) &&
1248 (this_row_offset > x->mv_row_min) &&
1249 (this_row_offset < x->mv_row_max)) {
1250 check_here = ss[i].offset + best_address;
1251 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1252
1253 if (thissad < bestsad) {
1254 this_mv.as_mv.row = this_row_offset;
1255 this_mv.as_mv.col = this_col_offset;
1256 thissad +=
1257 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1258
1259 if (thissad < bestsad) {
1260 bestsad = thissad;
1261 best_site = i;
1262 }
1263 }
1264 }
1265 i++;
1266 }
1267 }
1268
1269 if (best_site != last_site) {
1270 best_mv->as_mv.row += ss[best_site].mv.row;
1271 best_mv->as_mv.col += ss[best_site].mv.col;
1272 best_address += ss[best_site].offset;
1273 last_site = best_site;
1274 } else if (best_address == in_what) {
1275 (*num00)++;
1276 }
1277 }
1278
1279 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1280 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1281
1282 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1283 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1284 }
1285
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1286 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1287 int sad_per_bit, int distance,
1288 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1289 int_mv *center_mv) {
1290 unsigned char *what = (*(b->base_src) + b->src);
1291 int what_stride = b->src_stride;
1292 unsigned char *in_what;
1293 int pre_stride = x->e_mbd.pre.y_stride;
1294 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1295 int in_what_stride = pre_stride;
1296 int mv_stride = pre_stride;
1297 unsigned char *bestaddress;
1298 int_mv *best_mv = &d->bmi.mv;
1299 int_mv this_mv;
1300 unsigned int bestsad;
1301 unsigned int thissad;
1302 int r, c;
1303
1304 unsigned char *check_here;
1305
1306 int ref_row = ref_mv->as_mv.row;
1307 int ref_col = ref_mv->as_mv.col;
1308
1309 int row_min = ref_row - distance;
1310 int row_max = ref_row + distance;
1311 int col_min = ref_col - distance;
1312 int col_max = ref_col + distance;
1313
1314 int *mvsadcost[2];
1315 int_mv fcenter_mv;
1316
1317 mvsadcost[0] = x->mvsadcost[0];
1318 mvsadcost[1] = x->mvsadcost[1];
1319 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1320 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1321
1322 /* Work out the mid point for the search */
1323 in_what = base_pre + d->offset;
1324 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1325
1326 best_mv->as_mv.row = ref_row;
1327 best_mv->as_mv.col = ref_col;
1328
1329 /* Baseline value at the centre */
1330 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1331 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1332
1333 /* Apply further limits to prevent us looking using vectors that
1334 * stretch beyiond the UMV border
1335 */
1336 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1337
1338 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1339
1340 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1341
1342 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1343
1344 for (r = row_min; r < row_max; ++r) {
1345 this_mv.as_mv.row = r;
1346 check_here = r * mv_stride + in_what + col_min;
1347
1348 for (c = col_min; c < col_max; ++c) {
1349 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1350
1351 this_mv.as_mv.col = c;
1352 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1353
1354 if (thissad < bestsad) {
1355 bestsad = thissad;
1356 best_mv->as_mv.row = r;
1357 best_mv->as_mv.col = c;
1358 bestaddress = check_here;
1359 }
1360
1361 check_here++;
1362 }
1363 }
1364
1365 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1366 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1367
1368 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1369 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1370 }
1371
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1372 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1373 int sad_per_bit, int distance,
1374 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1375 int_mv *center_mv) {
1376 unsigned char *what = (*(b->base_src) + b->src);
1377 int what_stride = b->src_stride;
1378 unsigned char *in_what;
1379 int pre_stride = x->e_mbd.pre.y_stride;
1380 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1381 int in_what_stride = pre_stride;
1382 int mv_stride = pre_stride;
1383 unsigned char *bestaddress;
1384 int_mv *best_mv = &d->bmi.mv;
1385 int_mv this_mv;
1386 unsigned int bestsad;
1387 unsigned int thissad;
1388 int r, c;
1389
1390 unsigned char *check_here;
1391
1392 int ref_row = ref_mv->as_mv.row;
1393 int ref_col = ref_mv->as_mv.col;
1394
1395 int row_min = ref_row - distance;
1396 int row_max = ref_row + distance;
1397 int col_min = ref_col - distance;
1398 int col_max = ref_col + distance;
1399
1400 unsigned int sad_array[3];
1401
1402 int *mvsadcost[2];
1403 int_mv fcenter_mv;
1404
1405 mvsadcost[0] = x->mvsadcost[0];
1406 mvsadcost[1] = x->mvsadcost[1];
1407 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1408 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1409
1410 /* Work out the mid point for the search */
1411 in_what = base_pre + d->offset;
1412 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1413
1414 best_mv->as_mv.row = ref_row;
1415 best_mv->as_mv.col = ref_col;
1416
1417 /* Baseline value at the centre */
1418 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1419 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1420
1421 /* Apply further limits to prevent us looking using vectors that stretch
1422 * beyond the UMV border
1423 */
1424 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1425
1426 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1427
1428 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1429
1430 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1431
1432 for (r = row_min; r < row_max; ++r) {
1433 this_mv.as_mv.row = r;
1434 check_here = r * mv_stride + in_what + col_min;
1435 c = col_min;
1436
1437 while ((c + 2) < col_max) {
1438 int i;
1439
1440 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1441
1442 for (i = 0; i < 3; ++i) {
1443 thissad = sad_array[i];
1444
1445 if (thissad < bestsad) {
1446 this_mv.as_mv.col = c;
1447 thissad +=
1448 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1449
1450 if (thissad < bestsad) {
1451 bestsad = thissad;
1452 best_mv->as_mv.row = r;
1453 best_mv->as_mv.col = c;
1454 bestaddress = check_here;
1455 }
1456 }
1457
1458 check_here++;
1459 c++;
1460 }
1461 }
1462
1463 while (c < col_max) {
1464 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1465
1466 if (thissad < bestsad) {
1467 this_mv.as_mv.col = c;
1468 thissad +=
1469 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1470
1471 if (thissad < bestsad) {
1472 bestsad = thissad;
1473 best_mv->as_mv.row = r;
1474 best_mv->as_mv.col = c;
1475 bestaddress = check_here;
1476 }
1477 }
1478
1479 check_here++;
1480 c++;
1481 }
1482 }
1483
1484 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1485 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1486
1487 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1488 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1489 }
1490
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1491 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1492 int sad_per_bit, int distance,
1493 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1494 int_mv *center_mv) {
1495 unsigned char *what = (*(b->base_src) + b->src);
1496 int what_stride = b->src_stride;
1497 int pre_stride = x->e_mbd.pre.y_stride;
1498 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1499 unsigned char *in_what;
1500 int in_what_stride = pre_stride;
1501 int mv_stride = pre_stride;
1502 unsigned char *bestaddress;
1503 int_mv *best_mv = &d->bmi.mv;
1504 int_mv this_mv;
1505 unsigned int bestsad;
1506 unsigned int thissad;
1507 int r, c;
1508
1509 unsigned char *check_here;
1510
1511 int ref_row = ref_mv->as_mv.row;
1512 int ref_col = ref_mv->as_mv.col;
1513
1514 int row_min = ref_row - distance;
1515 int row_max = ref_row + distance;
1516 int col_min = ref_col - distance;
1517 int col_max = ref_col + distance;
1518
1519 DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1520 unsigned int sad_array[3];
1521
1522 int *mvsadcost[2];
1523 int_mv fcenter_mv;
1524
1525 mvsadcost[0] = x->mvsadcost[0];
1526 mvsadcost[1] = x->mvsadcost[1];
1527 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1528 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1529
1530 /* Work out the mid point for the search */
1531 in_what = base_pre + d->offset;
1532 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1533
1534 best_mv->as_mv.row = ref_row;
1535 best_mv->as_mv.col = ref_col;
1536
1537 /* Baseline value at the centre */
1538 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1539 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1540
1541 /* Apply further limits to prevent us looking using vectors that stretch
1542 * beyond the UMV border
1543 */
1544 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1545
1546 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1547
1548 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1549
1550 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1551
1552 for (r = row_min; r < row_max; ++r) {
1553 this_mv.as_mv.row = r;
1554 check_here = r * mv_stride + in_what + col_min;
1555 c = col_min;
1556
1557 while ((c + 7) < col_max) {
1558 int i;
1559
1560 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1561
1562 for (i = 0; i < 8; ++i) {
1563 thissad = sad_array8[i];
1564
1565 if (thissad < bestsad) {
1566 this_mv.as_mv.col = c;
1567 thissad +=
1568 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1569
1570 if (thissad < bestsad) {
1571 bestsad = thissad;
1572 best_mv->as_mv.row = r;
1573 best_mv->as_mv.col = c;
1574 bestaddress = check_here;
1575 }
1576 }
1577
1578 check_here++;
1579 c++;
1580 }
1581 }
1582
1583 while ((c + 2) < col_max) {
1584 int i;
1585
1586 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1587
1588 for (i = 0; i < 3; ++i) {
1589 thissad = sad_array[i];
1590
1591 if (thissad < bestsad) {
1592 this_mv.as_mv.col = c;
1593 thissad +=
1594 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1595
1596 if (thissad < bestsad) {
1597 bestsad = thissad;
1598 best_mv->as_mv.row = r;
1599 best_mv->as_mv.col = c;
1600 bestaddress = check_here;
1601 }
1602 }
1603
1604 check_here++;
1605 c++;
1606 }
1607 }
1608
1609 while (c < col_max) {
1610 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1611
1612 if (thissad < bestsad) {
1613 this_mv.as_mv.col = c;
1614 thissad +=
1615 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1616
1617 if (thissad < bestsad) {
1618 bestsad = thissad;
1619 best_mv->as_mv.row = r;
1620 best_mv->as_mv.col = c;
1621 bestaddress = check_here;
1622 }
1623 }
1624
1625 check_here++;
1626 c++;
1627 }
1628 }
1629
1630 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1631 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1632
1633 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1634 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1635 }
1636
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1637 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1638 int_mv *ref_mv, int error_per_bit,
1639 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1640 int *mvcost[2], int_mv *center_mv) {
1641 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1642 int i, j;
1643 short this_row_offset, this_col_offset;
1644
1645 int what_stride = b->src_stride;
1646 int pre_stride = x->e_mbd.pre.y_stride;
1647 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1648 int in_what_stride = pre_stride;
1649 unsigned char *what = (*(b->base_src) + b->src);
1650 unsigned char *best_address =
1651 (unsigned char *)(base_pre + d->offset +
1652 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1653 unsigned char *check_here;
1654 int_mv this_mv;
1655 unsigned int bestsad;
1656 unsigned int thissad;
1657
1658 int *mvsadcost[2];
1659 int_mv fcenter_mv;
1660
1661 mvsadcost[0] = x->mvsadcost[0];
1662 mvsadcost[1] = x->mvsadcost[1];
1663 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1664 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1665
1666 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1667 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1668
1669 for (i = 0; i < search_range; ++i) {
1670 int best_site = -1;
1671
1672 for (j = 0; j < 4; ++j) {
1673 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1674 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1675
1676 if ((this_col_offset > x->mv_col_min) &&
1677 (this_col_offset < x->mv_col_max) &&
1678 (this_row_offset > x->mv_row_min) &&
1679 (this_row_offset < x->mv_row_max)) {
1680 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1681 best_address;
1682 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1683
1684 if (thissad < bestsad) {
1685 this_mv.as_mv.row = this_row_offset;
1686 this_mv.as_mv.col = this_col_offset;
1687 thissad +=
1688 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1689
1690 if (thissad < bestsad) {
1691 bestsad = thissad;
1692 best_site = j;
1693 }
1694 }
1695 }
1696 }
1697
1698 if (best_site == -1) {
1699 break;
1700 } else {
1701 ref_mv->as_mv.row += neighbors[best_site].row;
1702 ref_mv->as_mv.col += neighbors[best_site].col;
1703 best_address += (neighbors[best_site].row) * in_what_stride +
1704 neighbors[best_site].col;
1705 }
1706 }
1707
1708 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1709 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1710
1711 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1712 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1713 }
1714
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1715 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1716 int_mv *ref_mv, int error_per_bit,
1717 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1718 int *mvcost[2], int_mv *center_mv) {
1719 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1720 int i, j;
1721 short this_row_offset, this_col_offset;
1722
1723 int what_stride = b->src_stride;
1724 int pre_stride = x->e_mbd.pre.y_stride;
1725 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1726 int in_what_stride = pre_stride;
1727 unsigned char *what = (*(b->base_src) + b->src);
1728 unsigned char *best_address =
1729 (unsigned char *)(base_pre + d->offset +
1730 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1731 unsigned char *check_here;
1732 int_mv this_mv;
1733 unsigned int bestsad;
1734 unsigned int thissad;
1735
1736 int *mvsadcost[2];
1737 int_mv fcenter_mv;
1738
1739 mvsadcost[0] = x->mvsadcost[0];
1740 mvsadcost[1] = x->mvsadcost[1];
1741 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1742 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1743
1744 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1745 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1746
1747 for (i = 0; i < search_range; ++i) {
1748 int best_site = -1;
1749 int all_in = 1;
1750
1751 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1752 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1753 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1754 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1755
1756 if (all_in) {
1757 unsigned int sad_array[4];
1758 const unsigned char *block_offset[4];
1759 block_offset[0] = best_address - in_what_stride;
1760 block_offset[1] = best_address - 1;
1761 block_offset[2] = best_address + 1;
1762 block_offset[3] = best_address + in_what_stride;
1763
1764 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1765 sad_array);
1766
1767 for (j = 0; j < 4; ++j) {
1768 if (sad_array[j] < bestsad) {
1769 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1770 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1771 sad_array[j] +=
1772 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1773
1774 if (sad_array[j] < bestsad) {
1775 bestsad = sad_array[j];
1776 best_site = j;
1777 }
1778 }
1779 }
1780 } else {
1781 for (j = 0; j < 4; ++j) {
1782 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1783 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1784
1785 if ((this_col_offset > x->mv_col_min) &&
1786 (this_col_offset < x->mv_col_max) &&
1787 (this_row_offset > x->mv_row_min) &&
1788 (this_row_offset < x->mv_row_max)) {
1789 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1790 best_address;
1791 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1792
1793 if (thissad < bestsad) {
1794 this_mv.as_mv.row = this_row_offset;
1795 this_mv.as_mv.col = this_col_offset;
1796 thissad +=
1797 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1798
1799 if (thissad < bestsad) {
1800 bestsad = thissad;
1801 best_site = j;
1802 }
1803 }
1804 }
1805 }
1806 }
1807
1808 if (best_site == -1) {
1809 break;
1810 } else {
1811 ref_mv->as_mv.row += neighbors[best_site].row;
1812 ref_mv->as_mv.col += neighbors[best_site].col;
1813 best_address += (neighbors[best_site].row) * in_what_stride +
1814 neighbors[best_site].col;
1815 }
1816 }
1817
1818 this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1819 this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1820
1821 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1822 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1823 }
1824
1825 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1826 void print_mode_context(void) {
1827 FILE *f = fopen("modecont.c", "w");
1828 int i, j;
1829
1830 fprintf(f, "#include \"entropy.h\"\n");
1831 fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1832 fprintf(f, "{\n");
1833
1834 for (j = 0; j < 6; ++j) {
1835 fprintf(f, " { /* %d */\n", j);
1836 fprintf(f, " ");
1837
1838 for (i = 0; i < 4; ++i) {
1839 int overal_prob;
1840 int this_prob;
1841 int count;
1842
1843 /* Overall probs */
1844 count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1845
1846 if (count)
1847 overal_prob = 256 * mv_mode_cts[i][0] / count;
1848 else
1849 overal_prob = 128;
1850
1851 if (overal_prob == 0) overal_prob = 1;
1852
1853 /* context probs */
1854 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1855
1856 if (count)
1857 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1858 else
1859 this_prob = 128;
1860
1861 if (this_prob == 0) this_prob = 1;
1862
1863 fprintf(f, "%5d, ", this_prob);
1864 }
1865
1866 fprintf(f, " },\n");
1867 }
1868
1869 fprintf(f, "};\n");
1870 fclose(f);
1871 }
1872
1873 /* MV ref count VP8_ENTROPY_STATS stats code */
1874 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1875 void init_mv_ref_counts() {
1876 memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1877 memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1878 }
1879
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1880 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
1881 if (m == ZEROMV) {
1882 ++mv_ref_ct[ct[0]][0][0];
1883 ++mv_mode_cts[0][0];
1884 } else {
1885 ++mv_ref_ct[ct[0]][0][1];
1886 ++mv_mode_cts[0][1];
1887
1888 if (m == NEARESTMV) {
1889 ++mv_ref_ct[ct[1]][1][0];
1890 ++mv_mode_cts[1][0];
1891 } else {
1892 ++mv_ref_ct[ct[1]][1][1];
1893 ++mv_mode_cts[1][1];
1894
1895 if (m == NEARMV) {
1896 ++mv_ref_ct[ct[2]][2][0];
1897 ++mv_mode_cts[2][0];
1898 } else {
1899 ++mv_ref_ct[ct[2]][2][1];
1900 ++mv_mode_cts[2][1];
1901
1902 if (m == NEWMV) {
1903 ++mv_ref_ct[ct[3]][3][0];
1904 ++mv_mode_cts[3][0];
1905 } else {
1906 ++mv_ref_ct[ct[3]][3][1];
1907 ++mv_mode_cts[3][1];
1908 }
1909 }
1910 }
1911 }
1912 }
1913
1914 #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
1915
1916 #endif
1917