• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23 
24 #ifdef VP8_ENTROPY_STATS
25 static int mv_ref_ct[31][4][2];
26 static int mv_mode_cts[4][2];
27 #endif
28 
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)29 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
30   /* MV costing is based on the distribution of vectors in the previous
31    * frame and as such will tend to over state the cost of vectors. In
32    * addition coding a new vector can have a knock on effect on the cost
33    * of subsequent vectors and the quality of prediction from NEAR and
34    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
35    * limited extent, for some account to be taken of these factors.
36    */
37   const int mv_idx_row =
38       clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
39   const int mv_idx_col =
40       clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
41   return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
42 }
43 
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)44 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
45                        int error_per_bit) {
46   /* Ignore mv costing if mvcost is NULL */
47   if (mvcost) {
48     const int mv_idx_row =
49         clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
50     const int mv_idx_col =
51         clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
52     return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
53             128) >>
54            8;
55   }
56   return 0;
57 }
58 
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)59 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
60                           int error_per_bit) {
61   /* Calculate sad error cost on full pixel basis. */
62   /* Ignore mv costing if mvsadcost is NULL */
63   if (mvsadcost) {
64     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
65              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
66                 error_per_bit +
67             128) >>
68            8;
69   }
70   return 0;
71 }
72 
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)73 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
74   int Len;
75   int search_site_count = 0;
76 
77   /* Generate offsets for 4 search sites per step. */
78   Len = MAX_FIRST_STEP;
79   x->ss[search_site_count].mv.col = 0;
80   x->ss[search_site_count].mv.row = 0;
81   x->ss[search_site_count].offset = 0;
82   search_site_count++;
83 
84   while (Len > 0) {
85     /* Compute offsets for search sites. */
86     x->ss[search_site_count].mv.col = 0;
87     x->ss[search_site_count].mv.row = -Len;
88     x->ss[search_site_count].offset = -Len * stride;
89     search_site_count++;
90 
91     /* Compute offsets for search sites. */
92     x->ss[search_site_count].mv.col = 0;
93     x->ss[search_site_count].mv.row = Len;
94     x->ss[search_site_count].offset = Len * stride;
95     search_site_count++;
96 
97     /* Compute offsets for search sites. */
98     x->ss[search_site_count].mv.col = -Len;
99     x->ss[search_site_count].mv.row = 0;
100     x->ss[search_site_count].offset = -Len;
101     search_site_count++;
102 
103     /* Compute offsets for search sites. */
104     x->ss[search_site_count].mv.col = Len;
105     x->ss[search_site_count].mv.row = 0;
106     x->ss[search_site_count].offset = Len;
107     search_site_count++;
108 
109     /* Contract. */
110     Len /= 2;
111   }
112 
113   x->ss_count = search_site_count;
114   x->searches_per_step = 4;
115 }
116 
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)117 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
118   int Len;
119   int search_site_count = 0;
120 
121   /* Generate offsets for 8 search sites per step. */
122   Len = MAX_FIRST_STEP;
123   x->ss[search_site_count].mv.col = 0;
124   x->ss[search_site_count].mv.row = 0;
125   x->ss[search_site_count].offset = 0;
126   search_site_count++;
127 
128   while (Len > 0) {
129     /* Compute offsets for search sites. */
130     x->ss[search_site_count].mv.col = 0;
131     x->ss[search_site_count].mv.row = -Len;
132     x->ss[search_site_count].offset = -Len * stride;
133     search_site_count++;
134 
135     /* Compute offsets for search sites. */
136     x->ss[search_site_count].mv.col = 0;
137     x->ss[search_site_count].mv.row = Len;
138     x->ss[search_site_count].offset = Len * stride;
139     search_site_count++;
140 
141     /* Compute offsets for search sites. */
142     x->ss[search_site_count].mv.col = -Len;
143     x->ss[search_site_count].mv.row = 0;
144     x->ss[search_site_count].offset = -Len;
145     search_site_count++;
146 
147     /* Compute offsets for search sites. */
148     x->ss[search_site_count].mv.col = Len;
149     x->ss[search_site_count].mv.row = 0;
150     x->ss[search_site_count].offset = Len;
151     search_site_count++;
152 
153     /* Compute offsets for search sites. */
154     x->ss[search_site_count].mv.col = -Len;
155     x->ss[search_site_count].mv.row = -Len;
156     x->ss[search_site_count].offset = -Len * stride - Len;
157     search_site_count++;
158 
159     /* Compute offsets for search sites. */
160     x->ss[search_site_count].mv.col = Len;
161     x->ss[search_site_count].mv.row = -Len;
162     x->ss[search_site_count].offset = -Len * stride + Len;
163     search_site_count++;
164 
165     /* Compute offsets for search sites. */
166     x->ss[search_site_count].mv.col = -Len;
167     x->ss[search_site_count].mv.row = Len;
168     x->ss[search_site_count].offset = Len * stride - Len;
169     search_site_count++;
170 
171     /* Compute offsets for search sites. */
172     x->ss[search_site_count].mv.col = Len;
173     x->ss[search_site_count].mv.row = Len;
174     x->ss[search_site_count].offset = Len * stride + Len;
175     search_site_count++;
176 
177     /* Contract. */
178     Len /= 2;
179   }
180 
181   x->ss_count = search_site_count;
182   x->searches_per_step = 8;
183 }
184 
185 /*
186  * To avoid the penalty for crossing cache-line read, preload the reference
187  * area in a small buffer, which is aligned to make sure there won't be crossing
188  * cache-line read while reading from this buffer. This reduced the cpu
189  * cycles spent on reading ref data in sub-pixel filter functions.
190  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
191  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
192  * could reduce the area.
193  */
194 
195 /* estimated cost of a motion vector (r,c) */
196 #define MVC(r, c)                                                             \
197   (mvcost                                                                     \
198        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
199        : 0)
200 /* pointer to predictor base of a motionvector */
201 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
202 /* convert motion vector component to offset for svf calc */
203 #define SP(x) (((x)&3) << 1)
204 /* returns subpixel variance error function. */
205 #define DIST(r, c) \
206   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
207 #define IFMVCV(r, c, s, e) \
208   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
209 /* returns distortion + motion vector cost */
210 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
211 /* checks if (r,c) has better score than previous best */
212 #define CHECK_BETTER(v, r, c)                           \
213   IFMVCV(r, c,                                          \
214          {                                              \
215            thismse = DIST(r, c);                        \
216            if ((v = (MVC(r, c) + thismse)) < besterr) { \
217              besterr = v;                               \
218              br = r;                                    \
219              bc = c;                                    \
220              *distortion = thismse;                     \
221              *sse1 = sse;                               \
222            }                                            \
223          },                                             \
224          v = UINT_MAX;)
225 
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)226 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
227                                              int_mv *bestmv, int_mv *ref_mv,
228                                              int error_per_bit,
229                                              const vp8_variance_fn_ptr_t *vfp,
230                                              int *mvcost[2], int *distortion,
231                                              unsigned int *sse1) {
232   unsigned char *z = (*(b->base_src) + b->src);
233 
234   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
235   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
236   int tr = br, tc = bc;
237   unsigned int besterr;
238   unsigned int left, right, up, down, diag;
239   unsigned int sse;
240   unsigned int whichdir;
241   unsigned int halfiters = 4;
242   unsigned int quarteriters = 4;
243   int thismse;
244 
245   int minc = VPXMAX(x->mv_col_min * 4,
246                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
247   int maxc = VPXMIN(x->mv_col_max * 4,
248                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
249   int minr = VPXMAX(x->mv_row_min * 4,
250                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
251   int maxr = VPXMIN(x->mv_row_max * 4,
252                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
253 
254   int y_stride;
255   int offset;
256   int pre_stride = x->e_mbd.pre.y_stride;
257   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
258 
259 #if ARCH_X86 || ARCH_X86_64
260   MACROBLOCKD *xd = &x->e_mbd;
261   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
262                        bestmv->as_mv.col;
263   unsigned char *y;
264   int buf_r1, buf_r2, buf_c1;
265 
266   /* Clamping to avoid out-of-range data access */
267   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
268                ? (bestmv->as_mv.row - x->mv_row_min)
269                : 3;
270   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
271                ? (x->mv_row_max - bestmv->as_mv.row)
272                : 3;
273   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
274                ? (bestmv->as_mv.col - x->mv_col_min)
275                : 3;
276   y_stride = 32;
277 
278   /* Copy to intermediate buffer before searching. */
279   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
280                y_stride, 16 + buf_r1 + buf_r2);
281   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
282 #else
283   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
284                      bestmv->as_mv.col;
285   y_stride = pre_stride;
286 #endif
287 
288   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
289 
290   /* central mv */
291   bestmv->as_mv.row *= 8;
292   bestmv->as_mv.col *= 8;
293 
294   /* calculate central point error */
295   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
296   *distortion = besterr;
297   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
298 
299   /* TODO: Each subsequent iteration checks at least one point in common
300    * with the last iteration could be 2 ( if diag selected)
301    */
302   while (--halfiters) {
303     /* 1/2 pel */
304     CHECK_BETTER(left, tr, tc - 2);
305     CHECK_BETTER(right, tr, tc + 2);
306     CHECK_BETTER(up, tr - 2, tc);
307     CHECK_BETTER(down, tr + 2, tc);
308 
309     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
310 
311     switch (whichdir) {
312       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
313       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
314       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
315       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
316     }
317 
318     /* no reason to check the same one again. */
319     if (tr == br && tc == bc) break;
320 
321     tr = br;
322     tc = bc;
323   }
324 
325   /* TODO: Each subsequent iteration checks at least one point in common
326    * with the last iteration could be 2 ( if diag selected)
327    */
328 
329   /* 1/4 pel */
330   while (--quarteriters) {
331     CHECK_BETTER(left, tr, tc - 1);
332     CHECK_BETTER(right, tr, tc + 1);
333     CHECK_BETTER(up, tr - 1, tc);
334     CHECK_BETTER(down, tr + 1, tc);
335 
336     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
337 
338     switch (whichdir) {
339       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
340       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
341       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
342       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
343     }
344 
345     /* no reason to check the same one again. */
346     if (tr == br && tc == bc) break;
347 
348     tr = br;
349     tc = bc;
350   }
351 
352   bestmv->as_mv.row = br * 2;
353   bestmv->as_mv.col = bc * 2;
354 
355   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
356       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
357     return INT_MAX;
358   }
359 
360   return besterr;
361 }
362 #undef MVC
363 #undef PRE
364 #undef SP
365 #undef DIST
366 #undef IFMVCV
367 #undef ERR
368 #undef CHECK_BETTER
369 
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)370 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
371                                  int_mv *bestmv, int_mv *ref_mv,
372                                  int error_per_bit,
373                                  const vp8_variance_fn_ptr_t *vfp,
374                                  int *mvcost[2], int *distortion,
375                                  unsigned int *sse1) {
376   int bestmse = INT_MAX;
377   int_mv startmv;
378   int_mv this_mv;
379   unsigned char *z = (*(b->base_src) + b->src);
380   int left, right, up, down, diag;
381   unsigned int sse;
382   int whichdir;
383   int thismse;
384   int y_stride;
385   int pre_stride = x->e_mbd.pre.y_stride;
386   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
387 
388 #if ARCH_X86 || ARCH_X86_64
389   MACROBLOCKD *xd = &x->e_mbd;
390   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
391                        bestmv->as_mv.col;
392   unsigned char *y;
393 
394   y_stride = 32;
395   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
396   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
397   y = xd->y_buf + y_stride + 1;
398 #else
399   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
400                      bestmv->as_mv.col;
401   y_stride = pre_stride;
402 #endif
403 
404   /* central mv */
405   bestmv->as_mv.row *= 8;
406   bestmv->as_mv.col *= 8;
407   startmv = *bestmv;
408 
409   /* calculate central point error */
410   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
411   *distortion = bestmse;
412   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
413 
414   /* go left then right and check error */
415   this_mv.as_mv.row = startmv.as_mv.row;
416   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
417   /* "halfpix" horizontal variance */
418   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
419   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
420 
421   if (left < bestmse) {
422     *bestmv = this_mv;
423     bestmse = left;
424     *distortion = thismse;
425     *sse1 = sse;
426   }
427 
428   this_mv.as_mv.col += 8;
429   /* "halfpix" horizontal variance */
430   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
431   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
432 
433   if (right < bestmse) {
434     *bestmv = this_mv;
435     bestmse = right;
436     *distortion = thismse;
437     *sse1 = sse;
438   }
439 
440   /* go up then down and check error */
441   this_mv.as_mv.col = startmv.as_mv.col;
442   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
443   /* "halfpix" vertical variance */
444   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
445   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
446 
447   if (up < bestmse) {
448     *bestmv = this_mv;
449     bestmse = up;
450     *distortion = thismse;
451     *sse1 = sse;
452   }
453 
454   this_mv.as_mv.row += 8;
455   /* "halfpix" vertical variance */
456   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
457   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
458 
459   if (down < bestmse) {
460     *bestmv = this_mv;
461     bestmse = down;
462     *distortion = thismse;
463     *sse1 = sse;
464   }
465 
466   /* now check 1 more diagonal */
467   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
468   this_mv = startmv;
469 
470   switch (whichdir) {
471     case 0:
472       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
473       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
474       /* "halfpix" horizontal/vertical variance */
475       thismse =
476           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
477       break;
478     case 1:
479       this_mv.as_mv.col += 4;
480       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
481       /* "halfpix" horizontal/vertical variance */
482       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
483       break;
484     case 2:
485       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
486       this_mv.as_mv.row += 4;
487       /* "halfpix" horizontal/vertical variance */
488       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
489       break;
490     case 3:
491     default:
492       this_mv.as_mv.col += 4;
493       this_mv.as_mv.row += 4;
494       /* "halfpix" horizontal/vertical variance */
495       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
496       break;
497   }
498 
499   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
500 
501   if (diag < bestmse) {
502     *bestmv = this_mv;
503     bestmse = diag;
504     *distortion = thismse;
505     *sse1 = sse;
506   }
507 
508   /* time to check quarter pels. */
509   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
510 
511   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
512 
513   startmv = *bestmv;
514 
515   /* go left then right and check error */
516   this_mv.as_mv.row = startmv.as_mv.row;
517 
518   if (startmv.as_mv.col & 7) {
519     this_mv.as_mv.col = startmv.as_mv.col - 2;
520     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
521                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
522   } else {
523     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
524     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
525                        b->src_stride, &sse);
526   }
527 
528   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
529 
530   if (left < bestmse) {
531     *bestmv = this_mv;
532     bestmse = left;
533     *distortion = thismse;
534     *sse1 = sse;
535   }
536 
537   this_mv.as_mv.col += 4;
538   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
539                      z, b->src_stride, &sse);
540   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
541 
542   if (right < bestmse) {
543     *bestmv = this_mv;
544     bestmse = right;
545     *distortion = thismse;
546     *sse1 = sse;
547   }
548 
549   /* go up then down and check error */
550   this_mv.as_mv.col = startmv.as_mv.col;
551 
552   if (startmv.as_mv.row & 7) {
553     this_mv.as_mv.row = startmv.as_mv.row - 2;
554     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
555                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
556   } else {
557     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
558     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
559                        b->src_stride, &sse);
560   }
561 
562   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
563 
564   if (up < bestmse) {
565     *bestmv = this_mv;
566     bestmse = up;
567     *distortion = thismse;
568     *sse1 = sse;
569   }
570 
571   this_mv.as_mv.row += 4;
572   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
573                      z, b->src_stride, &sse);
574   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
575 
576   if (down < bestmse) {
577     *bestmv = this_mv;
578     bestmse = down;
579     *distortion = thismse;
580     *sse1 = sse;
581   }
582 
583   /* now check 1 more diagonal */
584   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
585 
586   this_mv = startmv;
587 
588   switch (whichdir) {
589     case 0:
590 
591       if (startmv.as_mv.row & 7) {
592         this_mv.as_mv.row -= 2;
593 
594         if (startmv.as_mv.col & 7) {
595           this_mv.as_mv.col -= 2;
596           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
597                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
598         } else {
599           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
600           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
601                              b->src_stride, &sse);
602         }
603       } else {
604         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
605 
606         if (startmv.as_mv.col & 7) {
607           this_mv.as_mv.col -= 2;
608           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
609                              z, b->src_stride, &sse);
610         } else {
611           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
612           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
613                              &sse);
614         }
615       }
616 
617       break;
618     case 1:
619       this_mv.as_mv.col += 2;
620 
621       if (startmv.as_mv.row & 7) {
622         this_mv.as_mv.row -= 2;
623         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
624                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
625       } else {
626         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
627         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
628                            b->src_stride, &sse);
629       }
630 
631       break;
632     case 2:
633       this_mv.as_mv.row += 2;
634 
635       if (startmv.as_mv.col & 7) {
636         this_mv.as_mv.col -= 2;
637         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
638                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
639       } else {
640         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
641         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
642                            b->src_stride, &sse);
643       }
644 
645       break;
646     case 3:
647       this_mv.as_mv.col += 2;
648       this_mv.as_mv.row += 2;
649       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
650                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
651       break;
652   }
653 
654   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
655 
656   if (diag < bestmse) {
657     *bestmv = this_mv;
658     bestmse = diag;
659     *distortion = thismse;
660     *sse1 = sse;
661   }
662 
663   return bestmse;
664 }
665 
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)666 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
667                                   int_mv *bestmv, int_mv *ref_mv,
668                                   int error_per_bit,
669                                   const vp8_variance_fn_ptr_t *vfp,
670                                   int *mvcost[2], int *distortion,
671                                   unsigned int *sse1) {
672   int bestmse = INT_MAX;
673   int_mv startmv;
674   int_mv this_mv;
675   unsigned char *z = (*(b->base_src) + b->src);
676   int left, right, up, down, diag;
677   unsigned int sse;
678   int whichdir;
679   int thismse;
680   int y_stride;
681   int pre_stride = x->e_mbd.pre.y_stride;
682   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
683 
684 #if ARCH_X86 || ARCH_X86_64
685   MACROBLOCKD *xd = &x->e_mbd;
686   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
687                        bestmv->as_mv.col;
688   unsigned char *y;
689 
690   y_stride = 32;
691   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
692   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
693   y = xd->y_buf + y_stride + 1;
694 #else
695   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
696                      bestmv->as_mv.col;
697   y_stride = pre_stride;
698 #endif
699 
700   /* central mv */
701   bestmv->as_mv.row *= 8;
702   bestmv->as_mv.col *= 8;
703   startmv = *bestmv;
704 
705   /* calculate central point error */
706   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
707   *distortion = bestmse;
708   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
709 
710   /* go left then right and check error */
711   this_mv.as_mv.row = startmv.as_mv.row;
712   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
713   /* "halfpix" horizontal variance */
714   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
715   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
716 
717   if (left < bestmse) {
718     *bestmv = this_mv;
719     bestmse = left;
720     *distortion = thismse;
721     *sse1 = sse;
722   }
723 
724   this_mv.as_mv.col += 8;
725   /* "halfpix" horizontal variance */
726   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
727   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
728 
729   if (right < bestmse) {
730     *bestmv = this_mv;
731     bestmse = right;
732     *distortion = thismse;
733     *sse1 = sse;
734   }
735 
736   /* go up then down and check error */
737   this_mv.as_mv.col = startmv.as_mv.col;
738   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
739   /* "halfpix" vertical variance */
740   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
741   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
742 
743   if (up < bestmse) {
744     *bestmv = this_mv;
745     bestmse = up;
746     *distortion = thismse;
747     *sse1 = sse;
748   }
749 
750   this_mv.as_mv.row += 8;
751   /* "halfpix" vertical variance */
752   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
753   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
754 
755   if (down < bestmse) {
756     *bestmv = this_mv;
757     bestmse = down;
758     *distortion = thismse;
759     *sse1 = sse;
760   }
761 
762   /* now check 1 more diagonal - */
763   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
764   this_mv = startmv;
765 
766   switch (whichdir) {
767     case 0:
768       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
769       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
770       /* "halfpix" horizontal/vertical variance */
771       thismse =
772           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
773       break;
774     case 1:
775       this_mv.as_mv.col += 4;
776       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
777       /* "halfpix" horizontal/vertical variance */
778       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
779       break;
780     case 2:
781       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
782       this_mv.as_mv.row += 4;
783       /* "halfpix" horizontal/vertical variance */
784       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
785       break;
786     case 3:
787     default:
788       this_mv.as_mv.col += 4;
789       this_mv.as_mv.row += 4;
790       /* "halfpix" horizontal/vertical variance */
791       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
792       break;
793   }
794 
795   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
796 
797   if (diag < bestmse) {
798     *bestmv = this_mv;
799     bestmse = diag;
800     *distortion = thismse;
801     *sse1 = sse;
802   }
803 
804   return bestmse;
805 }
806 
807 #define CHECK_BOUNDS(range)                    \
808   {                                            \
809     all_in = 1;                                \
810     all_in &= ((br - range) >= x->mv_row_min); \
811     all_in &= ((br + range) <= x->mv_row_max); \
812     all_in &= ((bc - range) >= x->mv_col_min); \
813     all_in &= ((bc + range) <= x->mv_col_max); \
814   }
815 
816 #define CHECK_POINT                                  \
817   {                                                  \
818     if (this_mv.as_mv.col < x->mv_col_min) continue; \
819     if (this_mv.as_mv.col > x->mv_col_max) continue; \
820     if (this_mv.as_mv.row < x->mv_row_min) continue; \
821     if (this_mv.as_mv.row > x->mv_row_max) continue; \
822   }
823 
824 #define CHECK_BETTER                                                     \
825   {                                                                      \
826     if (thissad < bestsad) {                                             \
827       thissad +=                                                         \
828           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
829       if (thissad < bestsad) {                                           \
830         bestsad = thissad;                                               \
831         best_site = i;                                                   \
832       }                                                                  \
833     }                                                                    \
834   }
835 
836 static const MV next_chkpts[6][3] = {
837   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
838   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
839   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
840 };
841 
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int * mvcost[2],int_mv * center_mv)842 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
843                    int_mv *best_mv, int search_param, int sad_per_bit,
844                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
845                    int *mvcost[2], int_mv *center_mv) {
846   MV hex[6] = {
847     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
848   };
849   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
850   int i, j;
851 
852   unsigned char *what = (*(b->base_src) + b->src);
853   int what_stride = b->src_stride;
854   int pre_stride = x->e_mbd.pre.y_stride;
855   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
856 
857   int in_what_stride = pre_stride;
858   int br, bc;
859   int_mv this_mv;
860   unsigned int bestsad;
861   unsigned int thissad;
862   unsigned char *base_offset;
863   unsigned char *this_offset;
864   int k = -1;
865   int all_in;
866   int best_site = -1;
867   int hex_range = 127;
868   int dia_range = 8;
869 
870   int_mv fcenter_mv;
871   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
872   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
873 
874   (void)mvcost;
875 
876   /* adjust ref_mv to make sure it is within MV range */
877   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
878                x->mv_row_max);
879   br = ref_mv->as_mv.row;
880   bc = ref_mv->as_mv.col;
881 
882   /* Work out the start point for the search */
883   base_offset = (unsigned char *)(base_pre + d->offset);
884   this_offset = base_offset + (br * (pre_stride)) + bc;
885   this_mv.as_mv.row = br;
886   this_mv.as_mv.col = bc;
887   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
888             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
889 
890 #if CONFIG_MULTI_RES_ENCODING
891   /* Lower search range based on prediction info */
892   if (search_param >= 6)
893     goto cal_neighbors;
894   else if (search_param >= 5)
895     hex_range = 4;
896   else if (search_param >= 4)
897     hex_range = 6;
898   else if (search_param >= 3)
899     hex_range = 15;
900   else if (search_param >= 2)
901     hex_range = 31;
902   else if (search_param >= 1)
903     hex_range = 63;
904 
905   dia_range = 8;
906 #else
907   (void)search_param;
908 #endif
909 
910   /* hex search */
911   CHECK_BOUNDS(2)
912 
913   if (all_in) {
914     for (i = 0; i < 6; ++i) {
915       this_mv.as_mv.row = br + hex[i].row;
916       this_mv.as_mv.col = bc + hex[i].col;
917       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
918                     this_mv.as_mv.col;
919       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
920       CHECK_BETTER
921     }
922   } else {
923     for (i = 0; i < 6; ++i) {
924       this_mv.as_mv.row = br + hex[i].row;
925       this_mv.as_mv.col = bc + hex[i].col;
926       CHECK_POINT
927       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
928                     this_mv.as_mv.col;
929       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
930       CHECK_BETTER
931     }
932   }
933 
934   if (best_site == -1) {
935     goto cal_neighbors;
936   } else {
937     br += hex[best_site].row;
938     bc += hex[best_site].col;
939     k = best_site;
940   }
941 
942   for (j = 1; j < hex_range; ++j) {
943     best_site = -1;
944     CHECK_BOUNDS(2)
945 
946     if (all_in) {
947       for (i = 0; i < 3; ++i) {
948         this_mv.as_mv.row = br + next_chkpts[k][i].row;
949         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
950         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
951                       this_mv.as_mv.col;
952         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
953         CHECK_BETTER
954       }
955     } else {
956       for (i = 0; i < 3; ++i) {
957         this_mv.as_mv.row = br + next_chkpts[k][i].row;
958         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
959         CHECK_POINT
960         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
961                       this_mv.as_mv.col;
962         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
963         CHECK_BETTER
964       }
965     }
966 
967     if (best_site == -1) {
968       break;
969     } else {
970       br += next_chkpts[k][best_site].row;
971       bc += next_chkpts[k][best_site].col;
972       k += 5 + best_site;
973       if (k >= 12) {
974         k -= 12;
975       } else if (k >= 6) {
976         k -= 6;
977       }
978     }
979   }
980 
981 /* check 4 1-away neighbors */
982 cal_neighbors:
983   for (j = 0; j < dia_range; ++j) {
984     best_site = -1;
985     CHECK_BOUNDS(1)
986 
987     if (all_in) {
988       for (i = 0; i < 4; ++i) {
989         this_mv.as_mv.row = br + neighbors[i].row;
990         this_mv.as_mv.col = bc + neighbors[i].col;
991         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
992                       this_mv.as_mv.col;
993         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
994         CHECK_BETTER
995       }
996     } else {
997       for (i = 0; i < 4; ++i) {
998         this_mv.as_mv.row = br + neighbors[i].row;
999         this_mv.as_mv.col = bc + neighbors[i].col;
1000         CHECK_POINT
1001         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
1002                       this_mv.as_mv.col;
1003         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1004         CHECK_BETTER
1005       }
1006     }
1007 
1008     if (best_site == -1) {
1009       break;
1010     } else {
1011       br += neighbors[best_site].row;
1012       bc += neighbors[best_site].col;
1013     }
1014   }
1015 
1016   best_mv->as_mv.row = br;
1017   best_mv->as_mv.col = bc;
1018 
1019   return bestsad;
1020 }
1021 #undef CHECK_BOUNDS
1022 #undef CHECK_POINT
1023 #undef CHECK_BETTER
1024 
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1025 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1026                              int_mv *best_mv, int search_param, int sad_per_bit,
1027                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1028                              int *mvcost[2], int_mv *center_mv) {
1029   int i, j, step;
1030 
1031   unsigned char *what = (*(b->base_src) + b->src);
1032   int what_stride = b->src_stride;
1033   unsigned char *in_what;
1034   int pre_stride = x->e_mbd.pre.y_stride;
1035   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1036   int in_what_stride = pre_stride;
1037   unsigned char *best_address;
1038 
1039   int tot_steps;
1040   int_mv this_mv;
1041 
1042   unsigned int bestsad;
1043   unsigned int thissad;
1044   int best_site = 0;
1045   int last_site = 0;
1046 
1047   int ref_row;
1048   int ref_col;
1049   int this_row_offset;
1050   int this_col_offset;
1051   search_site *ss;
1052 
1053   unsigned char *check_here;
1054 
1055   int *mvsadcost[2];
1056   int_mv fcenter_mv;
1057 
1058   mvsadcost[0] = x->mvsadcost[0];
1059   mvsadcost[1] = x->mvsadcost[1];
1060   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1061   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1062 
1063   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1064                x->mv_row_max);
1065   ref_row = ref_mv->as_mv.row;
1066   ref_col = ref_mv->as_mv.col;
1067   *num00 = 0;
1068   best_mv->as_mv.row = ref_row;
1069   best_mv->as_mv.col = ref_col;
1070 
1071   /* Work out the start point for the search */
1072   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1073                               ref_col);
1074   best_address = in_what;
1075 
1076   /* Check the starting position */
1077   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1078             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1079 
1080   /* search_param determines the length of the initial step and hence
1081    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1082    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1083    */
1084   ss = &x->ss[search_param * x->searches_per_step];
1085   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1086 
1087   i = 1;
1088 
1089   for (step = 0; step < tot_steps; ++step) {
1090     for (j = 0; j < x->searches_per_step; ++j) {
1091       /* Trap illegal vectors */
1092       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1093       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1094 
1095       if ((this_col_offset > x->mv_col_min) &&
1096           (this_col_offset < x->mv_col_max) &&
1097           (this_row_offset > x->mv_row_min) &&
1098           (this_row_offset < x->mv_row_max))
1099 
1100       {
1101         check_here = ss[i].offset + best_address;
1102         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1103 
1104         if (thissad < bestsad) {
1105           this_mv.as_mv.row = this_row_offset;
1106           this_mv.as_mv.col = this_col_offset;
1107           thissad +=
1108               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1109 
1110           if (thissad < bestsad) {
1111             bestsad = thissad;
1112             best_site = i;
1113           }
1114         }
1115       }
1116 
1117       i++;
1118     }
1119 
1120     if (best_site != last_site) {
1121       best_mv->as_mv.row += ss[best_site].mv.row;
1122       best_mv->as_mv.col += ss[best_site].mv.col;
1123       best_address += ss[best_site].offset;
1124       last_site = best_site;
1125     } else if (best_address == in_what) {
1126       (*num00)++;
1127     }
1128   }
1129 
1130   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1131   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1132 
1133   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1134          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1135 }
1136 
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1137 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1138                              int_mv *best_mv, int search_param, int sad_per_bit,
1139                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1140                              int *mvcost[2], int_mv *center_mv) {
1141   int i, j, step;
1142 
1143   unsigned char *what = (*(b->base_src) + b->src);
1144   int what_stride = b->src_stride;
1145   unsigned char *in_what;
1146   int pre_stride = x->e_mbd.pre.y_stride;
1147   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1148   int in_what_stride = pre_stride;
1149   unsigned char *best_address;
1150 
1151   int tot_steps;
1152   int_mv this_mv;
1153 
1154   unsigned int bestsad;
1155   unsigned int thissad;
1156   int best_site = 0;
1157   int last_site = 0;
1158 
1159   int ref_row;
1160   int ref_col;
1161   int this_row_offset;
1162   int this_col_offset;
1163   search_site *ss;
1164 
1165   unsigned char *check_here;
1166 
1167   int *mvsadcost[2];
1168   int_mv fcenter_mv;
1169 
1170   mvsadcost[0] = x->mvsadcost[0];
1171   mvsadcost[1] = x->mvsadcost[1];
1172   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1173   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1174 
1175   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1176                x->mv_row_max);
1177   ref_row = ref_mv->as_mv.row;
1178   ref_col = ref_mv->as_mv.col;
1179   *num00 = 0;
1180   best_mv->as_mv.row = ref_row;
1181   best_mv->as_mv.col = ref_col;
1182 
1183   /* Work out the start point for the search */
1184   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1185                               ref_col);
1186   best_address = in_what;
1187 
1188   /* Check the starting position */
1189   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1190             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1191 
1192   /* search_param determines the length of the initial step and hence the
1193    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1194    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1195    */
1196   ss = &x->ss[search_param * x->searches_per_step];
1197   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1198 
1199   i = 1;
1200 
1201   for (step = 0; step < tot_steps; ++step) {
1202     int all_in = 1, t;
1203 
1204     /* To know if all neighbor points are within the bounds, 4 bounds
1205      * checking are enough instead of checking 4 bounds for each
1206      * points.
1207      */
1208     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1209     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1210     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1211     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1212 
1213     if (all_in) {
1214       unsigned int sad_array[4];
1215 
1216       for (j = 0; j < x->searches_per_step; j += 4) {
1217         const unsigned char *block_offset[4];
1218 
1219         for (t = 0; t < 4; ++t) {
1220           block_offset[t] = ss[i + t].offset + best_address;
1221         }
1222 
1223         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1224                        sad_array);
1225 
1226         for (t = 0; t < 4; t++, i++) {
1227           if (sad_array[t] < bestsad) {
1228             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1229             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1230             sad_array[t] +=
1231                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1232 
1233             if (sad_array[t] < bestsad) {
1234               bestsad = sad_array[t];
1235               best_site = i;
1236             }
1237           }
1238         }
1239       }
1240     } else {
1241       for (j = 0; j < x->searches_per_step; ++j) {
1242         /* Trap illegal vectors */
1243         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1244         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1245 
1246         if ((this_col_offset > x->mv_col_min) &&
1247             (this_col_offset < x->mv_col_max) &&
1248             (this_row_offset > x->mv_row_min) &&
1249             (this_row_offset < x->mv_row_max)) {
1250           check_here = ss[i].offset + best_address;
1251           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1252 
1253           if (thissad < bestsad) {
1254             this_mv.as_mv.row = this_row_offset;
1255             this_mv.as_mv.col = this_col_offset;
1256             thissad +=
1257                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1258 
1259             if (thissad < bestsad) {
1260               bestsad = thissad;
1261               best_site = i;
1262             }
1263           }
1264         }
1265         i++;
1266       }
1267     }
1268 
1269     if (best_site != last_site) {
1270       best_mv->as_mv.row += ss[best_site].mv.row;
1271       best_mv->as_mv.col += ss[best_site].mv.col;
1272       best_address += ss[best_site].offset;
1273       last_site = best_site;
1274     } else if (best_address == in_what) {
1275       (*num00)++;
1276     }
1277   }
1278 
1279   this_mv.as_mv.row = best_mv->as_mv.row * 8;
1280   this_mv.as_mv.col = best_mv->as_mv.col * 8;
1281 
1282   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1283          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1284 }
1285 
vp8_full_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1286 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1287                           int sad_per_bit, int distance,
1288                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1289                           int_mv *center_mv) {
1290   unsigned char *what = (*(b->base_src) + b->src);
1291   int what_stride = b->src_stride;
1292   unsigned char *in_what;
1293   int pre_stride = x->e_mbd.pre.y_stride;
1294   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1295   int in_what_stride = pre_stride;
1296   int mv_stride = pre_stride;
1297   unsigned char *bestaddress;
1298   int_mv *best_mv = &d->bmi.mv;
1299   int_mv this_mv;
1300   unsigned int bestsad;
1301   unsigned int thissad;
1302   int r, c;
1303 
1304   unsigned char *check_here;
1305 
1306   int ref_row = ref_mv->as_mv.row;
1307   int ref_col = ref_mv->as_mv.col;
1308 
1309   int row_min = ref_row - distance;
1310   int row_max = ref_row + distance;
1311   int col_min = ref_col - distance;
1312   int col_max = ref_col + distance;
1313 
1314   int *mvsadcost[2];
1315   int_mv fcenter_mv;
1316 
1317   mvsadcost[0] = x->mvsadcost[0];
1318   mvsadcost[1] = x->mvsadcost[1];
1319   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1320   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1321 
1322   /* Work out the mid point for the search */
1323   in_what = base_pre + d->offset;
1324   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1325 
1326   best_mv->as_mv.row = ref_row;
1327   best_mv->as_mv.col = ref_col;
1328 
1329   /* Baseline value at the centre */
1330   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1331             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1332 
1333   /* Apply further limits to prevent us looking using vectors that
1334    * stretch beyiond the UMV border
1335    */
1336   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1337 
1338   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1339 
1340   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1341 
1342   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1343 
1344   for (r = row_min; r < row_max; ++r) {
1345     this_mv.as_mv.row = r;
1346     check_here = r * mv_stride + in_what + col_min;
1347 
1348     for (c = col_min; c < col_max; ++c) {
1349       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1350 
1351       this_mv.as_mv.col = c;
1352       thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1353 
1354       if (thissad < bestsad) {
1355         bestsad = thissad;
1356         best_mv->as_mv.row = r;
1357         best_mv->as_mv.col = c;
1358         bestaddress = check_here;
1359       }
1360 
1361       check_here++;
1362     }
1363   }
1364 
1365   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1366   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1367 
1368   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1369          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1370 }
1371 
vp8_full_search_sadx3(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1372 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1373                           int sad_per_bit, int distance,
1374                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1375                           int_mv *center_mv) {
1376   unsigned char *what = (*(b->base_src) + b->src);
1377   int what_stride = b->src_stride;
1378   unsigned char *in_what;
1379   int pre_stride = x->e_mbd.pre.y_stride;
1380   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1381   int in_what_stride = pre_stride;
1382   int mv_stride = pre_stride;
1383   unsigned char *bestaddress;
1384   int_mv *best_mv = &d->bmi.mv;
1385   int_mv this_mv;
1386   unsigned int bestsad;
1387   unsigned int thissad;
1388   int r, c;
1389 
1390   unsigned char *check_here;
1391 
1392   int ref_row = ref_mv->as_mv.row;
1393   int ref_col = ref_mv->as_mv.col;
1394 
1395   int row_min = ref_row - distance;
1396   int row_max = ref_row + distance;
1397   int col_min = ref_col - distance;
1398   int col_max = ref_col + distance;
1399 
1400   unsigned int sad_array[3];
1401 
1402   int *mvsadcost[2];
1403   int_mv fcenter_mv;
1404 
1405   mvsadcost[0] = x->mvsadcost[0];
1406   mvsadcost[1] = x->mvsadcost[1];
1407   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1408   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1409 
1410   /* Work out the mid point for the search */
1411   in_what = base_pre + d->offset;
1412   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1413 
1414   best_mv->as_mv.row = ref_row;
1415   best_mv->as_mv.col = ref_col;
1416 
1417   /* Baseline value at the centre */
1418   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1419             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1420 
1421   /* Apply further limits to prevent us looking using vectors that stretch
1422    * beyond the UMV border
1423    */
1424   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1425 
1426   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1427 
1428   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1429 
1430   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1431 
1432   for (r = row_min; r < row_max; ++r) {
1433     this_mv.as_mv.row = r;
1434     check_here = r * mv_stride + in_what + col_min;
1435     c = col_min;
1436 
1437     while ((c + 2) < col_max) {
1438       int i;
1439 
1440       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1441 
1442       for (i = 0; i < 3; ++i) {
1443         thissad = sad_array[i];
1444 
1445         if (thissad < bestsad) {
1446           this_mv.as_mv.col = c;
1447           thissad +=
1448               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1449 
1450           if (thissad < bestsad) {
1451             bestsad = thissad;
1452             best_mv->as_mv.row = r;
1453             best_mv->as_mv.col = c;
1454             bestaddress = check_here;
1455           }
1456         }
1457 
1458         check_here++;
1459         c++;
1460       }
1461     }
1462 
1463     while (c < col_max) {
1464       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1465 
1466       if (thissad < bestsad) {
1467         this_mv.as_mv.col = c;
1468         thissad +=
1469             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1470 
1471         if (thissad < bestsad) {
1472           bestsad = thissad;
1473           best_mv->as_mv.row = r;
1474           best_mv->as_mv.col = c;
1475           bestaddress = check_here;
1476         }
1477       }
1478 
1479       check_here++;
1480       c++;
1481     }
1482   }
1483 
1484   this_mv.as_mv.row = best_mv->as_mv.row << 3;
1485   this_mv.as_mv.col = best_mv->as_mv.col << 3;
1486 
1487   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1488          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1489 }
1490 
vp8_full_search_sadx8(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1491 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1492                           int sad_per_bit, int distance,
1493                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1494                           int_mv *center_mv) {
1495   unsigned char *what = (*(b->base_src) + b->src);
1496   int what_stride = b->src_stride;
1497   int pre_stride = x->e_mbd.pre.y_stride;
1498   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1499   unsigned char *in_what;
1500   int in_what_stride = pre_stride;
1501   int mv_stride = pre_stride;
1502   unsigned char *bestaddress;
1503   int_mv *best_mv = &d->bmi.mv;
1504   int_mv this_mv;
1505   unsigned int bestsad;
1506   unsigned int thissad;
1507   int r, c;
1508 
1509   unsigned char *check_here;
1510 
1511   int ref_row = ref_mv->as_mv.row;
1512   int ref_col = ref_mv->as_mv.col;
1513 
1514   int row_min = ref_row - distance;
1515   int row_max = ref_row + distance;
1516   int col_min = ref_col - distance;
1517   int col_max = ref_col + distance;
1518 
1519   DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1520   unsigned int sad_array[3];
1521 
1522   int *mvsadcost[2];
1523   int_mv fcenter_mv;
1524 
1525   mvsadcost[0] = x->mvsadcost[0];
1526   mvsadcost[1] = x->mvsadcost[1];
1527   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1528   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1529 
1530   /* Work out the mid point for the search */
1531   in_what = base_pre + d->offset;
1532   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1533 
1534   best_mv->as_mv.row = ref_row;
1535   best_mv->as_mv.col = ref_col;
1536 
1537   /* Baseline value at the centre */
1538   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1539             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1540 
1541   /* Apply further limits to prevent us looking using vectors that stretch
1542    * beyond the UMV border
1543    */
1544   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1545 
1546   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1547 
1548   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1549 
1550   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1551 
1552   for (r = row_min; r < row_max; ++r) {
1553     this_mv.as_mv.row = r;
1554     check_here = r * mv_stride + in_what + col_min;
1555     c = col_min;
1556 
1557     while ((c + 7) < col_max) {
1558       int i;
1559 
1560       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1561 
1562       for (i = 0; i < 8; ++i) {
1563         thissad = sad_array8[i];
1564 
1565         if (thissad < bestsad) {
1566           this_mv.as_mv.col = c;
1567           thissad +=
1568               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1569 
1570           if (thissad < bestsad) {
1571             bestsad = thissad;
1572             best_mv->as_mv.row = r;
1573             best_mv->as_mv.col = c;
1574             bestaddress = check_here;
1575           }
1576         }
1577 
1578         check_here++;
1579         c++;
1580       }
1581     }
1582 
1583     while ((c + 2) < col_max) {
1584       int i;
1585 
1586       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1587 
1588       for (i = 0; i < 3; ++i) {
1589         thissad = sad_array[i];
1590 
1591         if (thissad < bestsad) {
1592           this_mv.as_mv.col = c;
1593           thissad +=
1594               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1595 
1596           if (thissad < bestsad) {
1597             bestsad = thissad;
1598             best_mv->as_mv.row = r;
1599             best_mv->as_mv.col = c;
1600             bestaddress = check_here;
1601           }
1602         }
1603 
1604         check_here++;
1605         c++;
1606       }
1607     }
1608 
1609     while (c < col_max) {
1610       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1611 
1612       if (thissad < bestsad) {
1613         this_mv.as_mv.col = c;
1614         thissad +=
1615             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1616 
1617         if (thissad < bestsad) {
1618           bestsad = thissad;
1619           best_mv->as_mv.row = r;
1620           best_mv->as_mv.col = c;
1621           bestaddress = check_here;
1622         }
1623       }
1624 
1625       check_here++;
1626       c++;
1627     }
1628   }
1629 
1630   this_mv.as_mv.row = best_mv->as_mv.row * 8;
1631   this_mv.as_mv.col = best_mv->as_mv.col * 8;
1632 
1633   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1634          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1635 }
1636 
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1637 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1638                               int_mv *ref_mv, int error_per_bit,
1639                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1640                               int *mvcost[2], int_mv *center_mv) {
1641   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1642   int i, j;
1643   short this_row_offset, this_col_offset;
1644 
1645   int what_stride = b->src_stride;
1646   int pre_stride = x->e_mbd.pre.y_stride;
1647   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1648   int in_what_stride = pre_stride;
1649   unsigned char *what = (*(b->base_src) + b->src);
1650   unsigned char *best_address =
1651       (unsigned char *)(base_pre + d->offset +
1652                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1653   unsigned char *check_here;
1654   int_mv this_mv;
1655   unsigned int bestsad;
1656   unsigned int thissad;
1657 
1658   int *mvsadcost[2];
1659   int_mv fcenter_mv;
1660 
1661   mvsadcost[0] = x->mvsadcost[0];
1662   mvsadcost[1] = x->mvsadcost[1];
1663   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1664   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1665 
1666   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1667             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1668 
1669   for (i = 0; i < search_range; ++i) {
1670     int best_site = -1;
1671 
1672     for (j = 0; j < 4; ++j) {
1673       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1674       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1675 
1676       if ((this_col_offset > x->mv_col_min) &&
1677           (this_col_offset < x->mv_col_max) &&
1678           (this_row_offset > x->mv_row_min) &&
1679           (this_row_offset < x->mv_row_max)) {
1680         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1681                      best_address;
1682         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1683 
1684         if (thissad < bestsad) {
1685           this_mv.as_mv.row = this_row_offset;
1686           this_mv.as_mv.col = this_col_offset;
1687           thissad +=
1688               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1689 
1690           if (thissad < bestsad) {
1691             bestsad = thissad;
1692             best_site = j;
1693           }
1694         }
1695       }
1696     }
1697 
1698     if (best_site == -1) {
1699       break;
1700     } else {
1701       ref_mv->as_mv.row += neighbors[best_site].row;
1702       ref_mv->as_mv.col += neighbors[best_site].col;
1703       best_address += (neighbors[best_site].row) * in_what_stride +
1704                       neighbors[best_site].col;
1705     }
1706   }
1707 
1708   this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1709   this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1710 
1711   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1712          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1713 }
1714 
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1715 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1716                               int_mv *ref_mv, int error_per_bit,
1717                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1718                               int *mvcost[2], int_mv *center_mv) {
1719   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1720   int i, j;
1721   short this_row_offset, this_col_offset;
1722 
1723   int what_stride = b->src_stride;
1724   int pre_stride = x->e_mbd.pre.y_stride;
1725   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1726   int in_what_stride = pre_stride;
1727   unsigned char *what = (*(b->base_src) + b->src);
1728   unsigned char *best_address =
1729       (unsigned char *)(base_pre + d->offset +
1730                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1731   unsigned char *check_here;
1732   int_mv this_mv;
1733   unsigned int bestsad;
1734   unsigned int thissad;
1735 
1736   int *mvsadcost[2];
1737   int_mv fcenter_mv;
1738 
1739   mvsadcost[0] = x->mvsadcost[0];
1740   mvsadcost[1] = x->mvsadcost[1];
1741   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1742   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1743 
1744   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1745             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1746 
1747   for (i = 0; i < search_range; ++i) {
1748     int best_site = -1;
1749     int all_in = 1;
1750 
1751     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1752     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1753     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1754     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1755 
1756     if (all_in) {
1757       unsigned int sad_array[4];
1758       const unsigned char *block_offset[4];
1759       block_offset[0] = best_address - in_what_stride;
1760       block_offset[1] = best_address - 1;
1761       block_offset[2] = best_address + 1;
1762       block_offset[3] = best_address + in_what_stride;
1763 
1764       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1765                      sad_array);
1766 
1767       for (j = 0; j < 4; ++j) {
1768         if (sad_array[j] < bestsad) {
1769           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1770           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1771           sad_array[j] +=
1772               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1773 
1774           if (sad_array[j] < bestsad) {
1775             bestsad = sad_array[j];
1776             best_site = j;
1777           }
1778         }
1779       }
1780     } else {
1781       for (j = 0; j < 4; ++j) {
1782         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1783         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1784 
1785         if ((this_col_offset > x->mv_col_min) &&
1786             (this_col_offset < x->mv_col_max) &&
1787             (this_row_offset > x->mv_row_min) &&
1788             (this_row_offset < x->mv_row_max)) {
1789           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1790                        best_address;
1791           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1792 
1793           if (thissad < bestsad) {
1794             this_mv.as_mv.row = this_row_offset;
1795             this_mv.as_mv.col = this_col_offset;
1796             thissad +=
1797                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1798 
1799             if (thissad < bestsad) {
1800               bestsad = thissad;
1801               best_site = j;
1802             }
1803           }
1804         }
1805       }
1806     }
1807 
1808     if (best_site == -1) {
1809       break;
1810     } else {
1811       ref_mv->as_mv.row += neighbors[best_site].row;
1812       ref_mv->as_mv.col += neighbors[best_site].col;
1813       best_address += (neighbors[best_site].row) * in_what_stride +
1814                       neighbors[best_site].col;
1815     }
1816   }
1817 
1818   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1819   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1820 
1821   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1822          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1823 }
1824 
1825 #ifdef VP8_ENTROPY_STATS
print_mode_context(void)1826 void print_mode_context(void) {
1827   FILE *f = fopen("modecont.c", "w");
1828   int i, j;
1829 
1830   fprintf(f, "#include \"entropy.h\"\n");
1831   fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1832   fprintf(f, "{\n");
1833 
1834   for (j = 0; j < 6; ++j) {
1835     fprintf(f, "  { /* %d */\n", j);
1836     fprintf(f, "    ");
1837 
1838     for (i = 0; i < 4; ++i) {
1839       int overal_prob;
1840       int this_prob;
1841       int count;
1842 
1843       /* Overall probs */
1844       count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1845 
1846       if (count)
1847         overal_prob = 256 * mv_mode_cts[i][0] / count;
1848       else
1849         overal_prob = 128;
1850 
1851       if (overal_prob == 0) overal_prob = 1;
1852 
1853       /* context probs */
1854       count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1855 
1856       if (count)
1857         this_prob = 256 * mv_ref_ct[j][i][0] / count;
1858       else
1859         this_prob = 128;
1860 
1861       if (this_prob == 0) this_prob = 1;
1862 
1863       fprintf(f, "%5d, ", this_prob);
1864     }
1865 
1866     fprintf(f, "  },\n");
1867   }
1868 
1869   fprintf(f, "};\n");
1870   fclose(f);
1871 }
1872 
1873 /* MV ref count VP8_ENTROPY_STATS stats code */
1874 #ifdef VP8_ENTROPY_STATS
init_mv_ref_counts()1875 void init_mv_ref_counts() {
1876   memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1877   memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1878 }
1879 
accum_mv_refs(MB_PREDICTION_MODE m,const int ct[4])1880 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
1881   if (m == ZEROMV) {
1882     ++mv_ref_ct[ct[0]][0][0];
1883     ++mv_mode_cts[0][0];
1884   } else {
1885     ++mv_ref_ct[ct[0]][0][1];
1886     ++mv_mode_cts[0][1];
1887 
1888     if (m == NEARESTMV) {
1889       ++mv_ref_ct[ct[1]][1][0];
1890       ++mv_mode_cts[1][0];
1891     } else {
1892       ++mv_ref_ct[ct[1]][1][1];
1893       ++mv_mode_cts[1][1];
1894 
1895       if (m == NEARMV) {
1896         ++mv_ref_ct[ct[2]][2][0];
1897         ++mv_mode_cts[2][0];
1898       } else {
1899         ++mv_ref_ct[ct[2]][2][1];
1900         ++mv_mode_cts[2][1];
1901 
1902         if (m == NEWMV) {
1903           ++mv_ref_ct[ct[3]][3][0];
1904           ++mv_mode_cts[3][0];
1905         } else {
1906           ++mv_ref_ct[ct[3]][3][1];
1907           ++mv_mode_cts[3][1];
1908         }
1909       }
1910     }
1911   }
1912 }
1913 
1914 #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
1915 
1916 #endif
1917