1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23 
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
25   /* MV costing is based on the distribution of vectors in the previous
26    * frame and as such will tend to over state the cost of vectors. In
27    * addition coding a new vector can have a knock on effect on the cost
28    * of subsequent vectors and the quality of prediction from NEAR and
29    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
30    * limited extent, for some account to be taken of these factors.
31    */
32   const int mv_idx_row =
33       clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
34   const int mv_idx_col =
35       clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
36   return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
37 }
38 
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
40                        int error_per_bit) {
41   /* Ignore mv costing if mvcost is NULL */
42   if (mvcost) {
43     const int mv_idx_row =
44         clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
45     const int mv_idx_col =
46         clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
47     return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
48             128) >>
49            8;
50   }
51   return 0;
52 }
53 
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
55                           int error_per_bit) {
56   /* Calculate sad error cost on full pixel basis. */
57   /* Ignore mv costing if mvsadcost is NULL */
58   if (mvsadcost) {
59     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
60              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
61                 error_per_bit +
62             128) >>
63            8;
64   }
65   return 0;
66 }
67 
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
69   int Len;
70   int search_site_count = 0;
71 
72   /* Generate offsets for 4 search sites per step. */
73   Len = MAX_FIRST_STEP;
74   x->ss[search_site_count].mv.col = 0;
75   x->ss[search_site_count].mv.row = 0;
76   x->ss[search_site_count].offset = 0;
77   search_site_count++;
78 
79   while (Len > 0) {
80     /* Compute offsets for search sites. */
81     x->ss[search_site_count].mv.col = 0;
82     x->ss[search_site_count].mv.row = -Len;
83     x->ss[search_site_count].offset = -Len * stride;
84     search_site_count++;
85 
86     /* Compute offsets for search sites. */
87     x->ss[search_site_count].mv.col = 0;
88     x->ss[search_site_count].mv.row = Len;
89     x->ss[search_site_count].offset = Len * stride;
90     search_site_count++;
91 
92     /* Compute offsets for search sites. */
93     x->ss[search_site_count].mv.col = -Len;
94     x->ss[search_site_count].mv.row = 0;
95     x->ss[search_site_count].offset = -Len;
96     search_site_count++;
97 
98     /* Compute offsets for search sites. */
99     x->ss[search_site_count].mv.col = Len;
100     x->ss[search_site_count].mv.row = 0;
101     x->ss[search_site_count].offset = Len;
102     search_site_count++;
103 
104     /* Contract. */
105     Len /= 2;
106   }
107 
108   x->ss_count = search_site_count;
109   x->searches_per_step = 4;
110 }
111 
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
113   int Len;
114   int search_site_count = 0;
115 
116   /* Generate offsets for 8 search sites per step. */
117   Len = MAX_FIRST_STEP;
118   x->ss[search_site_count].mv.col = 0;
119   x->ss[search_site_count].mv.row = 0;
120   x->ss[search_site_count].offset = 0;
121   search_site_count++;
122 
123   while (Len > 0) {
124     /* Compute offsets for search sites. */
125     x->ss[search_site_count].mv.col = 0;
126     x->ss[search_site_count].mv.row = -Len;
127     x->ss[search_site_count].offset = -Len * stride;
128     search_site_count++;
129 
130     /* Compute offsets for search sites. */
131     x->ss[search_site_count].mv.col = 0;
132     x->ss[search_site_count].mv.row = Len;
133     x->ss[search_site_count].offset = Len * stride;
134     search_site_count++;
135 
136     /* Compute offsets for search sites. */
137     x->ss[search_site_count].mv.col = -Len;
138     x->ss[search_site_count].mv.row = 0;
139     x->ss[search_site_count].offset = -Len;
140     search_site_count++;
141 
142     /* Compute offsets for search sites. */
143     x->ss[search_site_count].mv.col = Len;
144     x->ss[search_site_count].mv.row = 0;
145     x->ss[search_site_count].offset = Len;
146     search_site_count++;
147 
148     /* Compute offsets for search sites. */
149     x->ss[search_site_count].mv.col = -Len;
150     x->ss[search_site_count].mv.row = -Len;
151     x->ss[search_site_count].offset = -Len * stride - Len;
152     search_site_count++;
153 
154     /* Compute offsets for search sites. */
155     x->ss[search_site_count].mv.col = Len;
156     x->ss[search_site_count].mv.row = -Len;
157     x->ss[search_site_count].offset = -Len * stride + Len;
158     search_site_count++;
159 
160     /* Compute offsets for search sites. */
161     x->ss[search_site_count].mv.col = -Len;
162     x->ss[search_site_count].mv.row = Len;
163     x->ss[search_site_count].offset = Len * stride - Len;
164     search_site_count++;
165 
166     /* Compute offsets for search sites. */
167     x->ss[search_site_count].mv.col = Len;
168     x->ss[search_site_count].mv.row = Len;
169     x->ss[search_site_count].offset = Len * stride + Len;
170     search_site_count++;
171 
172     /* Contract. */
173     Len /= 2;
174   }
175 
176   x->ss_count = search_site_count;
177   x->searches_per_step = 8;
178 }
179 
180 /*
181  * To avoid the penalty for crossing cache-line read, preload the reference
182  * area in a small buffer, which is aligned to make sure there won't be crossing
183  * cache-line read while reading from this buffer. This reduced the cpu
184  * cycles spent on reading ref data in sub-pixel filter functions.
185  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187  * could reduce the area.
188  */
189 
190 /* estimated cost of a motion vector (r,c) */
191 #define MVC(r, c)                                                             \
192   (mvcost                                                                     \
193        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
194        : 0)
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3) << 1)
199 /* returns subpixel variance error function. */
200 #define DIST(r, c) \
201   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
202 #define IFMVCV(r, c, s, e) \
203   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
204 /* returns distortion + motion vector cost */
205 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
206 /* checks if (r,c) has better score than previous best */
207 #define CHECK_BETTER(v, r, c)                          \
208   do {                                                 \
209     IFMVCV(                                            \
210         r, c,                                          \
211         {                                              \
212           thismse = DIST(r, c);                        \
213           if ((v = (MVC(r, c) + thismse)) < besterr) { \
214             besterr = v;                               \
215             br = r;                                    \
216             bc = c;                                    \
217             *distortion = thismse;                     \
218             *sse1 = sse;                               \
219           }                                            \
220         },                                             \
221         v = UINT_MAX;)                                 \
222   } while (0)
223 
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)224 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
225                                              int_mv *bestmv, int_mv *ref_mv,
226                                              int error_per_bit,
227                                              const vp8_variance_fn_ptr_t *vfp,
228                                              int *mvcost[2], int *distortion,
229                                              unsigned int *sse1) {
230   unsigned char *z = (*(b->base_src) + b->src);
231 
232   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
233   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
234   int tr = br, tc = bc;
235   unsigned int besterr;
236   unsigned int left, right, up, down, diag;
237   unsigned int sse;
238   unsigned int whichdir;
239   unsigned int halfiters = 4;
240   unsigned int quarteriters = 4;
241   int thismse;
242 
243   int minc = VPXMAX(x->mv_col_min * 4,
244                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
245   int maxc = VPXMIN(x->mv_col_max * 4,
246                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
247   int minr = VPXMAX(x->mv_row_min * 4,
248                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
249   int maxr = VPXMIN(x->mv_row_max * 4,
250                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
251 
252   int y_stride;
253   int offset;
254   int pre_stride = x->e_mbd.pre.y_stride;
255   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
256 
257 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
258   MACROBLOCKD *xd = &x->e_mbd;
259   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
260                        bestmv->as_mv.col;
261   unsigned char *y;
262   int buf_r1, buf_r2, buf_c1;
263 
264   /* Clamping to avoid out-of-range data access */
265   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
266                ? (bestmv->as_mv.row - x->mv_row_min)
267                : 3;
268   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
269                ? (x->mv_row_max - bestmv->as_mv.row)
270                : 3;
271   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
272                ? (bestmv->as_mv.col - x->mv_col_min)
273                : 3;
274   y_stride = 32;
275 
276   /* Copy to intermediate buffer before searching. */
277   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
278                y_stride, 16 + buf_r1 + buf_r2);
279   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
280 #else
281   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
282                      bestmv->as_mv.col;
283   y_stride = pre_stride;
284 #endif
285 
286   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
287 
288   /* central mv */
289   bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
290   bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
291 
292   /* calculate central point error */
293   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
294   *distortion = besterr;
295   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
296 
297   /* TODO: Each subsequent iteration checks at least one point in common
298    * with the last iteration could be 2 ( if diag selected)
299    */
300   while (--halfiters) {
301     /* 1/2 pel */
302     CHECK_BETTER(left, tr, tc - 2);
303     CHECK_BETTER(right, tr, tc + 2);
304     CHECK_BETTER(up, tr - 2, tc);
305     CHECK_BETTER(down, tr + 2, tc);
306 
307     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
308 
309     switch (whichdir) {
310       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
311       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
312       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
313       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
314     }
315 
316     /* no reason to check the same one again. */
317     if (tr == br && tc == bc) break;
318 
319     tr = br;
320     tc = bc;
321   }
322 
323   /* TODO: Each subsequent iteration checks at least one point in common
324    * with the last iteration could be 2 ( if diag selected)
325    */
326 
327   /* 1/4 pel */
328   while (--quarteriters) {
329     CHECK_BETTER(left, tr, tc - 1);
330     CHECK_BETTER(right, tr, tc + 1);
331     CHECK_BETTER(up, tr - 1, tc);
332     CHECK_BETTER(down, tr + 1, tc);
333 
334     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
335 
336     switch (whichdir) {
337       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
338       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
339       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
340       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
341     }
342 
343     /* no reason to check the same one again. */
344     if (tr == br && tc == bc) break;
345 
346     tr = br;
347     tc = bc;
348   }
349 
350   bestmv->as_mv.row = clamp(br * 2, SHRT_MIN, SHRT_MAX);
351   bestmv->as_mv.col = clamp(bc * 2, SHRT_MIN, SHRT_MAX);
352 
353   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
354       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
355     return INT_MAX;
356   }
357 
358   return besterr;
359 }
360 #undef MVC
361 #undef PRE
362 #undef SP
363 #undef DIST
364 #undef IFMVCV
365 #undef ERR
366 #undef CHECK_BETTER
367 
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)368 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
369                                  int_mv *bestmv, int_mv *ref_mv,
370                                  int error_per_bit,
371                                  const vp8_variance_fn_ptr_t *vfp,
372                                  int *mvcost[2], int *distortion,
373                                  unsigned int *sse1) {
374   int bestmse = INT_MAX;
375   int_mv startmv;
376   int_mv this_mv;
377   unsigned char *z = (*(b->base_src) + b->src);
378   int left, right, up, down, diag;
379   unsigned int sse;
380   int whichdir;
381   int thismse;
382   int y_stride;
383   int pre_stride = x->e_mbd.pre.y_stride;
384   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
385 
386 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
387   MACROBLOCKD *xd = &x->e_mbd;
388   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
389                        bestmv->as_mv.col;
390   unsigned char *y;
391 
392   y_stride = 32;
393   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
394   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
395   y = xd->y_buf + y_stride + 1;
396 #else
397   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
398                      bestmv->as_mv.col;
399   y_stride = pre_stride;
400 #endif
401 
402   /* central mv */
403   bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
404   bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
405   startmv = *bestmv;
406 
407   /* calculate central point error */
408   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
409   *distortion = bestmse;
410   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
411 
412   /* go left then right and check error */
413   this_mv.as_mv.row = startmv.as_mv.row;
414   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
415   /* "halfpix" horizontal variance */
416   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
417   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
418 
419   if (left < bestmse) {
420     *bestmv = this_mv;
421     bestmse = left;
422     *distortion = thismse;
423     *sse1 = sse;
424   }
425 
426   this_mv.as_mv.col += 8;
427   /* "halfpix" horizontal variance */
428   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
429   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
430 
431   if (right < bestmse) {
432     *bestmv = this_mv;
433     bestmse = right;
434     *distortion = thismse;
435     *sse1 = sse;
436   }
437 
438   /* go up then down and check error */
439   this_mv.as_mv.col = startmv.as_mv.col;
440   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
441   /* "halfpix" vertical variance */
442   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
443   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
444 
445   if (up < bestmse) {
446     *bestmv = this_mv;
447     bestmse = up;
448     *distortion = thismse;
449     *sse1 = sse;
450   }
451 
452   this_mv.as_mv.row += 8;
453   /* "halfpix" vertical variance */
454   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
455   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
456 
457   if (down < bestmse) {
458     *bestmv = this_mv;
459     bestmse = down;
460     *distortion = thismse;
461     *sse1 = sse;
462   }
463 
464   /* now check 1 more diagonal */
465   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
466   this_mv = startmv;
467 
468   switch (whichdir) {
469     case 0:
470       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
471       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
472       /* "halfpix" horizontal/vertical variance */
473       thismse =
474           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
475       break;
476     case 1:
477       this_mv.as_mv.col += 4;
478       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
479       /* "halfpix" horizontal/vertical variance */
480       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
481       break;
482     case 2:
483       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
484       this_mv.as_mv.row += 4;
485       /* "halfpix" horizontal/vertical variance */
486       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
487       break;
488     case 3:
489     default:
490       this_mv.as_mv.col += 4;
491       this_mv.as_mv.row += 4;
492       /* "halfpix" horizontal/vertical variance */
493       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
494       break;
495   }
496 
497   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
498 
499   if (diag < bestmse) {
500     *bestmv = this_mv;
501     bestmse = diag;
502     *distortion = thismse;
503     *sse1 = sse;
504   }
505 
506   /* time to check quarter pels. */
507   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
508 
509   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
510 
511   startmv = *bestmv;
512 
513   /* go left then right and check error */
514   this_mv.as_mv.row = startmv.as_mv.row;
515 
516   if (startmv.as_mv.col & 7) {
517     this_mv.as_mv.col = startmv.as_mv.col - 2;
518     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
519                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
520   } else {
521     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
522     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
523                        b->src_stride, &sse);
524   }
525 
526   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
527 
528   if (left < bestmse) {
529     *bestmv = this_mv;
530     bestmse = left;
531     *distortion = thismse;
532     *sse1 = sse;
533   }
534 
535   this_mv.as_mv.col += 4;
536   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
537                      z, b->src_stride, &sse);
538   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
539 
540   if (right < bestmse) {
541     *bestmv = this_mv;
542     bestmse = right;
543     *distortion = thismse;
544     *sse1 = sse;
545   }
546 
547   /* go up then down and check error */
548   this_mv.as_mv.col = startmv.as_mv.col;
549 
550   if (startmv.as_mv.row & 7) {
551     this_mv.as_mv.row = startmv.as_mv.row - 2;
552     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
553                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
554   } else {
555     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
556     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
557                        b->src_stride, &sse);
558   }
559 
560   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
561 
562   if (up < bestmse) {
563     *bestmv = this_mv;
564     bestmse = up;
565     *distortion = thismse;
566     *sse1 = sse;
567   }
568 
569   this_mv.as_mv.row += 4;
570   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
571                      z, b->src_stride, &sse);
572   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
573 
574   if (down < bestmse) {
575     *bestmv = this_mv;
576     bestmse = down;
577     *distortion = thismse;
578     *sse1 = sse;
579   }
580 
581   /* now check 1 more diagonal */
582   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
583 
584   this_mv = startmv;
585 
586   switch (whichdir) {
587     case 0:
588 
589       if (startmv.as_mv.row & 7) {
590         this_mv.as_mv.row -= 2;
591 
592         if (startmv.as_mv.col & 7) {
593           this_mv.as_mv.col -= 2;
594           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
595                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
596         } else {
597           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
598           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
599                              b->src_stride, &sse);
600         }
601       } else {
602         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
603 
604         if (startmv.as_mv.col & 7) {
605           this_mv.as_mv.col -= 2;
606           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
607                              z, b->src_stride, &sse);
608         } else {
609           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
610           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
611                              &sse);
612         }
613       }
614 
615       break;
616     case 1:
617       this_mv.as_mv.col += 2;
618 
619       if (startmv.as_mv.row & 7) {
620         this_mv.as_mv.row -= 2;
621         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
622                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
623       } else {
624         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
625         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
626                            b->src_stride, &sse);
627       }
628 
629       break;
630     case 2:
631       this_mv.as_mv.row += 2;
632 
633       if (startmv.as_mv.col & 7) {
634         this_mv.as_mv.col -= 2;
635         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
636                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
637       } else {
638         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
639         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
640                            b->src_stride, &sse);
641       }
642 
643       break;
644     case 3:
645       this_mv.as_mv.col += 2;
646       this_mv.as_mv.row += 2;
647       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
648                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
649       break;
650   }
651 
652   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
653 
654   if (diag < bestmse) {
655     *bestmv = this_mv;
656     bestmse = diag;
657     *distortion = thismse;
658     *sse1 = sse;
659   }
660 
661   return bestmse;
662 }
663 
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)664 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
665                                   int_mv *bestmv, int_mv *ref_mv,
666                                   int error_per_bit,
667                                   const vp8_variance_fn_ptr_t *vfp,
668                                   int *mvcost[2], int *distortion,
669                                   unsigned int *sse1) {
670   int bestmse = INT_MAX;
671   int_mv startmv;
672   int_mv this_mv;
673   unsigned char *z = (*(b->base_src) + b->src);
674   int left, right, up, down, diag;
675   unsigned int sse;
676   int whichdir;
677   int thismse;
678   int y_stride;
679   int pre_stride = x->e_mbd.pre.y_stride;
680   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
681 
682 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
683   MACROBLOCKD *xd = &x->e_mbd;
684   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
685                        bestmv->as_mv.col;
686   unsigned char *y;
687 
688   y_stride = 32;
689   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
690   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
691   y = xd->y_buf + y_stride + 1;
692 #else
693   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
694                      bestmv->as_mv.col;
695   y_stride = pre_stride;
696 #endif
697 
698   /* central mv */
699   bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
700   bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
701   startmv = *bestmv;
702 
703   /* calculate central point error */
704   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
705   *distortion = bestmse;
706   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
707 
708   /* go left then right and check error */
709   this_mv.as_mv.row = startmv.as_mv.row;
710   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
711   /* "halfpix" horizontal variance */
712   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
713   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
714 
715   if (left < bestmse) {
716     *bestmv = this_mv;
717     bestmse = left;
718     *distortion = thismse;
719     *sse1 = sse;
720   }
721 
722   this_mv.as_mv.col += 8;
723   /* "halfpix" horizontal variance */
724   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
725   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
726 
727   if (right < bestmse) {
728     *bestmv = this_mv;
729     bestmse = right;
730     *distortion = thismse;
731     *sse1 = sse;
732   }
733 
734   /* go up then down and check error */
735   this_mv.as_mv.col = startmv.as_mv.col;
736   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
737   /* "halfpix" vertical variance */
738   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
739   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
740 
741   if (up < bestmse) {
742     *bestmv = this_mv;
743     bestmse = up;
744     *distortion = thismse;
745     *sse1 = sse;
746   }
747 
748   this_mv.as_mv.row += 8;
749   /* "halfpix" vertical variance */
750   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
751   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
752 
753   if (down < bestmse) {
754     *bestmv = this_mv;
755     bestmse = down;
756     *distortion = thismse;
757     *sse1 = sse;
758   }
759 
760   /* now check 1 more diagonal - */
761   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
762   this_mv = startmv;
763 
764   switch (whichdir) {
765     case 0:
766       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
767       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
768       /* "halfpix" horizontal/vertical variance */
769       thismse =
770           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
771       break;
772     case 1:
773       this_mv.as_mv.col += 4;
774       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
775       /* "halfpix" horizontal/vertical variance */
776       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
777       break;
778     case 2:
779       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
780       this_mv.as_mv.row += 4;
781       /* "halfpix" horizontal/vertical variance */
782       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
783       break;
784     case 3:
785     default:
786       this_mv.as_mv.col += 4;
787       this_mv.as_mv.row += 4;
788       /* "halfpix" horizontal/vertical variance */
789       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
790       break;
791   }
792 
793   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
794 
795   if (diag < bestmse) {
796     *bestmv = this_mv;
797     bestmse = diag;
798     *distortion = thismse;
799     *sse1 = sse;
800   }
801 
802   return bestmse;
803 }
804 
805 #define CHECK_BOUNDS(range)                    \
806   do {                                         \
807     all_in = 1;                                \
808     all_in &= ((br - range) >= x->mv_row_min); \
809     all_in &= ((br + range) <= x->mv_row_max); \
810     all_in &= ((bc - range) >= x->mv_col_min); \
811     all_in &= ((bc + range) <= x->mv_col_max); \
812   } while (0)
813 
814 #define CHECK_POINT                                  \
815   {                                                  \
816     if (this_mv.as_mv.col < x->mv_col_min) continue; \
817     if (this_mv.as_mv.col > x->mv_col_max) continue; \
818     if (this_mv.as_mv.row < x->mv_row_min) continue; \
819     if (this_mv.as_mv.row > x->mv_row_max) continue; \
820   }
821 
822 #define CHECK_BETTER                                                     \
823   do {                                                                   \
824     if (thissad < bestsad) {                                             \
825       thissad +=                                                         \
826           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
827       if (thissad < bestsad) {                                           \
828         bestsad = thissad;                                               \
829         best_site = i;                                                   \
830       }                                                                  \
831     }                                                                    \
832   } while (0)
833 
834 static const MV next_chkpts[6][3] = {
835   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
836   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
837   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
838 };
839 
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int_mv * center_mv)840 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
841                    int_mv *best_mv, int search_param, int sad_per_bit,
842                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
843                    int_mv *center_mv) {
844   MV hex[6] = {
845     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
846   };
847   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
848   int i, j;
849 
850   unsigned char *what = (*(b->base_src) + b->src);
851   int what_stride = b->src_stride;
852   int pre_stride = x->e_mbd.pre.y_stride;
853   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
854 
855   int in_what_stride = pre_stride;
856   int br, bc;
857   int_mv this_mv;
858   unsigned int bestsad;
859   unsigned int thissad;
860   unsigned char *base_offset;
861   unsigned char *this_offset;
862   int k = -1;
863   int all_in;
864   int best_site = -1;
865   int hex_range = 127;
866   int dia_range = 8;
867 
868   int_mv fcenter_mv;
869   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
870   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
871 
872   /* adjust ref_mv to make sure it is within MV range */
873   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
874                x->mv_row_max);
875   br = ref_mv->as_mv.row;
876   bc = ref_mv->as_mv.col;
877 
878   /* Work out the start point for the search */
879   base_offset = (unsigned char *)(base_pre + d->offset);
880   this_offset = base_offset + (br * (pre_stride)) + bc;
881   this_mv.as_mv.row = br;
882   this_mv.as_mv.col = bc;
883   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
884             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
885 
886 #if CONFIG_MULTI_RES_ENCODING
887   /* Lower search range based on prediction info */
888   if (search_param >= 6)
889     goto cal_neighbors;
890   else if (search_param >= 5)
891     hex_range = 4;
892   else if (search_param >= 4)
893     hex_range = 6;
894   else if (search_param >= 3)
895     hex_range = 15;
896   else if (search_param >= 2)
897     hex_range = 31;
898   else if (search_param >= 1)
899     hex_range = 63;
900 
901   dia_range = 8;
902 #else
903   (void)search_param;
904 #endif
905 
906   /* hex search */
907   CHECK_BOUNDS(2);
908 
909   if (all_in) {
910     for (i = 0; i < 6; ++i) {
911       this_mv.as_mv.row = br + hex[i].row;
912       this_mv.as_mv.col = bc + hex[i].col;
913       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
914                     this_mv.as_mv.col;
915       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
916       CHECK_BETTER;
917     }
918   } else {
919     for (i = 0; i < 6; ++i) {
920       this_mv.as_mv.row = br + hex[i].row;
921       this_mv.as_mv.col = bc + hex[i].col;
922       CHECK_POINT
923       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
924                     this_mv.as_mv.col;
925       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
926       CHECK_BETTER;
927     }
928   }
929 
930   if (best_site == -1) {
931     goto cal_neighbors;
932   } else {
933     br += hex[best_site].row;
934     bc += hex[best_site].col;
935     k = best_site;
936   }
937 
938   for (j = 1; j < hex_range; ++j) {
939     best_site = -1;
940     CHECK_BOUNDS(2);
941 
942     if (all_in) {
943       for (i = 0; i < 3; ++i) {
944         this_mv.as_mv.row = br + next_chkpts[k][i].row;
945         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
946         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
947                       this_mv.as_mv.col;
948         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
949         CHECK_BETTER;
950       }
951     } else {
952       for (i = 0; i < 3; ++i) {
953         this_mv.as_mv.row = br + next_chkpts[k][i].row;
954         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
955         CHECK_POINT
956         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
957                       this_mv.as_mv.col;
958         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
959         CHECK_BETTER;
960       }
961     }
962 
963     if (best_site == -1) {
964       break;
965     } else {
966       br += next_chkpts[k][best_site].row;
967       bc += next_chkpts[k][best_site].col;
968       k += 5 + best_site;
969       if (k >= 12) {
970         k -= 12;
971       } else if (k >= 6) {
972         k -= 6;
973       }
974     }
975   }
976 
977 /* check 4 1-away neighbors */
978 cal_neighbors:
979   for (j = 0; j < dia_range; ++j) {
980     best_site = -1;
981     CHECK_BOUNDS(1);
982 
983     if (all_in) {
984       for (i = 0; i < 4; ++i) {
985         this_mv.as_mv.row = br + neighbors[i].row;
986         this_mv.as_mv.col = bc + neighbors[i].col;
987         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
988                       this_mv.as_mv.col;
989         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
990         CHECK_BETTER;
991       }
992     } else {
993       for (i = 0; i < 4; ++i) {
994         this_mv.as_mv.row = br + neighbors[i].row;
995         this_mv.as_mv.col = bc + neighbors[i].col;
996         CHECK_POINT
997         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
998                       this_mv.as_mv.col;
999         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1000         CHECK_BETTER;
1001       }
1002     }
1003 
1004     if (best_site == -1) {
1005       break;
1006     } else {
1007       br += neighbors[best_site].row;
1008       bc += neighbors[best_site].col;
1009     }
1010   }
1011 
1012   best_mv->as_mv.row = br;
1013   best_mv->as_mv.col = bc;
1014 
1015   return bestsad;
1016 }
1017 #undef CHECK_BOUNDS
1018 #undef CHECK_POINT
1019 #undef CHECK_BETTER
1020 
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1021 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1022                              int_mv *best_mv, int search_param, int sad_per_bit,
1023                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1024                              int *mvcost[2], int_mv *center_mv) {
1025   int i, j, step;
1026 
1027   unsigned char *what = (*(b->base_src) + b->src);
1028   int what_stride = b->src_stride;
1029   unsigned char *in_what;
1030   int pre_stride = x->e_mbd.pre.y_stride;
1031   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1032   int in_what_stride = pre_stride;
1033   unsigned char *best_address;
1034 
1035   int tot_steps;
1036   int_mv this_mv;
1037 
1038   unsigned int bestsad;
1039   unsigned int thissad;
1040   int best_site = 0;
1041   int last_site = 0;
1042 
1043   int ref_row;
1044   int ref_col;
1045   int this_row_offset;
1046   int this_col_offset;
1047   search_site *ss;
1048 
1049   unsigned char *check_here;
1050 
1051   int *mvsadcost[2];
1052   int_mv fcenter_mv;
1053 
1054   mvsadcost[0] = x->mvsadcost[0];
1055   mvsadcost[1] = x->mvsadcost[1];
1056   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1057   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1058 
1059   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1060                x->mv_row_max);
1061   ref_row = ref_mv->as_mv.row;
1062   ref_col = ref_mv->as_mv.col;
1063   *num00 = 0;
1064   best_mv->as_mv.row = ref_row;
1065   best_mv->as_mv.col = ref_col;
1066 
1067   /* Work out the start point for the search */
1068   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1069                               ref_col);
1070   best_address = in_what;
1071 
1072   /* Check the starting position */
1073   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1074             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1075 
1076   /* search_param determines the length of the initial step and hence
1077    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1078    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1079    */
1080   ss = &x->ss[search_param * x->searches_per_step];
1081   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1082 
1083   i = 1;
1084 
1085   for (step = 0; step < tot_steps; ++step) {
1086     for (j = 0; j < x->searches_per_step; ++j) {
1087       /* Trap illegal vectors */
1088       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1089       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1090 
1091       if ((this_col_offset > x->mv_col_min) &&
1092           (this_col_offset < x->mv_col_max) &&
1093           (this_row_offset > x->mv_row_min) &&
1094           (this_row_offset < x->mv_row_max))
1095 
1096       {
1097         check_here = ss[i].offset + best_address;
1098         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1099 
1100         if (thissad < bestsad) {
1101           this_mv.as_mv.row = this_row_offset;
1102           this_mv.as_mv.col = this_col_offset;
1103           thissad +=
1104               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1105 
1106           if (thissad < bestsad) {
1107             bestsad = thissad;
1108             best_site = i;
1109           }
1110         }
1111       }
1112 
1113       i++;
1114     }
1115 
1116     if (best_site != last_site) {
1117       best_mv->as_mv.row += ss[best_site].mv.row;
1118       best_mv->as_mv.col += ss[best_site].mv.col;
1119       best_address += ss[best_site].offset;
1120       last_site = best_site;
1121     } else if (best_address == in_what) {
1122       (*num00)++;
1123     }
1124   }
1125 
1126   this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1127   this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1128 
1129   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1130          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1131 }
1132 
1133 #if HAVE_SSE2 || HAVE_MSA || HAVE_LSX
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1134 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135                              int_mv *best_mv, int search_param, int sad_per_bit,
1136                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137                              int *mvcost[2], int_mv *center_mv) {
1138   int i, j, step;
1139 
1140   unsigned char *what = (*(b->base_src) + b->src);
1141   int what_stride = b->src_stride;
1142   unsigned char *in_what;
1143   int pre_stride = x->e_mbd.pre.y_stride;
1144   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145   int in_what_stride = pre_stride;
1146   unsigned char *best_address;
1147 
1148   int tot_steps;
1149   int_mv this_mv;
1150 
1151   unsigned int bestsad;
1152   unsigned int thissad;
1153   int best_site = 0;
1154   int last_site = 0;
1155 
1156   int ref_row;
1157   int ref_col;
1158   int this_row_offset;
1159   int this_col_offset;
1160   search_site *ss;
1161 
1162   unsigned char *check_here;
1163 
1164   int *mvsadcost[2];
1165   int_mv fcenter_mv;
1166 
1167   mvsadcost[0] = x->mvsadcost[0];
1168   mvsadcost[1] = x->mvsadcost[1];
1169   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171 
1172   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173                x->mv_row_max);
1174   ref_row = ref_mv->as_mv.row;
1175   ref_col = ref_mv->as_mv.col;
1176   *num00 = 0;
1177   best_mv->as_mv.row = ref_row;
1178   best_mv->as_mv.col = ref_col;
1179 
1180   /* Work out the start point for the search */
1181   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182                               ref_col);
1183   best_address = in_what;
1184 
1185   /* Check the starting position */
1186   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188 
1189   /* search_param determines the length of the initial step and hence the
1190    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192    */
1193   ss = &x->ss[search_param * x->searches_per_step];
1194   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195 
1196   i = 1;
1197 
1198   for (step = 0; step < tot_steps; ++step) {
1199     int all_in = 1, t;
1200 
1201     /* To know if all neighbor points are within the bounds, 4 bounds
1202      * checking are enough instead of checking 4 bounds for each
1203      * points.
1204      */
1205     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209 
1210     if (all_in) {
1211       unsigned int sad_array[4];
1212 
1213       for (j = 0; j < x->searches_per_step; j += 4) {
1214         const unsigned char *block_offset[4];
1215 
1216         for (t = 0; t < 4; ++t) {
1217           block_offset[t] = ss[i + t].offset + best_address;
1218         }
1219 
1220         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221                        sad_array);
1222 
1223         for (t = 0; t < 4; t++, i++) {
1224           if (sad_array[t] < bestsad) {
1225             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227             sad_array[t] +=
1228                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229 
1230             if (sad_array[t] < bestsad) {
1231               bestsad = sad_array[t];
1232               best_site = i;
1233             }
1234           }
1235         }
1236       }
1237     } else {
1238       for (j = 0; j < x->searches_per_step; ++j) {
1239         /* Trap illegal vectors */
1240         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242 
1243         if ((this_col_offset > x->mv_col_min) &&
1244             (this_col_offset < x->mv_col_max) &&
1245             (this_row_offset > x->mv_row_min) &&
1246             (this_row_offset < x->mv_row_max)) {
1247           check_here = ss[i].offset + best_address;
1248           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249 
1250           if (thissad < bestsad) {
1251             this_mv.as_mv.row = this_row_offset;
1252             this_mv.as_mv.col = this_col_offset;
1253             thissad +=
1254                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255 
1256             if (thissad < bestsad) {
1257               bestsad = thissad;
1258               best_site = i;
1259             }
1260           }
1261         }
1262         i++;
1263       }
1264     }
1265 
1266     if (best_site != last_site) {
1267       best_mv->as_mv.row += ss[best_site].mv.row;
1268       best_mv->as_mv.col += ss[best_site].mv.col;
1269       best_address += ss[best_site].offset;
1270       last_site = best_site;
1271     } else if (best_address == in_what) {
1272       (*num00)++;
1273     }
1274   }
1275 
1276   this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1277   this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1278 
1279   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281 }
1282 #endif  // HAVE_SSE2 || HAVE_MSA || HAVE_LSX
1283 
vp8_full_search_sad(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1284 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1285                         int sad_per_bit, int distance,
1286                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1287                         int_mv *center_mv) {
1288   unsigned char *what = (*(b->base_src) + b->src);
1289   int what_stride = b->src_stride;
1290   unsigned char *in_what;
1291   int pre_stride = x->e_mbd.pre.y_stride;
1292   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1293   int in_what_stride = pre_stride;
1294   int mv_stride = pre_stride;
1295   unsigned char *bestaddress;
1296   int_mv *best_mv = &d->bmi.mv;
1297   int_mv this_mv;
1298   unsigned int bestsad;
1299   unsigned int thissad;
1300   int r, c;
1301 
1302   unsigned char *check_here;
1303 
1304   int ref_row = ref_mv->as_mv.row;
1305   int ref_col = ref_mv->as_mv.col;
1306 
1307   int row_min = ref_row - distance;
1308   int row_max = ref_row + distance;
1309   int col_min = ref_col - distance;
1310   int col_max = ref_col + distance;
1311 
1312   int *mvsadcost[2];
1313   int_mv fcenter_mv;
1314 
1315   mvsadcost[0] = x->mvsadcost[0];
1316   mvsadcost[1] = x->mvsadcost[1];
1317   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1318   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1319 
1320   /* Work out the mid point for the search */
1321   in_what = base_pre + d->offset;
1322   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1323 
1324   best_mv->as_mv.row = ref_row;
1325   best_mv->as_mv.col = ref_col;
1326 
1327   /* Baseline value at the centre */
1328   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1329             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1330 
1331   /* Apply further limits to prevent us looking using vectors that stretch
1332    * beyond the UMV border
1333    */
1334   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1335 
1336   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1337 
1338   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1339 
1340   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1341 
1342   for (r = row_min; r < row_max; ++r) {
1343     this_mv.as_mv.row = r;
1344     check_here = r * mv_stride + in_what + col_min;
1345 
1346     for (c = col_min; c < col_max; ++c) {
1347       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1348 
1349       if (thissad < bestsad) {
1350         this_mv.as_mv.col = c;
1351         thissad +=
1352             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1353 
1354         if (thissad < bestsad) {
1355           bestsad = thissad;
1356           best_mv->as_mv.row = r;
1357           best_mv->as_mv.col = c;
1358           bestaddress = check_here;
1359         }
1360       }
1361 
1362       check_here++;
1363     }
1364   }
1365 
1366   this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1367   this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1368 
1369   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1370          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1371 }
1372 
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1373 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1374                               int_mv *ref_mv, int error_per_bit,
1375                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1376                               int *mvcost[2], int_mv *center_mv) {
1377   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1378   int i, j;
1379   short this_row_offset, this_col_offset;
1380 
1381   int what_stride = b->src_stride;
1382   int pre_stride = x->e_mbd.pre.y_stride;
1383   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1384   int in_what_stride = pre_stride;
1385   unsigned char *what = (*(b->base_src) + b->src);
1386   unsigned char *best_address =
1387       (unsigned char *)(base_pre + d->offset +
1388                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1389   unsigned char *check_here;
1390   int_mv this_mv;
1391   unsigned int bestsad;
1392   unsigned int thissad;
1393 
1394   int *mvsadcost[2];
1395   int_mv fcenter_mv;
1396 
1397   mvsadcost[0] = x->mvsadcost[0];
1398   mvsadcost[1] = x->mvsadcost[1];
1399   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1400   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1401 
1402   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1403             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1404 
1405   for (i = 0; i < search_range; ++i) {
1406     int best_site = -1;
1407 
1408     for (j = 0; j < 4; ++j) {
1409       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1410       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1411 
1412       if ((this_col_offset > x->mv_col_min) &&
1413           (this_col_offset < x->mv_col_max) &&
1414           (this_row_offset > x->mv_row_min) &&
1415           (this_row_offset < x->mv_row_max)) {
1416         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1417                      best_address;
1418         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1419 
1420         if (thissad < bestsad) {
1421           this_mv.as_mv.row = this_row_offset;
1422           this_mv.as_mv.col = this_col_offset;
1423           thissad +=
1424               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1425 
1426           if (thissad < bestsad) {
1427             bestsad = thissad;
1428             best_site = j;
1429           }
1430         }
1431       }
1432     }
1433 
1434     if (best_site == -1) {
1435       break;
1436     } else {
1437       ref_mv->as_mv.row += neighbors[best_site].row;
1438       ref_mv->as_mv.col += neighbors[best_site].col;
1439       best_address += (neighbors[best_site].row) * in_what_stride +
1440                       neighbors[best_site].col;
1441     }
1442   }
1443 
1444   this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1445   this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1446 
1447   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1448          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1449 }
1450 
1451 #if HAVE_SSE2 || HAVE_MSA
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1452 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1453                               int_mv *ref_mv, int error_per_bit,
1454                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1455                               int *mvcost[2], int_mv *center_mv) {
1456   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1457   int i, j;
1458   short this_row_offset, this_col_offset;
1459 
1460   int what_stride = b->src_stride;
1461   int pre_stride = x->e_mbd.pre.y_stride;
1462   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1463   int in_what_stride = pre_stride;
1464   unsigned char *what = (*(b->base_src) + b->src);
1465   unsigned char *best_address =
1466       (unsigned char *)(base_pre + d->offset +
1467                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1468   unsigned char *check_here;
1469   int_mv this_mv;
1470   unsigned int bestsad;
1471   unsigned int thissad;
1472 
1473   int *mvsadcost[2];
1474   int_mv fcenter_mv;
1475 
1476   mvsadcost[0] = x->mvsadcost[0];
1477   mvsadcost[1] = x->mvsadcost[1];
1478   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1479   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1480 
1481   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1482             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1483 
1484   for (i = 0; i < search_range; ++i) {
1485     int best_site = -1;
1486     int all_in = 1;
1487 
1488     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1489     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1490     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1491     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1492 
1493     if (all_in) {
1494       unsigned int sad_array[4];
1495       const unsigned char *block_offset[4];
1496       block_offset[0] = best_address - in_what_stride;
1497       block_offset[1] = best_address - 1;
1498       block_offset[2] = best_address + 1;
1499       block_offset[3] = best_address + in_what_stride;
1500 
1501       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1502                      sad_array);
1503 
1504       for (j = 0; j < 4; ++j) {
1505         if (sad_array[j] < bestsad) {
1506           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1507           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1508           sad_array[j] +=
1509               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1510 
1511           if (sad_array[j] < bestsad) {
1512             bestsad = sad_array[j];
1513             best_site = j;
1514           }
1515         }
1516       }
1517     } else {
1518       for (j = 0; j < 4; ++j) {
1519         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1520         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1521 
1522         if ((this_col_offset > x->mv_col_min) &&
1523             (this_col_offset < x->mv_col_max) &&
1524             (this_row_offset > x->mv_row_min) &&
1525             (this_row_offset < x->mv_row_max)) {
1526           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1527                        best_address;
1528           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1529 
1530           if (thissad < bestsad) {
1531             this_mv.as_mv.row = this_row_offset;
1532             this_mv.as_mv.col = this_col_offset;
1533             thissad +=
1534                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1535 
1536             if (thissad < bestsad) {
1537               bestsad = thissad;
1538               best_site = j;
1539             }
1540           }
1541         }
1542       }
1543     }
1544 
1545     if (best_site == -1) {
1546       break;
1547     } else {
1548       ref_mv->as_mv.row += neighbors[best_site].row;
1549       ref_mv->as_mv.col += neighbors[best_site].col;
1550       best_address += (neighbors[best_site].row) * in_what_stride +
1551                       neighbors[best_site].col;
1552     }
1553   }
1554 
1555   this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1556   this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1557 
1558   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1559          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1560 }
1561 #endif  // HAVE_SSE2 || HAVE_MSA
1562