1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
15 
16 #include "./vpx_config.h"
17 #include "./vpx_dsp_rtcd.h"
18 
19 #include "vpx_dsp/vpx_dsp_common.h"
20 #include "vpx_mem/vpx_mem.h"
21 #include "vpx_ports/mem.h"
22 
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_mvref_common.h"
25 #include "vp9/common/vp9_reconinter.h"
26 
27 #include "vp9/encoder/vp9_encoder.h"
28 #include "vp9/encoder/vp9_mcomp.h"
29 
30 // #define NEW_DIAMOND_SEARCH
31 
vp9_set_mv_search_range(MvLimits * mv_limits,const MV * mv)32 void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
33   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
34   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
35   int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
36   int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
37 
38   col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
39   row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
40   col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
41   row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
42 
43   // Get intersection of UMV window and valid MV window to reduce # of checks
44   // in diamond search.
45   if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
46   if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
47   if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
48   if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
49 }
50 
vp9_set_subpel_mv_search_range(MvLimits * subpel_mv_limits,const MvLimits * umv_window_limits,const MV * ref_mv)51 void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
52                                     const MvLimits *umv_window_limits,
53                                     const MV *ref_mv) {
54   subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
55                                      ref_mv->col - MAX_FULL_PEL_VAL * 8);
56   subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
57                                      ref_mv->col + MAX_FULL_PEL_VAL * 8);
58   subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
59                                      ref_mv->row - MAX_FULL_PEL_VAL * 8);
60   subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
61                                      ref_mv->row + MAX_FULL_PEL_VAL * 8);
62 
63   subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
64   subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
65   subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
66   subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
67 }
68 
vp9_init_search_range(int size)69 int vp9_init_search_range(int size) {
70   int sr = 0;
71   // Minimum search size no matter what the passed in value.
72   size = VPXMAX(16, size);
73 
74   while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
75 
76   sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
77   return sr;
78 }
79 
mv_cost(const MV * mv,const int * joint_cost,int * const comp_cost[2])80 static INLINE int mv_cost(const MV *mv, const int *joint_cost,
81                           int *const comp_cost[2]) {
82   assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
83   assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
84   return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
85          comp_cost[1][mv->col];
86 }
87 
vp9_mv_bit_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int weight)88 int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
89                     int *mvcost[2], int weight) {
90   const MV diff = { mv->row - ref->row, mv->col - ref->col };
91   return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
92 }
93 
94 #define PIXEL_TRANSFORM_ERROR_SCALE 4
mv_err_cost(const MV * mv,const MV * ref,const int * mvjcost,int * mvcost[2],int error_per_bit)95 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
96                        int *mvcost[2], int error_per_bit) {
97   if (mvcost) {
98     const MV diff = { mv->row - ref->row, mv->col - ref->col };
99     return (int)ROUND64_POWER_OF_TWO(
100         (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
101         RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
102             PIXEL_TRANSFORM_ERROR_SCALE);
103   }
104   return 0;
105 }
106 
mvsad_err_cost(const MACROBLOCK * x,const MV * mv,const MV * ref,int sad_per_bit)107 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
108                           int sad_per_bit) {
109   const MV diff = { mv->row - ref->row, mv->col - ref->col };
110   return ROUND_POWER_OF_TWO(
111       (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
112       VP9_PROB_COST_SHIFT);
113 }
114 
vp9_init_dsmotion_compensation(search_site_config * cfg,int stride)115 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
116   int len;
117   int ss_count = 0;
118 
119   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
120     // Generate offsets for 4 search sites per step.
121     const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
122     int i;
123     for (i = 0; i < 4; ++i, ++ss_count) {
124       cfg->ss_mv[ss_count] = ss_mvs[i];
125       cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
126     }
127   }
128 
129   cfg->searches_per_step = 4;
130   cfg->total_steps = ss_count / cfg->searches_per_step;
131 }
132 
vp9_init3smotion_compensation(search_site_config * cfg,int stride)133 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
134   int len;
135   int ss_count = 0;
136 
137   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
138     // Generate offsets for 8 search sites per step.
139     const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
140                            { 0, len },    { -len, -len }, { -len, len },
141                            { len, -len }, { len, len } };
142     int i;
143     for (i = 0; i < 8; ++i, ++ss_count) {
144       cfg->ss_mv[ss_count] = ss_mvs[i];
145       cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
146     }
147   }
148 
149   cfg->searches_per_step = 8;
150   cfg->total_steps = ss_count / cfg->searches_per_step;
151 }
152 
153 // convert motion vector component to offset for sv[a]f calc
sp(int x)154 static INLINE int sp(int x) { return x & 7; }
155 
pre(const uint8_t * buf,int stride,int r,int c)156 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
157   return &buf[(r >> 3) * stride + (c >> 3)];
158 }
159 
160 #if CONFIG_VP9_HIGHBITDEPTH
161 /* checks if (r, c) has better score than previous best */
162 #define CHECK_BETTER(v, r, c)                                                  \
163   do {                                                                         \
164     if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
165       int64_t tmpmse;                                                          \
166       const MV mv = { r, c };                                                  \
167       const MV ref_mv = { rr, rc };                                            \
168       if (second_pred == NULL) {                                               \
169         thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
170                            src_stride, &sse);                                  \
171       } else {                                                                 \
172         thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
173                             src_stride, &sse, second_pred);                    \
174       }                                                                        \
175       tmpmse = thismse;                                                        \
176       tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);     \
177       if (tmpmse >= INT_MAX) {                                                 \
178         v = INT_MAX;                                                           \
179       } else if ((v = (uint32_t)tmpmse) < besterr) {                           \
180         besterr = v;                                                           \
181         br = r;                                                                \
182         bc = c;                                                                \
183         *distortion = thismse;                                                 \
184         *sse1 = sse;                                                           \
185       }                                                                        \
186     } else {                                                                   \
187       v = INT_MAX;                                                             \
188     }                                                                          \
189   } while (0)
190 #else
191 /* checks if (r, c) has better score than previous best */
192 #define CHECK_BETTER(v, r, c)                                                  \
193   do {                                                                         \
194     if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
195       const MV mv = { r, c };                                                  \
196       const MV ref_mv = { rr, rc };                                            \
197       if (second_pred == NULL)                                                 \
198         thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
199                            src_stride, &sse);                                  \
200       else                                                                     \
201         thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
202                             src_stride, &sse, second_pred);                    \
203       if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +     \
204                thismse) < besterr) {                                           \
205         besterr = v;                                                           \
206         br = r;                                                                \
207         bc = c;                                                                \
208         *distortion = thismse;                                                 \
209         *sse1 = sse;                                                           \
210       }                                                                        \
211     } else {                                                                   \
212       v = INT_MAX;                                                             \
213     }                                                                          \
214   } while (0)
215 
216 #endif
217 #define FIRST_LEVEL_CHECKS                                       \
218   do {                                                           \
219     unsigned int left, right, up, down, diag;                    \
220     CHECK_BETTER(left, tr, tc - hstep);                          \
221     CHECK_BETTER(right, tr, tc + hstep);                         \
222     CHECK_BETTER(up, tr - hstep, tc);                            \
223     CHECK_BETTER(down, tr + hstep, tc);                          \
224     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
225     switch (whichdir) {                                          \
226       case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
227       case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
228       case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
229       case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
230     }                                                            \
231   } while (0)
232 
233 #define SECOND_LEVEL_CHECKS                                       \
234   do {                                                            \
235     int kr, kc;                                                   \
236     unsigned int second;                                          \
237     if (tr != br && tc != bc) {                                   \
238       kr = br - tr;                                               \
239       kc = bc - tc;                                               \
240       CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
241       CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
242     } else if (tr == br && tc != bc) {                            \
243       kc = bc - tc;                                               \
244       CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
245       CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
246       switch (whichdir) {                                         \
247         case 0:                                                   \
248         case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
249         case 2:                                                   \
250         case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
251       }                                                           \
252     } else if (tr != br && tc == bc) {                            \
253       kr = br - tr;                                               \
254       CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
255       CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
256       switch (whichdir) {                                         \
257         case 0:                                                   \
258         case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
259         case 1:                                                   \
260         case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
261       }                                                           \
262     }                                                             \
263   } while (0)
264 
265 #define SETUP_SUBPEL_SEARCH                                                 \
266   const uint8_t *const z = x->plane[0].src.buf;                             \
267   const int src_stride = x->plane[0].src.stride;                            \
268   const MACROBLOCKD *xd = &x->e_mbd;                                        \
269   unsigned int besterr = UINT_MAX;                                          \
270   unsigned int sse;                                                         \
271   unsigned int whichdir;                                                    \
272   int thismse;                                                              \
273   const unsigned int halfiters = iters_per_step;                            \
274   const unsigned int quarteriters = iters_per_step;                         \
275   const unsigned int eighthiters = iters_per_step;                          \
276   const int y_stride = xd->plane[0].pre[0].stride;                          \
277   const int offset = bestmv->row * y_stride + bestmv->col;                  \
278   const uint8_t *const y = xd->plane[0].pre[0].buf;                         \
279                                                                             \
280   int rr = ref_mv->row;                                                     \
281   int rc = ref_mv->col;                                                     \
282   int br = bestmv->row * 8;                                                 \
283   int bc = bestmv->col * 8;                                                 \
284   int hstep = 4;                                                            \
285   int minc, maxc, minr, maxr;                                               \
286   int tr = br;                                                              \
287   int tc = bc;                                                              \
288   MvLimits subpel_mv_limits;                                                \
289                                                                             \
290   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
291   minc = subpel_mv_limits.col_min;                                          \
292   maxc = subpel_mv_limits.col_max;                                          \
293   minr = subpel_mv_limits.row_min;                                          \
294   maxr = subpel_mv_limits.row_max;                                          \
295                                                                             \
296   bestmv->row *= 8;                                                         \
297   bestmv->col *= 8
298 
setup_center_error(const MACROBLOCKD * xd,const MV * bestmv,const MV * ref_mv,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,const uint8_t * const src,const int src_stride,const uint8_t * const y,int y_stride,const uint8_t * second_pred,int w,int h,int offset,int * mvjcost,int * mvcost[2],uint32_t * sse1,uint32_t * distortion)299 static unsigned int setup_center_error(
300     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
301     int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
302     const uint8_t *const src, const int src_stride, const uint8_t *const y,
303     int y_stride, const uint8_t *second_pred, int w, int h, int offset,
304     int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
305 #if CONFIG_VP9_HIGHBITDEPTH
306   uint64_t besterr;
307   if (second_pred != NULL) {
308     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
309       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
310       vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
311                                h, CONVERT_TO_SHORTPTR(y + offset), y_stride);
312       besterr =
313           vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
314     } else {
315       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
316       vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
317       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
318     }
319   } else {
320     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
321   }
322   *distortion = (uint32_t)besterr;
323   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
324   if (besterr >= UINT_MAX) return UINT_MAX;
325   return (uint32_t)besterr;
326 #else
327   uint32_t besterr;
328   (void)xd;
329   if (second_pred != NULL) {
330     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
331     vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
332     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
333   } else {
334     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
335   }
336   *distortion = besterr;
337   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
338   return besterr;
339 #endif  // CONFIG_VP9_HIGHBITDEPTH
340 }
341 
divide_and_round(const int64_t n,const int64_t d)342 static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) {
343   return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
344 }
345 
is_cost_list_wellbehaved(int * cost_list)346 static INLINE int is_cost_list_wellbehaved(int *cost_list) {
347   return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
348          cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
349 }
350 
351 // Returns surface minima estimate at given precision in 1/2^n bits.
352 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
353 // For a given set of costs S0, S1, S2, S3, S4 at points
354 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
355 // the solution for the location of the minima (x0, y0) is given by:
356 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
357 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
358 // The code below is an integerized version of that.
get_cost_surf_min(int * cost_list,int * ir,int * ic,int bits)359 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
360   const int64_t x0 = (int64_t)cost_list[1] - cost_list[3];
361   const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3];
362   const int64_t x1 = (int64_t)cost_list[4] - cost_list[2];
363   const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2];
364   const int b = 1 << (bits - 1);
365   *ic = (int)divide_and_round(x0 * b, y0);
366   *ir = (int)divide_and_round(x1 * b, y1);
367 }
368 
vp9_skip_sub_pixel_tree(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)369 uint32_t vp9_skip_sub_pixel_tree(
370     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
371     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
372     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
373     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
374     int h, int use_accurate_subpel_search) {
375   SETUP_SUBPEL_SEARCH;
376   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
377                                src_stride, y, y_stride, second_pred, w, h,
378                                offset, mvjcost, mvcost, sse1, distortion);
379   (void)halfiters;
380   (void)quarteriters;
381   (void)eighthiters;
382   (void)whichdir;
383   (void)allow_hp;
384   (void)forced_stop;
385   (void)hstep;
386   (void)rr;
387   (void)rc;
388   (void)minr;
389   (void)minc;
390   (void)maxr;
391   (void)maxc;
392   (void)tr;
393   (void)tc;
394   (void)sse;
395   (void)thismse;
396   (void)cost_list;
397   (void)use_accurate_subpel_search;
398 
399   return besterr;
400 }
401 
vp9_find_best_sub_pixel_tree_pruned_evenmore(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)402 uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
403     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
404     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
405     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
406     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
407     int h, int use_accurate_subpel_search) {
408   SETUP_SUBPEL_SEARCH;
409   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
410                                src_stride, y, y_stride, second_pred, w, h,
411                                offset, mvjcost, mvcost, sse1, distortion);
412   (void)halfiters;
413   (void)quarteriters;
414   (void)eighthiters;
415   (void)whichdir;
416   (void)allow_hp;
417   (void)forced_stop;
418   (void)hstep;
419   (void)use_accurate_subpel_search;
420 
421   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
422       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
423       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
424     int ir, ic;
425     unsigned int minpt = INT_MAX;
426     get_cost_surf_min(cost_list, &ir, &ic, 2);
427     if (ir != 0 || ic != 0) {
428       CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
429     }
430   } else {
431     FIRST_LEVEL_CHECKS;
432     if (halfiters > 1) {
433       SECOND_LEVEL_CHECKS;
434     }
435 
436     tr = br;
437     tc = bc;
438 
439     // Each subsequent iteration checks at least one point in common with
440     // the last iteration could be 2 ( if diag selected) 1/4 pel
441     // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
442     if (forced_stop != 2) {
443       hstep >>= 1;
444       FIRST_LEVEL_CHECKS;
445       if (quarteriters > 1) {
446         SECOND_LEVEL_CHECKS;
447       }
448     }
449   }
450 
451   tr = br;
452   tc = bc;
453 
454   if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
455     hstep >>= 1;
456     FIRST_LEVEL_CHECKS;
457     if (eighthiters > 1) {
458       SECOND_LEVEL_CHECKS;
459     }
460   }
461 
462   bestmv->row = br;
463   bestmv->col = bc;
464 
465   return besterr;
466 }
467 
vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)468 uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
469     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
470     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
471     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
472     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
473     int h, int use_accurate_subpel_search) {
474   SETUP_SUBPEL_SEARCH;
475   (void)use_accurate_subpel_search;
476 
477   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
478                                src_stride, y, y_stride, second_pred, w, h,
479                                offset, mvjcost, mvcost, sse1, distortion);
480   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
481       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
482       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
483     unsigned int minpt;
484     int ir, ic;
485     get_cost_surf_min(cost_list, &ir, &ic, 1);
486     if (ir != 0 || ic != 0) {
487       CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
488     }
489   } else {
490     FIRST_LEVEL_CHECKS;
491     if (halfiters > 1) {
492       SECOND_LEVEL_CHECKS;
493     }
494   }
495 
496   // Each subsequent iteration checks at least one point in common with
497   // the last iteration could be 2 ( if diag selected) 1/4 pel
498 
499   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
500   if (forced_stop != 2) {
501     tr = br;
502     tc = bc;
503     hstep >>= 1;
504     FIRST_LEVEL_CHECKS;
505     if (quarteriters > 1) {
506       SECOND_LEVEL_CHECKS;
507     }
508   }
509 
510   if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
511     tr = br;
512     tc = bc;
513     hstep >>= 1;
514     FIRST_LEVEL_CHECKS;
515     if (eighthiters > 1) {
516       SECOND_LEVEL_CHECKS;
517     }
518   }
519   // These lines insure static analysis doesn't warn that
520   // tr and tc aren't used after the above point.
521   (void)tr;
522   (void)tc;
523 
524   bestmv->row = br;
525   bestmv->col = bc;
526 
527   return besterr;
528 }
529 
vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)530 uint32_t vp9_find_best_sub_pixel_tree_pruned(
531     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
532     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
533     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
534     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
535     int h, int use_accurate_subpel_search) {
536   SETUP_SUBPEL_SEARCH;
537   (void)use_accurate_subpel_search;
538 
539   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
540                                src_stride, y, y_stride, second_pred, w, h,
541                                offset, mvjcost, mvcost, sse1, distortion);
542   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
543       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
544       cost_list[4] != INT_MAX) {
545     unsigned int left, right, up, down, diag;
546     whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
547                (cost_list[2] < cost_list[4] ? 0 : 2);
548     switch (whichdir) {
549       case 0:
550         CHECK_BETTER(left, tr, tc - hstep);
551         CHECK_BETTER(down, tr + hstep, tc);
552         CHECK_BETTER(diag, tr + hstep, tc - hstep);
553         break;
554       case 1:
555         CHECK_BETTER(right, tr, tc + hstep);
556         CHECK_BETTER(down, tr + hstep, tc);
557         CHECK_BETTER(diag, tr + hstep, tc + hstep);
558         break;
559       case 2:
560         CHECK_BETTER(left, tr, tc - hstep);
561         CHECK_BETTER(up, tr - hstep, tc);
562         CHECK_BETTER(diag, tr - hstep, tc - hstep);
563         break;
564       case 3:
565         CHECK_BETTER(right, tr, tc + hstep);
566         CHECK_BETTER(up, tr - hstep, tc);
567         CHECK_BETTER(diag, tr - hstep, tc + hstep);
568         break;
569     }
570   } else {
571     FIRST_LEVEL_CHECKS;
572     if (halfiters > 1) {
573       SECOND_LEVEL_CHECKS;
574     }
575   }
576 
577   tr = br;
578   tc = bc;
579 
580   // Each subsequent iteration checks at least one point in common with
581   // the last iteration could be 2 ( if diag selected) 1/4 pel
582 
583   // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
584   if (forced_stop != 2) {
585     hstep >>= 1;
586     FIRST_LEVEL_CHECKS;
587     if (quarteriters > 1) {
588       SECOND_LEVEL_CHECKS;
589     }
590     tr = br;
591     tc = bc;
592   }
593 
594   if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
595     hstep >>= 1;
596     FIRST_LEVEL_CHECKS;
597     if (eighthiters > 1) {
598       SECOND_LEVEL_CHECKS;
599     }
600     tr = br;
601     tc = bc;
602   }
603   // These lines insure static analysis doesn't warn that
604   // tr and tc aren't used after the above point.
605   (void)tr;
606   (void)tc;
607 
608   bestmv->row = br;
609   bestmv->col = bc;
610 
611   return besterr;
612 }
613 
614 /* clang-format off */
615 static const MV search_step_table[12] = {
616   // left, right, up, down
617   { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
618   { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
619   { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
620 };
621 /* clang-format on */
622 
accurate_sub_pel_search(const MACROBLOCKD * xd,const MV * this_mv,const struct scale_factors * sf,const InterpKernel * kernel,const vp9_variance_fn_ptr_t * vfp,const uint8_t * const src_address,const int src_stride,const uint8_t * const pre_address,int y_stride,const uint8_t * second_pred,int w,int h,uint32_t * sse)623 static int accurate_sub_pel_search(
624     const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
625     const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
626     const uint8_t *const src_address, const int src_stride,
627     const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
628     int w, int h, uint32_t *sse) {
629 #if CONFIG_VP9_HIGHBITDEPTH
630   uint64_t besterr;
631   assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
632   assert(w != 0 && h != 0);
633   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
634     DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
635     vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
636                                      pred16, w, this_mv, sf, w, h, 0, kernel,
637                                      MV_PRECISION_Q3, 0, 0, xd->bd);
638     if (second_pred != NULL) {
639       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
640       vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
641                                h, pred16, w);
642       besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
643                         src_stride, sse);
644     } else {
645       besterr =
646           vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
647     }
648   } else {
649     DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
650     vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
651                               0, kernel, MV_PRECISION_Q3, 0, 0);
652     if (second_pred != NULL) {
653       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
654       vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
655       besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
656     } else {
657       besterr = vfp->vf(pred, w, src_address, src_stride, sse);
658     }
659   }
660   if (besterr >= UINT_MAX) return UINT_MAX;
661   return (int)besterr;
662 #else
663   int besterr;
664   DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
665   assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
666   assert(w != 0 && h != 0);
667   (void)xd;
668 
669   vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
670                             0, kernel, MV_PRECISION_Q3, 0, 0);
671   if (second_pred != NULL) {
672     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
673     vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
674     besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
675   } else {
676     besterr = vfp->vf(pred, w, src_address, src_stride, sse);
677   }
678   return besterr;
679 #endif  // CONFIG_VP9_HIGHBITDEPTH
680 }
681 
682 // TODO(yunqing): this part can be further refactored.
683 #if CONFIG_VP9_HIGHBITDEPTH
684 /* checks if (r, c) has better score than previous best */
685 #define CHECK_BETTER1(v, r, c)                                                \
686   do {                                                                        \
687     if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
688       int64_t tmpmse;                                                         \
689       const MV mv = { r, c };                                                 \
690       const MV ref_mv = { rr, rc };                                           \
691       thismse = accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z,    \
692                                         src_stride, y, y_stride, second_pred, \
693                                         w, h, &sse);                          \
694       tmpmse = thismse;                                                       \
695       tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit);    \
696       if (tmpmse >= INT_MAX) {                                                \
697         v = INT_MAX;                                                          \
698       } else if ((v = (uint32_t)tmpmse) < besterr) {                          \
699         besterr = v;                                                          \
700         br = r;                                                               \
701         bc = c;                                                               \
702         *distortion = thismse;                                                \
703         *sse1 = sse;                                                          \
704       }                                                                       \
705     } else {                                                                  \
706       v = INT_MAX;                                                            \
707     }                                                                         \
708   } while (0)
709 #else
710 /* checks if (r, c) has better score than previous best */
711 #define CHECK_BETTER1(v, r, c)                                                \
712   do {                                                                        \
713     if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
714       const MV mv = { r, c };                                                 \
715       const MV ref_mv = { rr, rc };                                           \
716       thismse = accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z,    \
717                                         src_stride, y, y_stride, second_pred, \
718                                         w, h, &sse);                          \
719       if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) +    \
720                thismse) < besterr) {                                          \
721         besterr = v;                                                          \
722         br = r;                                                               \
723         bc = c;                                                               \
724         *distortion = thismse;                                                \
725         *sse1 = sse;                                                          \
726       }                                                                       \
727     } else {                                                                  \
728       v = INT_MAX;                                                            \
729     }                                                                         \
730   } while (0)
731 
732 #endif
733 
vp9_find_best_sub_pixel_tree(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)734 uint32_t vp9_find_best_sub_pixel_tree(
735     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
736     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
737     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
738     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
739     int h, int use_accurate_subpel_search) {
740   const uint8_t *const z = x->plane[0].src.buf;
741   const uint8_t *const src_address = z;
742   const int src_stride = x->plane[0].src.stride;
743   const MACROBLOCKD *xd = &x->e_mbd;
744   unsigned int besterr = UINT_MAX;
745   unsigned int sse;
746   int thismse;
747   const int y_stride = xd->plane[0].pre[0].stride;
748   const int offset = bestmv->row * y_stride + bestmv->col;
749   const uint8_t *const y = xd->plane[0].pre[0].buf;
750 
751   int rr = ref_mv->row;
752   int rc = ref_mv->col;
753   int br = bestmv->row * 8;
754   int bc = bestmv->col * 8;
755   int hstep = 4;
756   int iter, round = 3 - forced_stop;
757 
758   int minc, maxc, minr, maxr;
759   int tr = br;
760   int tc = bc;
761   const MV *search_step = search_step_table;
762   int idx, best_idx = -1;
763   unsigned int cost_array[5];
764   int kr, kc;
765   MvLimits subpel_mv_limits;
766 
767   // TODO(yunqing): need to add 4-tap filter optimization to speed up the
768   // encoder.
769   const InterpKernel *kernel =
770       (use_accurate_subpel_search > 0)
771           ? ((use_accurate_subpel_search == USE_4_TAPS)
772                  ? vp9_filter_kernels[FOURTAP]
773                  : ((use_accurate_subpel_search == USE_8_TAPS)
774                         ? vp9_filter_kernels[EIGHTTAP]
775                         : vp9_filter_kernels[EIGHTTAP_SHARP]))
776           : vp9_filter_kernels[BILINEAR];
777 
778   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
779   minc = subpel_mv_limits.col_min;
780   maxc = subpel_mv_limits.col_max;
781   minr = subpel_mv_limits.row_min;
782   maxr = subpel_mv_limits.row_max;
783 
784   if (!(allow_hp && use_mv_hp(ref_mv)))
785     if (round == 3) round = 2;
786 
787   bestmv->row *= 8;
788   bestmv->col *= 8;
789 
790   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
791                                src_stride, y, y_stride, second_pred, w, h,
792                                offset, mvjcost, mvcost, sse1, distortion);
793 
794   (void)cost_list;  // to silence compiler warning
795 
796   for (iter = 0; iter < round; ++iter) {
797     // Check vertical and horizontal sub-pixel positions.
798     for (idx = 0; idx < 4; ++idx) {
799       tr = br + search_step[idx].row;
800       tc = bc + search_step[idx].col;
801       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
802         MV this_mv;
803         this_mv.row = tr;
804         this_mv.col = tc;
805 
806         if (use_accurate_subpel_search) {
807           thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
808                                             src_address, src_stride, y,
809                                             y_stride, second_pred, w, h, &sse);
810         } else {
811           const uint8_t *const pre_address =
812               y + (tr >> 3) * y_stride + (tc >> 3);
813           if (second_pred == NULL)
814             thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
815                                src_address, src_stride, &sse);
816           else
817             thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
818                                 src_address, src_stride, &sse, second_pred);
819         }
820 
821         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
822                                                 mvcost, error_per_bit);
823 
824         if (cost_array[idx] < besterr) {
825           best_idx = idx;
826           besterr = cost_array[idx];
827           *distortion = thismse;
828           *sse1 = sse;
829         }
830       } else {
831         cost_array[idx] = UINT_MAX;
832       }
833     }
834 
835     // Check diagonal sub-pixel position
836     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
837     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
838 
839     tc = bc + kc;
840     tr = br + kr;
841     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
842       MV this_mv = { tr, tc };
843       if (use_accurate_subpel_search) {
844         thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
845                                           src_address, src_stride, y, y_stride,
846                                           second_pred, w, h, &sse);
847       } else {
848         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
849         if (second_pred == NULL)
850           thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
851                              src_stride, &sse);
852         else
853           thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
854                               src_address, src_stride, &sse, second_pred);
855       }
856 
857       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
858                                             error_per_bit);
859 
860       if (cost_array[4] < besterr) {
861         best_idx = 4;
862         besterr = cost_array[4];
863         *distortion = thismse;
864         *sse1 = sse;
865       }
866     } else {
867       cost_array[idx] = UINT_MAX;
868     }
869 
870     if (best_idx < 4 && best_idx >= 0) {
871       br += search_step[best_idx].row;
872       bc += search_step[best_idx].col;
873     } else if (best_idx == 4) {
874       br = tr;
875       bc = tc;
876     }
877 
878     if (iters_per_step > 0 && best_idx != -1) {
879       unsigned int second;
880       const int br0 = br;
881       const int bc0 = bc;
882       assert(tr == br || tc == bc);
883 
884       if (tr == br && tc != bc) {
885         kc = bc - tc;
886         if (iters_per_step == 1) {
887           if (use_accurate_subpel_search) {
888             CHECK_BETTER1(second, br0, bc0 + kc);
889           } else {
890             CHECK_BETTER(second, br0, bc0 + kc);
891           }
892         }
893       } else if (tr != br && tc == bc) {
894         kr = br - tr;
895         if (iters_per_step == 1) {
896           if (use_accurate_subpel_search) {
897             CHECK_BETTER1(second, br0 + kr, bc0);
898           } else {
899             CHECK_BETTER(second, br0 + kr, bc0);
900           }
901         }
902       }
903 
904       if (iters_per_step > 1) {
905         if (use_accurate_subpel_search) {
906           CHECK_BETTER1(second, br0 + kr, bc0);
907           CHECK_BETTER1(second, br0, bc0 + kc);
908           if (br0 != br || bc0 != bc) {
909             CHECK_BETTER1(second, br0 + kr, bc0 + kc);
910           }
911         } else {
912           CHECK_BETTER(second, br0 + kr, bc0);
913           CHECK_BETTER(second, br0, bc0 + kc);
914           if (br0 != br || bc0 != bc) {
915             CHECK_BETTER(second, br0 + kr, bc0 + kc);
916           }
917         }
918       }
919     }
920 
921     search_step += 4;
922     hstep >>= 1;
923     best_idx = -1;
924   }
925 
926   // Each subsequent iteration checks at least one point in common with
927   // the last iteration could be 2 ( if diag selected) 1/4 pel
928 
929   // These lines insure static analysis doesn't warn that
930   // tr and tc aren't used after the above point.
931   (void)tr;
932   (void)tc;
933 
934   bestmv->row = br;
935   bestmv->col = bc;
936 
937   return besterr;
938 }
939 
940 #undef CHECK_BETTER
941 #undef CHECK_BETTER1
942 
check_bounds(const MvLimits * mv_limits,int row,int col,int range)943 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
944                                int range) {
945   return ((row - range) >= mv_limits->row_min) &
946          ((row + range) <= mv_limits->row_max) &
947          ((col - range) >= mv_limits->col_min) &
948          ((col + range) <= mv_limits->col_max);
949 }
950 
is_mv_in(const MvLimits * mv_limits,const MV * mv)951 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
952   return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
953          (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
954 }
955 
956 #define CHECK_BETTER                                                      \
957   {                                                                       \
958     if (thissad < bestsad) {                                              \
959       if (use_mvcost)                                                     \
960         thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
961       if (thissad < bestsad) {                                            \
962         bestsad = thissad;                                                \
963         best_site = i;                                                    \
964       }                                                                   \
965     }                                                                     \
966   }
967 
968 #define MAX_PATTERN_SCALES 11
969 #define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
970 #define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
971 
972 // Calculate and return a sad+mvcost list around an integer best pel.
calc_int_cost_list(const MACROBLOCK * x,const MV * ref_mv,int sadpb,const vp9_variance_fn_ptr_t * fn_ptr,const MV * best_mv,int * cost_list)973 static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
974                                       int sadpb,
975                                       const vp9_variance_fn_ptr_t *fn_ptr,
976                                       const MV *best_mv, int *cost_list) {
977   static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
978   const struct buf_2d *const what = &x->plane[0].src;
979   const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
980   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
981   int br = best_mv->row;
982   int bc = best_mv->col;
983   MV this_mv;
984   int i;
985   unsigned int sse;
986 
987   this_mv.row = br;
988   this_mv.col = bc;
989   cost_list[0] =
990       fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
991                  in_what->stride, &sse) +
992       mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
993   if (check_bounds(&x->mv_limits, br, bc, 1)) {
994     for (i = 0; i < 4; i++) {
995       const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
996       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
997                                     get_buf_from_mv(in_what, &this_mv),
998                                     in_what->stride, &sse) +
999                          mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
1000                                      x->mvcost, x->errorperbit);
1001     }
1002   } else {
1003     for (i = 0; i < 4; i++) {
1004       const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1005       if (!is_mv_in(&x->mv_limits, &this_mv))
1006         cost_list[i + 1] = INT_MAX;
1007       else
1008         cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
1009                                       get_buf_from_mv(in_what, &this_mv),
1010                                       in_what->stride, &sse) +
1011                            mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
1012                                        x->mvcost, x->errorperbit);
1013     }
1014   }
1015 }
1016 
1017 // Generic pattern search function that searches over multiple scales.
1018 // Each scale can have a different number of candidates and shape of
1019 // candidates as indicated in the num_candidates and candidates arrays
1020 // passed into this function
1021 //
vp9_pattern_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])1022 static int vp9_pattern_search(
1023     const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
1024     int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1025     int use_mvcost, const MV *center_mv, MV *best_mv,
1026     const int num_candidates[MAX_PATTERN_SCALES],
1027     const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1028   const MACROBLOCKD *const xd = &x->e_mbd;
1029   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1030     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1031   };
1032   int i, s, t;
1033   const struct buf_2d *const what = &x->plane[0].src;
1034   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1035   int br, bc;
1036   int bestsad = INT_MAX;
1037   int thissad;
1038   int k = -1;
1039   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1040   int best_init_s = search_param_to_steps[search_param];
1041   // adjust ref_mv to make sure it is within MV range
1042   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1043            x->mv_limits.row_min, x->mv_limits.row_max);
1044   br = ref_mv->row;
1045   bc = ref_mv->col;
1046 
1047   // Work out the start point for the search
1048   bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1049                      in_what->stride) +
1050             mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1051 
1052   // Search all possible scales upto the search param around the center point
1053   // pick the scale of the point that is best as the starting scale of
1054   // further steps around it.
1055   if (do_init_search) {
1056     s = best_init_s;
1057     best_init_s = -1;
1058     for (t = 0; t <= s; ++t) {
1059       int best_site = -1;
1060       if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1061         for (i = 0; i < num_candidates[t]; i++) {
1062           const MV this_mv = { br + candidates[t][i].row,
1063                                bc + candidates[t][i].col };
1064           thissad =
1065               vfp->sdf(what->buf, what->stride,
1066                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1067           CHECK_BETTER
1068         }
1069       } else {
1070         for (i = 0; i < num_candidates[t]; i++) {
1071           const MV this_mv = { br + candidates[t][i].row,
1072                                bc + candidates[t][i].col };
1073           if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1074           thissad =
1075               vfp->sdf(what->buf, what->stride,
1076                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1077           CHECK_BETTER
1078         }
1079       }
1080       if (best_site == -1) {
1081         continue;
1082       } else {
1083         best_init_s = t;
1084         k = best_site;
1085       }
1086     }
1087     if (best_init_s != -1) {
1088       br += candidates[best_init_s][k].row;
1089       bc += candidates[best_init_s][k].col;
1090     }
1091   }
1092 
1093   // If the center point is still the best, just skip this and move to
1094   // the refinement step.
1095   if (best_init_s != -1) {
1096     int best_site = -1;
1097     s = best_init_s;
1098 
1099     do {
1100       // No need to search all 6 points the 1st time if initial search was used
1101       if (!do_init_search || s != best_init_s) {
1102         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1103           for (i = 0; i < num_candidates[s]; i++) {
1104             const MV this_mv = { br + candidates[s][i].row,
1105                                  bc + candidates[s][i].col };
1106             thissad =
1107                 vfp->sdf(what->buf, what->stride,
1108                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1109             CHECK_BETTER
1110           }
1111         } else {
1112           for (i = 0; i < num_candidates[s]; i++) {
1113             const MV this_mv = { br + candidates[s][i].row,
1114                                  bc + candidates[s][i].col };
1115             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1116             thissad =
1117                 vfp->sdf(what->buf, what->stride,
1118                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1119             CHECK_BETTER
1120           }
1121         }
1122 
1123         if (best_site == -1) {
1124           continue;
1125         } else {
1126           br += candidates[s][best_site].row;
1127           bc += candidates[s][best_site].col;
1128           k = best_site;
1129         }
1130       }
1131 
1132       do {
1133         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1134         best_site = -1;
1135         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1136         next_chkpts_indices[1] = k;
1137         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1138 
1139         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1140           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1141             const MV this_mv = {
1142               br + candidates[s][next_chkpts_indices[i]].row,
1143               bc + candidates[s][next_chkpts_indices[i]].col
1144             };
1145             thissad =
1146                 vfp->sdf(what->buf, what->stride,
1147                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1148             CHECK_BETTER
1149           }
1150         } else {
1151           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1152             const MV this_mv = {
1153               br + candidates[s][next_chkpts_indices[i]].row,
1154               bc + candidates[s][next_chkpts_indices[i]].col
1155             };
1156             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1157             thissad =
1158                 vfp->sdf(what->buf, what->stride,
1159                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1160             CHECK_BETTER
1161           }
1162         }
1163 
1164         if (best_site != -1) {
1165           k = next_chkpts_indices[best_site];
1166           br += candidates[s][k].row;
1167           bc += candidates[s][k].col;
1168         }
1169       } while (best_site != -1);
1170     } while (s--);
1171   }
1172 
1173   // Returns the one-away integer pel sad values around the best as follows:
1174   // cost_list[0]: cost at the best integer pel
1175   // cost_list[1]: cost at delta {0, -1} (left)   from the best integer pel
1176   // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
1177   // cost_list[3]: cost at delta { 0, 1} (right)  from the best integer pel
1178   // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
1179   if (cost_list) {
1180     const MV best_mv = { br, bc };
1181     calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
1182   }
1183   best_mv->row = br;
1184   best_mv->col = bc;
1185   return bestsad;
1186 }
1187 
1188 // A specialized function where the smallest scale search candidates
1189 // are 4 1-away neighbors, and cost_list is non-null
1190 // TODO(debargha): Merge this function with the one above. Also remove
1191 // use_mvcost option since it is always 1, to save unnecessary branches.
vp9_pattern_search_sad(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv,const int num_candidates[MAX_PATTERN_SCALES],const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES])1192 static int vp9_pattern_search_sad(
1193     const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
1194     int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1195     int use_mvcost, const MV *center_mv, MV *best_mv,
1196     const int num_candidates[MAX_PATTERN_SCALES],
1197     const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1198   const MACROBLOCKD *const xd = &x->e_mbd;
1199   static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1200     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1201   };
1202   int i, s, t;
1203   const struct buf_2d *const what = &x->plane[0].src;
1204   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1205   int br, bc;
1206   int bestsad = INT_MAX;
1207   int thissad;
1208   int k = -1;
1209   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1210   int best_init_s = search_param_to_steps[search_param];
1211   // adjust ref_mv to make sure it is within MV range
1212   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1213            x->mv_limits.row_min, x->mv_limits.row_max);
1214   br = ref_mv->row;
1215   bc = ref_mv->col;
1216   if (cost_list != NULL) {
1217     cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1218         INT_MAX;
1219   }
1220 
1221   // Work out the start point for the search
1222   bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1223                      in_what->stride) +
1224             mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1225 
1226   // Search all possible scales upto the search param around the center point
1227   // pick the scale of the point that is best as the starting scale of
1228   // further steps around it.
1229   if (do_init_search) {
1230     s = best_init_s;
1231     best_init_s = -1;
1232     for (t = 0; t <= s; ++t) {
1233       int best_site = -1;
1234       if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1235         for (i = 0; i < num_candidates[t]; i++) {
1236           const MV this_mv = { br + candidates[t][i].row,
1237                                bc + candidates[t][i].col };
1238           thissad =
1239               vfp->sdf(what->buf, what->stride,
1240                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1241           CHECK_BETTER
1242         }
1243       } else {
1244         for (i = 0; i < num_candidates[t]; i++) {
1245           const MV this_mv = { br + candidates[t][i].row,
1246                                bc + candidates[t][i].col };
1247           if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1248           thissad =
1249               vfp->sdf(what->buf, what->stride,
1250                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1251           CHECK_BETTER
1252         }
1253       }
1254       if (best_site == -1) {
1255         continue;
1256       } else {
1257         best_init_s = t;
1258         k = best_site;
1259       }
1260     }
1261     if (best_init_s != -1) {
1262       br += candidates[best_init_s][k].row;
1263       bc += candidates[best_init_s][k].col;
1264     }
1265   }
1266 
1267   // If the center point is still the best, just skip this and move to
1268   // the refinement step.
1269   if (best_init_s != -1) {
1270     int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
1271     int best_site = -1;
1272     s = best_init_s;
1273 
1274     for (; s >= do_sad; s--) {
1275       if (!do_init_search || s != best_init_s) {
1276         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1277           for (i = 0; i < num_candidates[s]; i++) {
1278             const MV this_mv = { br + candidates[s][i].row,
1279                                  bc + candidates[s][i].col };
1280             thissad =
1281                 vfp->sdf(what->buf, what->stride,
1282                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1283             CHECK_BETTER
1284           }
1285         } else {
1286           for (i = 0; i < num_candidates[s]; i++) {
1287             const MV this_mv = { br + candidates[s][i].row,
1288                                  bc + candidates[s][i].col };
1289             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1290             thissad =
1291                 vfp->sdf(what->buf, what->stride,
1292                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1293             CHECK_BETTER
1294           }
1295         }
1296 
1297         if (best_site == -1) {
1298           continue;
1299         } else {
1300           br += candidates[s][best_site].row;
1301           bc += candidates[s][best_site].col;
1302           k = best_site;
1303         }
1304       }
1305 
1306       do {
1307         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1308         best_site = -1;
1309         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1310         next_chkpts_indices[1] = k;
1311         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1312 
1313         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1314           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1315             const MV this_mv = {
1316               br + candidates[s][next_chkpts_indices[i]].row,
1317               bc + candidates[s][next_chkpts_indices[i]].col
1318             };
1319             thissad =
1320                 vfp->sdf(what->buf, what->stride,
1321                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1322             CHECK_BETTER
1323           }
1324         } else {
1325           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1326             const MV this_mv = {
1327               br + candidates[s][next_chkpts_indices[i]].row,
1328               bc + candidates[s][next_chkpts_indices[i]].col
1329             };
1330             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1331             thissad =
1332                 vfp->sdf(what->buf, what->stride,
1333                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1334             CHECK_BETTER
1335           }
1336         }
1337 
1338         if (best_site != -1) {
1339           k = next_chkpts_indices[best_site];
1340           br += candidates[s][k].row;
1341           bc += candidates[s][k].col;
1342         }
1343       } while (best_site != -1);
1344     }
1345 
1346     // Note: If we enter the if below, then cost_list must be non-NULL.
1347     if (s == 0) {
1348       cost_list[0] = bestsad;
1349       if (!do_init_search || s != best_init_s) {
1350         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1351           for (i = 0; i < num_candidates[s]; i++) {
1352             const MV this_mv = { br + candidates[s][i].row,
1353                                  bc + candidates[s][i].col };
1354             cost_list[i + 1] = thissad =
1355                 vfp->sdf(what->buf, what->stride,
1356                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1357             CHECK_BETTER
1358           }
1359         } else {
1360           for (i = 0; i < num_candidates[s]; i++) {
1361             const MV this_mv = { br + candidates[s][i].row,
1362                                  bc + candidates[s][i].col };
1363             if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1364             cost_list[i + 1] = thissad =
1365                 vfp->sdf(what->buf, what->stride,
1366                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1367             CHECK_BETTER
1368           }
1369         }
1370 
1371         if (best_site != -1) {
1372           br += candidates[s][best_site].row;
1373           bc += candidates[s][best_site].col;
1374           k = best_site;
1375         }
1376       }
1377       while (best_site != -1) {
1378         int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1379         best_site = -1;
1380         next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1381         next_chkpts_indices[1] = k;
1382         next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1383         cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1384         cost_list[((k + 2) % 4) + 1] = cost_list[0];
1385         cost_list[0] = bestsad;
1386 
1387         if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1388           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1389             const MV this_mv = {
1390               br + candidates[s][next_chkpts_indices[i]].row,
1391               bc + candidates[s][next_chkpts_indices[i]].col
1392             };
1393             cost_list[next_chkpts_indices[i] + 1] = thissad =
1394                 vfp->sdf(what->buf, what->stride,
1395                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1396             CHECK_BETTER
1397           }
1398         } else {
1399           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1400             const MV this_mv = {
1401               br + candidates[s][next_chkpts_indices[i]].row,
1402               bc + candidates[s][next_chkpts_indices[i]].col
1403             };
1404             if (!is_mv_in(&x->mv_limits, &this_mv)) {
1405               cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1406               continue;
1407             }
1408             cost_list[next_chkpts_indices[i] + 1] = thissad =
1409                 vfp->sdf(what->buf, what->stride,
1410                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1411             CHECK_BETTER
1412           }
1413         }
1414 
1415         if (best_site != -1) {
1416           k = next_chkpts_indices[best_site];
1417           br += candidates[s][k].row;
1418           bc += candidates[s][k].col;
1419         }
1420       }
1421     }
1422   }
1423 
1424   // Returns the one-away integer pel sad values around the best as follows:
1425   // cost_list[0]: sad at the best integer pel
1426   // cost_list[1]: sad at delta {0, -1} (left)   from the best integer pel
1427   // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
1428   // cost_list[3]: sad at delta { 0, 1} (right)  from the best integer pel
1429   // cost_list[4]: sad at delta {-1, 0} (top)    from the best integer pel
1430   if (cost_list) {
1431     static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1432     if (cost_list[0] == INT_MAX) {
1433       cost_list[0] = bestsad;
1434       if (check_bounds(&x->mv_limits, br, bc, 1)) {
1435         for (i = 0; i < 4; i++) {
1436           const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1437           cost_list[i + 1] =
1438               vfp->sdf(what->buf, what->stride,
1439                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
1440         }
1441       } else {
1442         for (i = 0; i < 4; i++) {
1443           const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1444           if (!is_mv_in(&x->mv_limits, &this_mv))
1445             cost_list[i + 1] = INT_MAX;
1446           else
1447             cost_list[i + 1] =
1448                 vfp->sdf(what->buf, what->stride,
1449                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
1450         }
1451       }
1452     } else {
1453       if (use_mvcost) {
1454         for (i = 0; i < 4; i++) {
1455           const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1456           if (cost_list[i + 1] != INT_MAX) {
1457             cost_list[i + 1] +=
1458                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1459           }
1460         }
1461       }
1462     }
1463   }
1464   best_mv->row = br;
1465   best_mv->col = bc;
1466   return bestsad;
1467 }
1468 
vp9_get_mvpred_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)1469 int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1470                        const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
1471                        int use_mvcost) {
1472   const MACROBLOCKD *const xd = &x->e_mbd;
1473   const struct buf_2d *const what = &x->plane[0].src;
1474   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1475   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1476   uint32_t unused;
1477 #if CONFIG_VP9_HIGHBITDEPTH
1478   uint64_t err =
1479       vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1480               in_what->stride, &unused);
1481   err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1482                                    x->errorperbit)
1483                      : 0);
1484   if (err >= INT_MAX) return INT_MAX;
1485   return (int)err;
1486 #else
1487   return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1488                  in_what->stride, &unused) +
1489          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1490                                    x->errorperbit)
1491                      : 0);
1492 #endif
1493 }
1494 
vp9_get_mvpred_av_var(const MACROBLOCK * x,const MV * best_mv,const MV * center_mv,const uint8_t * second_pred,const vp9_variance_fn_ptr_t * vfp,int use_mvcost)1495 int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1496                           const MV *center_mv, const uint8_t *second_pred,
1497                           const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
1498   const MACROBLOCKD *const xd = &x->e_mbd;
1499   const struct buf_2d *const what = &x->plane[0].src;
1500   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1501   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1502   unsigned int unused;
1503 
1504   return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1505                    what->buf, what->stride, &unused, second_pred) +
1506          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1507                                    x->errorperbit)
1508                      : 0);
1509 }
1510 
hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1511 static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1512                       int sad_per_bit, int do_init_search, int *cost_list,
1513                       const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1514                       const MV *center_mv, MV *best_mv) {
1515   // First scale has 8-closest points, the rest have 6 points in hex shape
1516   // at increasing scales
1517   static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1518                                                               6, 6, 6, 6, 6 };
1519   // Note that the largest candidate step at each scale is 2^scale
1520   /* clang-format off */
1521   static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1522     { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1523       { -1, 0 } },
1524     { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1525     { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1526     { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1527     { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1528     { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1529       { -32, 0 } },
1530     { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1531       { -64, 0 } },
1532     { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1533       { -128, 0 } },
1534     { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1535       { -256, 0 } },
1536     { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1537       { -512, 0 } },
1538     { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1539       { -512, 1024 }, { -1024, 0 } }
1540   };
1541   /* clang-format on */
1542   return vp9_pattern_search(
1543       x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1544       use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
1545 }
1546 
bigdia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1547 static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1548                          int sad_per_bit, int do_init_search, int *cost_list,
1549                          const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1550                          const MV *center_mv, MV *best_mv) {
1551   // First scale has 4-closest points, the rest have 8 points in diamond
1552   // shape at increasing scales
1553   static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1554     4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1555   };
1556   // Note that the largest candidate step at each scale is 2^scale
1557   /* clang-format off */
1558   static const MV
1559       bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1560         { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1561         { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1562           { -1, 1 }, { -2, 0 } },
1563         { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1564           { -2, 2 }, { -4, 0 } },
1565         { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1566           { -4, 4 }, { -8, 0 } },
1567         { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1568           { -8, 8 }, { -16, 0 } },
1569         { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1570           { 0, 32 }, { -16, 16 }, { -32, 0 } },
1571         { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1572           { 0, 64 }, { -32, 32 }, { -64, 0 } },
1573         { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1574           { 0, 128 }, { -64, 64 }, { -128, 0 } },
1575         { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1576           { 0, 256 }, { -128, 128 }, { -256, 0 } },
1577         { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1578           { 0, 512 }, { -256, 256 }, { -512, 0 } },
1579         { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1580           { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
1581       };
1582   /* clang-format on */
1583   return vp9_pattern_search_sad(
1584       x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1585       use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
1586 }
1587 
square_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1588 static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1589                          int sad_per_bit, int do_init_search, int *cost_list,
1590                          const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1591                          const MV *center_mv, MV *best_mv) {
1592   // All scales have 8 closest points in square shape
1593   static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1594     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1595   };
1596   // Note that the largest candidate step at each scale is 2^scale
1597   /* clang-format off */
1598   static const MV
1599       square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1600         { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1601           { -1, 1 }, { -1, 0 } },
1602         { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1603           { -2, 2 }, { -2, 0 } },
1604         { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1605           { -4, 4 }, { -4, 0 } },
1606         { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1607           { -8, 8 }, { -8, 0 } },
1608         { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1609           { 0, 16 }, { -16, 16 }, { -16, 0 } },
1610         { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1611           { 0, 32 }, { -32, 32 }, { -32, 0 } },
1612         { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1613           { 0, 64 }, { -64, 64 }, { -64, 0 } },
1614         { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1615           { 0, 128 }, { -128, 128 }, { -128, 0 } },
1616         { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1617           { 0, 256 }, { -256, 256 }, { -256, 0 } },
1618         { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1619           { 0, 512 }, { -512, 512 }, { -512, 0 } },
1620         { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1621           { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
1622       };
1623   /* clang-format on */
1624   return vp9_pattern_search(
1625       x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1626       use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
1627 }
1628 
fast_hex_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1629 static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1630                            int sad_per_bit,
1631                            int do_init_search,  // must be zero for fast_hex
1632                            int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1633                            int use_mvcost, const MV *center_mv, MV *best_mv) {
1634   return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1635                     sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1636                     center_mv, best_mv);
1637 }
1638 
fast_dia_search(const MACROBLOCK * x,MV * ref_mv,int search_param,int sad_per_bit,int do_init_search,int * cost_list,const vp9_variance_fn_ptr_t * vfp,int use_mvcost,const MV * center_mv,MV * best_mv)1639 static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1640                            int sad_per_bit, int do_init_search, int *cost_list,
1641                            const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1642                            const MV *center_mv, MV *best_mv) {
1643   return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1644                        sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1645                        center_mv, best_mv);
1646 }
1647 
1648 #undef CHECK_BETTER
1649 
1650 // Exhuastive motion search around a given centre position with a given
1651 // step size.
exhaustive_mesh_search(const MACROBLOCK * x,MV * ref_mv,MV * best_mv,int range,int step,int sad_per_bit,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)1652 static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1653                                   int range, int step, int sad_per_bit,
1654                                   const vp9_variance_fn_ptr_t *fn_ptr,
1655                                   const MV *center_mv) {
1656   const MACROBLOCKD *const xd = &x->e_mbd;
1657   const struct buf_2d *const what = &x->plane[0].src;
1658   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1659   MV fcenter_mv = { center_mv->row, center_mv->col };
1660   unsigned int best_sad = INT_MAX;
1661   int r, c, i;
1662   int start_col, end_col, start_row, end_row;
1663   int col_step = (step > 1) ? step : 4;
1664 
1665   assert(step >= 1);
1666 
1667   clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1668            x->mv_limits.row_min, x->mv_limits.row_max);
1669   *best_mv = fcenter_mv;
1670   best_sad =
1671       fn_ptr->sdf(what->buf, what->stride,
1672                   get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1673       mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
1674   start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
1675   start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
1676   end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
1677   end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
1678 
1679   for (r = start_row; r <= end_row; r += step) {
1680     for (c = start_col; c <= end_col; c += col_step) {
1681       // Step > 1 means we are not checking every location in this pass.
1682       if (step > 1) {
1683         const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1684         unsigned int sad =
1685             fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1686                         in_what->stride);
1687         if (sad < best_sad) {
1688           sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1689           if (sad < best_sad) {
1690             best_sad = sad;
1691             *best_mv = mv;
1692           }
1693         }
1694       } else {
1695         // 4 sads in a single call if we are checking every location
1696         if (c + 3 <= end_col) {
1697           unsigned int sads[4];
1698           const uint8_t *addrs[4];
1699           for (i = 0; i < 4; ++i) {
1700             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1701             addrs[i] = get_buf_from_mv(in_what, &mv);
1702           }
1703           fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1704 
1705           for (i = 0; i < 4; ++i) {
1706             if (sads[i] < best_sad) {
1707               const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1708               const unsigned int sad =
1709                   sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1710               if (sad < best_sad) {
1711                 best_sad = sad;
1712                 *best_mv = mv;
1713               }
1714             }
1715           }
1716         } else {
1717           for (i = 0; i < end_col - c; ++i) {
1718             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1719             unsigned int sad =
1720                 fn_ptr->sdf(what->buf, what->stride,
1721                             get_buf_from_mv(in_what, &mv), in_what->stride);
1722             if (sad < best_sad) {
1723               sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1724               if (sad < best_sad) {
1725                 best_sad = sad;
1726                 *best_mv = mv;
1727               }
1728             }
1729           }
1730         }
1731       }
1732     }
1733   }
1734 
1735   return best_sad;
1736 }
1737 
1738 #define MIN_RANGE 7
1739 #define MAX_RANGE 256
1740 #define MIN_INTERVAL 1
1741 #if CONFIG_NON_GREEDY_MV
exhaustive_mesh_search_multi_step(MV * best_mv,const MV * center_mv,int range,int step,const struct buf_2d * src,const struct buf_2d * pre,int lambda,const int_mv * nb_full_mvs,int full_mv_num,const MvLimits * mv_limits,const vp9_variance_fn_ptr_t * fn_ptr)1742 static int64_t exhaustive_mesh_search_multi_step(
1743     MV *best_mv, const MV *center_mv, int range, int step,
1744     const struct buf_2d *src, const struct buf_2d *pre, int lambda,
1745     const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits,
1746     const vp9_variance_fn_ptr_t *fn_ptr) {
1747   int64_t best_sad;
1748   int r, c;
1749   int start_col, end_col, start_row, end_row;
1750   *best_mv = *center_mv;
1751   best_sad =
1752       ((int64_t)fn_ptr->sdf(src->buf, src->stride,
1753                             get_buf_from_mv(pre, center_mv), pre->stride)
1754        << LOG2_PRECISION) +
1755       lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
1756   start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
1757   start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
1758   end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
1759   end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
1760   for (r = start_row; r <= end_row; r += step) {
1761     for (c = start_col; c <= end_col; c += step) {
1762       const MV mv = { r, c };
1763       int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
1764                                          get_buf_from_mv(pre, &mv), pre->stride)
1765                     << LOG2_PRECISION;
1766       if (sad < best_sad) {
1767         sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
1768         if (sad < best_sad) {
1769           best_sad = sad;
1770           *best_mv = mv;
1771         }
1772       }
1773     }
1774   }
1775   return best_sad;
1776 }
1777 
exhaustive_mesh_search_single_step(MV * best_mv,const MV * center_mv,int range,const struct buf_2d * src,const struct buf_2d * pre,int lambda,const int_mv * nb_full_mvs,int full_mv_num,const MvLimits * mv_limits,const vp9_variance_fn_ptr_t * fn_ptr)1778 static int64_t exhaustive_mesh_search_single_step(
1779     MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src,
1780     const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs,
1781     int full_mv_num, const MvLimits *mv_limits,
1782     const vp9_variance_fn_ptr_t *fn_ptr) {
1783   int64_t best_sad;
1784   int r, c, i;
1785   int start_col, end_col, start_row, end_row;
1786 
1787   *best_mv = *center_mv;
1788   best_sad =
1789       ((int64_t)fn_ptr->sdf(src->buf, src->stride,
1790                             get_buf_from_mv(pre, center_mv), pre->stride)
1791        << LOG2_PRECISION) +
1792       lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
1793   start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
1794   start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
1795   end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
1796   end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
1797   for (r = start_row; r <= end_row; r += 1) {
1798     c = start_col;
1799     while (c + 3 <= end_col) {
1800       unsigned int sads[4];
1801       const uint8_t *addrs[4];
1802       for (i = 0; i < 4; ++i) {
1803         const MV mv = { r, c + i };
1804         addrs[i] = get_buf_from_mv(pre, &mv);
1805       }
1806       fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
1807 
1808       for (i = 0; i < 4; ++i) {
1809         int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
1810         if (sad < best_sad) {
1811           const MV mv = { r, c + i };
1812           sad +=
1813               lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
1814           if (sad < best_sad) {
1815             best_sad = sad;
1816             *best_mv = mv;
1817           }
1818         }
1819       }
1820       c += 4;
1821     }
1822     while (c <= end_col) {
1823       const MV mv = { r, c };
1824       int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
1825                                          get_buf_from_mv(pre, &mv), pre->stride)
1826                     << LOG2_PRECISION;
1827       if (sad < best_sad) {
1828         sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
1829         if (sad < best_sad) {
1830           best_sad = sad;
1831           *best_mv = mv;
1832         }
1833       }
1834       c += 1;
1835     }
1836   }
1837   return best_sad;
1838 }
1839 
exhaustive_mesh_search_new(const MACROBLOCK * x,MV * best_mv,int range,int step,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,int lambda,const int_mv * nb_full_mvs,int full_mv_num)1840 static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
1841                                           int range, int step,
1842                                           const vp9_variance_fn_ptr_t *fn_ptr,
1843                                           const MV *center_mv, int lambda,
1844                                           const int_mv *nb_full_mvs,
1845                                           int full_mv_num) {
1846   const MACROBLOCKD *const xd = &x->e_mbd;
1847   const struct buf_2d *src = &x->plane[0].src;
1848   const struct buf_2d *pre = &xd->plane[0].pre[0];
1849   assert(step >= 1);
1850   assert(is_mv_in(&x->mv_limits, center_mv));
1851   if (step == 1) {
1852     return exhaustive_mesh_search_single_step(
1853         best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num,
1854         &x->mv_limits, fn_ptr);
1855   }
1856   return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src,
1857                                            pre, lambda, nb_full_mvs,
1858                                            full_mv_num, &x->mv_limits, fn_ptr);
1859 }
1860 
full_pixel_exhaustive_new(const VP9_COMP * cpi,MACROBLOCK * x,MV * centre_mv_full,const vp9_variance_fn_ptr_t * fn_ptr,MV * dst_mv,int lambda,const int_mv * nb_full_mvs,int full_mv_num)1861 static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
1862                                          MV *centre_mv_full,
1863                                          const vp9_variance_fn_ptr_t *fn_ptr,
1864                                          MV *dst_mv, int lambda,
1865                                          const int_mv *nb_full_mvs,
1866                                          int full_mv_num) {
1867   const SPEED_FEATURES *const sf = &cpi->sf;
1868   MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
1869   int64_t bestsme;
1870   int i;
1871   int interval = sf->mesh_patterns[0].interval;
1872   int range = sf->mesh_patterns[0].range;
1873   int baseline_interval_divisor;
1874 
1875   // Trap illegal values for interval and range for this function.
1876   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
1877       (interval > range)) {
1878     printf("ERROR: invalid range\n");
1879     assert(0);
1880   }
1881 
1882   baseline_interval_divisor = range / interval;
1883 
1884   // Check size of proposed first range against magnitude of the centre
1885   // value used as a starting point.
1886   range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
1887   range = VPXMIN(range, MAX_RANGE);
1888   interval = VPXMAX(interval, range / baseline_interval_divisor);
1889 
1890   // initial search
1891   bestsme =
1892       exhaustive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv,
1893                                  lambda, nb_full_mvs, full_mv_num);
1894 
1895   if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
1896     // Progressive searches with range and step size decreasing each time
1897     // till we reach a step size of 1. Then break out.
1898     for (i = 1; i < MAX_MESH_STEP; ++i) {
1899       // First pass with coarser step and longer range
1900       bestsme = exhaustive_mesh_search_new(
1901           x, &temp_mv, sf->mesh_patterns[i].range,
1902           sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs,
1903           full_mv_num);
1904 
1905       if (sf->mesh_patterns[i].interval == 1) break;
1906     }
1907   }
1908 
1909   *dst_mv = temp_mv;
1910 
1911   return bestsme;
1912 }
1913 
diamond_search_sad_new(const MACROBLOCK * x,const search_site_config * cfg,const MV * init_full_mv,MV * best_full_mv,int search_param,int lambda,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,const int_mv * nb_full_mvs,int full_mv_num)1914 static int64_t diamond_search_sad_new(const MACROBLOCK *x,
1915                                       const search_site_config *cfg,
1916                                       const MV *init_full_mv, MV *best_full_mv,
1917                                       int search_param, int lambda, int *num00,
1918                                       const vp9_variance_fn_ptr_t *fn_ptr,
1919                                       const int_mv *nb_full_mvs,
1920                                       int full_mv_num) {
1921   int i, j, step;
1922 
1923   const MACROBLOCKD *const xd = &x->e_mbd;
1924   uint8_t *what = x->plane[0].src.buf;
1925   const int what_stride = x->plane[0].src.stride;
1926   const uint8_t *in_what;
1927   const int in_what_stride = xd->plane[0].pre[0].stride;
1928   const uint8_t *best_address;
1929 
1930   int64_t bestsad;
1931   int best_site = -1;
1932   int last_site = -1;
1933 
1934   // search_param determines the length of the initial step and hence the number
1935   // of iterations.
1936   // 0 = initial step (MAX_FIRST_STEP) pel
1937   // 1 = (MAX_FIRST_STEP/2) pel,
1938   // 2 = (MAX_FIRST_STEP/4) pel...
1939   //  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1940   const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
1941   const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
1942   const int tot_steps = cfg->total_steps - search_param;
1943   vpx_clear_system_state();
1944 
1945   *best_full_mv = *init_full_mv;
1946   clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1947            x->mv_limits.row_min, x->mv_limits.row_max);
1948   *num00 = 0;
1949 
1950   // Work out the start point for the search
1951   in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
1952             best_full_mv->col;
1953   best_address = in_what;
1954 
1955   // Check the starting position
1956   {
1957     const int64_t mv_dist =
1958         (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1959         << LOG2_PRECISION;
1960     const int64_t mv_cost =
1961         vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
1962     bestsad = mv_dist + lambda * mv_cost;
1963   }
1964 
1965   i = 0;
1966 
1967   for (step = 0; step < tot_steps; step++) {
1968     int all_in = 1, t;
1969 
1970     // All_in is true if every one of the points we are checking are within
1971     // the bounds of the image.
1972     all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
1973     all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
1974     all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
1975     all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
1976 
1977     // If all the pixels are within the bounds we don't check whether the
1978     // search point is valid in this loop,  otherwise we check each point
1979     // for validity..
1980     if (all_in) {
1981       unsigned int sad_array[4];
1982 
1983       for (j = 0; j < cfg->searches_per_step; j += 4) {
1984         unsigned char const *block_offset[4];
1985 
1986         for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
1987 
1988         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1989                        sad_array);
1990 
1991         for (t = 0; t < 4; t++, i++) {
1992           const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION;
1993           if (mv_dist < bestsad) {
1994             const MV this_mv = { best_full_mv->row + ss_mv[i].row,
1995                                  best_full_mv->col + ss_mv[i].col };
1996             const int64_t mv_cost =
1997                 vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
1998             const int64_t thissad = mv_dist + lambda * mv_cost;
1999             if (thissad < bestsad) {
2000               bestsad = thissad;
2001               best_site = i;
2002             }
2003           }
2004         }
2005       }
2006     } else {
2007       for (j = 0; j < cfg->searches_per_step; j++) {
2008         // Trap illegal vectors
2009         const MV this_mv = { best_full_mv->row + ss_mv[i].row,
2010                              best_full_mv->col + ss_mv[i].col };
2011 
2012         if (is_mv_in(&x->mv_limits, &this_mv)) {
2013           const uint8_t *const check_here = ss_os[i] + best_address;
2014           const int64_t mv_dist =
2015               (int64_t)fn_ptr->sdf(what, what_stride, check_here,
2016                                    in_what_stride)
2017               << LOG2_PRECISION;
2018           if (mv_dist < bestsad) {
2019             const int64_t mv_cost =
2020                 vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
2021             const int64_t thissad = mv_dist + lambda * mv_cost;
2022             if (thissad < bestsad) {
2023               bestsad = thissad;
2024               best_site = i;
2025             }
2026           }
2027         }
2028         i++;
2029       }
2030     }
2031     if (best_site != last_site) {
2032       best_full_mv->row += ss_mv[best_site].row;
2033       best_full_mv->col += ss_mv[best_site].col;
2034       best_address += ss_os[best_site];
2035       last_site = best_site;
2036     } else if (best_address == in_what) {
2037       (*num00)++;
2038     }
2039   }
2040   return bestsad;
2041 }
2042 
vp9_prepare_nb_full_mvs(const MotionField * motion_field,int mi_row,int mi_col,int_mv * nb_full_mvs)2043 int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row,
2044                             int mi_col, int_mv *nb_full_mvs) {
2045   const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
2046   const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
2047   const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
2048   int nb_full_mv_num = 0;
2049   int i;
2050   assert(mi_row % mi_height == 0);
2051   assert(mi_col % mi_width == 0);
2052   for (i = 0; i < NB_MVS_NUM; ++i) {
2053     int r = dirs[i][0];
2054     int c = dirs[i][1];
2055     int brow = mi_row / mi_height + r;
2056     int bcol = mi_col / mi_width + c;
2057     if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 &&
2058         bcol < motion_field->block_cols) {
2059       if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) {
2060         int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol);
2061         nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv);
2062         ++nb_full_mv_num;
2063       }
2064     }
2065   }
2066   return nb_full_mv_num;
2067 }
2068 #endif  // CONFIG_NON_GREEDY_MV
2069 
vp9_diamond_search_sad_c(const MACROBLOCK * x,const search_site_config * cfg,MV * ref_mv,MV * best_mv,int search_param,int sad_per_bit,int * num00,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)2070 int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
2071                              MV *ref_mv, MV *best_mv, int search_param,
2072                              int sad_per_bit, int *num00,
2073                              const vp9_variance_fn_ptr_t *fn_ptr,
2074                              const MV *center_mv) {
2075   int i, j, step;
2076 
2077   const MACROBLOCKD *const xd = &x->e_mbd;
2078   uint8_t *what = x->plane[0].src.buf;
2079   const int what_stride = x->plane[0].src.stride;
2080   const uint8_t *in_what;
2081   const int in_what_stride = xd->plane[0].pre[0].stride;
2082   const uint8_t *best_address;
2083 
2084   unsigned int bestsad = INT_MAX;
2085   int best_site = -1;
2086   int last_site = -1;
2087 
2088   int ref_row;
2089   int ref_col;
2090 
2091   // search_param determines the length of the initial step and hence the number
2092   // of iterations.
2093   // 0 = initial step (MAX_FIRST_STEP) pel
2094   // 1 = (MAX_FIRST_STEP/2) pel,
2095   // 2 = (MAX_FIRST_STEP/4) pel...
2096   //  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
2097   const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
2098   const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
2099   const int tot_steps = cfg->total_steps - search_param;
2100 
2101   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2102   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2103            x->mv_limits.row_min, x->mv_limits.row_max);
2104   ref_row = ref_mv->row;
2105   ref_col = ref_mv->col;
2106   *num00 = 0;
2107   best_mv->row = ref_row;
2108   best_mv->col = ref_col;
2109 
2110   // Work out the start point for the search
2111   in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
2112   best_address = in_what;
2113 
2114   // Check the starting position
2115   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
2116             mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
2117 
2118   i = 0;
2119 
2120   for (step = 0; step < tot_steps; step++) {
2121     int all_in = 1, t;
2122 
2123     // All_in is true if every one of the points we are checking are within
2124     // the bounds of the image.
2125     all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
2126     all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
2127     all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
2128     all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
2129 
2130     // If all the pixels are within the bounds we don't check whether the
2131     // search point is valid in this loop,  otherwise we check each point
2132     // for validity..
2133     if (all_in) {
2134       unsigned int sad_array[4];
2135 
2136       for (j = 0; j < cfg->searches_per_step; j += 4) {
2137         unsigned char const *block_offset[4];
2138 
2139         for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
2140 
2141         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
2142                        sad_array);
2143 
2144         for (t = 0; t < 4; t++, i++) {
2145           if (sad_array[t] < bestsad) {
2146             const MV this_mv = { best_mv->row + ss_mv[i].row,
2147                                  best_mv->col + ss_mv[i].col };
2148             sad_array[t] +=
2149                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2150             if (sad_array[t] < bestsad) {
2151               bestsad = sad_array[t];
2152               best_site = i;
2153             }
2154           }
2155         }
2156       }
2157     } else {
2158       for (j = 0; j < cfg->searches_per_step; j++) {
2159         // Trap illegal vectors
2160         const MV this_mv = { best_mv->row + ss_mv[i].row,
2161                              best_mv->col + ss_mv[i].col };
2162 
2163         if (is_mv_in(&x->mv_limits, &this_mv)) {
2164           const uint8_t *const check_here = ss_os[i] + best_address;
2165           unsigned int thissad =
2166               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
2167 
2168           if (thissad < bestsad) {
2169             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2170             if (thissad < bestsad) {
2171               bestsad = thissad;
2172               best_site = i;
2173             }
2174           }
2175         }
2176         i++;
2177       }
2178     }
2179     if (best_site != last_site) {
2180       best_mv->row += ss_mv[best_site].row;
2181       best_mv->col += ss_mv[best_site].col;
2182       best_address += ss_os[best_site];
2183       last_site = best_site;
2184 #if defined(NEW_DIAMOND_SEARCH)
2185       while (1) {
2186         const MV this_mv = { best_mv->row + ss_mv[best_site].row,
2187                              best_mv->col + ss_mv[best_site].col };
2188         if (is_mv_in(&x->mv_limits, &this_mv)) {
2189           const uint8_t *const check_here = ss_os[best_site] + best_address;
2190           unsigned int thissad =
2191               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
2192           if (thissad < bestsad) {
2193             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2194             if (thissad < bestsad) {
2195               bestsad = thissad;
2196               best_mv->row += ss_mv[best_site].row;
2197               best_mv->col += ss_mv[best_site].col;
2198               best_address += ss_os[best_site];
2199               continue;
2200             }
2201           }
2202         }
2203         break;
2204       }
2205 #endif
2206     } else if (best_address == in_what) {
2207       (*num00)++;
2208     }
2209   }
2210   return bestsad;
2211 }
2212 
vector_match(int16_t * ref,int16_t * src,int bwl)2213 static int vector_match(int16_t *ref, int16_t *src, int bwl) {
2214   int best_sad = INT_MAX;
2215   int this_sad;
2216   int d;
2217   int center, offset = 0;
2218   int bw = 4 << bwl;  // redundant variable, to be changed in the experiments.
2219   for (d = 0; d <= bw; d += 16) {
2220     this_sad = vpx_vector_var(&ref[d], src, bwl);
2221     if (this_sad < best_sad) {
2222       best_sad = this_sad;
2223       offset = d;
2224     }
2225   }
2226   center = offset;
2227 
2228   for (d = -8; d <= 8; d += 16) {
2229     int this_pos = offset + d;
2230     // check limit
2231     if (this_pos < 0 || this_pos > bw) continue;
2232     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2233     if (this_sad < best_sad) {
2234       best_sad = this_sad;
2235       center = this_pos;
2236     }
2237   }
2238   offset = center;
2239 
2240   for (d = -4; d <= 4; d += 8) {
2241     int this_pos = offset + d;
2242     // check limit
2243     if (this_pos < 0 || this_pos > bw) continue;
2244     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2245     if (this_sad < best_sad) {
2246       best_sad = this_sad;
2247       center = this_pos;
2248     }
2249   }
2250   offset = center;
2251 
2252   for (d = -2; d <= 2; d += 4) {
2253     int this_pos = offset + d;
2254     // check limit
2255     if (this_pos < 0 || this_pos > bw) continue;
2256     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2257     if (this_sad < best_sad) {
2258       best_sad = this_sad;
2259       center = this_pos;
2260     }
2261   }
2262   offset = center;
2263 
2264   for (d = -1; d <= 1; d += 2) {
2265     int this_pos = offset + d;
2266     // check limit
2267     if (this_pos < 0 || this_pos > bw) continue;
2268     this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2269     if (this_sad < best_sad) {
2270       best_sad = this_sad;
2271       center = this_pos;
2272     }
2273   }
2274 
2275   return (center - (bw >> 1));
2276 }
2277 
2278 static const MV search_pos[4] = {
2279   { -1, 0 },
2280   { 0, -1 },
2281   { 0, 1 },
2282   { 1, 0 },
2283 };
2284 
vp9_int_pro_motion_estimation(const VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,const MV * ref_mv)2285 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
2286                                            BLOCK_SIZE bsize, int mi_row,
2287                                            int mi_col, const MV *ref_mv) {
2288   MACROBLOCKD *xd = &x->e_mbd;
2289   MODE_INFO *mi = xd->mi[0];
2290   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
2291   DECLARE_ALIGNED(16, int16_t, hbuf[128]);
2292   DECLARE_ALIGNED(16, int16_t, vbuf[128]);
2293   DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
2294   DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
2295   int idx;
2296   const int bw = 4 << b_width_log2_lookup[bsize];
2297   const int bh = 4 << b_height_log2_lookup[bsize];
2298   const int search_width = bw << 1;
2299   const int search_height = bh << 1;
2300   const int src_stride = x->plane[0].src.stride;
2301   const int ref_stride = xd->plane[0].pre[0].stride;
2302   uint8_t const *ref_buf, *src_buf;
2303   MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
2304   unsigned int best_sad, tmp_sad, this_sad[4];
2305   MV this_mv;
2306   const int norm_factor = 3 + (bw >> 5);
2307   const YV12_BUFFER_CONFIG *scaled_ref_frame =
2308       vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
2309   MvLimits subpel_mv_limits;
2310 
2311   if (scaled_ref_frame) {
2312     int i;
2313     // Swap out the reference frame for a version that's been scaled to
2314     // match the resolution of the current frame, allowing the existing
2315     // motion search code to be used without additional modifications.
2316     for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
2317     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2318   }
2319 
2320 #if CONFIG_VP9_HIGHBITDEPTH
2321   // TODO(jingning): Implement integral projection functions for high bit-depth
2322   // setting and remove this part of code.
2323   if (xd->bd != 8) {
2324     unsigned int this_sad;
2325     tmp_mv->row = 0;
2326     tmp_mv->col = 0;
2327     this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
2328                                       xd->plane[0].pre[0].buf, ref_stride);
2329 
2330     if (scaled_ref_frame) {
2331       int i;
2332       for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2333     }
2334     return this_sad;
2335   }
2336 #endif
2337 
2338   // Set up prediction 1-D reference set
2339   ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
2340   for (idx = 0; idx < search_width; idx += 16) {
2341     vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
2342     ref_buf += 16;
2343   }
2344 
2345   ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
2346   for (idx = 0; idx < search_height; ++idx) {
2347     vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
2348     ref_buf += ref_stride;
2349   }
2350 
2351   // Set up src 1-D reference set
2352   for (idx = 0; idx < bw; idx += 16) {
2353     src_buf = x->plane[0].src.buf + idx;
2354     vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
2355   }
2356 
2357   src_buf = x->plane[0].src.buf;
2358   for (idx = 0; idx < bh; ++idx) {
2359     src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
2360     src_buf += src_stride;
2361   }
2362 
2363   // Find the best match per 1-D search
2364   tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
2365   tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
2366 
2367   this_mv = *tmp_mv;
2368   src_buf = x->plane[0].src.buf;
2369   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2370   best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2371 
2372   {
2373     const uint8_t *const pos[4] = {
2374       ref_buf - ref_stride,
2375       ref_buf - 1,
2376       ref_buf + 1,
2377       ref_buf + ref_stride,
2378     };
2379 
2380     cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
2381   }
2382 
2383   for (idx = 0; idx < 4; ++idx) {
2384     if (this_sad[idx] < best_sad) {
2385       best_sad = this_sad[idx];
2386       tmp_mv->row = search_pos[idx].row + this_mv.row;
2387       tmp_mv->col = search_pos[idx].col + this_mv.col;
2388     }
2389   }
2390 
2391   if (this_sad[0] < this_sad[3])
2392     this_mv.row -= 1;
2393   else
2394     this_mv.row += 1;
2395 
2396   if (this_sad[1] < this_sad[2])
2397     this_mv.col -= 1;
2398   else
2399     this_mv.col += 1;
2400 
2401   ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2402 
2403   tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2404   if (best_sad > tmp_sad) {
2405     *tmp_mv = this_mv;
2406     best_sad = tmp_sad;
2407   }
2408 
2409   tmp_mv->row *= 8;
2410   tmp_mv->col *= 8;
2411 
2412   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
2413   clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
2414            subpel_mv_limits.row_min, subpel_mv_limits.row_max);
2415 
2416   if (scaled_ref_frame) {
2417     int i;
2418     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2419   }
2420 
2421   return best_sad;
2422 }
2423 
get_exhaustive_threshold(int exhaustive_searches_thresh,BLOCK_SIZE bsize)2424 static int get_exhaustive_threshold(int exhaustive_searches_thresh,
2425                                     BLOCK_SIZE bsize) {
2426   return exhaustive_searches_thresh >>
2427          (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
2428 }
2429 
2430 #if CONFIG_NON_GREEDY_MV
2431 // Runs sequence of diamond searches in smaller steps for RD.
2432 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
2433               point as the best match, we will do a final 1-away diamond
2434               refining search  */
vp9_full_pixel_diamond_new(const VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,MV * mvp_full,int step_param,int lambda,int do_refine,const int_mv * nb_full_mvs,int full_mv_num,MV * best_mv)2435 int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
2436                                BLOCK_SIZE bsize, MV *mvp_full, int step_param,
2437                                int lambda, int do_refine,
2438                                const int_mv *nb_full_mvs, int full_mv_num,
2439                                MV *best_mv) {
2440   const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2441   const SPEED_FEATURES *const sf = &cpi->sf;
2442   int n, num00 = 0;
2443   int thissme;
2444   int bestsme;
2445   const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
2446   const MV center_mv = { 0, 0 };
2447   vpx_clear_system_state();
2448   diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda,
2449                          &n, fn_ptr, nb_full_mvs, full_mv_num);
2450 
2451   bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
2452 
2453   // If there won't be more n-step search, check to see if refining search is
2454   // needed.
2455   if (n > further_steps) do_refine = 0;
2456 
2457   while (n < further_steps) {
2458     ++n;
2459     if (num00) {
2460       num00--;
2461     } else {
2462       MV temp_mv;
2463       diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2464                              step_param + n, lambda, &num00, fn_ptr,
2465                              nb_full_mvs, full_mv_num);
2466       thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0);
2467       // check to see if refining search is needed.
2468       if (num00 > further_steps - n) do_refine = 0;
2469 
2470       if (thissme < bestsme) {
2471         bestsme = thissme;
2472         *best_mv = temp_mv;
2473       }
2474     }
2475   }
2476 
2477   // final 1-away diamond refining search
2478   if (do_refine) {
2479     const int search_range = 8;
2480     MV temp_mv = *best_mv;
2481     vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr,
2482                                 nb_full_mvs, full_mv_num);
2483     thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0);
2484     if (thissme < bestsme) {
2485       bestsme = thissme;
2486       *best_mv = temp_mv;
2487     }
2488   }
2489 
2490   if (sf->exhaustive_searches_thresh < INT_MAX &&
2491       !cpi->rc.is_src_frame_alt_ref) {
2492     const int64_t exhaustive_thr =
2493         get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
2494     if (bestsme > exhaustive_thr) {
2495       full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
2496                                 nb_full_mvs, full_mv_num);
2497       bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
2498     }
2499   }
2500   return bestsme;
2501 }
2502 #endif  // CONFIG_NON_GREEDY_MV
2503 
2504 // Runs sequence of diamond searches in smaller steps for RD.
2505 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
2506               point as the best match, we will do a final 1-away diamond
2507               refining search  */
full_pixel_diamond(const VP9_COMP * const cpi,const MACROBLOCK * const x,MV * mvp_full,int step_param,int sadpb,int further_steps,int do_refine,int * cost_list,const vp9_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv)2508 static int full_pixel_diamond(const VP9_COMP *const cpi,
2509                               const MACROBLOCK *const x, MV *mvp_full,
2510                               int step_param, int sadpb, int further_steps,
2511                               int do_refine, int *cost_list,
2512                               const vp9_variance_fn_ptr_t *fn_ptr,
2513                               const MV *ref_mv, MV *dst_mv) {
2514   MV temp_mv;
2515   int thissme, n, num00 = 0;
2516   int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2517                                         step_param, sadpb, &n, fn_ptr, ref_mv);
2518   if (bestsme < INT_MAX)
2519     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2520   *dst_mv = temp_mv;
2521 
2522   // If there won't be more n-step search, check to see if refining search is
2523   // needed.
2524   if (n > further_steps) do_refine = 0;
2525 
2526   while (n < further_steps) {
2527     ++n;
2528 
2529     if (num00) {
2530       num00--;
2531     } else {
2532       thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2533                                         step_param + n, sadpb, &num00, fn_ptr,
2534                                         ref_mv);
2535       if (thissme < INT_MAX)
2536         thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2537 
2538       // check to see if refining search is needed.
2539       if (num00 > further_steps - n) do_refine = 0;
2540 
2541       if (thissme < bestsme) {
2542         bestsme = thissme;
2543         *dst_mv = temp_mv;
2544       }
2545     }
2546   }
2547 
2548   // final 1-away diamond refining search
2549   if (do_refine) {
2550     const int search_range = 8;
2551     MV best_mv = *dst_mv;
2552     thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
2553                                       ref_mv);
2554     if (thissme < INT_MAX)
2555       thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
2556     if (thissme < bestsme) {
2557       bestsme = thissme;
2558       *dst_mv = best_mv;
2559     }
2560   }
2561 
2562   // Return cost list.
2563   if (cost_list) {
2564     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2565   }
2566   return bestsme;
2567 }
2568 
2569 // Runs an limited range exhaustive mesh search using a pattern set
2570 // according to the encode speed profile.
full_pixel_exhaustive(const VP9_COMP * const cpi,const MACROBLOCK * const x,MV * centre_mv_full,int sadpb,int * cost_list,const vp9_variance_fn_ptr_t * fn_ptr,const MV * ref_mv,MV * dst_mv)2571 static int full_pixel_exhaustive(const VP9_COMP *const cpi,
2572                                  const MACROBLOCK *const x, MV *centre_mv_full,
2573                                  int sadpb, int *cost_list,
2574                                  const vp9_variance_fn_ptr_t *fn_ptr,
2575                                  const MV *ref_mv, MV *dst_mv) {
2576   const SPEED_FEATURES *const sf = &cpi->sf;
2577   MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
2578   MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
2579   int bestsme;
2580   int i;
2581   int interval = sf->mesh_patterns[0].interval;
2582   int range = sf->mesh_patterns[0].range;
2583   int baseline_interval_divisor;
2584 
2585   // Trap illegal values for interval and range for this function.
2586   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
2587       (interval > range))
2588     return INT_MAX;
2589 
2590   baseline_interval_divisor = range / interval;
2591 
2592   // Check size of proposed first range against magnitude of the centre
2593   // value used as a starting point.
2594   range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
2595   range = VPXMIN(range, MAX_RANGE);
2596   interval = VPXMAX(interval, range / baseline_interval_divisor);
2597 
2598   // initial search
2599   bestsme = exhaustive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
2600                                    sadpb, fn_ptr, &temp_mv);
2601 
2602   if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
2603     // Progressive searches with range and step size decreasing each time
2604     // till we reach a step size of 1. Then break out.
2605     for (i = 1; i < MAX_MESH_STEP; ++i) {
2606       // First pass with coarser step and longer range
2607       bestsme = exhaustive_mesh_search(
2608           x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
2609           sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
2610 
2611       if (sf->mesh_patterns[i].interval == 1) break;
2612     }
2613   }
2614 
2615   if (bestsme < INT_MAX)
2616     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2617   *dst_mv = temp_mv;
2618 
2619   // Return cost list.
2620   if (cost_list) {
2621     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2622   }
2623   return bestsme;
2624 }
2625 
2626 #if CONFIG_NON_GREEDY_MV
vp9_refining_search_sad_new(const MACROBLOCK * x,MV * best_full_mv,int lambda,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const int_mv * nb_full_mvs,int full_mv_num)2627 int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
2628                                     int lambda, int search_range,
2629                                     const vp9_variance_fn_ptr_t *fn_ptr,
2630                                     const int_mv *nb_full_mvs,
2631                                     int full_mv_num) {
2632   const MACROBLOCKD *const xd = &x->e_mbd;
2633   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2634   const struct buf_2d *const what = &x->plane[0].src;
2635   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2636   const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
2637   int64_t best_sad;
2638   int i, j;
2639   vpx_clear_system_state();
2640   {
2641     const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride,
2642                                                  best_address, in_what->stride)
2643                             << LOG2_PRECISION;
2644     const int64_t mv_cost =
2645         vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
2646     best_sad = mv_dist + lambda * mv_cost;
2647   }
2648 
2649   for (i = 0; i < search_range; i++) {
2650     int best_site = -1;
2651     const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) &
2652                        ((best_full_mv->row + 1) < x->mv_limits.row_max) &
2653                        ((best_full_mv->col - 1) > x->mv_limits.col_min) &
2654                        ((best_full_mv->col + 1) < x->mv_limits.col_max);
2655 
2656     if (all_in) {
2657       unsigned int sads[4];
2658       const uint8_t *const positions[4] = { best_address - in_what->stride,
2659                                             best_address - 1, best_address + 1,
2660                                             best_address + in_what->stride };
2661 
2662       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2663 
2664       for (j = 0; j < 4; ++j) {
2665         const MV mv = { best_full_mv->row + neighbors[j].row,
2666                         best_full_mv->col + neighbors[j].col };
2667         const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION;
2668         const int64_t mv_cost =
2669             vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
2670         const int64_t thissad = mv_dist + lambda * mv_cost;
2671         if (thissad < best_sad) {
2672           best_sad = thissad;
2673           best_site = j;
2674         }
2675       }
2676     } else {
2677       for (j = 0; j < 4; ++j) {
2678         const MV mv = { best_full_mv->row + neighbors[j].row,
2679                         best_full_mv->col + neighbors[j].col };
2680 
2681         if (is_mv_in(&x->mv_limits, &mv)) {
2682           const int64_t mv_dist =
2683               (int64_t)fn_ptr->sdf(what->buf, what->stride,
2684                                    get_buf_from_mv(in_what, &mv),
2685                                    in_what->stride)
2686               << LOG2_PRECISION;
2687           const int64_t mv_cost =
2688               vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
2689           const int64_t thissad = mv_dist + lambda * mv_cost;
2690           if (thissad < best_sad) {
2691             best_sad = thissad;
2692             best_site = j;
2693           }
2694         }
2695       }
2696     }
2697 
2698     if (best_site == -1) {
2699       break;
2700     } else {
2701       best_full_mv->row += neighbors[best_site].row;
2702       best_full_mv->col += neighbors[best_site].col;
2703       best_address = get_buf_from_mv(in_what, best_full_mv);
2704     }
2705   }
2706 
2707   return best_sad;
2708 }
2709 #endif  // CONFIG_NON_GREEDY_MV
2710 
vp9_refining_search_sad(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv)2711 int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2712                             int search_range,
2713                             const vp9_variance_fn_ptr_t *fn_ptr,
2714                             const MV *center_mv) {
2715   const MACROBLOCKD *const xd = &x->e_mbd;
2716   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2717   const struct buf_2d *const what = &x->plane[0].src;
2718   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2719   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2720   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2721   unsigned int best_sad =
2722       fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
2723       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2724   int i, j;
2725 
2726   for (i = 0; i < search_range; i++) {
2727     int best_site = -1;
2728     const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
2729                        ((ref_mv->row + 1) < x->mv_limits.row_max) &
2730                        ((ref_mv->col - 1) > x->mv_limits.col_min) &
2731                        ((ref_mv->col + 1) < x->mv_limits.col_max);
2732 
2733     if (all_in) {
2734       unsigned int sads[4];
2735       const uint8_t *const positions[4] = { best_address - in_what->stride,
2736                                             best_address - 1, best_address + 1,
2737                                             best_address + in_what->stride };
2738 
2739       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2740 
2741       for (j = 0; j < 4; ++j) {
2742         if (sads[j] < best_sad) {
2743           const MV mv = { ref_mv->row + neighbors[j].row,
2744                           ref_mv->col + neighbors[j].col };
2745           sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2746           if (sads[j] < best_sad) {
2747             best_sad = sads[j];
2748             best_site = j;
2749           }
2750         }
2751       }
2752     } else {
2753       for (j = 0; j < 4; ++j) {
2754         const MV mv = { ref_mv->row + neighbors[j].row,
2755                         ref_mv->col + neighbors[j].col };
2756 
2757         if (is_mv_in(&x->mv_limits, &mv)) {
2758           unsigned int sad =
2759               fn_ptr->sdf(what->buf, what->stride,
2760                           get_buf_from_mv(in_what, &mv), in_what->stride);
2761           if (sad < best_sad) {
2762             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2763             if (sad < best_sad) {
2764               best_sad = sad;
2765               best_site = j;
2766             }
2767           }
2768         }
2769       }
2770     }
2771 
2772     if (best_site == -1) {
2773       break;
2774     } else {
2775       ref_mv->row += neighbors[best_site].row;
2776       ref_mv->col += neighbors[best_site].col;
2777       best_address = get_buf_from_mv(in_what, ref_mv);
2778     }
2779   }
2780 
2781   return best_sad;
2782 }
2783 
2784 // This function is called when we do joint motion search in comp_inter_inter
2785 // mode.
vp9_refining_search_8p_c(const MACROBLOCK * x,MV * ref_mv,int error_per_bit,int search_range,const vp9_variance_fn_ptr_t * fn_ptr,const MV * center_mv,const uint8_t * second_pred)2786 int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2787                              int search_range,
2788                              const vp9_variance_fn_ptr_t *fn_ptr,
2789                              const MV *center_mv, const uint8_t *second_pred) {
2790   const MV neighbors[8] = { { -1, 0 },  { 0, -1 }, { 0, 1 },  { 1, 0 },
2791                             { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
2792   const MACROBLOCKD *const xd = &x->e_mbd;
2793   const struct buf_2d *const what = &x->plane[0].src;
2794   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2795   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2796   unsigned int best_sad = INT_MAX;
2797   int i, j;
2798   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2799            x->mv_limits.row_min, x->mv_limits.row_max);
2800   best_sad =
2801       fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2802                    in_what->stride, second_pred) +
2803       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2804 
2805   for (i = 0; i < search_range; ++i) {
2806     int best_site = -1;
2807 
2808     for (j = 0; j < 8; ++j) {
2809       const MV mv = { ref_mv->row + neighbors[j].row,
2810                       ref_mv->col + neighbors[j].col };
2811 
2812       if (is_mv_in(&x->mv_limits, &mv)) {
2813         unsigned int sad =
2814             fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2815                          in_what->stride, second_pred);
2816         if (sad < best_sad) {
2817           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2818           if (sad < best_sad) {
2819             best_sad = sad;
2820             best_site = j;
2821           }
2822         }
2823       }
2824     }
2825 
2826     if (best_site == -1) {
2827       break;
2828     } else {
2829       ref_mv->row += neighbors[best_site].row;
2830       ref_mv->col += neighbors[best_site].col;
2831     }
2832   }
2833   return best_sad;
2834 }
2835 
vp9_full_pixel_search(const VP9_COMP * const cpi,const MACROBLOCK * const x,BLOCK_SIZE bsize,MV * mvp_full,int step_param,int search_method,int error_per_bit,int * cost_list,const MV * ref_mv,MV * tmp_mv,int var_max,int rd)2836 int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
2837                           BLOCK_SIZE bsize, MV *mvp_full, int step_param,
2838                           int search_method, int error_per_bit, int *cost_list,
2839                           const MV *ref_mv, MV *tmp_mv, int var_max, int rd) {
2840   const SPEED_FEATURES *const sf = &cpi->sf;
2841   const SEARCH_METHODS method = (SEARCH_METHODS)search_method;
2842   const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2843   int var = 0;
2844   int run_exhaustive_search = 0;
2845 
2846   if (cost_list) {
2847     cost_list[0] = INT_MAX;
2848     cost_list[1] = INT_MAX;
2849     cost_list[2] = INT_MAX;
2850     cost_list[3] = INT_MAX;
2851     cost_list[4] = INT_MAX;
2852   }
2853 
2854   switch (method) {
2855     case FAST_DIAMOND:
2856       var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2857                             cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2858       break;
2859     case FAST_HEX:
2860       var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2861                             cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2862       break;
2863     case HEX:
2864       var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2865                        fn_ptr, 1, ref_mv, tmp_mv);
2866       break;
2867     case SQUARE:
2868       var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2869                           fn_ptr, 1, ref_mv, tmp_mv);
2870       break;
2871     case BIGDIA:
2872       var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2873                           fn_ptr, 1, ref_mv, tmp_mv);
2874       break;
2875     case NSTEP:
2876     case MESH:
2877       var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2878                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2879                                cost_list, fn_ptr, ref_mv, tmp_mv);
2880       break;
2881     default: assert(0 && "Unknown search method");
2882   }
2883 
2884   if (method == NSTEP) {
2885     if (sf->exhaustive_searches_thresh < INT_MAX &&
2886         !cpi->rc.is_src_frame_alt_ref) {
2887       const int64_t exhaustive_thr =
2888           get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
2889       if (var > exhaustive_thr) {
2890         run_exhaustive_search = 1;
2891       }
2892     }
2893   } else if (method == MESH) {
2894     run_exhaustive_search = 1;
2895   }
2896 
2897   if (run_exhaustive_search) {
2898     int var_ex;
2899     MV tmp_mv_ex;
2900     var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list,
2901                                    fn_ptr, ref_mv, &tmp_mv_ex);
2902     if (var_ex < var) {
2903       var = var_ex;
2904       *tmp_mv = tmp_mv_ex;
2905     }
2906   }
2907 
2908   if (method != NSTEP && method != MESH && rd && var < var_max)
2909     var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
2910 
2911   return var;
2912 }
2913 
2914 // Note(yunqingwang): The following 2 functions are only used in the motion
2915 // vector unit test, which return extreme motion vectors allowed by the MV
2916 // limits.
2917 #define COMMON_MV_TEST \
2918   SETUP_SUBPEL_SEARCH; \
2919                        \
2920   (void)error_per_bit; \
2921   (void)vfp;           \
2922   (void)z;             \
2923   (void)src_stride;    \
2924   (void)y;             \
2925   (void)y_stride;      \
2926   (void)second_pred;   \
2927   (void)w;             \
2928   (void)h;             \
2929   (void)offset;        \
2930   (void)mvjcost;       \
2931   (void)mvcost;        \
2932   (void)sse1;          \
2933   (void)distortion;    \
2934                        \
2935   (void)halfiters;     \
2936   (void)quarteriters;  \
2937   (void)eighthiters;   \
2938   (void)whichdir;      \
2939   (void)allow_hp;      \
2940   (void)forced_stop;   \
2941   (void)hstep;         \
2942   (void)rr;            \
2943   (void)rc;            \
2944                        \
2945   (void)tr;            \
2946   (void)tc;            \
2947   (void)sse;           \
2948   (void)thismse;       \
2949   (void)cost_list;     \
2950   (void)use_accurate_subpel_search
2951 
2952 // Return the maximum MV.
vp9_return_max_sub_pixel_mv(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)2953 uint32_t vp9_return_max_sub_pixel_mv(
2954     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
2955     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
2956     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
2957     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
2958     int h, int use_accurate_subpel_search) {
2959   COMMON_MV_TEST;
2960 
2961   (void)minr;
2962   (void)minc;
2963 
2964   bestmv->row = maxr;
2965   bestmv->col = maxc;
2966   besterr = 0;
2967 
2968   // In the sub-pel motion search, if hp is not used, then the last bit of mv
2969   // has to be 0.
2970   lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
2971 
2972   return besterr;
2973 }
2974 // Return the minimum MV.
vp9_return_min_sub_pixel_mv(const MACROBLOCK * x,MV * bestmv,const MV * ref_mv,int allow_hp,int error_per_bit,const vp9_variance_fn_ptr_t * vfp,int forced_stop,int iters_per_step,int * cost_list,int * mvjcost,int * mvcost[2],uint32_t * distortion,uint32_t * sse1,const uint8_t * second_pred,int w,int h,int use_accurate_subpel_search)2975 uint32_t vp9_return_min_sub_pixel_mv(
2976     const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
2977     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
2978     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
2979     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
2980     int h, int use_accurate_subpel_search) {
2981   COMMON_MV_TEST;
2982 
2983   (void)maxr;
2984   (void)maxc;
2985 
2986   bestmv->row = minr;
2987   bestmv->col = minc;
2988   besterr = 0;
2989 
2990   // In the sub-pel motion search, if hp is not used, then the last bit of mv
2991   // has to be 0.
2992   lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
2993 
2994   return besterr;
2995 }
2996