• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 
17 #include "vpx_mem/vpx_mem.h"
18 
19 #include "vp9/common/vp9_common.h"
20 #include "vp9/common/vp9_entropy.h"
21 #include "vp9/common/vp9_entropymode.h"
22 #include "vp9/common/vp9_mvref_common.h"
23 #include "vp9/common/vp9_pred_common.h"
24 #include "vp9/common/vp9_quant_common.h"
25 #include "vp9/common/vp9_reconinter.h"
26 #include "vp9/common/vp9_reconintra.h"
27 #include "vp9/common/vp9_seg_common.h"
28 #include "vp9/common/vp9_systemdependent.h"
29 
30 #include "vp9/encoder/vp9_cost.h"
31 #include "vp9/encoder/vp9_encodemb.h"
32 #include "vp9/encoder/vp9_encodemv.h"
33 #include "vp9/encoder/vp9_encoder.h"
34 #include "vp9/encoder/vp9_mcomp.h"
35 #include "vp9/encoder/vp9_quantize.h"
36 #include "vp9/encoder/vp9_ratectrl.h"
37 #include "vp9/encoder/vp9_rd.h"
38 #include "vp9/encoder/vp9_tokenize.h"
39 #include "vp9/encoder/vp9_variance.h"
40 
41 #define RD_THRESH_POW      1.25
42 #define RD_MULT_EPB_RATIO  64
43 
44 // Factor to weigh the rate for switchable interp filters.
45 #define SWITCHABLE_INTERP_RATE_FACTOR 1
46 
47 // The baseline rd thresholds for breaking out of the rd loop for
48 // certain modes are assumed to be based on 8x8 blocks.
49 // This table is used to correct for block size.
50 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
51 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
52   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
53 };
54 
fill_mode_costs(VP9_COMP * cpi)55 static void fill_mode_costs(VP9_COMP *cpi) {
56   const FRAME_CONTEXT *const fc = &cpi->common.fc;
57   int i, j;
58 
59   for (i = 0; i < INTRA_MODES; ++i)
60     for (j = 0; j < INTRA_MODES; ++j)
61       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
62                       vp9_intra_mode_tree);
63 
64   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
65   vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
66                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
67   vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
68                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
69 
70   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
71     vp9_cost_tokens(cpi->switchable_interp_costs[i],
72                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
73 }
74 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])75 static void fill_token_costs(vp9_coeff_cost *c,
76                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
77   int i, j, k, l;
78   TX_SIZE t;
79   for (t = TX_4X4; t <= TX_32X32; ++t)
80     for (i = 0; i < PLANE_TYPES; ++i)
81       for (j = 0; j < REF_TYPES; ++j)
82         for (k = 0; k < COEF_BANDS; ++k)
83           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
84             vp9_prob probs[ENTROPY_NODES];
85             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
86             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
87                             vp9_coef_tree);
88             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
89                                  vp9_coef_tree);
90             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
91                    c[t][i][j][k][1][l][EOB_TOKEN]);
92           }
93 }
94 
95 // Values are now correlated to quantizer.
96 static int sad_per_bit16lut_8[QINDEX_RANGE];
97 static int sad_per_bit4lut_8[QINDEX_RANGE];
98 
99 #if CONFIG_VP9_HIGHBITDEPTH
100 static int sad_per_bit16lut_10[QINDEX_RANGE];
101 static int sad_per_bit4lut_10[QINDEX_RANGE];
102 static int sad_per_bit16lut_12[QINDEX_RANGE];
103 static int sad_per_bit4lut_12[QINDEX_RANGE];
104 #endif
105 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)106 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
107                             vpx_bit_depth_t bit_depth) {
108   int i;
109   // Initialize the sad lut tables using a formulaic calculation for now.
110   // This is to make it easier to resolve the impact of experimental changes
111   // to the quantizer tables.
112   for (i = 0; i < range; i++) {
113     const double q = vp9_convert_qindex_to_q(i, bit_depth);
114     bit16lut[i] = (int)(0.0418 * q + 2.4107);
115     bit4lut[i] = (int)(0.063 * q + 2.742);
116   }
117 }
118 
vp9_init_me_luts()119 void vp9_init_me_luts() {
120   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
121                   VPX_BITS_8);
122 #if CONFIG_VP9_HIGHBITDEPTH
123   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
124                   VPX_BITS_10);
125   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
126                   VPX_BITS_12);
127 #endif
128 }
129 
130 static const int rd_boost_factor[16] = {
131   64, 32, 32, 32, 24, 16, 12, 12,
132   8, 8, 4, 4, 2, 2, 1, 0
133 };
134 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
135   128, 144, 128, 128, 144
136 };
137 
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)138 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
139   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
140 #if CONFIG_VP9_HIGHBITDEPTH
141   int rdmult = 0;
142   switch (cpi->common.bit_depth) {
143     case VPX_BITS_8:
144       rdmult = 88 * q * q / 24;
145       break;
146     case VPX_BITS_10:
147       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
148       break;
149     case VPX_BITS_12:
150       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
151       break;
152     default:
153       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
154       return -1;
155   }
156 #else
157   int rdmult = 88 * q * q / 24;
158 #endif
159   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
160     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
161     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
162     const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
163 
164     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
165     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
166   }
167   return rdmult;
168 }
169 
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)170 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
171   double q;
172 #if CONFIG_VP9_HIGHBITDEPTH
173   switch (bit_depth) {
174     case VPX_BITS_8:
175       q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
176       break;
177     case VPX_BITS_10:
178       q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
179       break;
180     case VPX_BITS_12:
181       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
182       break;
183     default:
184       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
185       return -1;
186   }
187 #else
188   (void) bit_depth;
189   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
190 #endif
191   // TODO(debargha): Adjust the function below.
192   return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
193 }
194 
vp9_initialize_me_consts(VP9_COMP * cpi,int qindex)195 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
196 #if CONFIG_VP9_HIGHBITDEPTH
197   switch (cpi->common.bit_depth) {
198     case VPX_BITS_8:
199       cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
200       cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
201       break;
202     case VPX_BITS_10:
203       cpi->mb.sadperbit16 = sad_per_bit16lut_10[qindex];
204       cpi->mb.sadperbit4 = sad_per_bit4lut_10[qindex];
205       break;
206     case VPX_BITS_12:
207       cpi->mb.sadperbit16 = sad_per_bit16lut_12[qindex];
208       cpi->mb.sadperbit4 = sad_per_bit4lut_12[qindex];
209       break;
210     default:
211       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
212   }
213 #else
214   cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
215   cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
216 #endif
217 }
218 
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)219 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
220   int i, bsize, segment_id;
221 
222   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
223     const int qindex =
224         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
225               cm->y_dc_delta_q, 0, MAXQ);
226     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
227 
228     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
229       // Threshold here seems unnecessarily harsh but fine given actual
230       // range of values used for cpi->sf.thresh_mult[].
231       const int t = q * rd_thresh_block_size_factor[bsize];
232       const int thresh_max = INT_MAX / t;
233 
234       if (bsize >= BLOCK_8X8) {
235         for (i = 0; i < MAX_MODES; ++i)
236           rd->threshes[segment_id][bsize][i] =
237               rd->thresh_mult[i] < thresh_max
238                   ? rd->thresh_mult[i] * t / 4
239                   : INT_MAX;
240       } else {
241         for (i = 0; i < MAX_REFS; ++i)
242           rd->threshes[segment_id][bsize][i] =
243               rd->thresh_mult_sub8x8[i] < thresh_max
244                   ? rd->thresh_mult_sub8x8[i] * t / 4
245                   : INT_MAX;
246       }
247     }
248   }
249 }
250 
vp9_initialize_rd_consts(VP9_COMP * cpi)251 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
252   VP9_COMMON *const cm = &cpi->common;
253   MACROBLOCK *const x = &cpi->mb;
254   RD_OPT *const rd = &cpi->rd;
255   int i;
256 
257   vp9_clear_system_state();
258 
259   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
260   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
261 
262   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
263   x->errorperbit += (x->errorperbit == 0);
264 
265   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
266                        cm->frame_type != KEY_FRAME) ? 0 : 1;
267 
268   set_block_thresholds(cm, rd);
269 
270   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
271     fill_token_costs(x->token_costs, cm->fc.coef_probs);
272 
273     for (i = 0; i < PARTITION_CONTEXTS; ++i)
274       vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
275                       vp9_partition_tree);
276   }
277 
278   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
279       cm->frame_type == KEY_FRAME) {
280     fill_mode_costs(cpi);
281 
282     if (!frame_is_intra_only(cm)) {
283       vp9_build_nmv_cost_table(x->nmvjointcost,
284                                cm->allow_high_precision_mv ? x->nmvcost_hp
285                                                            : x->nmvcost,
286                                &cm->fc.nmvc, cm->allow_high_precision_mv);
287 
288       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
289         vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
290                         cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
291     }
292   }
293 }
294 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)295 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
296   // NOTE: The tables below must be of the same size.
297 
298   // The functions described below are sampled at the four most significant
299   // bits of x^2 + 8 / 256.
300 
301   // Normalized rate:
302   // This table models the rate for a Laplacian source with given variance
303   // when quantized with a uniform quantizer with given stepsize. The
304   // closed form expression is:
305   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
306   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
307   // and H(x) is the binary entropy function.
308   static const int rate_tab_q10[] = {
309     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
310      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
311      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
312      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
313      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
314      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
315      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
316      1159,  1086,  1021,   963,   911,   864,   821,   781,
317       745,   680,   623,   574,   530,   490,   455,   424,
318       395,   345,   304,   269,   239,   213,   190,   171,
319       154,   126,   104,    87,    73,    61,    52,    44,
320        38,    28,    21,    16,    12,    10,     8,     6,
321         5,     3,     2,     1,     1,     1,     0,     0,
322   };
323   // Normalized distortion:
324   // This table models the normalized distortion for a Laplacian source
325   // with given variance when quantized with a uniform quantizer
326   // with given stepsize. The closed form expression is:
327   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
328   // where x = qpstep / sqrt(variance).
329   // Note the actual distortion is Dn * variance.
330   static const int dist_tab_q10[] = {
331        0,     0,     1,     1,     1,     2,     2,     2,
332        3,     3,     4,     5,     5,     6,     7,     7,
333        8,     9,    11,    12,    13,    15,    16,    17,
334       18,    21,    24,    26,    29,    31,    34,    36,
335       39,    44,    49,    54,    59,    64,    69,    73,
336       78,    88,    97,   106,   115,   124,   133,   142,
337      151,   167,   184,   200,   215,   231,   245,   260,
338      274,   301,   327,   351,   375,   397,   418,   439,
339      458,   495,   528,   559,   587,   613,   637,   659,
340      680,   717,   749,   777,   801,   823,   842,   859,
341      874,   899,   919,   936,   949,   960,   969,   977,
342      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
343     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
344   };
345   static const int xsq_iq_q10[] = {
346          0,      4,      8,     12,     16,     20,     24,     28,
347         32,     40,     48,     56,     64,     72,     80,     88,
348         96,    112,    128,    144,    160,    176,    192,    208,
349        224,    256,    288,    320,    352,    384,    416,    448,
350        480,    544,    608,    672,    736,    800,    864,    928,
351        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
352       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
353       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
354       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
355      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
356      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
357      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
358     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
359   };
360   const int tmp = (xsq_q10 >> 2) + 8;
361   const int k = get_msb(tmp) - 3;
362   const int xq = (k << 3) + ((tmp >> k) & 0x7);
363   const int one_q10 = 1 << 10;
364   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
365   const int b_q10 = one_q10 - a_q10;
366   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
367   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
368 }
369 
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n,unsigned int qstep,int * rate,int64_t * dist)370 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
371                                   unsigned int qstep, int *rate,
372                                   int64_t *dist) {
373   // This function models the rate and distortion for a Laplacian
374   // source with given variance when quantized with a uniform quantizer
375   // with given stepsize. The closed form expressions are in:
376   // Hang and Chen, "Source Model for transform video coder and its
377   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
378   // Sys. for Video Tech., April 1997.
379   if (var == 0) {
380     *rate = 0;
381     *dist = 0;
382   } else {
383     int d_q10, r_q10;
384     static const uint32_t MAX_XSQ_Q10 = 245727;
385     const uint64_t xsq_q10_64 =
386         ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
387     const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
388     model_rd_norm(xsq_q10, &r_q10, &d_q10);
389     *rate = (n * r_q10 + 2) >> 2;
390     *dist = (var * (int64_t)d_q10 + 512) >> 10;
391   }
392 }
393 
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])394 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
395                               const struct macroblockd_plane *pd,
396                               ENTROPY_CONTEXT t_above[16],
397                               ENTROPY_CONTEXT t_left[16]) {
398   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
399   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
400   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
401   const ENTROPY_CONTEXT *const above = pd->above_context;
402   const ENTROPY_CONTEXT *const left = pd->left_context;
403 
404   int i;
405   switch (tx_size) {
406     case TX_4X4:
407       vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
408       vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
409       break;
410     case TX_8X8:
411       for (i = 0; i < num_4x4_w; i += 2)
412         t_above[i] = !!*(const uint16_t *)&above[i];
413       for (i = 0; i < num_4x4_h; i += 2)
414         t_left[i] = !!*(const uint16_t *)&left[i];
415       break;
416     case TX_16X16:
417       for (i = 0; i < num_4x4_w; i += 4)
418         t_above[i] = !!*(const uint32_t *)&above[i];
419       for (i = 0; i < num_4x4_h; i += 4)
420         t_left[i] = !!*(const uint32_t *)&left[i];
421       break;
422     case TX_32X32:
423       for (i = 0; i < num_4x4_w; i += 8)
424         t_above[i] = !!*(const uint64_t *)&above[i];
425       for (i = 0; i < num_4x4_h; i += 8)
426         t_left[i] = !!*(const uint64_t *)&left[i];
427       break;
428     default:
429       assert(0 && "Invalid transform size.");
430       break;
431   }
432 }
433 
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)434 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
435                  uint8_t *ref_y_buffer, int ref_y_stride,
436                  int ref_frame, BLOCK_SIZE block_size) {
437   MACROBLOCKD *xd = &x->e_mbd;
438   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
439   int i;
440   int zero_seen = 0;
441   int best_index = 0;
442   int best_sad = INT_MAX;
443   int this_sad = INT_MAX;
444   int max_mv = 0;
445   uint8_t *src_y_ptr = x->plane[0].src.buf;
446   uint8_t *ref_y_ptr;
447   const int num_mv_refs = MAX_MV_REF_CANDIDATES +
448                     (cpi->sf.adaptive_motion_search &&
449                      block_size < cpi->sf.max_partition_size);
450 
451   MV pred_mv[3];
452   pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
453   pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
454   pred_mv[2] = x->pred_mv[ref_frame];
455 
456   // Get the sad for each candidate reference mv.
457   for (i = 0; i < num_mv_refs; ++i) {
458     const MV *this_mv = &pred_mv[i];
459 
460     max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
461     if (is_zero_mv(this_mv) && zero_seen)
462       continue;
463 
464     zero_seen |= is_zero_mv(this_mv);
465 
466     ref_y_ptr =
467         &ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)];
468 
469     // Find sad for current vector.
470     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
471                                            ref_y_ptr, ref_y_stride);
472 
473     // Note if it is the best so far.
474     if (this_sad < best_sad) {
475       best_sad = this_sad;
476       best_index = i;
477     }
478   }
479 
480   // Note the index of the mv that worked best in the reference list.
481   x->mv_best_ref_index[ref_frame] = best_index;
482   x->max_mv_context[ref_frame] = max_mv;
483   x->pred_mv_sad[ref_frame] = best_sad;
484 }
485 
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)486 void vp9_setup_pred_block(const MACROBLOCKD *xd,
487                           struct buf_2d dst[MAX_MB_PLANE],
488                           const YV12_BUFFER_CONFIG *src,
489                           int mi_row, int mi_col,
490                           const struct scale_factors *scale,
491                           const struct scale_factors *scale_uv) {
492   int i;
493 
494   dst[0].buf = src->y_buffer;
495   dst[0].stride = src->y_stride;
496   dst[1].buf = src->u_buffer;
497   dst[2].buf = src->v_buffer;
498   dst[1].stride = dst[2].stride = src->uv_stride;
499 
500   for (i = 0; i < MAX_MB_PLANE; ++i) {
501     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
502                      i ? scale_uv : scale,
503                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
504   }
505 }
506 
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)507 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
508                                                    int ref_frame) {
509   const VP9_COMMON *const cm = &cpi->common;
510   const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
511   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
512   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
513 }
514 
vp9_get_switchable_rate(const VP9_COMP * cpi)515 int vp9_get_switchable_rate(const VP9_COMP *cpi) {
516   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
517   const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
518   const int ctx = vp9_get_pred_context_switchable_interp(xd);
519   return SWITCHABLE_INTERP_RATE_FACTOR *
520              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
521 }
522 
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)523 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
524   int i;
525   RD_OPT *const rd = &cpi->rd;
526   SPEED_FEATURES *const sf = &cpi->sf;
527 
528   // Set baseline threshold values.
529   for (i = 0; i < MAX_MODES; ++i)
530     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
531 
532   if (sf->adaptive_rd_thresh) {
533     rd->thresh_mult[THR_NEARESTMV] = 300;
534     rd->thresh_mult[THR_NEARESTG] = 300;
535     rd->thresh_mult[THR_NEARESTA] = 300;
536   } else {
537     rd->thresh_mult[THR_NEARESTMV] = 0;
538     rd->thresh_mult[THR_NEARESTG] = 0;
539     rd->thresh_mult[THR_NEARESTA] = 0;
540   }
541 
542   rd->thresh_mult[THR_DC] += 1000;
543 
544   rd->thresh_mult[THR_NEWMV] += 1000;
545   rd->thresh_mult[THR_NEWA] += 1000;
546   rd->thresh_mult[THR_NEWG] += 1000;
547 
548   // Adjust threshold only in real time mode, which only uses last
549   // reference frame.
550   rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
551 
552   rd->thresh_mult[THR_NEARMV] += 1000;
553   rd->thresh_mult[THR_NEARA] += 1000;
554   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
555   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
556 
557   rd->thresh_mult[THR_TM] += 1000;
558 
559   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
560   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
561   rd->thresh_mult[THR_NEARG] += 1000;
562   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
563   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
564 
565   rd->thresh_mult[THR_ZEROMV] += 2000;
566   rd->thresh_mult[THR_ZEROG] += 2000;
567   rd->thresh_mult[THR_ZEROA] += 2000;
568   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
569   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
570 
571   rd->thresh_mult[THR_H_PRED] += 2000;
572   rd->thresh_mult[THR_V_PRED] += 2000;
573   rd->thresh_mult[THR_D45_PRED ] += 2500;
574   rd->thresh_mult[THR_D135_PRED] += 2500;
575   rd->thresh_mult[THR_D117_PRED] += 2500;
576   rd->thresh_mult[THR_D153_PRED] += 2500;
577   rd->thresh_mult[THR_D207_PRED] += 2500;
578   rd->thresh_mult[THR_D63_PRED] += 2500;
579 }
580 
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)581 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
582   const SPEED_FEATURES *const sf = &cpi->sf;
583   RD_OPT *const rd = &cpi->rd;
584   int i;
585 
586   for (i = 0; i < MAX_REFS; ++i)
587     rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0;
588 
589   rd->thresh_mult_sub8x8[THR_LAST] += 2500;
590   rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
591   rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
592   rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
593   rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
594   rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
595 
596   // Check for masked out split cases.
597   for (i = 0; i < MAX_REFS; ++i)
598     if (sf->disable_split_mask & (1 << i))
599       rd->thresh_mult_sub8x8[i] = INT_MAX;
600 }
601