• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22 
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32 
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 
43 #define RD_THRESH_POW 1.25
44 
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47 
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49   rd_cost->rate = INT_MAX;
50   rd_cost->dist = INT64_MAX;
51   rd_cost->rdcost = INT64_MAX;
52 }
53 
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55   rd_cost->rate = 0;
56   rd_cost->dist = 0;
57   rd_cost->rdcost = 0;
58 }
59 
60 // The baseline rd thresholds for breaking out of the rd loop for
61 // certain modes are assumed to be based on 8x8 blocks.
62 // This table is used to correct for block size.
63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
65   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
66 };
67 
fill_mode_costs(VP9_COMP * cpi)68 static void fill_mode_costs(VP9_COMP *cpi) {
69   const FRAME_CONTEXT *const fc = cpi->common.fc;
70   int i, j;
71 
72   for (i = 0; i < INTRA_MODES; ++i)
73     for (j = 0; j < INTRA_MODES; ++j)
74       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
75                       vp9_intra_mode_tree);
76 
77   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
78   for (i = 0; i < INTRA_MODES; ++i) {
79     vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
80                     vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
81     vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
82                     fc->uv_mode_prob[i], vp9_intra_mode_tree);
83   }
84 
85   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
86     vp9_cost_tokens(cpi->switchable_interp_costs[i],
87                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
88 }
89 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])90 static void fill_token_costs(vp9_coeff_cost *c,
91                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
92   int i, j, k, l;
93   TX_SIZE t;
94   for (t = TX_4X4; t <= TX_32X32; ++t)
95     for (i = 0; i < PLANE_TYPES; ++i)
96       for (j = 0; j < REF_TYPES; ++j)
97         for (k = 0; k < COEF_BANDS; ++k)
98           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
99             vpx_prob probs[ENTROPY_NODES];
100             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
101             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
102             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
103                                  vp9_coef_tree);
104             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
105                    c[t][i][j][k][1][l][EOB_TOKEN]);
106           }
107 }
108 
109 // Values are now correlated to quantizer.
110 static int sad_per_bit16lut_8[QINDEX_RANGE];
111 static int sad_per_bit4lut_8[QINDEX_RANGE];
112 
113 #if CONFIG_VP9_HIGHBITDEPTH
114 static int sad_per_bit16lut_10[QINDEX_RANGE];
115 static int sad_per_bit4lut_10[QINDEX_RANGE];
116 static int sad_per_bit16lut_12[QINDEX_RANGE];
117 static int sad_per_bit4lut_12[QINDEX_RANGE];
118 #endif
119 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)120 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
121                             vpx_bit_depth_t bit_depth) {
122   int i;
123   // Initialize the sad lut tables using a formulaic calculation for now.
124   // This is to make it easier to resolve the impact of experimental changes
125   // to the quantizer tables.
126   for (i = 0; i < range; i++) {
127     const double q = vp9_convert_qindex_to_q(i, bit_depth);
128     bit16lut[i] = (int)(0.0418 * q + 2.4107);
129     bit4lut[i] = (int)(0.063 * q + 2.742);
130   }
131 }
132 
vp9_init_me_luts(void)133 void vp9_init_me_luts(void) {
134   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
135                   VPX_BITS_8);
136 #if CONFIG_VP9_HIGHBITDEPTH
137   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
138                   VPX_BITS_10);
139   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
140                   VPX_BITS_12);
141 #endif
142 }
143 
144 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
145                                          8,  8,  4,  4,  2,  2,  1,  0 };
146 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
147                                                               128, 144 };
148 
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)149 int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
150   const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
151 #if CONFIG_VP9_HIGHBITDEPTH
152   int64_t rdmult = 0;
153   switch (cpi->common.bit_depth) {
154     case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
155     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
156     case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
157     default:
158       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
159       return -1;
160   }
161 #else
162   int64_t rdmult = 88 * q * q / 24;
163 #endif  // CONFIG_VP9_HIGHBITDEPTH
164   return rdmult;
165 }
166 
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)167 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
168   int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
169 
170   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
171     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
172     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
173     const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
174 
175     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
176     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
177   }
178   if (rdmult < 1) rdmult = 1;
179   return (int)rdmult;
180 }
181 
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)182 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
183   double q;
184 #if CONFIG_VP9_HIGHBITDEPTH
185   switch (bit_depth) {
186     case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
187     case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
188     case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
189     default:
190       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
191       return -1;
192   }
193 #else
194   (void)bit_depth;
195   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
196 #endif  // CONFIG_VP9_HIGHBITDEPTH
197   // TODO(debargha): Adjust the function below.
198   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
199 }
200 
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)201 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
202 #if CONFIG_VP9_HIGHBITDEPTH
203   switch (cpi->common.bit_depth) {
204     case VPX_BITS_8:
205       x->sadperbit16 = sad_per_bit16lut_8[qindex];
206       x->sadperbit4 = sad_per_bit4lut_8[qindex];
207       break;
208     case VPX_BITS_10:
209       x->sadperbit16 = sad_per_bit16lut_10[qindex];
210       x->sadperbit4 = sad_per_bit4lut_10[qindex];
211       break;
212     case VPX_BITS_12:
213       x->sadperbit16 = sad_per_bit16lut_12[qindex];
214       x->sadperbit4 = sad_per_bit4lut_12[qindex];
215       break;
216     default:
217       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
218   }
219 #else
220   (void)cpi;
221   x->sadperbit16 = sad_per_bit16lut_8[qindex];
222   x->sadperbit4 = sad_per_bit4lut_8[qindex];
223 #endif  // CONFIG_VP9_HIGHBITDEPTH
224 }
225 
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)226 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
227   int i, bsize, segment_id;
228 
229   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
230     const int qindex =
231         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
232                   cm->y_dc_delta_q,
233               0, MAXQ);
234     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
235 
236     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
237       // Threshold here seems unnecessarily harsh but fine given actual
238       // range of values used for cpi->sf.thresh_mult[].
239       const int t = q * rd_thresh_block_size_factor[bsize];
240       const int thresh_max = INT_MAX / t;
241 
242       if (bsize >= BLOCK_8X8) {
243         for (i = 0; i < MAX_MODES; ++i)
244           rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
245                                                    ? rd->thresh_mult[i] * t / 4
246                                                    : INT_MAX;
247       } else {
248         for (i = 0; i < MAX_REFS; ++i)
249           rd->threshes[segment_id][bsize][i] =
250               rd->thresh_mult_sub8x8[i] < thresh_max
251                   ? rd->thresh_mult_sub8x8[i] * t / 4
252                   : INT_MAX;
253       }
254     }
255   }
256 }
257 
vp9_initialize_rd_consts(VP9_COMP * cpi)258 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
259   VP9_COMMON *const cm = &cpi->common;
260   MACROBLOCK *const x = &cpi->td.mb;
261   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
262   RD_OPT *const rd = &cpi->rd;
263   int i;
264 
265   vpx_clear_system_state();
266 
267   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
268   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
269 
270   set_error_per_bit(x, rd->RDMULT);
271 
272   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
273                        cm->frame_type != KEY_FRAME)
274                           ? 0
275                           : 1;
276 
277   set_block_thresholds(cm, rd);
278   set_partition_probs(cm, xd);
279 
280   if (cpi->oxcf.pass == 1) {
281     if (!frame_is_intra_only(cm))
282       vp9_build_nmv_cost_table(
283           x->nmvjointcost,
284           cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
285           &cm->fc->nmvc, cm->allow_high_precision_mv);
286   } else {
287     if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
288       fill_token_costs(x->token_costs, cm->fc->coef_probs);
289 
290     if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
291         cm->frame_type == KEY_FRAME) {
292       for (i = 0; i < PARTITION_CONTEXTS; ++i)
293         vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
294                         vp9_partition_tree);
295     }
296 
297     if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
298         cm->frame_type == KEY_FRAME) {
299       fill_mode_costs(cpi);
300 
301       if (!frame_is_intra_only(cm)) {
302         vp9_build_nmv_cost_table(
303             x->nmvjointcost,
304             cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
305             &cm->fc->nmvc, cm->allow_high_precision_mv);
306 
307         for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
308           vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
309                           cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
310       }
311     }
312   }
313 }
314 
315 // NOTE: The tables below must be of the same size.
316 
317 // The functions described below are sampled at the four most significant
318 // bits of x^2 + 8 / 256.
319 
320 // Normalized rate:
321 // This table models the rate for a Laplacian source with given variance
322 // when quantized with a uniform quantizer with given stepsize. The
323 // closed form expression is:
324 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
325 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
326 // and H(x) is the binary entropy function.
327 static const int rate_tab_q10[] = {
328   65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
329   3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
330   2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
331   2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
332   1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
333   680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
334   190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
335   16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
336 };
337 
338 // Normalized distortion:
339 // This table models the normalized distortion for a Laplacian source
340 // with given variance when quantized with a uniform quantizer
341 // with given stepsize. The closed form expression is:
342 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
343 // where x = qpstep / sqrt(variance).
344 // Note the actual distortion is Dn * variance.
345 static const int dist_tab_q10[] = {
346   0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
347   6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
348   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
349   73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
350   215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
351   495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
352   842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
353   1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
354 };
355 static const int xsq_iq_q10[] = {
356   0,      4,      8,      12,     16,     20,     24,     28,     32,
357   40,     48,     56,     64,     72,     80,     88,     96,     112,
358   128,    144,    160,    176,    192,    208,    224,    256,    288,
359   320,    352,    384,    416,    448,    480,    544,    608,    672,
360   736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
361   1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
362   3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
363   7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
364   16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
365   36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
366   81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
367   180192, 196576, 212960, 229344, 245728,
368 };
369 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)370 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
371   const int tmp = (xsq_q10 >> 2) + 8;
372   const int k = get_msb(tmp) - 3;
373   const int xq = (k << 3) + ((tmp >> k) & 0x7);
374   const int one_q10 = 1 << 10;
375   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
376   const int b_q10 = one_q10 - a_q10;
377   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
378   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
379 }
380 
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])381 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
382                               int r_q10[MAX_MB_PLANE],
383                               int d_q10[MAX_MB_PLANE]) {
384   int i;
385   const int one_q10 = 1 << 10;
386   for (i = 0; i < MAX_MB_PLANE; ++i) {
387     const int tmp = (xsq_q10[i] >> 2) + 8;
388     const int k = get_msb(tmp) - 3;
389     const int xq = (k << 3) + ((tmp >> k) & 0x7);
390     const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
391     const int b_q10 = one_q10 - a_q10;
392     r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
393     d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
394   }
395 }
396 
397 static const uint32_t MAX_XSQ_Q10 = 245727;
398 
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)399 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
400                                   unsigned int qstep, int *rate,
401                                   int64_t *dist) {
402   // This function models the rate and distortion for a Laplacian
403   // source with given variance when quantized with a uniform quantizer
404   // with given stepsize. The closed form expressions are in:
405   // Hang and Chen, "Source Model for transform video coder and its
406   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
407   // Sys. for Video Tech., April 1997.
408   if (var == 0) {
409     *rate = 0;
410     *dist = 0;
411   } else {
412     int d_q10, r_q10;
413     const uint64_t xsq_q10_64 =
414         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
415     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
416     model_rd_norm(xsq_q10, &r_q10, &d_q10);
417     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
418     *dist = (var * (int64_t)d_q10 + 512) >> 10;
419   }
420 }
421 
422 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
423 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)424 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
425                                       unsigned int n_log2[MAX_MB_PLANE],
426                                       unsigned int qstep[MAX_MB_PLANE],
427                                       int64_t *rate_sum, int64_t *dist_sum) {
428   int i;
429   int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
430   for (i = 0; i < MAX_MB_PLANE; ++i) {
431     const uint64_t xsq_q10_64 =
432         (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
433         var[i];
434     xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
435   }
436   model_rd_norm_vec(xsq_q10, r_q10, d_q10);
437   for (i = 0; i < MAX_MB_PLANE; ++i) {
438     int rate =
439         ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
440     int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
441     *rate_sum += rate;
442     *dist_sum += dist;
443   }
444 }
445 
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])446 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
447                               const struct macroblockd_plane *pd,
448                               ENTROPY_CONTEXT t_above[16],
449                               ENTROPY_CONTEXT t_left[16]) {
450   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
451   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
452   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
453   const ENTROPY_CONTEXT *const above = pd->above_context;
454   const ENTROPY_CONTEXT *const left = pd->left_context;
455 
456   int i;
457   switch (tx_size) {
458     case TX_4X4:
459       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
460       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
461       break;
462     case TX_8X8:
463       for (i = 0; i < num_4x4_w; i += 2)
464         t_above[i] = !!*(const uint16_t *)&above[i];
465       for (i = 0; i < num_4x4_h; i += 2)
466         t_left[i] = !!*(const uint16_t *)&left[i];
467       break;
468     case TX_16X16:
469       for (i = 0; i < num_4x4_w; i += 4)
470         t_above[i] = !!*(const uint32_t *)&above[i];
471       for (i = 0; i < num_4x4_h; i += 4)
472         t_left[i] = !!*(const uint32_t *)&left[i];
473       break;
474     case TX_32X32:
475       for (i = 0; i < num_4x4_w; i += 8)
476         t_above[i] = !!*(const uint64_t *)&above[i];
477       for (i = 0; i < num_4x4_h; i += 8)
478         t_left[i] = !!*(const uint64_t *)&left[i];
479       break;
480     default: assert(0 && "Invalid transform size."); break;
481   }
482 }
483 
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)484 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
485                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
486   int i;
487   int zero_seen = 0;
488   int best_index = 0;
489   int best_sad = INT_MAX;
490   int this_sad = INT_MAX;
491   int max_mv = 0;
492   int near_same_nearest;
493   uint8_t *src_y_ptr = x->plane[0].src.buf;
494   uint8_t *ref_y_ptr;
495   const int num_mv_refs =
496       MAX_MV_REF_CANDIDATES +
497       (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
498 
499   MV pred_mv[3];
500   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
501   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
502   pred_mv[2] = x->pred_mv[ref_frame];
503   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
504 
505   near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
506                       x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
507   // Get the sad for each candidate reference mv.
508   for (i = 0; i < num_mv_refs; ++i) {
509     const MV *this_mv = &pred_mv[i];
510     int fp_row, fp_col;
511 
512     if (i == 1 && near_same_nearest) continue;
513     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
514     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
515     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
516 
517     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
518     zero_seen |= (fp_row == 0 && fp_col == 0);
519 
520     ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
521     // Find sad for current vector.
522     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
523                                            ref_y_ptr, ref_y_stride);
524     // Note if it is the best so far.
525     if (this_sad < best_sad) {
526       best_sad = this_sad;
527       best_index = i;
528     }
529   }
530 
531   // Note the index of the mv that worked best in the reference list.
532   x->mv_best_ref_index[ref_frame] = best_index;
533   x->max_mv_context[ref_frame] = max_mv;
534   x->pred_mv_sad[ref_frame] = best_sad;
535 }
536 
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)537 void vp9_setup_pred_block(const MACROBLOCKD *xd,
538                           struct buf_2d dst[MAX_MB_PLANE],
539                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
540                           const struct scale_factors *scale,
541                           const struct scale_factors *scale_uv) {
542   int i;
543 
544   dst[0].buf = src->y_buffer;
545   dst[0].stride = src->y_stride;
546   dst[1].buf = src->u_buffer;
547   dst[2].buf = src->v_buffer;
548   dst[1].stride = dst[2].stride = src->uv_stride;
549 
550   for (i = 0; i < MAX_MB_PLANE; ++i) {
551     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
552                      i ? scale_uv : scale, xd->plane[i].subsampling_x,
553                      xd->plane[i].subsampling_y);
554   }
555 }
556 
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)557 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
558                             int stride) {
559   const int bw = b_width_log2_lookup[plane_bsize];
560   const int y = 4 * (raster_block >> bw);
561   const int x = 4 * (raster_block & ((1 << bw) - 1));
562   return y * stride + x;
563 }
564 
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)565 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
566                                        int16_t *base) {
567   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
568   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
569 }
570 
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)571 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
572                                              int ref_frame) {
573   const VP9_COMMON *const cm = &cpi->common;
574   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
575   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
576   return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
577              ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
578              : NULL;
579 }
580 
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)581 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
582   const MODE_INFO *const mi = xd->mi[0];
583   const int ctx = get_pred_context_switchable_interp(xd);
584   return SWITCHABLE_INTERP_RATE_FACTOR *
585          cpi->switchable_interp_costs[ctx][mi->interp_filter];
586 }
587 
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)588 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
589   int i;
590   RD_OPT *const rd = &cpi->rd;
591   SPEED_FEATURES *const sf = &cpi->sf;
592 
593   // Set baseline threshold values.
594   for (i = 0; i < MAX_MODES; ++i)
595     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
596 
597   if (sf->adaptive_rd_thresh) {
598     rd->thresh_mult[THR_NEARESTMV] = 300;
599     rd->thresh_mult[THR_NEARESTG] = 300;
600     rd->thresh_mult[THR_NEARESTA] = 300;
601   } else {
602     rd->thresh_mult[THR_NEARESTMV] = 0;
603     rd->thresh_mult[THR_NEARESTG] = 0;
604     rd->thresh_mult[THR_NEARESTA] = 0;
605   }
606 
607   rd->thresh_mult[THR_DC] += 1000;
608 
609   rd->thresh_mult[THR_NEWMV] += 1000;
610   rd->thresh_mult[THR_NEWA] += 1000;
611   rd->thresh_mult[THR_NEWG] += 1000;
612 
613   rd->thresh_mult[THR_NEARMV] += 1000;
614   rd->thresh_mult[THR_NEARA] += 1000;
615   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
616   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
617 
618   rd->thresh_mult[THR_TM] += 1000;
619 
620   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
621   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
622   rd->thresh_mult[THR_NEARG] += 1000;
623   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
624   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
625 
626   rd->thresh_mult[THR_ZEROMV] += 2000;
627   rd->thresh_mult[THR_ZEROG] += 2000;
628   rd->thresh_mult[THR_ZEROA] += 2000;
629   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
630   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
631 
632   rd->thresh_mult[THR_H_PRED] += 2000;
633   rd->thresh_mult[THR_V_PRED] += 2000;
634   rd->thresh_mult[THR_D45_PRED] += 2500;
635   rd->thresh_mult[THR_D135_PRED] += 2500;
636   rd->thresh_mult[THR_D117_PRED] += 2500;
637   rd->thresh_mult[THR_D153_PRED] += 2500;
638   rd->thresh_mult[THR_D207_PRED] += 2500;
639   rd->thresh_mult[THR_D63_PRED] += 2500;
640 }
641 
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)642 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
643   static const int thresh_mult[2][MAX_REFS] = {
644     { 2500, 2500, 2500, 4500, 4500, 2500 },
645     { 2000, 2000, 2000, 4000, 4000, 2000 }
646   };
647   RD_OPT *const rd = &cpi->rd;
648   const int idx = cpi->oxcf.mode == BEST;
649   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
650 }
651 
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)652 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
653                                int bsize, int best_mode_index) {
654   if (rd_thresh > 0) {
655     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
656     int mode;
657     for (mode = 0; mode < top_mode; ++mode) {
658       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
659       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
660       BLOCK_SIZE bs;
661       for (bs = min_size; bs <= max_size; ++bs) {
662         int *const fact = &factor_buf[bs][mode];
663         if (mode == best_mode_index) {
664           *fact -= (*fact >> 4);
665         } else {
666           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
667         }
668       }
669     }
670   }
671 }
672 
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)673 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
674                                int qindex, int qdelta) {
675   // Reduce the intra cost penalty for small blocks (<=16x16).
676   int reduction_fac =
677       (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
678 
679   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
680     // Don't reduce intra cost penalty if estimated noise level is high.
681     reduction_fac = 0;
682 
683   // Always use VPX_BITS_8 as input here because the penalty is applied
684   // to rate not distortion so we want a consistent penalty for all bit
685   // depths. If the actual bit depth were passed in here then the value
686   // retured by vp9_dc_quant() would scale with the bit depth and we would
687   // then need to apply inverse scaling to correct back to a bit depth
688   // independent rate penalty.
689   return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
690 }
691