• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22 
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32 
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 
43 #define RD_THRESH_POW 1.25
44 
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47 
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49   rd_cost->rate = INT_MAX;
50   rd_cost->dist = INT64_MAX;
51   rd_cost->rdcost = INT64_MAX;
52 }
53 
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55   rd_cost->rate = 0;
56   rd_cost->dist = 0;
57   rd_cost->rdcost = 0;
58 }
59 
vp9_calculate_rd_cost(int mult,int div,int rate,int64_t dist)60 int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61   assert(mult >= 0);
62   assert(div > 0);
63   if (rate >= 0 && dist >= 0) {
64     return RDCOST(mult, div, rate, dist);
65   }
66   if (rate >= 0 && dist < 0) {
67     return RDCOST_NEG_D(mult, div, rate, -dist);
68   }
69   if (rate < 0 && dist >= 0) {
70     return RDCOST_NEG_R(mult, div, -rate, dist);
71   }
72   return -RDCOST(mult, div, -rate, -dist);
73 }
74 
vp9_rd_cost_update(int mult,int div,RD_COST * rd_cost)75 void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76   if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77     rd_cost->rdcost =
78         vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79   } else {
80     vp9_rd_cost_reset(rd_cost);
81   }
82 }
83 
84 // The baseline rd thresholds for breaking out of the rd loop for
85 // certain modes are assumed to be based on 8x8 blocks.
86 // This table is used to correct for block size.
87 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90 };
91 
fill_mode_costs(VP9_COMP * cpi)92 static void fill_mode_costs(VP9_COMP *cpi) {
93   const FRAME_CONTEXT *const fc = cpi->common.fc;
94   int i, j;
95 
96   for (i = 0; i < INTRA_MODES; ++i) {
97     for (j = 0; j < INTRA_MODES; ++j) {
98       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99                       vp9_intra_mode_tree);
100     }
101   }
102 
103   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104   for (i = 0; i < INTRA_MODES; ++i) {
105     vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106                     vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107     vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108                     fc->uv_mode_prob[i], vp9_intra_mode_tree);
109   }
110 
111   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112     vp9_cost_tokens(cpi->switchable_interp_costs[i],
113                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114   }
115 
116   for (i = TX_8X8; i < TX_SIZES; ++i) {
117     for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118       const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119       int k;
120       for (k = 0; k <= i; ++k) {
121         int cost = 0;
122         int m;
123         for (m = 0; m <= k - (k == i); ++m) {
124           if (m == k)
125             cost += vp9_cost_zero(tx_probs[m]);
126           else
127             cost += vp9_cost_one(tx_probs[m]);
128         }
129         cpi->tx_size_cost[i - 1][j][k] = cost;
130       }
131     }
132   }
133 }
134 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])135 static void fill_token_costs(vp9_coeff_cost *c,
136                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137   int i, j, k, l;
138   TX_SIZE t;
139   for (t = TX_4X4; t <= TX_32X32; ++t)
140     for (i = 0; i < PLANE_TYPES; ++i)
141       for (j = 0; j < REF_TYPES; ++j)
142         for (k = 0; k < COEF_BANDS; ++k)
143           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144             vpx_prob probs[ENTROPY_NODES];
145             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148                                  vp9_coef_tree);
149             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150                    c[t][i][j][k][1][l][EOB_TOKEN]);
151           }
152 }
153 
154 // Values are now correlated to quantizer.
155 static int sad_per_bit16lut_8[QINDEX_RANGE];
156 static int sad_per_bit4lut_8[QINDEX_RANGE];
157 
158 #if CONFIG_VP9_HIGHBITDEPTH
159 static int sad_per_bit16lut_10[QINDEX_RANGE];
160 static int sad_per_bit4lut_10[QINDEX_RANGE];
161 static int sad_per_bit16lut_12[QINDEX_RANGE];
162 static int sad_per_bit4lut_12[QINDEX_RANGE];
163 #endif
164 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)165 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166                             vpx_bit_depth_t bit_depth) {
167   int i;
168   // Initialize the sad lut tables using a formulaic calculation for now.
169   // This is to make it easier to resolve the impact of experimental changes
170   // to the quantizer tables.
171   for (i = 0; i < range; i++) {
172     const double q = vp9_convert_qindex_to_q(i, bit_depth);
173     bit16lut[i] = (int)(0.0418 * q + 2.4107);
174     bit4lut[i] = (int)(0.063 * q + 2.742);
175   }
176 }
177 
vp9_init_me_luts(void)178 void vp9_init_me_luts(void) {
179   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180                   VPX_BITS_8);
181 #if CONFIG_VP9_HIGHBITDEPTH
182   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183                   VPX_BITS_10);
184   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185                   VPX_BITS_12);
186 #endif
187 }
188 
189 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190                                          8,  8,  4,  4,  2,  2,  1,  0 };
191 
192 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
193 // that the show frame flag is set, should not be used as no real frame
194 // is encoded so we should not reach here. However, a dummy value
195 // is inserted here to make sure the data structure has the right number
196 // of values assigned.
197 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198                                                               128, 144, 144 };
199 
200 // Configure Vizier RD parameters.
201 // Later this function will use passed in command line values.
vp9_init_rd_parameters(VP9_COMP * cpi)202 void vp9_init_rd_parameters(VP9_COMP *cpi) {
203   RD_CONTROL *const rdc = &cpi->rd_ctrl;
204 
205   // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
206   // initialized by the pass in values.
207   // Be careful that parameters below are only initialized to 1, if we do not
208   // pass values to them. It is desired to take care of each parameter when
209   // using |use_vizier_rc_params|.
210   if (cpi->twopass.use_vizier_rc_params) return;
211 
212   // Make sure this function is floating point safe.
213   vpx_clear_system_state();
214 
215   rdc->rd_mult_inter_qp_fac = 1.0;
216   rdc->rd_mult_arf_qp_fac = 1.0;
217   rdc->rd_mult_key_qp_fac = 1.0;
218 }
219 
220 // Returns the default rd multiplier for inter frames for a given qindex.
221 // The function here is a first pass estimate based on data from
222 // a previous Vizer run
def_inter_rd_multiplier(int qindex)223 static double def_inter_rd_multiplier(int qindex) {
224   return 4.15 + (0.001 * (double)qindex);
225 }
226 
227 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
228 // The function here is a first pass estimate based on data from
229 // a previous Vizer run
def_arf_rd_multiplier(int qindex)230 static double def_arf_rd_multiplier(int qindex) {
231   return 4.25 + (0.001 * (double)qindex);
232 }
233 
234 // Returns the default rd multiplier for key frames for a given qindex.
235 // The function here is a first pass estimate based on data from
236 // a previous Vizer run
def_kf_rd_multiplier(int qindex)237 static double def_kf_rd_multiplier(int qindex) {
238   return 4.35 + (0.001 * (double)qindex);
239 }
240 
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)241 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
242   const RD_CONTROL *rdc = &cpi->rd_ctrl;
243   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
244   // largest dc_quant is 21387, therefore rdmult should fit in int32_t
245   int rdmult = q * q;
246 
247   // Make sure this function is floating point safe.
248   vpx_clear_system_state();
249 
250   if (cpi->common.frame_type == KEY_FRAME) {
251     double def_rd_q_mult = def_kf_rd_multiplier(qindex);
252     rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
253   } else if (!cpi->rc.is_src_frame_alt_ref &&
254              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
255     double def_rd_q_mult = def_arf_rd_multiplier(qindex);
256     rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
257   } else {
258     double def_rd_q_mult = def_inter_rd_multiplier(qindex);
259     rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
260   }
261 
262 #if CONFIG_VP9_HIGHBITDEPTH
263   switch (cpi->common.bit_depth) {
264     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
265     case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
266     default: break;
267   }
268 #endif  // CONFIG_VP9_HIGHBITDEPTH
269   return rdmult > 0 ? rdmult : 1;
270 }
271 
modulate_rdmult(const VP9_COMP * cpi,int rdmult)272 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
273   int64_t rdmult_64 = rdmult;
274   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
275     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
276     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
277     const int gfu_boost = cpi->multi_layer_arf
278                               ? gf_group->gfu_boost[gf_group->index]
279                               : cpi->rc.gfu_boost;
280     const int boost_index = VPXMIN(15, (gfu_boost / 100));
281 
282     rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
283     rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
284   }
285   return (int)rdmult_64;
286 }
287 
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)288 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
289   int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
290   return modulate_rdmult(cpi, rdmult);
291 }
292 
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)293 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
294   int rdmult =
295       vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
296   rdmult = (int)((double)rdmult / beta);
297   rdmult = rdmult > 0 ? rdmult : 1;
298   return modulate_rdmult(cpi, rdmult);
299 }
300 
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)301 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
302   double q;
303 #if CONFIG_VP9_HIGHBITDEPTH
304   switch (bit_depth) {
305     case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
306     case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
307     default:
308       assert(bit_depth == VPX_BITS_12);
309       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
310       break;
311   }
312 #else
313   (void)bit_depth;
314   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
315 #endif  // CONFIG_VP9_HIGHBITDEPTH
316   // TODO(debargha): Adjust the function below.
317   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
318 }
319 
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)320 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
321 #if CONFIG_VP9_HIGHBITDEPTH
322   switch (cpi->common.bit_depth) {
323     case VPX_BITS_8:
324       x->sadperbit16 = sad_per_bit16lut_8[qindex];
325       x->sadperbit4 = sad_per_bit4lut_8[qindex];
326       break;
327     case VPX_BITS_10:
328       x->sadperbit16 = sad_per_bit16lut_10[qindex];
329       x->sadperbit4 = sad_per_bit4lut_10[qindex];
330       break;
331     default:
332       assert(cpi->common.bit_depth == VPX_BITS_12);
333       x->sadperbit16 = sad_per_bit16lut_12[qindex];
334       x->sadperbit4 = sad_per_bit4lut_12[qindex];
335       break;
336   }
337 #else
338   (void)cpi;
339   x->sadperbit16 = sad_per_bit16lut_8[qindex];
340   x->sadperbit4 = sad_per_bit4lut_8[qindex];
341 #endif  // CONFIG_VP9_HIGHBITDEPTH
342 }
343 
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)344 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
345   int i, bsize, segment_id;
346 
347   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
348     const int qindex =
349         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
350                   cm->y_dc_delta_q,
351               0, MAXQ);
352     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
353 
354     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
355       // Threshold here seems unnecessarily harsh but fine given actual
356       // range of values used for cpi->sf.thresh_mult[].
357       const int t = q * rd_thresh_block_size_factor[bsize];
358       const int thresh_max = INT_MAX / t;
359 
360       if (bsize >= BLOCK_8X8) {
361         for (i = 0; i < MAX_MODES; ++i)
362           rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
363                                                    ? rd->thresh_mult[i] * t / 4
364                                                    : INT_MAX;
365       } else {
366         for (i = 0; i < MAX_REFS; ++i)
367           rd->threshes[segment_id][bsize][i] =
368               rd->thresh_mult_sub8x8[i] < thresh_max
369                   ? rd->thresh_mult_sub8x8[i] * t / 4
370                   : INT_MAX;
371       }
372     }
373   }
374 }
375 
vp9_build_inter_mode_cost(VP9_COMP * cpi)376 void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
377   const VP9_COMMON *const cm = &cpi->common;
378   int i;
379   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
380     vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
381                     vp9_inter_mode_tree);
382   }
383 }
384 
vp9_initialize_rd_consts(VP9_COMP * cpi)385 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
386   VP9_COMMON *const cm = &cpi->common;
387   MACROBLOCK *const x = &cpi->td.mb;
388   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
389   RD_OPT *const rd = &cpi->rd;
390   int i;
391 
392   vpx_clear_system_state();
393 
394   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
395   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
396 
397   set_error_per_bit(x, rd->RDMULT);
398 
399   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
400                        cm->frame_type != KEY_FRAME)
401                           ? 0
402                           : 1;
403 
404   set_block_thresholds(cm, rd);
405   set_partition_probs(cm, xd);
406 
407   if (cpi->oxcf.pass == 1) {
408     if (!frame_is_intra_only(cm))
409       vp9_build_nmv_cost_table(
410           x->nmvjointcost,
411           cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
412           &cm->fc->nmvc, cm->allow_high_precision_mv);
413   } else {
414     if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
415       fill_token_costs(x->token_costs, cm->fc->coef_probs);
416 
417     if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
418         cm->frame_type == KEY_FRAME) {
419       for (i = 0; i < PARTITION_CONTEXTS; ++i)
420         vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
421                         vp9_partition_tree);
422     }
423 
424     if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
425         cm->frame_type == KEY_FRAME) {
426       fill_mode_costs(cpi);
427 
428       if (!frame_is_intra_only(cm)) {
429         vp9_build_nmv_cost_table(
430             x->nmvjointcost,
431             cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
432             &cm->fc->nmvc, cm->allow_high_precision_mv);
433         vp9_build_inter_mode_cost(cpi);
434       }
435     }
436   }
437 }
438 
439 // NOTE: The tables below must be of the same size.
440 
441 // The functions described below are sampled at the four most significant
442 // bits of x^2 + 8 / 256.
443 
444 // Normalized rate:
445 // This table models the rate for a Laplacian source with given variance
446 // when quantized with a uniform quantizer with given stepsize. The
447 // closed form expression is:
448 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
449 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
450 // and H(x) is the binary entropy function.
451 static const int rate_tab_q10[] = {
452   65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
453   3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
454   2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
455   2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
456   1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
457   680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
458   190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
459   16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
460 };
461 
462 // Normalized distortion:
463 // This table models the normalized distortion for a Laplacian source
464 // with given variance when quantized with a uniform quantizer
465 // with given stepsize. The closed form expression is:
466 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
467 // where x = qpstep / sqrt(variance).
468 // Note the actual distortion is Dn * variance.
469 static const int dist_tab_q10[] = {
470   0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
471   6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
472   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
473   73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
474   215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
475   495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
476   842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
477   1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
478 };
479 static const int xsq_iq_q10[] = {
480   0,      4,      8,      12,     16,     20,     24,     28,     32,
481   40,     48,     56,     64,     72,     80,     88,     96,     112,
482   128,    144,    160,    176,    192,    208,    224,    256,    288,
483   320,    352,    384,    416,    448,    480,    544,    608,    672,
484   736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
485   1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
486   3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
487   7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
488   16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
489   36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
490   81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
491   180192, 196576, 212960, 229344, 245728,
492 };
493 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)494 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
495   const int tmp = (xsq_q10 >> 2) + 8;
496   const int k = get_msb(tmp) - 3;
497   const int xq = (k << 3) + ((tmp >> k) & 0x7);
498   const int one_q10 = 1 << 10;
499   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
500   const int b_q10 = one_q10 - a_q10;
501   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
502   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
503 }
504 
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])505 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
506                               int r_q10[MAX_MB_PLANE],
507                               int d_q10[MAX_MB_PLANE]) {
508   int i;
509   const int one_q10 = 1 << 10;
510   for (i = 0; i < MAX_MB_PLANE; ++i) {
511     const int tmp = (xsq_q10[i] >> 2) + 8;
512     const int k = get_msb(tmp) - 3;
513     const int xq = (k << 3) + ((tmp >> k) & 0x7);
514     const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
515     const int b_q10 = one_q10 - a_q10;
516     r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
517     d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
518   }
519 }
520 
521 static const uint32_t MAX_XSQ_Q10 = 245727;
522 
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)523 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
524                                   unsigned int qstep, int *rate,
525                                   int64_t *dist) {
526   // This function models the rate and distortion for a Laplacian
527   // source with given variance when quantized with a uniform quantizer
528   // with given stepsize. The closed form expressions are in:
529   // Hang and Chen, "Source Model for transform video coder and its
530   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
531   // Sys. for Video Tech., April 1997.
532   if (var == 0) {
533     *rate = 0;
534     *dist = 0;
535   } else {
536     int d_q10, r_q10;
537     const uint64_t xsq_q10_64 =
538         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
539     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
540     model_rd_norm(xsq_q10, &r_q10, &d_q10);
541     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
542     *dist = (var * (int64_t)d_q10 + 512) >> 10;
543   }
544 }
545 
546 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
547 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)548 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
549                                       unsigned int n_log2[MAX_MB_PLANE],
550                                       unsigned int qstep[MAX_MB_PLANE],
551                                       int64_t *rate_sum, int64_t *dist_sum) {
552   int i;
553   int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
554   for (i = 0; i < MAX_MB_PLANE; ++i) {
555     const uint64_t xsq_q10_64 =
556         (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
557         var[i];
558     xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
559   }
560   model_rd_norm_vec(xsq_q10, r_q10, d_q10);
561   for (i = 0; i < MAX_MB_PLANE; ++i) {
562     int rate =
563         ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
564     int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
565     *rate_sum += rate;
566     *dist_sum += dist;
567   }
568 }
569 
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])570 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
571                               const struct macroblockd_plane *pd,
572                               ENTROPY_CONTEXT t_above[16],
573                               ENTROPY_CONTEXT t_left[16]) {
574   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
575   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
576   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
577   const ENTROPY_CONTEXT *const above = pd->above_context;
578   const ENTROPY_CONTEXT *const left = pd->left_context;
579 
580   int i;
581   switch (tx_size) {
582     case TX_4X4:
583       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
584       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
585       break;
586     case TX_8X8:
587       for (i = 0; i < num_4x4_w; i += 2)
588         t_above[i] = !!*(const uint16_t *)&above[i];
589       for (i = 0; i < num_4x4_h; i += 2)
590         t_left[i] = !!*(const uint16_t *)&left[i];
591       break;
592     case TX_16X16:
593       for (i = 0; i < num_4x4_w; i += 4)
594         t_above[i] = !!*(const uint32_t *)&above[i];
595       for (i = 0; i < num_4x4_h; i += 4)
596         t_left[i] = !!*(const uint32_t *)&left[i];
597       break;
598     default:
599       assert(tx_size == TX_32X32);
600       for (i = 0; i < num_4x4_w; i += 8)
601         t_above[i] = !!*(const uint64_t *)&above[i];
602       for (i = 0; i < num_4x4_h; i += 8)
603         t_left[i] = !!*(const uint64_t *)&left[i];
604       break;
605   }
606 }
607 
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)608 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
609                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
610   int i;
611   int zero_seen = 0;
612   int best_index = 0;
613   int best_sad = INT_MAX;
614   int this_sad = INT_MAX;
615   int max_mv = 0;
616   int near_same_nearest;
617   uint8_t *src_y_ptr = x->plane[0].src.buf;
618   uint8_t *ref_y_ptr;
619   const int num_mv_refs =
620       MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
621 
622   MV pred_mv[3];
623   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
624   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
625   pred_mv[2] = x->pred_mv[ref_frame];
626   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
627 
628   near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
629                       x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
630 
631   // Get the sad for each candidate reference mv.
632   for (i = 0; i < num_mv_refs; ++i) {
633     const MV *this_mv = &pred_mv[i];
634     int fp_row, fp_col;
635     if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
636     if (i == 1 && near_same_nearest) continue;
637     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
638     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
639     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
640 
641     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
642     zero_seen |= (fp_row == 0 && fp_col == 0);
643 
644     ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
645     // Find sad for current vector.
646     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
647                                            ref_y_ptr, ref_y_stride);
648     // Note if it is the best so far.
649     if (this_sad < best_sad) {
650       best_sad = this_sad;
651       best_index = i;
652     }
653   }
654 
655   // Note the index of the mv that worked best in the reference list.
656   x->mv_best_ref_index[ref_frame] = best_index;
657   x->max_mv_context[ref_frame] = max_mv;
658   x->pred_mv_sad[ref_frame] = best_sad;
659 }
660 
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)661 void vp9_setup_pred_block(const MACROBLOCKD *xd,
662                           struct buf_2d dst[MAX_MB_PLANE],
663                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
664                           const struct scale_factors *scale,
665                           const struct scale_factors *scale_uv) {
666   int i;
667 
668   dst[0].buf = src->y_buffer;
669   dst[0].stride = src->y_stride;
670   dst[1].buf = src->u_buffer;
671   dst[2].buf = src->v_buffer;
672   dst[1].stride = dst[2].stride = src->uv_stride;
673 
674   for (i = 0; i < MAX_MB_PLANE; ++i) {
675     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
676                      i ? scale_uv : scale, xd->plane[i].subsampling_x,
677                      xd->plane[i].subsampling_y);
678   }
679 }
680 
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)681 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
682                             int stride) {
683   const int bw = b_width_log2_lookup[plane_bsize];
684   const int y = 4 * (raster_block >> bw);
685   const int x = 4 * (raster_block & ((1 << bw) - 1));
686   return y * stride + x;
687 }
688 
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)689 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
690                                        int16_t *base) {
691   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
692   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
693 }
694 
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)695 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
696                                              int ref_frame) {
697   const VP9_COMMON *const cm = &cpi->common;
698   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
699   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
700   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
701   return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
702              ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
703              : NULL;
704 }
705 
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)706 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
707   const MODE_INFO *const mi = xd->mi[0];
708   const int ctx = get_pred_context_switchable_interp(xd);
709   return SWITCHABLE_INTERP_RATE_FACTOR *
710          cpi->switchable_interp_costs[ctx][mi->interp_filter];
711 }
712 
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)713 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
714   int i;
715   RD_OPT *const rd = &cpi->rd;
716   SPEED_FEATURES *const sf = &cpi->sf;
717 
718   // Set baseline threshold values.
719   for (i = 0; i < MAX_MODES; ++i)
720     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
721 
722   if (sf->adaptive_rd_thresh) {
723     rd->thresh_mult[THR_NEARESTMV] = 300;
724     rd->thresh_mult[THR_NEARESTG] = 300;
725     rd->thresh_mult[THR_NEARESTA] = 300;
726   } else {
727     rd->thresh_mult[THR_NEARESTMV] = 0;
728     rd->thresh_mult[THR_NEARESTG] = 0;
729     rd->thresh_mult[THR_NEARESTA] = 0;
730   }
731 
732   rd->thresh_mult[THR_DC] += 1000;
733 
734   rd->thresh_mult[THR_NEWMV] += 1000;
735   rd->thresh_mult[THR_NEWA] += 1000;
736   rd->thresh_mult[THR_NEWG] += 1000;
737 
738   rd->thresh_mult[THR_NEARMV] += 1000;
739   rd->thresh_mult[THR_NEARA] += 1000;
740   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
741   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
742 
743   rd->thresh_mult[THR_TM] += 1000;
744 
745   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
746   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
747   rd->thresh_mult[THR_NEARG] += 1000;
748   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
749   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
750 
751   rd->thresh_mult[THR_ZEROMV] += 2000;
752   rd->thresh_mult[THR_ZEROG] += 2000;
753   rd->thresh_mult[THR_ZEROA] += 2000;
754   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
755   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
756 
757   rd->thresh_mult[THR_H_PRED] += 2000;
758   rd->thresh_mult[THR_V_PRED] += 2000;
759   rd->thresh_mult[THR_D45_PRED] += 2500;
760   rd->thresh_mult[THR_D135_PRED] += 2500;
761   rd->thresh_mult[THR_D117_PRED] += 2500;
762   rd->thresh_mult[THR_D153_PRED] += 2500;
763   rd->thresh_mult[THR_D207_PRED] += 2500;
764   rd->thresh_mult[THR_D63_PRED] += 2500;
765 }
766 
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)767 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
768   static const int thresh_mult[2][MAX_REFS] = {
769     { 2500, 2500, 2500, 4500, 4500, 2500 },
770     { 2000, 2000, 2000, 4000, 4000, 2000 }
771   };
772   RD_OPT *const rd = &cpi->rd;
773   const int idx = cpi->oxcf.mode == BEST;
774   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
775 }
776 
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)777 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
778                                int bsize, int best_mode_index) {
779   if (rd_thresh > 0) {
780     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
781     int mode;
782     for (mode = 0; mode < top_mode; ++mode) {
783       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
784       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
785       BLOCK_SIZE bs;
786       for (bs = min_size; bs <= max_size; ++bs) {
787         int *const fact = &factor_buf[bs][mode];
788         if (mode == best_mode_index) {
789           *fact -= (*fact >> 4);
790         } else {
791           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
792         }
793       }
794     }
795   }
796 }
797 
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)798 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
799                                int qindex, int qdelta) {
800   // Reduce the intra cost penalty for small blocks (<=16x16).
801   int reduction_fac =
802       (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
803 
804   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
805     // Don't reduce intra cost penalty if estimated noise level is high.
806     reduction_fac = 0;
807 
808   // Always use VPX_BITS_8 as input here because the penalty is applied
809   // to rate not distortion so we want a consistent penalty for all bit
810   // depths. If the actual bit depth were passed in here then the value
811   // retured by vp9_dc_quant() would scale with the bit depth and we would
812   // then need to apply inverse scaling to correct back to a bit depth
813   // independent rate penalty.
814   return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
815 }
816