• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22 
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32 
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 
43 #define RD_THRESH_POW 1.25
44 
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47 
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49   rd_cost->rate = INT_MAX;
50   rd_cost->dist = INT64_MAX;
51   rd_cost->rdcost = INT64_MAX;
52 }
53 
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55   rd_cost->rate = 0;
56   rd_cost->dist = 0;
57   rd_cost->rdcost = 0;
58 }
59 
vp9_calculate_rd_cost(int mult,int div,int rate,int64_t dist)60 int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61   assert(mult >= 0);
62   assert(div > 0);
63   if (rate >= 0 && dist >= 0) {
64     return RDCOST(mult, div, rate, dist);
65   }
66   if (rate >= 0 && dist < 0) {
67     return RDCOST_NEG_D(mult, div, rate, -dist);
68   }
69   if (rate < 0 && dist >= 0) {
70     return RDCOST_NEG_R(mult, div, -rate, dist);
71   }
72   return -RDCOST(mult, div, -rate, -dist);
73 }
74 
vp9_rd_cost_update(int mult,int div,RD_COST * rd_cost)75 void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76   if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77     rd_cost->rdcost =
78         vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79   } else {
80     vp9_rd_cost_reset(rd_cost);
81   }
82 }
83 
84 // The baseline rd thresholds for breaking out of the rd loop for
85 // certain modes are assumed to be based on 8x8 blocks.
86 // This table is used to correct for block size.
87 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90 };
91 
fill_mode_costs(VP9_COMP * cpi)92 static void fill_mode_costs(VP9_COMP *cpi) {
93   const FRAME_CONTEXT *const fc = cpi->common.fc;
94   int i, j;
95 
96   for (i = 0; i < INTRA_MODES; ++i) {
97     for (j = 0; j < INTRA_MODES; ++j) {
98       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99                       vp9_intra_mode_tree);
100     }
101   }
102 
103   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104   for (i = 0; i < INTRA_MODES; ++i) {
105     vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106                     vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107     vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108                     fc->uv_mode_prob[i], vp9_intra_mode_tree);
109   }
110 
111   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112     vp9_cost_tokens(cpi->switchable_interp_costs[i],
113                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114   }
115 
116   for (i = TX_8X8; i < TX_SIZES; ++i) {
117     for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118       const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119       int k;
120       for (k = 0; k <= i; ++k) {
121         int cost = 0;
122         int m;
123         for (m = 0; m <= k - (k == i); ++m) {
124           if (m == k)
125             cost += vp9_cost_zero(tx_probs[m]);
126           else
127             cost += vp9_cost_one(tx_probs[m]);
128         }
129         cpi->tx_size_cost[i - 1][j][k] = cost;
130       }
131     }
132   }
133 }
134 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])135 static void fill_token_costs(vp9_coeff_cost *c,
136                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137   int i, j, k, l;
138   TX_SIZE t;
139   for (t = TX_4X4; t <= TX_32X32; ++t)
140     for (i = 0; i < PLANE_TYPES; ++i)
141       for (j = 0; j < REF_TYPES; ++j)
142         for (k = 0; k < COEF_BANDS; ++k)
143           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144             vpx_prob probs[ENTROPY_NODES];
145             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148                                  vp9_coef_tree);
149             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150                    c[t][i][j][k][1][l][EOB_TOKEN]);
151           }
152 }
153 
154 // Values are now correlated to quantizer.
155 static int sad_per_bit16lut_8[QINDEX_RANGE];
156 static int sad_per_bit4lut_8[QINDEX_RANGE];
157 
158 #if CONFIG_VP9_HIGHBITDEPTH
159 static int sad_per_bit16lut_10[QINDEX_RANGE];
160 static int sad_per_bit4lut_10[QINDEX_RANGE];
161 static int sad_per_bit16lut_12[QINDEX_RANGE];
162 static int sad_per_bit4lut_12[QINDEX_RANGE];
163 #endif
164 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)165 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166                             vpx_bit_depth_t bit_depth) {
167   int i;
168   // Initialize the sad lut tables using a formulaic calculation for now.
169   // This is to make it easier to resolve the impact of experimental changes
170   // to the quantizer tables.
171   for (i = 0; i < range; i++) {
172     const double q = vp9_convert_qindex_to_q(i, bit_depth);
173     bit16lut[i] = (int)(0.0418 * q + 2.4107);
174     bit4lut[i] = (int)(0.063 * q + 2.742);
175   }
176 }
177 
vp9_init_me_luts(void)178 void vp9_init_me_luts(void) {
179   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180                   VPX_BITS_8);
181 #if CONFIG_VP9_HIGHBITDEPTH
182   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183                   VPX_BITS_10);
184   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185                   VPX_BITS_12);
186 #endif
187 }
188 
189 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190                                          8,  8,  4,  4,  2,  2,  1,  0 };
191 
192 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
193 // that the show frame flag is set, should not be used as no real frame
194 // is encoded so we should not reach here. However, a dummy value
195 // is inserted here to make sure the data structure has the right number
196 // of values assigned.
197 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198                                                               128, 144, 144 };
199 
200 // Configure Vizier RD parameters.
201 // Later this function will use passed in command line values.
vp9_init_rd_parameters(VP9_COMP * cpi)202 void vp9_init_rd_parameters(VP9_COMP *cpi) {
203   RD_CONTROL *const rdc = &cpi->rd_ctrl;
204 
205   // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
206   // initialized by the pass in values.
207   // Be careful that parameters below are only initialized to 1, if we do not
208   // pass values to them. It is desired to take care of each parameter when
209   // using |use_vizier_rc_params|.
210   if (cpi->twopass.use_vizier_rc_params) return;
211 
212   // Make sure this function is floating point safe.
213   vpx_clear_system_state();
214 
215   rdc->rd_mult_inter_qp_fac = 1.0;
216   rdc->rd_mult_arf_qp_fac = 1.0;
217   rdc->rd_mult_key_qp_fac = 1.0;
218 }
219 
220 // Returns the default rd multiplier for inter frames for a given qindex.
221 // The function here is a first pass estimate based on data from
222 // a previous Vizer run
def_inter_rd_multiplier(int qindex)223 static double def_inter_rd_multiplier(int qindex) {
224   return 4.15 + (0.001 * (double)qindex);
225 }
226 
227 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
228 // The function here is a first pass estimate based on data from
229 // a previous Vizer run
def_arf_rd_multiplier(int qindex)230 static double def_arf_rd_multiplier(int qindex) {
231   return 4.25 + (0.001 * (double)qindex);
232 }
233 
234 // Returns the default rd multiplier for key frames for a given qindex.
235 // The function here is a first pass estimate based on data from
236 // a previous Vizer run
def_kf_rd_multiplier(int qindex)237 static double def_kf_rd_multiplier(int qindex) {
238   return 4.35 + (0.001 * (double)qindex);
239 }
240 
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)241 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
242   const RD_CONTROL *rdc = &cpi->rd_ctrl;
243   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
244   // largest dc_quant is 21387, therefore rdmult should fit in int32_t
245   int rdmult = q * q;
246 
247   if (cpi->ext_ratectrl.ready &&
248       (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
249       cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
250     return cpi->ext_ratectrl.ext_rdmult;
251   }
252 
253   // Make sure this function is floating point safe.
254   vpx_clear_system_state();
255 
256   if (cpi->common.frame_type == KEY_FRAME) {
257     double def_rd_q_mult = def_kf_rd_multiplier(qindex);
258     rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
259   } else if (!cpi->rc.is_src_frame_alt_ref &&
260              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
261     double def_rd_q_mult = def_arf_rd_multiplier(qindex);
262     rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
263   } else {
264     double def_rd_q_mult = def_inter_rd_multiplier(qindex);
265     rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
266   }
267 
268 #if CONFIG_VP9_HIGHBITDEPTH
269   switch (cpi->common.bit_depth) {
270     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
271     case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
272     default: break;
273   }
274 #endif  // CONFIG_VP9_HIGHBITDEPTH
275   return rdmult > 0 ? rdmult : 1;
276 }
277 
modulate_rdmult(const VP9_COMP * cpi,int rdmult)278 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
279   int64_t rdmult_64 = rdmult;
280   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
281     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
282     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
283     const int gfu_boost = cpi->multi_layer_arf
284                               ? gf_group->gfu_boost[gf_group->index]
285                               : cpi->rc.gfu_boost;
286     const int boost_index = VPXMIN(15, (gfu_boost / 100));
287 
288     rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
289     rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
290   }
291   return (int)rdmult_64;
292 }
293 
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)294 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
295   int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
296   if (cpi->ext_ratectrl.ready &&
297       (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
298       cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
299     return cpi->ext_ratectrl.ext_rdmult;
300   }
301   return modulate_rdmult(cpi, rdmult);
302 }
303 
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)304 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
305   int rdmult =
306       vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
307   rdmult = (int)((double)rdmult / beta);
308   rdmult = rdmult > 0 ? rdmult : 1;
309   return modulate_rdmult(cpi, rdmult);
310 }
311 
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)312 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
313   double q;
314 #if CONFIG_VP9_HIGHBITDEPTH
315   switch (bit_depth) {
316     case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
317     case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
318     default:
319       assert(bit_depth == VPX_BITS_12);
320       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
321       break;
322   }
323 #else
324   (void)bit_depth;
325   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
326 #endif  // CONFIG_VP9_HIGHBITDEPTH
327   // TODO(debargha): Adjust the function below.
328   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
329 }
330 
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)331 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
332 #if CONFIG_VP9_HIGHBITDEPTH
333   switch (cpi->common.bit_depth) {
334     case VPX_BITS_8:
335       x->sadperbit16 = sad_per_bit16lut_8[qindex];
336       x->sadperbit4 = sad_per_bit4lut_8[qindex];
337       break;
338     case VPX_BITS_10:
339       x->sadperbit16 = sad_per_bit16lut_10[qindex];
340       x->sadperbit4 = sad_per_bit4lut_10[qindex];
341       break;
342     default:
343       assert(cpi->common.bit_depth == VPX_BITS_12);
344       x->sadperbit16 = sad_per_bit16lut_12[qindex];
345       x->sadperbit4 = sad_per_bit4lut_12[qindex];
346       break;
347   }
348 #else
349   (void)cpi;
350   x->sadperbit16 = sad_per_bit16lut_8[qindex];
351   x->sadperbit4 = sad_per_bit4lut_8[qindex];
352 #endif  // CONFIG_VP9_HIGHBITDEPTH
353 }
354 
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)355 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
356   int i, bsize, segment_id;
357 
358   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
359     const int qindex =
360         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
361                   cm->y_dc_delta_q,
362               0, MAXQ);
363     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
364 
365     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
366       // Threshold here seems unnecessarily harsh but fine given actual
367       // range of values used for cpi->sf.thresh_mult[].
368       const int t = q * rd_thresh_block_size_factor[bsize];
369       const int thresh_max = INT_MAX / t;
370 
371       if (bsize >= BLOCK_8X8) {
372         for (i = 0; i < MAX_MODES; ++i)
373           rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
374                                                    ? rd->thresh_mult[i] * t / 4
375                                                    : INT_MAX;
376       } else {
377         for (i = 0; i < MAX_REFS; ++i)
378           rd->threshes[segment_id][bsize][i] =
379               rd->thresh_mult_sub8x8[i] < thresh_max
380                   ? rd->thresh_mult_sub8x8[i] * t / 4
381                   : INT_MAX;
382       }
383     }
384   }
385 }
386 
vp9_build_inter_mode_cost(VP9_COMP * cpi)387 void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
388   const VP9_COMMON *const cm = &cpi->common;
389   int i;
390   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
391     vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
392                     vp9_inter_mode_tree);
393   }
394 }
395 
vp9_initialize_rd_consts(VP9_COMP * cpi)396 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
397   VP9_COMMON *const cm = &cpi->common;
398   MACROBLOCK *const x = &cpi->td.mb;
399   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
400   RD_OPT *const rd = &cpi->rd;
401   int i;
402 
403   vpx_clear_system_state();
404 
405   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
406   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
407 
408   set_error_per_bit(x, rd->RDMULT);
409 
410   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
411                        cm->frame_type != KEY_FRAME)
412                           ? 0
413                           : 1;
414 
415   set_block_thresholds(cm, rd);
416   set_partition_probs(cm, xd);
417 
418   if (cpi->oxcf.pass == 1) {
419     if (!frame_is_intra_only(cm))
420       vp9_build_nmv_cost_table(
421           x->nmvjointcost,
422           cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
423           &cm->fc->nmvc, cm->allow_high_precision_mv);
424   } else {
425     if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
426       fill_token_costs(x->token_costs, cm->fc->coef_probs);
427 
428     if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
429         cm->frame_type == KEY_FRAME) {
430       for (i = 0; i < PARTITION_CONTEXTS; ++i)
431         vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
432                         vp9_partition_tree);
433     }
434 
435     if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
436         cm->frame_type == KEY_FRAME) {
437       fill_mode_costs(cpi);
438 
439       if (!frame_is_intra_only(cm)) {
440         vp9_build_nmv_cost_table(
441             x->nmvjointcost,
442             cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
443             &cm->fc->nmvc, cm->allow_high_precision_mv);
444         vp9_build_inter_mode_cost(cpi);
445       }
446     }
447   }
448 }
449 
450 // NOTE: The tables below must be of the same size.
451 
452 // The functions described below are sampled at the four most significant
453 // bits of x^2 + 8 / 256.
454 
455 // Normalized rate:
456 // This table models the rate for a Laplacian source with given variance
457 // when quantized with a uniform quantizer with given stepsize. The
458 // closed form expression is:
459 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
460 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
461 // and H(x) is the binary entropy function.
462 static const int rate_tab_q10[] = {
463   65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
464   3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
465   2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
466   2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
467   1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
468   680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
469   190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
470   16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
471 };
472 
473 // Normalized distortion:
474 // This table models the normalized distortion for a Laplacian source
475 // with given variance when quantized with a uniform quantizer
476 // with given stepsize. The closed form expression is:
477 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
478 // where x = qpstep / sqrt(variance).
479 // Note the actual distortion is Dn * variance.
480 static const int dist_tab_q10[] = {
481   0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
482   6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
483   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
484   73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
485   215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
486   495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
487   842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
488   1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
489 };
490 static const int xsq_iq_q10[] = {
491   0,      4,      8,      12,     16,     20,     24,     28,     32,
492   40,     48,     56,     64,     72,     80,     88,     96,     112,
493   128,    144,    160,    176,    192,    208,    224,    256,    288,
494   320,    352,    384,    416,    448,    480,    544,    608,    672,
495   736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
496   1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
497   3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
498   7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
499   16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
500   36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
501   81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
502   180192, 196576, 212960, 229344, 245728,
503 };
504 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)505 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
506   const int tmp = (xsq_q10 >> 2) + 8;
507   const int k = get_msb(tmp) - 3;
508   const int xq = (k << 3) + ((tmp >> k) & 0x7);
509   const int one_q10 = 1 << 10;
510   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
511   const int b_q10 = one_q10 - a_q10;
512   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
513   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
514 }
515 
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])516 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
517                               int r_q10[MAX_MB_PLANE],
518                               int d_q10[MAX_MB_PLANE]) {
519   int i;
520   const int one_q10 = 1 << 10;
521   for (i = 0; i < MAX_MB_PLANE; ++i) {
522     const int tmp = (xsq_q10[i] >> 2) + 8;
523     const int k = get_msb(tmp) - 3;
524     const int xq = (k << 3) + ((tmp >> k) & 0x7);
525     const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
526     const int b_q10 = one_q10 - a_q10;
527     r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
528     d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
529   }
530 }
531 
532 static const uint32_t MAX_XSQ_Q10 = 245727;
533 
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)534 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
535                                   unsigned int qstep, int *rate,
536                                   int64_t *dist) {
537   // This function models the rate and distortion for a Laplacian
538   // source with given variance when quantized with a uniform quantizer
539   // with given stepsize. The closed form expressions are in:
540   // Hang and Chen, "Source Model for transform video coder and its
541   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
542   // Sys. for Video Tech., April 1997.
543   if (var == 0) {
544     *rate = 0;
545     *dist = 0;
546   } else {
547     int d_q10, r_q10;
548     const uint64_t xsq_q10_64 =
549         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
550     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
551     model_rd_norm(xsq_q10, &r_q10, &d_q10);
552     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
553     *dist = (var * (int64_t)d_q10 + 512) >> 10;
554   }
555 }
556 
557 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
558 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)559 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
560                                       unsigned int n_log2[MAX_MB_PLANE],
561                                       unsigned int qstep[MAX_MB_PLANE],
562                                       int64_t *rate_sum, int64_t *dist_sum) {
563   int i;
564   int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
565   for (i = 0; i < MAX_MB_PLANE; ++i) {
566     const uint64_t xsq_q10_64 =
567         (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
568         var[i];
569     xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
570   }
571   model_rd_norm_vec(xsq_q10, r_q10, d_q10);
572   for (i = 0; i < MAX_MB_PLANE; ++i) {
573     int rate =
574         ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
575     int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
576     *rate_sum += rate;
577     *dist_sum += dist;
578   }
579 }
580 
581 // Disable gcc 12.2 false positive warning.
582 // warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
583 #if defined(__GNUC__) && !defined(__clang__)
584 #pragma GCC diagnostic push
585 #pragma GCC diagnostic ignored "-Wstringop-overflow"
586 #endif
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])587 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
588                               const struct macroblockd_plane *pd,
589                               ENTROPY_CONTEXT t_above[16],
590                               ENTROPY_CONTEXT t_left[16]) {
591   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
592   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
593   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
594   const ENTROPY_CONTEXT *const above = pd->above_context;
595   const ENTROPY_CONTEXT *const left = pd->left_context;
596 
597   int i;
598   switch (tx_size) {
599     case TX_4X4:
600       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
601       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
602       break;
603     case TX_8X8:
604       for (i = 0; i < num_4x4_w; i += 2)
605         t_above[i] = !!*(const uint16_t *)&above[i];
606       for (i = 0; i < num_4x4_h; i += 2)
607         t_left[i] = !!*(const uint16_t *)&left[i];
608       break;
609     case TX_16X16:
610       for (i = 0; i < num_4x4_w; i += 4)
611         t_above[i] = !!*(const uint32_t *)&above[i];
612       for (i = 0; i < num_4x4_h; i += 4)
613         t_left[i] = !!*(const uint32_t *)&left[i];
614       break;
615     default:
616       assert(tx_size == TX_32X32);
617       for (i = 0; i < num_4x4_w; i += 8)
618         t_above[i] = !!*(const uint64_t *)&above[i];
619       for (i = 0; i < num_4x4_h; i += 8)
620         t_left[i] = !!*(const uint64_t *)&left[i];
621       break;
622   }
623 }
624 #if defined(__GNUC__) && !defined(__clang__)
625 #pragma GCC diagnostic pop
626 #endif
627 
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)628 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
629                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
630   int i;
631   int zero_seen = 0;
632   int best_index = 0;
633   int best_sad = INT_MAX;
634   int this_sad = INT_MAX;
635   int max_mv = 0;
636   int near_same_nearest;
637   uint8_t *src_y_ptr = x->plane[0].src.buf;
638   uint8_t *ref_y_ptr;
639   const int num_mv_refs =
640       MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
641 
642   MV pred_mv[3];
643   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
644   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
645   pred_mv[2] = x->pred_mv[ref_frame];
646   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
647 
648   near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
649                       x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
650 
651   // Get the sad for each candidate reference mv.
652   for (i = 0; i < num_mv_refs; ++i) {
653     const MV *this_mv = &pred_mv[i];
654     int fp_row, fp_col;
655     if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
656     if (i == 1 && near_same_nearest) continue;
657     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
658     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
659     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
660 
661     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
662     zero_seen |= (fp_row == 0 && fp_col == 0);
663 
664     ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
665     // Find sad for current vector.
666     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
667                                            ref_y_ptr, ref_y_stride);
668     // Note if it is the best so far.
669     if (this_sad < best_sad) {
670       best_sad = this_sad;
671       best_index = i;
672     }
673   }
674 
675   // Note the index of the mv that worked best in the reference list.
676   x->mv_best_ref_index[ref_frame] = best_index;
677   x->max_mv_context[ref_frame] = max_mv;
678   x->pred_mv_sad[ref_frame] = best_sad;
679 }
680 
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)681 void vp9_setup_pred_block(const MACROBLOCKD *xd,
682                           struct buf_2d dst[MAX_MB_PLANE],
683                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
684                           const struct scale_factors *scale,
685                           const struct scale_factors *scale_uv) {
686   int i;
687 
688   dst[0].buf = src->y_buffer;
689   dst[0].stride = src->y_stride;
690   dst[1].buf = src->u_buffer;
691   dst[2].buf = src->v_buffer;
692   dst[1].stride = dst[2].stride = src->uv_stride;
693 
694   for (i = 0; i < MAX_MB_PLANE; ++i) {
695     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
696                      i ? scale_uv : scale, xd->plane[i].subsampling_x,
697                      xd->plane[i].subsampling_y);
698   }
699 }
700 
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)701 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
702                             int stride) {
703   const int bw = b_width_log2_lookup[plane_bsize];
704   const int y = 4 * (raster_block >> bw);
705   const int x = 4 * (raster_block & ((1 << bw) - 1));
706   return y * stride + x;
707 }
708 
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)709 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
710                                        int16_t *base) {
711   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
712   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
713 }
714 
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)715 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
716                                              int ref_frame) {
717   const VP9_COMMON *const cm = &cpi->common;
718   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
719   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
720   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
721   return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
722              ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
723              : NULL;
724 }
725 
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)726 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
727   const MODE_INFO *const mi = xd->mi[0];
728   const int ctx = get_pred_context_switchable_interp(xd);
729   return SWITCHABLE_INTERP_RATE_FACTOR *
730          cpi->switchable_interp_costs[ctx][mi->interp_filter];
731 }
732 
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)733 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
734   int i;
735   RD_OPT *const rd = &cpi->rd;
736   SPEED_FEATURES *const sf = &cpi->sf;
737 
738   // Set baseline threshold values.
739   for (i = 0; i < MAX_MODES; ++i)
740     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
741 
742   if (sf->adaptive_rd_thresh) {
743     rd->thresh_mult[THR_NEARESTMV] = 300;
744     rd->thresh_mult[THR_NEARESTG] = 300;
745     rd->thresh_mult[THR_NEARESTA] = 300;
746   } else {
747     rd->thresh_mult[THR_NEARESTMV] = 0;
748     rd->thresh_mult[THR_NEARESTG] = 0;
749     rd->thresh_mult[THR_NEARESTA] = 0;
750   }
751 
752   rd->thresh_mult[THR_DC] += 1000;
753 
754   rd->thresh_mult[THR_NEWMV] += 1000;
755   rd->thresh_mult[THR_NEWA] += 1000;
756   rd->thresh_mult[THR_NEWG] += 1000;
757 
758   rd->thresh_mult[THR_NEARMV] += 1000;
759   rd->thresh_mult[THR_NEARA] += 1000;
760   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
761   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
762 
763   rd->thresh_mult[THR_TM] += 1000;
764 
765   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
766   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
767   rd->thresh_mult[THR_NEARG] += 1000;
768   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
769   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
770 
771   rd->thresh_mult[THR_ZEROMV] += 2000;
772   rd->thresh_mult[THR_ZEROG] += 2000;
773   rd->thresh_mult[THR_ZEROA] += 2000;
774   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
775   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
776 
777   rd->thresh_mult[THR_H_PRED] += 2000;
778   rd->thresh_mult[THR_V_PRED] += 2000;
779   rd->thresh_mult[THR_D45_PRED] += 2500;
780   rd->thresh_mult[THR_D135_PRED] += 2500;
781   rd->thresh_mult[THR_D117_PRED] += 2500;
782   rd->thresh_mult[THR_D153_PRED] += 2500;
783   rd->thresh_mult[THR_D207_PRED] += 2500;
784   rd->thresh_mult[THR_D63_PRED] += 2500;
785 }
786 
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)787 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
788   static const int thresh_mult[2][MAX_REFS] = {
789     { 2500, 2500, 2500, 4500, 4500, 2500 },
790     { 2000, 2000, 2000, 4000, 4000, 2000 }
791   };
792   RD_OPT *const rd = &cpi->rd;
793   const int idx = cpi->oxcf.mode == BEST;
794   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
795 }
796 
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)797 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
798                                int bsize, int best_mode_index) {
799   if (rd_thresh > 0) {
800     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
801     int mode;
802     for (mode = 0; mode < top_mode; ++mode) {
803       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
804       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
805       BLOCK_SIZE bs;
806       for (bs = min_size; bs <= max_size; ++bs) {
807         int *const fact = &factor_buf[bs][mode];
808         if (mode == best_mode_index) {
809           *fact -= (*fact >> 4);
810         } else {
811           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
812         }
813       }
814     }
815   }
816 }
817 
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)818 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
819                                int qindex, int qdelta) {
820   // Reduce the intra cost penalty for small blocks (<=16x16).
821   int reduction_fac =
822       (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
823 
824   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
825     // Don't reduce intra cost penalty if estimated noise level is high.
826     reduction_fac = 0;
827 
828   // Always use VPX_BITS_8 as input here because the penalty is applied
829   // to rate not distortion so we want a consistent penalty for all bit
830   // depths. If the actual bit depth were passed in here then the value
831   // retured by vp9_dc_quant() would scale with the bit depth and we would
832   // then need to apply inverse scaling to correct back to a bit depth
833   // independent rate penalty.
834   return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
835 }
836