• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22 
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31 
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38 #include "config/aom_config.h"
39 
40 #define RD_THRESH_POW 1.25
41 
42 // The baseline rd thresholds for breaking out of the rd loop for
43 // certain modes are assumed to be based on 8x8 blocks.
44 // This table is used to correct for block size.
45 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
46 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
47   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
48 };
49 
50 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
51                                             [EXT_TX_SIZES] = {
52                                               { 1, 1, 1, 1 },  // unused
53                                               { 1, 1, 0, 0 },
54                                               { 0, 0, 1, 0 },
55                                             };
56 
57 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
58                                             [EXT_TX_SIZES] = {
59                                               { 1, 1, 1, 1 },  // unused
60                                               { 1, 1, 0, 0 },
61                                               { 0, 0, 1, 0 },
62                                               { 0, 1, 1, 1 },
63                                             };
64 
65 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
66                                                       EXT_TX_SETS_INTER)] = {
67   {
68       // Intra
69       EXT_TX_SET_DCTONLY,
70       EXT_TX_SET_DTT4_IDTX_1DDCT,
71       EXT_TX_SET_DTT4_IDTX,
72   },
73   {
74       // Inter
75       EXT_TX_SET_DCTONLY,
76       EXT_TX_SET_ALL16,
77       EXT_TX_SET_DTT9_IDTX_1DDCT,
78       EXT_TX_SET_DCT_IDTX,
79   },
80 };
81 
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)82 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
83                          FRAME_CONTEXT *fc) {
84   int i, j;
85 
86   for (i = 0; i < PARTITION_CONTEXTS; ++i)
87     av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
88                              fc->partition_cdf[i], NULL);
89 
90   if (cm->current_frame.skip_mode_info.skip_mode_flag) {
91     for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
92       av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
93                                fc->skip_mode_cdfs[i], NULL);
94     }
95   }
96 
97   for (i = 0; i < SKIP_CONTEXTS; ++i) {
98     av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
99                              fc->skip_txfm_cdfs[i], NULL);
100   }
101 
102   for (i = 0; i < KF_MODE_CONTEXTS; ++i)
103     for (j = 0; j < KF_MODE_CONTEXTS; ++j)
104       av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
105                                fc->kf_y_cdf[i][j], NULL);
106 
107   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
108     av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
109                              NULL);
110   for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
111     for (j = 0; j < INTRA_MODES; ++j)
112       av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
113                                fc->uv_mode_cdf[i][j], NULL);
114 
115   av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
116                            fc->filter_intra_mode_cdf, NULL);
117   for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
118     if (av1_filter_intra_allowed_bsize(cm, i))
119       av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
120                                fc->filter_intra_cdfs[i], NULL);
121   }
122 
123   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
124     av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
125                              fc->switchable_interp_cdf[i], NULL);
126 
127   for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
128     av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
129                              fc->palette_y_size_cdf[i], NULL);
130     av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
131                              fc->palette_uv_size_cdf[i], NULL);
132     for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
133       av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
134                                fc->palette_y_mode_cdf[i][j], NULL);
135     }
136   }
137 
138   for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
139     av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
140                              fc->palette_uv_mode_cdf[i], NULL);
141   }
142 
143   for (i = 0; i < PALETTE_SIZES; ++i) {
144     for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
145       av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
146                                fc->palette_y_color_index_cdf[i][j], NULL);
147       av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
148                                fc->palette_uv_color_index_cdf[i][j], NULL);
149     }
150   }
151 
152   int sign_cost[CFL_JOINT_SIGNS];
153   av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
154   for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
155     int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
156     int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
157     if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
158       memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
159     } else {
160       const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
161       av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
162     }
163     if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
164       memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
165     } else {
166       const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
167       av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
168     }
169     for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
170       cost_u[u] += sign_cost[joint_sign];
171   }
172 
173   for (i = 0; i < MAX_TX_CATS; ++i)
174     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
175       av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
176                                fc->tx_size_cdf[i][j], NULL);
177 
178   for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
179     av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
180                              fc->txfm_partition_cdf[i], NULL);
181   }
182 
183   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
184     int s;
185     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
186       if (use_inter_ext_tx_for_txsize[s][i]) {
187         av1_cost_tokens_from_cdf(
188             mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
189             av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
190       }
191     }
192     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
193       if (use_intra_ext_tx_for_txsize[s][i]) {
194         for (j = 0; j < INTRA_MODES; ++j) {
195           av1_cost_tokens_from_cdf(
196               mode_costs->intra_tx_type_costs[s][i][j],
197               fc->intra_ext_tx_cdf[s][i][j],
198               av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
199         }
200       }
201     }
202   }
203   for (i = 0; i < DIRECTIONAL_MODES; ++i) {
204     av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
205                              fc->angle_delta_cdf[i], NULL);
206   }
207   av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
208 
209   for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
210     av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
211                              fc->seg.spatial_pred_seg_cdf[i], NULL);
212   }
213 
214   for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
215     av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
216                              NULL);
217   }
218 
219   if (!frame_is_intra_only(cm)) {
220     for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
221       av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
222                                fc->comp_inter_cdf[i], NULL);
223     }
224 
225     for (i = 0; i < REF_CONTEXTS; ++i) {
226       for (j = 0; j < SINGLE_REFS - 1; ++j) {
227         av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
228                                  fc->single_ref_cdf[i][j], NULL);
229       }
230     }
231 
232     for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
233       av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
234                                fc->comp_ref_type_cdf[i], NULL);
235     }
236 
237     for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
238       for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
239         av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
240                                  fc->uni_comp_ref_cdf[i][j], NULL);
241       }
242     }
243 
244     for (i = 0; i < REF_CONTEXTS; ++i) {
245       for (j = 0; j < FWD_REFS - 1; ++j) {
246         av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
247                                  fc->comp_ref_cdf[i][j], NULL);
248       }
249     }
250 
251     for (i = 0; i < REF_CONTEXTS; ++i) {
252       for (j = 0; j < BWD_REFS - 1; ++j) {
253         av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
254                                  fc->comp_bwdref_cdf[i][j], NULL);
255       }
256     }
257 
258     for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
259       av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
260                                fc->intra_inter_cdf[i], NULL);
261     }
262 
263     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
264       av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
265                                NULL);
266     }
267 
268     for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
269       av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
270                                fc->zeromv_cdf[i], NULL);
271     }
272 
273     for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
274       av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
275                                NULL);
276     }
277 
278     for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
279       av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
280                                NULL);
281     }
282     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
283       av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
284                                fc->inter_compound_mode_cdf[i], NULL);
285     for (i = 0; i < BLOCK_SIZES_ALL; ++i)
286       av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
287                                fc->compound_type_cdf[i], NULL);
288     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
289       if (av1_is_wedge_used(i)) {
290         av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
291                                  fc->wedge_idx_cdf[i], NULL);
292       }
293     }
294     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
295       av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
296                                fc->interintra_cdf[i], NULL);
297       av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
298                                fc->interintra_mode_cdf[i], NULL);
299     }
300     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
301       av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
302                                fc->wedge_interintra_cdf[i], NULL);
303     }
304     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
305       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
306                                fc->motion_mode_cdf[i], NULL);
307     }
308     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
309       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
310                                fc->obmc_cdf[i], NULL);
311     }
312     for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
313       av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
314                                fc->compound_index_cdf[i], NULL);
315     }
316     for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
317       av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
318                                fc->comp_group_idx_cdf[i], NULL);
319     }
320   }
321 }
322 
323 #if !CONFIG_REALTIME_ONLY
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)324 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
325   av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
326                            fc->switchable_restore_cdf, NULL);
327   av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
328                            fc->wiener_restore_cdf, NULL);
329   av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
330                            fc->sgrproj_restore_cdf, NULL);
331 }
332 #endif  // !CONFIG_REALTIME_ONLY
333 
334 // Values are now correlated to quantizer.
335 static int sad_per_bit_lut_8[QINDEX_RANGE];
336 static int sad_per_bit_lut_10[QINDEX_RANGE];
337 static int sad_per_bit_lut_12[QINDEX_RANGE];
338 
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)339 static void init_me_luts_bd(int *bit16lut, int range,
340                             aom_bit_depth_t bit_depth) {
341   int i;
342   // Initialize the sad lut tables using a formulaic calculation for now.
343   // This is to make it easier to resolve the impact of experimental changes
344   // to the quantizer tables.
345   for (i = 0; i < range; i++) {
346     const double q = av1_convert_qindex_to_q(i, bit_depth);
347     bit16lut[i] = (int)(0.0418 * q + 2.4107);
348   }
349 }
350 
init_me_luts(void)351 static void init_me_luts(void) {
352   init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
353   init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
354   init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
355 }
356 
av1_init_me_luts(void)357 void av1_init_me_luts(void) { aom_once(init_me_luts); }
358 
359 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
360                                          8,  8,  4,  4,  2,  2,  1,  0 };
361 
362 static const int rd_layer_depth_factor[7] = {
363   160, 160, 160, 160, 192, 208, 224
364 };
365 
366 // Returns the default rd multiplier for inter frames for a given qindex.
367 // The function here is a first pass estimate based on data from
368 // a previous Vizer run
def_inter_rd_multiplier(int qindex)369 static double def_inter_rd_multiplier(int qindex) {
370   return 3.2 + (0.0015 * (double)qindex);
371 }
372 
373 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
374 // The function here is a first pass estimate based on data from
375 // a previous Vizer run
def_arf_rd_multiplier(int qindex)376 static double def_arf_rd_multiplier(int qindex) {
377   return 3.25 + (0.0015 * (double)qindex);
378 }
379 
380 // Returns the default rd multiplier for key frames for a given qindex.
381 // The function here is a first pass estimate based on data from
382 // a previous Vizer run
def_kf_rd_multiplier(int qindex)383 static double def_kf_rd_multiplier(int qindex) {
384   return 3.3 + (0.0015 * (double)qindex);
385 }
386 
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex,aom_tune_metric tuning)387 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
388                                         FRAME_UPDATE_TYPE update_type,
389                                         int qindex, aom_tune_metric tuning) {
390   const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
391   int64_t rdmult = q * q;
392   if (update_type == KF_UPDATE) {
393     double def_rd_q_mult = def_kf_rd_multiplier(q);
394     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395   } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
396     double def_rd_q_mult = def_arf_rd_multiplier(q);
397     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398   } else {
399     double def_rd_q_mult = def_inter_rd_multiplier(q);
400     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
401   }
402 
403   if (tuning == AOM_TUNE_IQ) {
404     // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
405     // quality. The most noticeable effect is a mild bias towards choosing
406     // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
407     // transforms).
408     // For very high qindexes, start progressively reducing the weight towards
409     // unity (128/128), as transforms are large enough and making them even
410     // larger actually harms subjective quality and SSIMULACRA 2 scores.
411     // This weight part of the equation was determined by iteratively increasing
412     // weight on CID22 and Daala's subset1, and observing its effects on visual
413     // quality and SSIMULACRA 2 scores along the usable (0-100) range.
414     // The ramp-down part of the equation was determined by choosing a fixed
415     // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
416     // 2 scores for encodes with qindexes greater than 159 scored at or above
417     // their equivalents with no rdmult adjustment.
418     const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
419     rdmult = (int64_t)((double)rdmult * weight / 128.0);
420   }
421 
422   switch (bit_depth) {
423     case AOM_BITS_8: break;
424     case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
425     case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
426     default:
427       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
428       return -1;
429   }
430   return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
431 }
432 
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage,const aom_tune_metric tuning)433 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
434                         const FRAME_UPDATE_TYPE update_type,
435                         const int layer_depth, const int boost_index,
436                         const FRAME_TYPE frame_type,
437                         const int use_fixed_qp_offsets,
438                         const int is_stat_consumption_stage,
439                         const aom_tune_metric tuning) {
440   int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
441                                                        qindex, tuning);
442   if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
443       (frame_type != KEY_FRAME)) {
444     // Layer depth adjustment
445     rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
446     // ARF boost adjustment
447     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
448   }
449   return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
450 }
451 
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)452 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
453   assert(beta > 0.0);
454   int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
455   int newq = (int)rint(q / sqrt(beta));
456   int orig_qindex = qindex;
457   if (newq == q) {
458     return 0;
459   }
460   if (newq < q) {
461     while (qindex > 0) {
462       qindex--;
463       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
464       if (newq >= q) {
465         break;
466       }
467     }
468   } else {
469     while (qindex < MAXQ) {
470       qindex++;
471       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
472       if (newq <= q) {
473         break;
474       }
475     }
476   }
477   return qindex - orig_qindex;
478 }
479 
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)480 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
481                                   int curr_qindex) {
482   curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
483   const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
484   const int deltaq_deadzone = delta_q_res / 4;
485   const int qmask = ~(delta_q_res - 1);
486   int abs_deltaq_index = abs(curr_qindex - prev_qindex);
487   abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
488   int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
489   adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
490   return adjust_qindex;
491 }
492 
493 #if !CONFIG_REALTIME_ONLY
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)494 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
495   assert(beta > 0.0);
496   const AV1_COMMON *cm = &cpi->common;
497 
498   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
499   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
500   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
501   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
502 
503   const int qindex_rdmult = cm->quant_params.base_qindex;
504   return (int)(av1_compute_rd_mult(
505                    qindex_rdmult, cm->seq_params->bit_depth,
506                    cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
507                    layer_depth, boost_index, frame_type,
508                    cpi->oxcf.q_cfg.use_fixed_qp_offsets,
509                    is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
510                beta);
511 }
512 #endif  // !CONFIG_REALTIME_ONLY
513 
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)514 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
515   double q;
516   switch (bit_depth) {
517     case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
518     case AOM_BITS_10:
519       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
520       break;
521     case AOM_BITS_12:
522       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
523       break;
524     default:
525       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
526       return -1;
527   }
528   // TODO(debargha): Adjust the function below.
529   return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
530 }
531 
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)532 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
533   switch (cpi->common.seq_params->bit_depth) {
534     case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
535     case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
536     case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
537     default:
538       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
539   }
540 }
541 
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)542 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
543                                  int use_nonrd_pick_mode) {
544   int i, bsize, segment_id;
545   THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
546   int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
547 
548   if (use_nonrd_pick_mode) {
549     for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
550       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
551       if (ref != INTRA_FRAME) {
552         for (i = 0; i < RTC_INTER_MODES; i++)
553           mode_indices[num_modes_count++] =
554               mode_idx[ref][mode_offset(inter_mode_list[i])];
555       } else {
556         for (i = 0; i < RTC_INTRA_MODES; i++)
557           mode_indices[num_modes_count++] =
558               mode_idx[ref][mode_offset(intra_mode_list[i])];
559       }
560     }
561   }
562 
563   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
564     const int qindex = clamp(
565         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
566             cm->quant_params.y_dc_delta_q,
567         0, MAXQ);
568     const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
569 
570     for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
571       // Threshold here seems unnecessarily harsh but fine given actual
572       // range of values used for cpi->sf.thresh_mult[].
573       const int t = q * rd_thresh_block_size_factor[bsize];
574       const int thresh_max = INT_MAX / t;
575 
576       for (i = 0; i < num_modes_count; ++i) {
577         const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
578         rd->threshes[segment_id][bsize][mode_index] =
579             rd->thresh_mult[mode_index] < thresh_max
580                 ? rd->thresh_mult[mode_index] * t / 4
581                 : INT_MAX;
582       }
583     }
584   }
585 }
586 
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)587 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
588                           const int num_planes) {
589   const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
590   for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
591     for (int plane = 0; plane < nplanes; ++plane) {
592       LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
593 
594       for (int ctx = 0; ctx < 2; ++ctx) {
595         aom_cdf_prob *pcdf;
596         switch (eob_multi_size) {
597           case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
598           case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
599           case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
600           case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
601           case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
602           case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
603           case 6:
604           default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
605         }
606         av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
607       }
608     }
609   }
610   for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
611     for (int plane = 0; plane < nplanes; ++plane) {
612       LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
613 
614       for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
615         av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
616                                  fc->txb_skip_cdf[tx_size][ctx], NULL);
617 
618       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
619         av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
620                                  fc->coeff_base_eob_cdf[tx_size][plane][ctx],
621                                  NULL);
622       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
623         av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
624                                  fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
625 
626       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
627         pcost->base_cost[ctx][4] = 0;
628         pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
629                                    av1_cost_literal(1) -
630                                    pcost->base_cost[ctx][0];
631         pcost->base_cost[ctx][6] =
632             pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
633         pcost->base_cost[ctx][7] =
634             pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
635       }
636 
637       for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
638         av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
639                                  fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
640 
641       for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
642         av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
643                                  fc->dc_sign_cdf[plane][ctx], NULL);
644 
645       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
646         int br_rate[BR_CDF_SIZE];
647         int prev_cost = 0;
648         int i, j;
649         av1_cost_tokens_from_cdf(
650             br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
651             NULL);
652         // printf("br_rate: ");
653         // for(j = 0; j < BR_CDF_SIZE; j++)
654         //  printf("%4d ", br_rate[j]);
655         // printf("\n");
656         for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
657           for (j = 0; j < BR_CDF_SIZE - 1; j++) {
658             pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
659           }
660           prev_cost += br_rate[j];
661         }
662         pcost->lps_cost[ctx][i] = prev_cost;
663         // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
664         // for (i = 0; i <= COEFF_BASE_RANGE; i++)
665         //  printf("%5d ", pcost->lps_cost[ctx][i]);
666         // printf("\n");
667       }
668       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
669         pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
670             pcost->lps_cost[ctx][0];
671         for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
672           pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
673               pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
674         }
675       }
676     }
677   }
678 }
679 
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)680 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
681                        MvCosts *mv_costs) {
682   // Avoid accessing 'mv_costs' when it is not allocated.
683   if (mv_costs == NULL) return;
684 
685   mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
686   mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
687   mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
688   mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
689   if (integer_mv) {
690     mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
691     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
692                              nmvc, MV_SUBPEL_NONE);
693   } else {
694     mv_costs->mv_cost_stack =
695         usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
696     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
697                              nmvc, usehp);
698   }
699 }
700 
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)701 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
702   dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
703   dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
704   av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
705                            MV_SUBPEL_NONE);
706 }
707 
708 // Populates speed features based on codec control settings (of type
709 // COST_UPDATE_TYPE) and expected speed feature settings (of type
710 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
711 // The populated/updated speed features are used for cost updates in the
712 // encoder.
713 // WARNING: Population of unified cost update frequency needs to be taken care
714 // accordingly, in case of any modifications/additions to the enum
715 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)716 static inline void populate_unified_cost_update_freq(
717     const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
718   INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
719   // Mapping of entropy cost update frequency from the encoder's codec control
720   // settings of type COST_UPDATE_TYPE to speed features of type
721   // INTERNAL_COST_UPDATE_TYPE.
722   static const INTERNAL_COST_UPDATE_TYPE
723       map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
724         INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
725         INTERNAL_COST_UPD_OFF
726       };
727 
728   inter_sf->mv_cost_upd_level =
729       AOMMIN(inter_sf->mv_cost_upd_level,
730              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
731   inter_sf->coeff_cost_upd_level =
732       AOMMIN(inter_sf->coeff_cost_upd_level,
733              map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
734   inter_sf->mode_cost_upd_level =
735       AOMMIN(inter_sf->mode_cost_upd_level,
736              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
737   sf->intra_sf.dv_cost_upd_level =
738       AOMMIN(sf->intra_sf.dv_cost_upd_level,
739              map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
740 }
741 
742 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)743 static inline int is_frame_level_cost_upd_freq_set(
744     const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
745     const int use_nonrd_pick_mode, const int frames_since_key) {
746   const int fill_costs =
747       frame_is_intra_only(cm) ||
748       (use_nonrd_pick_mode ? frames_since_key < 2
749                            : (cm->current_frame.frame_number & 0x07) == 1);
750   return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
751           cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
752 }
753 
754 // Decide whether we want to update the mode entropy cost for the current frame.
755 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)756 static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
757   const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
758   if (!rt_sf->frame_level_mode_cost_update) {
759     return false;
760   }
761 
762   if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
763     return cpi->frames_since_last_update == 1;
764   } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
765     if (cpi->svc.number_spatial_layers == 1 &&
766         cpi->svc.number_temporal_layers == 1) {
767       const AV1_COMMON *const cm = &cpi->common;
768       const RATE_CONTROL *const rc = &cpi->rc;
769 
770       return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
771              rc->high_source_sad || rc->frames_since_key < 10 ||
772              cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
773              cm->current_frame.frame_number % 8 == 0;
774     } else if (cpi->svc.number_temporal_layers > 1) {
775       return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
776     }
777   }
778 
779   return false;
780 }
781 
av1_initialize_rd_consts(AV1_COMP * cpi)782 void av1_initialize_rd_consts(AV1_COMP *cpi) {
783   AV1_COMMON *const cm = &cpi->common;
784   MACROBLOCK *const x = &cpi->td.mb;
785   SPEED_FEATURES *const sf = &cpi->sf;
786   RD_OPT *const rd = &cpi->rd;
787   int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
788   int frames_since_key = cpi->rc.frames_since_key;
789 
790   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
791   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
792   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
793   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
794 
795   const int qindex_rdmult =
796       cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
797   rd->RDMULT = av1_compute_rd_mult(
798       qindex_rdmult, cm->seq_params->bit_depth,
799       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
800       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
801       is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
802 #if CONFIG_RD_COMMAND
803   if (cpi->oxcf.pass == 2) {
804     const RD_COMMAND *rd_command = &cpi->rd_command;
805     if (rd_command->option_ls[rd_command->frame_index] ==
806         RD_OPTION_SET_Q_RDMULT) {
807       rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
808     }
809   }
810 #endif  // CONFIG_RD_COMMAND
811 
812   av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
813 
814   set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
815 
816   populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
817   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
818   // Frame level mv cost update
819   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
820                                        use_nonrd_pick_mode, frames_since_key))
821     av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
822                       cm->features.allow_high_precision_mv, x->mv_costs);
823 
824   // Frame level coefficient cost update
825   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
826                                        use_nonrd_pick_mode, frames_since_key))
827     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
828 
829   // Frame level mode cost update
830   if (should_force_mode_cost_update(cpi) ||
831       is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
832                                        use_nonrd_pick_mode, frames_since_key))
833     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
834 
835   // Frame level dv cost update
836   if (av1_need_dv_costs(cpi)) {
837     if (cpi->td.dv_costs_alloc == NULL) {
838       CHECK_MEM_ERROR(
839           cm, cpi->td.dv_costs_alloc,
840           (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
841       cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
842     }
843     av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
844   }
845 }
846 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)847 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
848   // NOTE: The tables below must be of the same size.
849 
850   // The functions described below are sampled at the four most significant
851   // bits of x^2 + 8 / 256.
852 
853   // Normalized rate:
854   // This table models the rate for a Laplacian source with given variance
855   // when quantized with a uniform quantizer with given stepsize. The
856   // closed form expression is:
857   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
858   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
859   // and H(x) is the binary entropy function.
860   static const int rate_tab_q10[] = {
861     65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
862     4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
863     3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
864     2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
865     1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
866     911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
867     395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
868     73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
869     5,     3,    2,    1,    1,    1,    0,    0,
870   };
871   // Normalized distortion:
872   // This table models the normalized distortion for a Laplacian source
873   // with given variance when quantized with a uniform quantizer
874   // with given stepsize. The closed form expression is:
875   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
876   // where x = qpstep / sqrt(variance).
877   // Note the actual distortion is Dn * variance.
878   static const int dist_tab_q10[] = {
879     0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
880     5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
881     18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
882     59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
883     151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
884     375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
885     680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
886     949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
887     1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
888   };
889   static const int xsq_iq_q10[] = {
890     0,      4,      8,      12,     16,     20,     24,     28,     32,
891     40,     48,     56,     64,     72,     80,     88,     96,     112,
892     128,    144,    160,    176,    192,    208,    224,    256,    288,
893     320,    352,    384,    416,    448,    480,    544,    608,    672,
894     736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
895     1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
896     3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
897     7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
898     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
899     36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
900     81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
901     180192, 196576, 212960, 229344, 245728,
902   };
903   const int tmp = (xsq_q10 >> 2) + 8;
904   const int k = get_msb(tmp) - 3;
905   const int xq = (k << 3) + ((tmp >> k) & 0x7);
906   const int one_q10 = 1 << 10;
907   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
908   const int b_q10 = one_q10 - a_q10;
909   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
910   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
911 }
912 
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)913 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
914                                   unsigned int qstep, int *rate,
915                                   int64_t *dist) {
916   // This function models the rate and distortion for a Laplacian
917   // source with given variance when quantized with a uniform quantizer
918   // with given stepsize. The closed form expressions are in:
919   // Hang and Chen, "Source Model for transform video coder and its
920   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
921   // Sys. for Video Tech., April 1997.
922   if (var == 0) {
923     *rate = 0;
924     *dist = 0;
925   } else {
926     int d_q10, r_q10;
927     static const uint32_t MAX_XSQ_Q10 = 245727;
928     const uint64_t xsq_q10_64 =
929         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
930     const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
931     model_rd_norm(xsq_q10, &r_q10, &d_q10);
932     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
933     *dist = (var * (int64_t)d_q10 + 512) >> 10;
934   }
935 }
936 
interp_cubic(const double * p,double x)937 static double interp_cubic(const double *p, double x) {
938   return p[1] + 0.5 * x *
939                     (p[2] - p[0] +
940                      x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
941                           x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
942 }
943 
944 /*
945 static double interp_bicubic(const double *p, int p_stride, double x,
946                              double y) {
947   double q[4];
948   q[0] = interp_cubic(p, x);
949   q[1] = interp_cubic(p + p_stride, x);
950   q[2] = interp_cubic(p + 2 * p_stride, x);
951   q[3] = interp_cubic(p + 3 * p_stride, x);
952   return interp_cubic(q, y);
953 }
954 */
955 
956 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
957   0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
958 };
959 
sse_norm_curvfit_model_cat_lookup(double sse_norm)960 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
961   return (sse_norm > 16.0);
962 }
963 
964 static const double interp_rgrid_curv[4][65] = {
965   {
966       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
967       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
968       0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
969       122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
970       126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
971       262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
972       726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
973       1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
974       1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
975       2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
976       2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
977       2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
978       3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
979   },
980   {
981       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
982       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
983       0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
984       28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
985       39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
986       137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
987       614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
988       1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
989       1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
990       1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
991       2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
992       2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
993       3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
994   },
995   {
996       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
997       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
998       0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
999       6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
1000       13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
1001       98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
1002       525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
1003       926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
1004       1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1005       1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1006       2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1007       2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1008       3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1009   },
1010   {
1011       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1012       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1013       0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
1014       0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
1015       3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
1016       65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
1017       355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
1018       619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
1019       1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1020       1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1021       1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1022       2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1023       3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1024   },
1025 };
1026 
1027 static const double interp_dgrid_curv[3][65] = {
1028   {
1029       16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1030       15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1031       15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1032       13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1033       7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1034       1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1035       0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1036       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1037       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1038       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1039       0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1040   },
1041   {
1042       16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1043       15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1044       15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1045       13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1046       5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1047       1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1048       0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1049       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1050       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1051       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1052       0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1053   },
1054 };
1055 
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1056 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1057                           double *rate_f, double *distbysse_f) {
1058   const double x_start = -15.5;
1059   const double x_end = 16.5;
1060   const double x_step = 0.5;
1061   const double epsilon = 1e-6;
1062   const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1063   const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1064   (void)x_end;
1065 
1066   xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1067   xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1068   const double x = (xqr - x_start) / x_step;
1069   const int xi = (int)floor(x);
1070   const double xo = x - xi;
1071 
1072   assert(xi > 0);
1073 
1074   const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1075   *rate_f = interp_cubic(prate, xo);
1076   const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1077   *distbysse_f = interp_cubic(pdist, xo);
1078 }
1079 
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1080 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1081                                        const struct macroblockd_plane *pd,
1082                                        ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1083                                        ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1084   const int num_4x4_w = mi_size_wide[plane_bsize];
1085   const int num_4x4_h = mi_size_high[plane_bsize];
1086   const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1087   const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1088 
1089   memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1090   memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1091 }
1092 
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1093 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1094                               const struct macroblockd_plane *pd,
1095                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1096                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1097   assert(plane_bsize < BLOCK_SIZES_ALL);
1098   get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1099 }
1100 
1101 // Special clamping used in the encoder when calculating a prediction
1102 //
1103 // Logically, all pixel fetches used for prediction are clamped against the
1104 // edges of the frame. But doing this directly is slow, so instead we allocate
1105 // a finite border around the frame and fill it with copies of the outermost
1106 // pixels.
1107 //
1108 // Since this border is finite, we need to clamp the motion vector before
1109 // prediction in order to avoid out-of-bounds reads. At the same time, this
1110 // clamp must not change the prediction result.
1111 //
1112 // We can balance both of these concerns by calculating how far we would have
1113 // to go in each direction before the extended prediction region (the current
1114 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1115 // so that it touches the frame only at one row or column. This is a special
1116 // point because any more extreme MV will always lead to the same prediction.
1117 // So it is safe to clamp at that point.
1118 //
1119 // In the worst case, this requires a border of
1120 //   max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1121 // around the frame edges.
enc_clamp_mv(const AV1_COMMON * cm,const MACROBLOCKD * xd,MV * mv)1122 static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1123                                 MV *mv) {
1124   int bw = xd->width << MI_SIZE_LOG2;
1125   int bh = xd->height << MI_SIZE_LOG2;
1126 
1127   int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1128   int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1129   int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1130   int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1131 
1132   const SubpelMvLimits mv_limits = {
1133     .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1134     .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1135     .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1136     .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1137   };
1138   clamp_mv(mv, &mv_limits);
1139 }
1140 
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1141 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1142                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1143   const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1144   const int_mv ref_mv =
1145       av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1146   const int_mv ref_mv1 =
1147       av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1148   MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1149   int num_mv_refs = 0;
1150   pred_mv[num_mv_refs++] = ref_mv.as_mv;
1151   if (ref_mv.as_int != ref_mv1.as_int) {
1152     pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1153   }
1154 
1155   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1156 
1157   const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1158   int zero_seen = 0;
1159   int best_sad = INT_MAX;
1160   int max_mv = 0;
1161   // Get the sad for each candidate reference mv.
1162   for (int i = 0; i < num_mv_refs; ++i) {
1163     MV *this_mv = &pred_mv[i];
1164     enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1165 
1166     const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1167     const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1168     max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1169 
1170     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1171     zero_seen |= (fp_row == 0 && fp_col == 0);
1172 
1173     const uint8_t *const ref_y_ptr =
1174         &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1175     // Find sad for current vector.
1176     const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1177         src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1178     // Note if it is the best so far.
1179     if (this_sad < best_sad) {
1180       best_sad = this_sad;
1181     }
1182     if (i == 0)
1183       x->pred_mv0_sad[ref_frame] = this_sad;
1184     else if (i == 1)
1185       x->pred_mv1_sad[ref_frame] = this_sad;
1186   }
1187 
1188   // Note the index of the mv that worked best in the reference list.
1189   x->max_mv_context[ref_frame] = max_mv;
1190   x->pred_mv_sad[ref_frame] = best_sad;
1191 }
1192 
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1193 void av1_setup_pred_block(const MACROBLOCKD *xd,
1194                           struct buf_2d dst[MAX_MB_PLANE],
1195                           const YV12_BUFFER_CONFIG *src,
1196                           const struct scale_factors *scale,
1197                           const struct scale_factors *scale_uv,
1198                           const int num_planes) {
1199   dst[0].buf = src->y_buffer;
1200   dst[0].stride = src->y_stride;
1201   dst[1].buf = src->u_buffer;
1202   dst[2].buf = src->v_buffer;
1203   dst[1].stride = dst[2].stride = src->uv_stride;
1204 
1205   const int mi_row = xd->mi_row;
1206   const int mi_col = xd->mi_col;
1207   for (int i = 0; i < num_planes; ++i) {
1208     setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1209                      i ? src->uv_crop_width : src->y_crop_width,
1210                      i ? src->uv_crop_height : src->y_crop_height,
1211                      dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1212                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1213   }
1214 }
1215 
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1216 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1217                                              int ref_frame) {
1218   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1219   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1220   const RefCntBuffer *const ref_buf =
1221       get_ref_frame_buf(&cpi->common, ref_frame);
1222   return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1223                                                        : NULL;
1224 }
1225 
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1226 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1227                             InterpFilter interp_filter, int dual_filter) {
1228   if (interp_filter == SWITCHABLE) {
1229     const MB_MODE_INFO *const mbmi = xd->mi[0];
1230     int inter_filter_cost = 0;
1231     for (int dir = 0; dir < 2; ++dir) {
1232       if (dir && !dual_filter) break;
1233       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1234       const InterpFilter filter =
1235           av1_extract_interp_filter(mbmi->interp_filters, dir);
1236       inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1237     }
1238     return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1239   } else {
1240     return 0;
1241   }
1242 }
1243 
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1244 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1245   RD_OPT *const rd = &cpi->rd;
1246 
1247   // Set baseline threshold values.
1248   av1_zero(rd->thresh_mult);
1249 
1250   rd->thresh_mult[THR_NEARESTMV] = 300;
1251   rd->thresh_mult[THR_NEARESTL2] = 300;
1252   rd->thresh_mult[THR_NEARESTL3] = 300;
1253   rd->thresh_mult[THR_NEARESTB] = 300;
1254   rd->thresh_mult[THR_NEARESTA2] = 300;
1255   rd->thresh_mult[THR_NEARESTA] = 300;
1256   rd->thresh_mult[THR_NEARESTG] = 300;
1257 
1258   rd->thresh_mult[THR_NEWMV] = 1000;
1259   rd->thresh_mult[THR_NEWL2] = 1000;
1260   rd->thresh_mult[THR_NEWL3] = 1000;
1261   rd->thresh_mult[THR_NEWB] = 1000;
1262   rd->thresh_mult[THR_NEWA2] = 1100;
1263   rd->thresh_mult[THR_NEWA] = 1000;
1264   rd->thresh_mult[THR_NEWG] = 1000;
1265 
1266   rd->thresh_mult[THR_NEARMV] = 1000;
1267   rd->thresh_mult[THR_NEARL2] = 1000;
1268   rd->thresh_mult[THR_NEARL3] = 1000;
1269   rd->thresh_mult[THR_NEARB] = 1000;
1270   rd->thresh_mult[THR_NEARA2] = 1000;
1271   rd->thresh_mult[THR_NEARA] = 1000;
1272   rd->thresh_mult[THR_NEARG] = 1000;
1273 
1274   rd->thresh_mult[THR_GLOBALMV] = 2200;
1275   rd->thresh_mult[THR_GLOBALL2] = 2000;
1276   rd->thresh_mult[THR_GLOBALL3] = 2000;
1277   rd->thresh_mult[THR_GLOBALB] = 2400;
1278   rd->thresh_mult[THR_GLOBALA2] = 2000;
1279   rd->thresh_mult[THR_GLOBALG] = 2000;
1280   rd->thresh_mult[THR_GLOBALA] = 2400;
1281 
1282   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1283   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1284   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1285   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1286   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1287   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1288   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1289   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1290   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1291   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1292   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1293   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1294 
1295   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1296   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1297   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1298   rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1299 
1300   rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1301   rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1302   rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1303   rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1304   rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1305   rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1306   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1307 
1308   rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1309   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1310   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1311   rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1312   rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1313   rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1314   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1315 
1316   rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1317   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1318   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1319   rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1320   rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1321   rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1322   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1323 
1324   rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1325   rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1326   rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1327   rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1328   rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1329   rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1330   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1331 
1332   rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1333   rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1334   rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1335   rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1336   rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1337   rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1338   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1339 
1340   rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1341   rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1342   rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1343   rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1344   rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1345   rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1346   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1347 
1348   rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1349   rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1350   rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1351   rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1352   rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1353   rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1354   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1355 
1356   rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1357   rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1358   rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1359   rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1360   rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1361   rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1362   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1363 
1364   rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1365   rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1366   rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1367   rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1368   rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1369   rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1370   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1371 
1372   rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1373   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1374   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1375   rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1376   rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1377   rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1378   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1379 
1380   rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1381   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1382   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1383   rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1384   rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1385   rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1386   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1387 
1388   rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1389   rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1390   rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1391   rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1392   rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1393   rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1394   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1395 
1396   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1397   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1398   rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1399   rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1400   rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1401   rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1402   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1403 
1404   rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1405   rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1406   rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1407   rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1408   rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1409   rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1410   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1411 
1412   rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1413   rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1414   rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1415   rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1416   rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1417   rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1418   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1419 
1420   rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1421   rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1422   rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1423   rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1424   rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1425   rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1426   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1427 
1428   rd->thresh_mult[THR_DC] = 1000;
1429   rd->thresh_mult[THR_PAETH] = 1000;
1430   rd->thresh_mult[THR_SMOOTH] = 2200;
1431   rd->thresh_mult[THR_SMOOTH_V] = 2000;
1432   rd->thresh_mult[THR_SMOOTH_H] = 2000;
1433   rd->thresh_mult[THR_H_PRED] = 2000;
1434   rd->thresh_mult[THR_V_PRED] = 1800;
1435   rd->thresh_mult[THR_D135_PRED] = 2500;
1436   rd->thresh_mult[THR_D203_PRED] = 2000;
1437   rd->thresh_mult[THR_D157_PRED] = 2500;
1438   rd->thresh_mult[THR_D67_PRED] = 2000;
1439   rd->thresh_mult[THR_D113_PRED] = 2500;
1440   rd->thresh_mult[THR_D45_PRED] = 2500;
1441 }
1442 
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1443 static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
1444                                    THR_MODES best_mode_index,
1445                                    THR_MODES mode_start, THR_MODES mode_end,
1446                                    BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1447                                    int max_rd_thresh_factor) {
1448   for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1449     for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1450       int *const fact = &factor_buf[bs][mode];
1451       if (mode == best_mode_index) {
1452         *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1453       } else {
1454         *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1455       }
1456     }
1457   }
1458 }
1459 
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1460 void av1_update_rd_thresh_fact(
1461     const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1462     int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1463     THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1464     THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1465   assert(use_adaptive_rd_thresh > 0);
1466   const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1467 
1468   const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1469   BLOCK_SIZE min_size, max_size;
1470   if (bsize_is_1_to_4) {
1471     // This part handles block sizes with 1:4 and 4:1 aspect ratios
1472     // TODO(any): Experiment with threshold update for parent/child blocks
1473     min_size = bsize;
1474     max_size = bsize;
1475   } else {
1476     min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1477     max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1478   }
1479 
1480   update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1481                   min_size, max_size, max_rd_thresh_factor);
1482   update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1483                   min_size, max_size, max_rd_thresh_factor);
1484 }
1485 
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1486 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1487                                aom_bit_depth_t bit_depth) {
1488   const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1489   switch (bit_depth) {
1490     case AOM_BITS_8: return 20 * q;
1491     case AOM_BITS_10: return 5 * q;
1492     case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1493     default:
1494       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1495       return -1;
1496   }
1497 }
1498