• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22 
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31 
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38 
39 #define RD_THRESH_POW 1.25
40 
41 // The baseline rd thresholds for breaking out of the rd loop for
42 // certain modes are assumed to be based on 8x8 blocks.
43 // This table is used to correct for block size.
44 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
45 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
46   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
47 };
48 
49 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
50                                             [EXT_TX_SIZES] = {
51                                               { 1, 1, 1, 1 },  // unused
52                                               { 1, 1, 0, 0 },
53                                               { 0, 0, 1, 0 },
54                                             };
55 
56 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
57                                             [EXT_TX_SIZES] = {
58                                               { 1, 1, 1, 1 },  // unused
59                                               { 1, 1, 0, 0 },
60                                               { 0, 0, 1, 0 },
61                                               { 0, 1, 1, 1 },
62                                             };
63 
64 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
65                                                       EXT_TX_SETS_INTER)] = {
66   {
67       // Intra
68       EXT_TX_SET_DCTONLY,
69       EXT_TX_SET_DTT4_IDTX_1DDCT,
70       EXT_TX_SET_DTT4_IDTX,
71   },
72   {
73       // Inter
74       EXT_TX_SET_DCTONLY,
75       EXT_TX_SET_ALL16,
76       EXT_TX_SET_DTT9_IDTX_1DDCT,
77       EXT_TX_SET_DCT_IDTX,
78   },
79 };
80 
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)81 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
82                          FRAME_CONTEXT *fc) {
83   int i, j;
84 
85   for (i = 0; i < PARTITION_CONTEXTS; ++i)
86     av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
87                              fc->partition_cdf[i], NULL);
88 
89   if (cm->current_frame.skip_mode_info.skip_mode_flag) {
90     for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
91       av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
92                                fc->skip_mode_cdfs[i], NULL);
93     }
94   }
95 
96   for (i = 0; i < SKIP_CONTEXTS; ++i) {
97     av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
98                              fc->skip_txfm_cdfs[i], NULL);
99   }
100 
101   for (i = 0; i < KF_MODE_CONTEXTS; ++i)
102     for (j = 0; j < KF_MODE_CONTEXTS; ++j)
103       av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
104                                fc->kf_y_cdf[i][j], NULL);
105 
106   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
107     av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
108                              NULL);
109   for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
110     for (j = 0; j < INTRA_MODES; ++j)
111       av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
112                                fc->uv_mode_cdf[i][j], NULL);
113 
114   av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
115                            fc->filter_intra_mode_cdf, NULL);
116   for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
117     if (av1_filter_intra_allowed_bsize(cm, i))
118       av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
119                                fc->filter_intra_cdfs[i], NULL);
120   }
121 
122   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
123     av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
124                              fc->switchable_interp_cdf[i], NULL);
125 
126   for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
127     av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
128                              fc->palette_y_size_cdf[i], NULL);
129     av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
130                              fc->palette_uv_size_cdf[i], NULL);
131     for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
132       av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
133                                fc->palette_y_mode_cdf[i][j], NULL);
134     }
135   }
136 
137   for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
138     av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
139                              fc->palette_uv_mode_cdf[i], NULL);
140   }
141 
142   for (i = 0; i < PALETTE_SIZES; ++i) {
143     for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
144       av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
145                                fc->palette_y_color_index_cdf[i][j], NULL);
146       av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
147                                fc->palette_uv_color_index_cdf[i][j], NULL);
148     }
149   }
150 
151   int sign_cost[CFL_JOINT_SIGNS];
152   av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
153   for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
154     int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
155     int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
156     if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
157       memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
158     } else {
159       const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
160       av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
161     }
162     if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
163       memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
164     } else {
165       const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
166       av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
167     }
168     for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
169       cost_u[u] += sign_cost[joint_sign];
170   }
171 
172   for (i = 0; i < MAX_TX_CATS; ++i)
173     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
174       av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
175                                fc->tx_size_cdf[i][j], NULL);
176 
177   for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
178     av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
179                              fc->txfm_partition_cdf[i], NULL);
180   }
181 
182   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
183     int s;
184     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
185       if (use_inter_ext_tx_for_txsize[s][i]) {
186         av1_cost_tokens_from_cdf(
187             mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
188             av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
189       }
190     }
191     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
192       if (use_intra_ext_tx_for_txsize[s][i]) {
193         for (j = 0; j < INTRA_MODES; ++j) {
194           av1_cost_tokens_from_cdf(
195               mode_costs->intra_tx_type_costs[s][i][j],
196               fc->intra_ext_tx_cdf[s][i][j],
197               av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
198         }
199       }
200     }
201   }
202   for (i = 0; i < DIRECTIONAL_MODES; ++i) {
203     av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
204                              fc->angle_delta_cdf[i], NULL);
205   }
206   av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
207 
208   for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
209     av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
210                              fc->seg.spatial_pred_seg_cdf[i], NULL);
211   }
212 
213   for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
214     av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
215                              NULL);
216   }
217 
218   if (!frame_is_intra_only(cm)) {
219     for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
220       av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
221                                fc->comp_inter_cdf[i], NULL);
222     }
223 
224     for (i = 0; i < REF_CONTEXTS; ++i) {
225       for (j = 0; j < SINGLE_REFS - 1; ++j) {
226         av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
227                                  fc->single_ref_cdf[i][j], NULL);
228       }
229     }
230 
231     for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
232       av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
233                                fc->comp_ref_type_cdf[i], NULL);
234     }
235 
236     for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
237       for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
238         av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
239                                  fc->uni_comp_ref_cdf[i][j], NULL);
240       }
241     }
242 
243     for (i = 0; i < REF_CONTEXTS; ++i) {
244       for (j = 0; j < FWD_REFS - 1; ++j) {
245         av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
246                                  fc->comp_ref_cdf[i][j], NULL);
247       }
248     }
249 
250     for (i = 0; i < REF_CONTEXTS; ++i) {
251       for (j = 0; j < BWD_REFS - 1; ++j) {
252         av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
253                                  fc->comp_bwdref_cdf[i][j], NULL);
254       }
255     }
256 
257     for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
258       av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
259                                fc->intra_inter_cdf[i], NULL);
260     }
261 
262     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
263       av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
264                                NULL);
265     }
266 
267     for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
268       av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
269                                fc->zeromv_cdf[i], NULL);
270     }
271 
272     for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
273       av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
274                                NULL);
275     }
276 
277     for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
278       av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
279                                NULL);
280     }
281     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
282       av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
283                                fc->inter_compound_mode_cdf[i], NULL);
284     for (i = 0; i < BLOCK_SIZES_ALL; ++i)
285       av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
286                                fc->compound_type_cdf[i], NULL);
287     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
288       if (av1_is_wedge_used(i)) {
289         av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
290                                  fc->wedge_idx_cdf[i], NULL);
291       }
292     }
293     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
294       av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
295                                fc->interintra_cdf[i], NULL);
296       av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
297                                fc->interintra_mode_cdf[i], NULL);
298     }
299     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
300       av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
301                                fc->wedge_interintra_cdf[i], NULL);
302     }
303     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
304       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
305                                fc->motion_mode_cdf[i], NULL);
306     }
307     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
308       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
309                                fc->obmc_cdf[i], NULL);
310     }
311     for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
312       av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
313                                fc->compound_index_cdf[i], NULL);
314     }
315     for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
316       av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
317                                fc->comp_group_idx_cdf[i], NULL);
318     }
319   }
320 }
321 
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)322 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
323   av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
324                            fc->switchable_restore_cdf, NULL);
325   av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
326                            fc->wiener_restore_cdf, NULL);
327   av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
328                            fc->sgrproj_restore_cdf, NULL);
329 }
330 
331 // Values are now correlated to quantizer.
332 static int sad_per_bit_lut_8[QINDEX_RANGE];
333 static int sad_per_bit_lut_10[QINDEX_RANGE];
334 static int sad_per_bit_lut_12[QINDEX_RANGE];
335 
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)336 static void init_me_luts_bd(int *bit16lut, int range,
337                             aom_bit_depth_t bit_depth) {
338   int i;
339   // Initialize the sad lut tables using a formulaic calculation for now.
340   // This is to make it easier to resolve the impact of experimental changes
341   // to the quantizer tables.
342   for (i = 0; i < range; i++) {
343     const double q = av1_convert_qindex_to_q(i, bit_depth);
344     bit16lut[i] = (int)(0.0418 * q + 2.4107);
345   }
346 }
347 
init_me_luts(void)348 static void init_me_luts(void) {
349   init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
350   init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
351   init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
352 }
353 
av1_init_me_luts(void)354 void av1_init_me_luts(void) { aom_once(init_me_luts); }
355 
356 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
357                                          8,  8,  4,  4,  2,  2,  1,  0 };
358 
359 static const int rd_layer_depth_factor[7] = {
360   160, 160, 160, 160, 192, 208, 224
361 };
362 
363 // Returns the default rd multiplier for inter frames for a given qindex.
364 // The function here is a first pass estimate based on data from
365 // a previous Vizer run
def_inter_rd_multiplier(int qindex)366 static double def_inter_rd_multiplier(int qindex) {
367   return 3.2 + (0.0015 * (double)qindex);
368 }
369 
370 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
371 // The function here is a first pass estimate based on data from
372 // a previous Vizer run
def_arf_rd_multiplier(int qindex)373 static double def_arf_rd_multiplier(int qindex) {
374   return 3.25 + (0.0015 * (double)qindex);
375 }
376 
377 // Returns the default rd multiplier for key frames for a given qindex.
378 // The function here is a first pass estimate based on data from
379 // a previous Vizer run
def_kf_rd_multiplier(int qindex)380 static double def_kf_rd_multiplier(int qindex) {
381   return 3.3 + (0.0015 * (double)qindex);
382 }
383 
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)384 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
385                                         FRAME_UPDATE_TYPE update_type,
386                                         int qindex) {
387   const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
388   int64_t rdmult = q * q;
389   if (update_type == KF_UPDATE) {
390     double def_rd_q_mult = def_kf_rd_multiplier(q);
391     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
392   } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
393     double def_rd_q_mult = def_arf_rd_multiplier(q);
394     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395   } else {
396     double def_rd_q_mult = def_inter_rd_multiplier(q);
397     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398   }
399 
400   switch (bit_depth) {
401     case AOM_BITS_8: break;
402     case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
403     case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
404     default:
405       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
406       return -1;
407   }
408   return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
409 }
410 
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage)411 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
412                         const FRAME_UPDATE_TYPE update_type,
413                         const int layer_depth, const int boost_index,
414                         const FRAME_TYPE frame_type,
415                         const int use_fixed_qp_offsets,
416                         const int is_stat_consumption_stage) {
417   int64_t rdmult =
418       av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
419   if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
420       (frame_type != KEY_FRAME)) {
421     // Layer depth adjustment
422     rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
423     // ARF boost adjustment
424     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
425   }
426   return (int)rdmult;
427 }
428 
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)429 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
430   assert(beta > 0.0);
431   int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
432   int newq = (int)rint(q / sqrt(beta));
433   int orig_qindex = qindex;
434   if (newq == q) {
435     return 0;
436   }
437   if (newq < q) {
438     while (qindex > 0) {
439       qindex--;
440       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
441       if (newq >= q) {
442         break;
443       }
444     }
445   } else {
446     while (qindex < MAXQ) {
447       qindex++;
448       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
449       if (newq <= q) {
450         break;
451       }
452     }
453   }
454   return qindex - orig_qindex;
455 }
456 
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)457 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
458                                   int curr_qindex) {
459   curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
460   const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
461   const int deltaq_deadzone = delta_q_res / 4;
462   const int qmask = ~(delta_q_res - 1);
463   int abs_deltaq_index = abs(curr_qindex - prev_qindex);
464   abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
465   int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
466   adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
467   return adjust_qindex;
468 }
469 
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)470 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
471   assert(beta > 0.0);
472   const AV1_COMMON *cm = &cpi->common;
473 
474   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
475   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
476   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
477   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
478 
479   const int qindex_rdmult = cm->quant_params.base_qindex;
480   return (int)(av1_compute_rd_mult(
481                    qindex_rdmult, cm->seq_params->bit_depth,
482                    cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
483                    layer_depth, boost_index, frame_type,
484                    cpi->oxcf.q_cfg.use_fixed_qp_offsets,
485                    is_stat_consumption_stage(cpi)) /
486                beta);
487 }
488 
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)489 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
490   double q;
491   switch (bit_depth) {
492     case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
493     case AOM_BITS_10:
494       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
495       break;
496     case AOM_BITS_12:
497       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
498       break;
499     default:
500       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501       return -1;
502   }
503   // TODO(debargha): Adjust the function below.
504   return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
505 }
506 
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)507 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
508   switch (cpi->common.seq_params->bit_depth) {
509     case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
510     case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
511     case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
512     default:
513       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
514   }
515 }
516 
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)517 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
518                                  int use_nonrd_pick_mode) {
519   int i, bsize, segment_id;
520   THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
521   int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
522 
523   if (use_nonrd_pick_mode) {
524     for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
525       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
526       if (ref != INTRA_FRAME) {
527         for (i = 0; i < RTC_INTER_MODES; i++)
528           mode_indices[num_modes_count++] =
529               mode_idx[ref][mode_offset(inter_mode_list[i])];
530       } else {
531         for (i = 0; i < RTC_INTRA_MODES; i++)
532           mode_indices[num_modes_count++] =
533               mode_idx[ref][mode_offset(intra_mode_list[i])];
534       }
535     }
536   }
537 
538   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
539     const int qindex = clamp(
540         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
541             cm->quant_params.y_dc_delta_q,
542         0, MAXQ);
543     const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
544 
545     for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
546       // Threshold here seems unnecessarily harsh but fine given actual
547       // range of values used for cpi->sf.thresh_mult[].
548       const int t = q * rd_thresh_block_size_factor[bsize];
549       const int thresh_max = INT_MAX / t;
550 
551       for (i = 0; i < num_modes_count; ++i) {
552         const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
553         rd->threshes[segment_id][bsize][mode_index] =
554             rd->thresh_mult[mode_index] < thresh_max
555                 ? rd->thresh_mult[mode_index] * t / 4
556                 : INT_MAX;
557       }
558     }
559   }
560 }
561 
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)562 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
563                           const int num_planes) {
564   const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
565   for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
566     for (int plane = 0; plane < nplanes; ++plane) {
567       LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
568 
569       for (int ctx = 0; ctx < 2; ++ctx) {
570         aom_cdf_prob *pcdf;
571         switch (eob_multi_size) {
572           case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
573           case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
574           case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
575           case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
576           case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
577           case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
578           case 6:
579           default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
580         }
581         av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
582       }
583     }
584   }
585   for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
586     for (int plane = 0; plane < nplanes; ++plane) {
587       LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
588 
589       for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
590         av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
591                                  fc->txb_skip_cdf[tx_size][ctx], NULL);
592 
593       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
594         av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
595                                  fc->coeff_base_eob_cdf[tx_size][plane][ctx],
596                                  NULL);
597       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
598         av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
599                                  fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
600 
601       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
602         pcost->base_cost[ctx][4] = 0;
603         pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
604                                    av1_cost_literal(1) -
605                                    pcost->base_cost[ctx][0];
606         pcost->base_cost[ctx][6] =
607             pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
608         pcost->base_cost[ctx][7] =
609             pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
610       }
611 
612       for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
613         av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
614                                  fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
615 
616       for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
617         av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
618                                  fc->dc_sign_cdf[plane][ctx], NULL);
619 
620       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
621         int br_rate[BR_CDF_SIZE];
622         int prev_cost = 0;
623         int i, j;
624         av1_cost_tokens_from_cdf(
625             br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
626             NULL);
627         // printf("br_rate: ");
628         // for(j = 0; j < BR_CDF_SIZE; j++)
629         //  printf("%4d ", br_rate[j]);
630         // printf("\n");
631         for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
632           for (j = 0; j < BR_CDF_SIZE - 1; j++) {
633             pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
634           }
635           prev_cost += br_rate[j];
636         }
637         pcost->lps_cost[ctx][i] = prev_cost;
638         // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
639         // for (i = 0; i <= COEFF_BASE_RANGE; i++)
640         //  printf("%5d ", pcost->lps_cost[ctx][i]);
641         // printf("\n");
642       }
643       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
644         pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
645             pcost->lps_cost[ctx][0];
646         for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
647           pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
648               pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
649         }
650       }
651     }
652   }
653 }
654 
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)655 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
656                        MvCosts *mv_costs) {
657   // Avoid accessing 'mv_costs' when it is not allocated.
658   if (mv_costs == NULL) return;
659 
660   mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
661   mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
662   mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
663   mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
664   if (integer_mv) {
665     mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
666     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
667                              nmvc, MV_SUBPEL_NONE);
668   } else {
669     mv_costs->mv_cost_stack =
670         usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
671     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
672                              nmvc, usehp);
673   }
674 }
675 
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)676 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
677   dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
678   dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
679   av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
680                            MV_SUBPEL_NONE);
681 }
682 
683 // Populates speed features based on codec control settings (of type
684 // COST_UPDATE_TYPE) and expected speed feature settings (of type
685 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
686 // The populated/updated speed features are used for cost updates in the
687 // encoder.
688 // WARNING: Population of unified cost update frequency needs to be taken care
689 // accordingly, in case of any modifications/additions to the enum
690 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)691 static INLINE void populate_unified_cost_update_freq(
692     const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
693   INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
694   // Mapping of entropy cost update frequency from the encoder's codec control
695   // settings of type COST_UPDATE_TYPE to speed features of type
696   // INTERNAL_COST_UPDATE_TYPE.
697   static const INTERNAL_COST_UPDATE_TYPE
698       map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
699         INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
700         INTERNAL_COST_UPD_OFF
701       };
702 
703   inter_sf->mv_cost_upd_level =
704       AOMMIN(inter_sf->mv_cost_upd_level,
705              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
706   inter_sf->coeff_cost_upd_level =
707       AOMMIN(inter_sf->coeff_cost_upd_level,
708              map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
709   inter_sf->mode_cost_upd_level =
710       AOMMIN(inter_sf->mode_cost_upd_level,
711              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
712   sf->intra_sf.dv_cost_upd_level =
713       AOMMIN(sf->intra_sf.dv_cost_upd_level,
714              map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
715 }
716 
717 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)718 static INLINE int is_frame_level_cost_upd_freq_set(
719     const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
720     const int use_nonrd_pick_mode, const int frames_since_key) {
721   const int fill_costs =
722       frame_is_intra_only(cm) ||
723       (use_nonrd_pick_mode ? frames_since_key < 2
724                            : (cm->current_frame.frame_number & 0x07) == 1);
725   return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
726           cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
727 }
728 
729 // Decide whether we want to update the mode entropy cost for the current frame.
730 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)731 static AOM_INLINE int should_force_mode_cost_update(const AV1_COMP *cpi) {
732   const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
733   if (!rt_sf->frame_level_mode_cost_update) {
734     return false;
735   }
736 
737   if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
738     return cpi->frames_since_last_update == 1;
739   } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
740     if (cpi->svc.number_spatial_layers == 1 &&
741         cpi->svc.number_temporal_layers == 1) {
742       const AV1_COMMON *const cm = &cpi->common;
743       const RATE_CONTROL *const rc = &cpi->rc;
744 
745       return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
746              rc->high_source_sad || rc->frames_since_key < 10 ||
747              cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
748              cm->current_frame.frame_number % 8 == 0;
749     } else if (cpi->svc.number_temporal_layers > 1) {
750       return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
751     }
752   }
753 
754   return false;
755 }
756 
av1_initialize_rd_consts(AV1_COMP * cpi)757 void av1_initialize_rd_consts(AV1_COMP *cpi) {
758   AV1_COMMON *const cm = &cpi->common;
759   MACROBLOCK *const x = &cpi->td.mb;
760   SPEED_FEATURES *const sf = &cpi->sf;
761   RD_OPT *const rd = &cpi->rd;
762   int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
763   int frames_since_key = cpi->rc.frames_since_key;
764 
765   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
766   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
767   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
768   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
769 
770   const int qindex_rdmult =
771       cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
772   rd->RDMULT = av1_compute_rd_mult(
773       qindex_rdmult, cm->seq_params->bit_depth,
774       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
775       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
776       is_stat_consumption_stage(cpi));
777 #if CONFIG_RD_COMMAND
778   if (cpi->oxcf.pass == 2) {
779     const RD_COMMAND *rd_command = &cpi->rd_command;
780     if (rd_command->option_ls[rd_command->frame_index] ==
781         RD_OPTION_SET_Q_RDMULT) {
782       rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
783     }
784   }
785 #endif  // CONFIG_RD_COMMAND
786 
787   av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
788 
789   set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
790 
791   populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
792   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
793   // Frame level mv cost update
794   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
795                                        use_nonrd_pick_mode, frames_since_key))
796     av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
797                       cm->features.allow_high_precision_mv, x->mv_costs);
798 
799   // Frame level coefficient cost update
800   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
801                                        use_nonrd_pick_mode, frames_since_key))
802     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
803 
804   // Frame level mode cost update
805   if (should_force_mode_cost_update(cpi) ||
806       is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
807                                        use_nonrd_pick_mode, frames_since_key))
808     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
809 
810   // Frame level dv cost update
811   if (av1_need_dv_costs(cpi)) {
812     if (cpi->td.mb.dv_costs == NULL) {
813       CHECK_MEM_ERROR(
814           cm, cpi->td.mb.dv_costs,
815           (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.mb.dv_costs)));
816     }
817     av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
818   }
819 }
820 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)821 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
822   // NOTE: The tables below must be of the same size.
823 
824   // The functions described below are sampled at the four most significant
825   // bits of x^2 + 8 / 256.
826 
827   // Normalized rate:
828   // This table models the rate for a Laplacian source with given variance
829   // when quantized with a uniform quantizer with given stepsize. The
830   // closed form expression is:
831   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
832   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
833   // and H(x) is the binary entropy function.
834   static const int rate_tab_q10[] = {
835     65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
836     4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
837     3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
838     2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
839     1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
840     911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
841     395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
842     73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
843     5,     3,    2,    1,    1,    1,    0,    0,
844   };
845   // Normalized distortion:
846   // This table models the normalized distortion for a Laplacian source
847   // with given variance when quantized with a uniform quantizer
848   // with given stepsize. The closed form expression is:
849   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
850   // where x = qpstep / sqrt(variance).
851   // Note the actual distortion is Dn * variance.
852   static const int dist_tab_q10[] = {
853     0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
854     5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
855     18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
856     59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
857     151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
858     375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
859     680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
860     949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
861     1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
862   };
863   static const int xsq_iq_q10[] = {
864     0,      4,      8,      12,     16,     20,     24,     28,     32,
865     40,     48,     56,     64,     72,     80,     88,     96,     112,
866     128,    144,    160,    176,    192,    208,    224,    256,    288,
867     320,    352,    384,    416,    448,    480,    544,    608,    672,
868     736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
869     1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
870     3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
871     7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
872     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
873     36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
874     81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
875     180192, 196576, 212960, 229344, 245728,
876   };
877   const int tmp = (xsq_q10 >> 2) + 8;
878   const int k = get_msb(tmp) - 3;
879   const int xq = (k << 3) + ((tmp >> k) & 0x7);
880   const int one_q10 = 1 << 10;
881   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
882   const int b_q10 = one_q10 - a_q10;
883   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
884   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
885 }
886 
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)887 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
888                                   unsigned int qstep, int *rate,
889                                   int64_t *dist) {
890   // This function models the rate and distortion for a Laplacian
891   // source with given variance when quantized with a uniform quantizer
892   // with given stepsize. The closed form expressions are in:
893   // Hang and Chen, "Source Model for transform video coder and its
894   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
895   // Sys. for Video Tech., April 1997.
896   if (var == 0) {
897     *rate = 0;
898     *dist = 0;
899   } else {
900     int d_q10, r_q10;
901     static const uint32_t MAX_XSQ_Q10 = 245727;
902     const uint64_t xsq_q10_64 =
903         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
904     const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
905     model_rd_norm(xsq_q10, &r_q10, &d_q10);
906     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
907     *dist = (var * (int64_t)d_q10 + 512) >> 10;
908   }
909 }
910 
interp_cubic(const double * p,double x)911 static double interp_cubic(const double *p, double x) {
912   return p[1] + 0.5 * x *
913                     (p[2] - p[0] +
914                      x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
915                           x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
916 }
917 
918 /*
919 static double interp_bicubic(const double *p, int p_stride, double x,
920                              double y) {
921   double q[4];
922   q[0] = interp_cubic(p, x);
923   q[1] = interp_cubic(p + p_stride, x);
924   q[2] = interp_cubic(p + 2 * p_stride, x);
925   q[3] = interp_cubic(p + 3 * p_stride, x);
926   return interp_cubic(q, y);
927 }
928 */
929 
930 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
931   0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
932 };
933 
sse_norm_curvfit_model_cat_lookup(double sse_norm)934 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
935   return (sse_norm > 16.0);
936 }
937 
938 // Models distortion by sse using a logistic function on
939 // l = log2(sse / q^2) as:
940 // dbysse = 16 / (1 + k exp(l + c))
get_dbysse_logistic(double l,double c,double k)941 static double get_dbysse_logistic(double l, double c, double k) {
942   const double A = 16.0;
943   const double dbysse = A / (1 + k * exp(l + c));
944   return dbysse;
945 }
946 
947 // Models rate using a clamped linear function on
948 // l = log2(sse / q^2) as:
949 // rate = max(0, a + b * l)
get_rate_clamplinear(double l,double a,double b)950 static double get_rate_clamplinear(double l, double a, double b) {
951   const double rate = a + b * l;
952   return (rate < 0 ? 0 : rate);
953 }
954 
955 static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
956   0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
957 };
958 
959 static const double surffit_rate_params[9][4] = {
960   {
961       638.390212,
962       2.253108,
963       166.585650,
964       -3.939401,
965   },
966   {
967       5.256905,
968       81.997240,
969       -1.321771,
970       17.694216,
971   },
972   {
973       -74.193045,
974       72.431868,
975       -19.033152,
976       15.407276,
977   },
978   {
979       416.770113,
980       14.794188,
981       167.686830,
982       -6.997756,
983   },
984   {
985       378.511276,
986       9.558376,
987       154.658843,
988       -6.635663,
989   },
990   {
991       277.818787,
992       4.413180,
993       150.317637,
994       -9.893038,
995   },
996   {
997       142.212132,
998       11.542038,
999       94.393964,
1000       -5.518517,
1001   },
1002   {
1003       219.100256,
1004       4.007421,
1005       108.932852,
1006       -6.981310,
1007   },
1008   {
1009       222.261971,
1010       3.251049,
1011       95.972916,
1012       -5.609789,
1013   },
1014 };
1015 
1016 static const double surffit_dist_params[7] = { 1.475844,  4.328362, -5.680233,
1017                                                -0.500994, 0.554585, 4.839478,
1018                                                -0.695837 };
1019 
rate_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * rpar)1020 static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1021                                              double *rpar) {
1022   const int cat = bsize_surffit_model_cat_lookup[bsize];
1023   rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
1024   rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
1025 }
1026 
dist_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * dpar)1027 static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1028                                              double *dpar) {
1029   (void)bsize;
1030   const double *params = surffit_dist_params;
1031   dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
1032   dpar[1] = params[4] + params[5] * exp(params[6] * xm);
1033 }
1034 
av1_model_rd_surffit(BLOCK_SIZE bsize,double sse_norm,double xm,double yl,double * rate_f,double * distbysse_f)1035 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
1036                           double yl, double *rate_f, double *distbysse_f) {
1037   (void)sse_norm;
1038   double rpar[2], dpar[2];
1039   rate_surffit_model_params_lookup(bsize, xm, rpar);
1040   dist_surffit_model_params_lookup(bsize, xm, dpar);
1041 
1042   *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
1043   *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
1044 }
1045 
1046 static const double interp_rgrid_curv[4][65] = {
1047   {
1048       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1049       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1050       0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
1051       122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
1052       126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
1053       262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
1054       726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
1055       1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
1056       1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
1057       2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
1058       2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
1059       2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
1060       3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
1061   },
1062   {
1063       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1064       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1065       0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
1066       28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
1067       39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
1068       137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
1069       614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
1070       1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
1071       1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
1072       1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
1073       2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
1074       2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
1075       3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
1076   },
1077   {
1078       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1079       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1080       0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
1081       6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
1082       13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
1083       98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
1084       525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
1085       926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
1086       1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1087       1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1088       2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1089       2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1090       3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1091   },
1092   {
1093       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1094       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1095       0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
1096       0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
1097       3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
1098       65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
1099       355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
1100       619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
1101       1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1102       1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1103       1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1104       2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1105       3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1106   },
1107 };
1108 
1109 static const double interp_dgrid_curv[3][65] = {
1110   {
1111       16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1112       15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1113       15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1114       13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1115       7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1116       1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1117       0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1118       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1119       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1120       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1121       0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1122   },
1123   {
1124       16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1125       15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1126       15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1127       13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1128       5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1129       1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1130       0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1131       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1132       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1133       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1134       0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1135   },
1136 };
1137 
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1138 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1139                           double *rate_f, double *distbysse_f) {
1140   const double x_start = -15.5;
1141   const double x_end = 16.5;
1142   const double x_step = 0.5;
1143   const double epsilon = 1e-6;
1144   const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1145   const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1146   (void)x_end;
1147 
1148   xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1149   xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1150   const double x = (xqr - x_start) / x_step;
1151   const int xi = (int)floor(x);
1152   const double xo = x - xi;
1153 
1154   assert(xi > 0);
1155 
1156   const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1157   *rate_f = interp_cubic(prate, xo);
1158   const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1159   *distbysse_f = interp_cubic(pdist, xo);
1160 }
1161 
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1162 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1163                                        const struct macroblockd_plane *pd,
1164                                        ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1165                                        ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1166   const int num_4x4_w = mi_size_wide[plane_bsize];
1167   const int num_4x4_h = mi_size_high[plane_bsize];
1168   const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1169   const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1170 
1171   memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1172   memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1173 }
1174 
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1175 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1176                               const struct macroblockd_plane *pd,
1177                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1178                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1179   assert(plane_bsize < BLOCK_SIZES_ALL);
1180   get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1181 }
1182 
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1183 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1184                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1185   const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1186   const int_mv ref_mv =
1187       av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1188   const int_mv ref_mv1 =
1189       av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1190   MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1191   int num_mv_refs = 0;
1192   pred_mv[num_mv_refs++] = ref_mv.as_mv;
1193   if (ref_mv.as_int != ref_mv1.as_int) {
1194     pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1195   }
1196 
1197   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1198 
1199   const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1200   int zero_seen = 0;
1201   int best_sad = INT_MAX;
1202   int max_mv = 0;
1203   // Get the sad for each candidate reference mv.
1204   for (int i = 0; i < num_mv_refs; ++i) {
1205     const MV *this_mv = &pred_mv[i];
1206     const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1207     const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1208     max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1209 
1210     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1211     zero_seen |= (fp_row == 0 && fp_col == 0);
1212 
1213     const uint8_t *const ref_y_ptr =
1214         &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1215     // Find sad for current vector.
1216     const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1217         src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1218     // Note if it is the best so far.
1219     if (this_sad < best_sad) {
1220       best_sad = this_sad;
1221     }
1222     if (i == 0)
1223       x->pred_mv0_sad[ref_frame] = this_sad;
1224     else if (i == 1)
1225       x->pred_mv1_sad[ref_frame] = this_sad;
1226   }
1227 
1228   // Note the index of the mv that worked best in the reference list.
1229   x->max_mv_context[ref_frame] = max_mv;
1230   x->pred_mv_sad[ref_frame] = best_sad;
1231 }
1232 
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1233 void av1_setup_pred_block(const MACROBLOCKD *xd,
1234                           struct buf_2d dst[MAX_MB_PLANE],
1235                           const YV12_BUFFER_CONFIG *src,
1236                           const struct scale_factors *scale,
1237                           const struct scale_factors *scale_uv,
1238                           const int num_planes) {
1239   dst[0].buf = src->y_buffer;
1240   dst[0].stride = src->y_stride;
1241   dst[1].buf = src->u_buffer;
1242   dst[2].buf = src->v_buffer;
1243   dst[1].stride = dst[2].stride = src->uv_stride;
1244 
1245   const int mi_row = xd->mi_row;
1246   const int mi_col = xd->mi_col;
1247   for (int i = 0; i < num_planes; ++i) {
1248     setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1249                      i ? src->uv_crop_width : src->y_crop_width,
1250                      i ? src->uv_crop_height : src->y_crop_height,
1251                      dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1252                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1253   }
1254 }
1255 
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1256 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1257                                              int ref_frame) {
1258   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1259   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1260   const RefCntBuffer *const ref_buf =
1261       get_ref_frame_buf(&cpi->common, ref_frame);
1262   return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1263                                                        : NULL;
1264 }
1265 
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1266 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1267                             InterpFilter interp_filter, int dual_filter) {
1268   if (interp_filter == SWITCHABLE) {
1269     const MB_MODE_INFO *const mbmi = xd->mi[0];
1270     int inter_filter_cost = 0;
1271     for (int dir = 0; dir < 2; ++dir) {
1272       if (dir && !dual_filter) break;
1273       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1274       const InterpFilter filter =
1275           av1_extract_interp_filter(mbmi->interp_filters, dir);
1276       inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1277     }
1278     return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1279   } else {
1280     return 0;
1281   }
1282 }
1283 
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1284 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1285   RD_OPT *const rd = &cpi->rd;
1286 
1287   // Set baseline threshold values.
1288   av1_zero(rd->thresh_mult);
1289 
1290   rd->thresh_mult[THR_NEARESTMV] = 300;
1291   rd->thresh_mult[THR_NEARESTL2] = 300;
1292   rd->thresh_mult[THR_NEARESTL3] = 300;
1293   rd->thresh_mult[THR_NEARESTB] = 300;
1294   rd->thresh_mult[THR_NEARESTA2] = 300;
1295   rd->thresh_mult[THR_NEARESTA] = 300;
1296   rd->thresh_mult[THR_NEARESTG] = 300;
1297 
1298   rd->thresh_mult[THR_NEWMV] = 1000;
1299   rd->thresh_mult[THR_NEWL2] = 1000;
1300   rd->thresh_mult[THR_NEWL3] = 1000;
1301   rd->thresh_mult[THR_NEWB] = 1000;
1302   rd->thresh_mult[THR_NEWA2] = 1100;
1303   rd->thresh_mult[THR_NEWA] = 1000;
1304   rd->thresh_mult[THR_NEWG] = 1000;
1305 
1306   rd->thresh_mult[THR_NEARMV] = 1000;
1307   rd->thresh_mult[THR_NEARL2] = 1000;
1308   rd->thresh_mult[THR_NEARL3] = 1000;
1309   rd->thresh_mult[THR_NEARB] = 1000;
1310   rd->thresh_mult[THR_NEARA2] = 1000;
1311   rd->thresh_mult[THR_NEARA] = 1000;
1312   rd->thresh_mult[THR_NEARG] = 1000;
1313 
1314   rd->thresh_mult[THR_GLOBALMV] = 2200;
1315   rd->thresh_mult[THR_GLOBALL2] = 2000;
1316   rd->thresh_mult[THR_GLOBALL3] = 2000;
1317   rd->thresh_mult[THR_GLOBALB] = 2400;
1318   rd->thresh_mult[THR_GLOBALA2] = 2000;
1319   rd->thresh_mult[THR_GLOBALG] = 2000;
1320   rd->thresh_mult[THR_GLOBALA] = 2400;
1321 
1322   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1323   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1324   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1325   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1326   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1327   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1328   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1329   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1330   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1331   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1332   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1333   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1334 
1335   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1336   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1337   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1338   rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1339 
1340   rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1341   rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1342   rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1343   rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1344   rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1345   rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1346   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1347 
1348   rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1349   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1350   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1351   rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1352   rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1353   rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1354   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1355 
1356   rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1357   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1358   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1359   rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1360   rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1361   rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1362   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1363 
1364   rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1365   rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1366   rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1367   rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1368   rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1369   rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1370   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1371 
1372   rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1373   rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1374   rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1375   rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1376   rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1377   rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1378   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1379 
1380   rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1381   rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1382   rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1383   rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1384   rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1385   rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1386   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1387 
1388   rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1389   rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1390   rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1391   rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1392   rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1393   rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1394   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1395 
1396   rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1397   rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1398   rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1399   rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1400   rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1401   rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1402   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1403 
1404   rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1405   rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1406   rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1407   rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1408   rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1409   rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1410   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1411 
1412   rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1413   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1414   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1415   rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1416   rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1417   rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1418   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1419 
1420   rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1421   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1422   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1423   rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1424   rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1425   rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1426   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1427 
1428   rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1429   rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1430   rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1431   rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1432   rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1433   rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1434   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1435 
1436   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1437   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1438   rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1439   rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1440   rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1441   rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1442   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1443 
1444   rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1445   rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1446   rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1447   rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1448   rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1449   rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1450   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1451 
1452   rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1453   rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1454   rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1455   rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1456   rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1457   rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1458   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1459 
1460   rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1461   rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1462   rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1463   rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1464   rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1465   rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1466   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1467 
1468   rd->thresh_mult[THR_DC] = 1000;
1469   rd->thresh_mult[THR_PAETH] = 1000;
1470   rd->thresh_mult[THR_SMOOTH] = 2200;
1471   rd->thresh_mult[THR_SMOOTH_V] = 2000;
1472   rd->thresh_mult[THR_SMOOTH_H] = 2000;
1473   rd->thresh_mult[THR_H_PRED] = 2000;
1474   rd->thresh_mult[THR_V_PRED] = 1800;
1475   rd->thresh_mult[THR_D135_PRED] = 2500;
1476   rd->thresh_mult[THR_D203_PRED] = 2000;
1477   rd->thresh_mult[THR_D157_PRED] = 2500;
1478   rd->thresh_mult[THR_D67_PRED] = 2000;
1479   rd->thresh_mult[THR_D113_PRED] = 2500;
1480   rd->thresh_mult[THR_D45_PRED] = 2500;
1481 }
1482 
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1483 static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1484                                    THR_MODES best_mode_index,
1485                                    THR_MODES mode_start, THR_MODES mode_end,
1486                                    BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1487                                    int max_rd_thresh_factor) {
1488   for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1489     for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1490       int *const fact = &factor_buf[bs][mode];
1491       if (mode == best_mode_index) {
1492         *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1493       } else {
1494         *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1495       }
1496     }
1497   }
1498 }
1499 
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1500 void av1_update_rd_thresh_fact(
1501     const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1502     int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1503     THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1504     THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1505   assert(use_adaptive_rd_thresh > 0);
1506   const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1507 
1508   const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1509   BLOCK_SIZE min_size, max_size;
1510   if (bsize_is_1_to_4) {
1511     // This part handles block sizes with 1:4 and 4:1 aspect ratios
1512     // TODO(any): Experiment with threshold update for parent/child blocks
1513     min_size = bsize;
1514     max_size = bsize;
1515   } else {
1516     min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1517     max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1518   }
1519 
1520   update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1521                   min_size, max_size, max_rd_thresh_factor);
1522   update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1523                   min_size, max_size, max_rd_thresh_factor);
1524 }
1525 
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1526 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1527                                aom_bit_depth_t bit_depth) {
1528   const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1529   switch (bit_depth) {
1530     case AOM_BITS_8: return 20 * q;
1531     case AOM_BITS_10: return 5 * q;
1532     case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1533     default:
1534       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1535       return -1;
1536   }
1537 }
1538