• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22 
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31 
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38 
39 #define RD_THRESH_POW 1.25
40 
41 // The baseline rd thresholds for breaking out of the rd loop for
42 // certain modes are assumed to be based on 8x8 blocks.
43 // This table is used to correct for block size.
44 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
45 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
46   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
47 };
48 
49 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
50                                             [EXT_TX_SIZES] = {
51                                               { 1, 1, 1, 1 },  // unused
52                                               { 1, 1, 0, 0 },
53                                               { 0, 0, 1, 0 },
54                                             };
55 
56 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
57                                             [EXT_TX_SIZES] = {
58                                               { 1, 1, 1, 1 },  // unused
59                                               { 1, 1, 0, 0 },
60                                               { 0, 0, 1, 0 },
61                                               { 0, 1, 1, 1 },
62                                             };
63 
64 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
65                                                       EXT_TX_SETS_INTER)] = {
66   {
67       // Intra
68       EXT_TX_SET_DCTONLY,
69       EXT_TX_SET_DTT4_IDTX_1DDCT,
70       EXT_TX_SET_DTT4_IDTX,
71   },
72   {
73       // Inter
74       EXT_TX_SET_DCTONLY,
75       EXT_TX_SET_ALL16,
76       EXT_TX_SET_DTT9_IDTX_1DDCT,
77       EXT_TX_SET_DCT_IDTX,
78   },
79 };
80 
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)81 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
82                          FRAME_CONTEXT *fc) {
83   int i, j;
84 
85   for (i = 0; i < PARTITION_CONTEXTS; ++i)
86     av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
87                              fc->partition_cdf[i], NULL);
88 
89   if (cm->current_frame.skip_mode_info.skip_mode_flag) {
90     for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
91       av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
92                                fc->skip_mode_cdfs[i], NULL);
93     }
94   }
95 
96   for (i = 0; i < SKIP_CONTEXTS; ++i) {
97     av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
98                              fc->skip_txfm_cdfs[i], NULL);
99   }
100 
101   for (i = 0; i < KF_MODE_CONTEXTS; ++i)
102     for (j = 0; j < KF_MODE_CONTEXTS; ++j)
103       av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
104                                fc->kf_y_cdf[i][j], NULL);
105 
106   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
107     av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
108                              NULL);
109   for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
110     for (j = 0; j < INTRA_MODES; ++j)
111       av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
112                                fc->uv_mode_cdf[i][j], NULL);
113 
114   av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
115                            fc->filter_intra_mode_cdf, NULL);
116   for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
117     if (av1_filter_intra_allowed_bsize(cm, i))
118       av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
119                                fc->filter_intra_cdfs[i], NULL);
120   }
121 
122   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
123     av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
124                              fc->switchable_interp_cdf[i], NULL);
125 
126   for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
127     av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
128                              fc->palette_y_size_cdf[i], NULL);
129     av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
130                              fc->palette_uv_size_cdf[i], NULL);
131     for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
132       av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
133                                fc->palette_y_mode_cdf[i][j], NULL);
134     }
135   }
136 
137   for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
138     av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
139                              fc->palette_uv_mode_cdf[i], NULL);
140   }
141 
142   for (i = 0; i < PALETTE_SIZES; ++i) {
143     for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
144       av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
145                                fc->palette_y_color_index_cdf[i][j], NULL);
146       av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
147                                fc->palette_uv_color_index_cdf[i][j], NULL);
148     }
149   }
150 
151   int sign_cost[CFL_JOINT_SIGNS];
152   av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
153   for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
154     int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
155     int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
156     if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
157       memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
158     } else {
159       const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
160       av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
161     }
162     if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
163       memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
164     } else {
165       const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
166       av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
167     }
168     for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
169       cost_u[u] += sign_cost[joint_sign];
170   }
171 
172   for (i = 0; i < MAX_TX_CATS; ++i)
173     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
174       av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
175                                fc->tx_size_cdf[i][j], NULL);
176 
177   for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
178     av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
179                              fc->txfm_partition_cdf[i], NULL);
180   }
181 
182   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
183     int s;
184     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
185       if (use_inter_ext_tx_for_txsize[s][i]) {
186         av1_cost_tokens_from_cdf(
187             mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
188             av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
189       }
190     }
191     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
192       if (use_intra_ext_tx_for_txsize[s][i]) {
193         for (j = 0; j < INTRA_MODES; ++j) {
194           av1_cost_tokens_from_cdf(
195               mode_costs->intra_tx_type_costs[s][i][j],
196               fc->intra_ext_tx_cdf[s][i][j],
197               av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
198         }
199       }
200     }
201   }
202   for (i = 0; i < DIRECTIONAL_MODES; ++i) {
203     av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
204                              fc->angle_delta_cdf[i], NULL);
205   }
206   av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
207 
208   for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
209     av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
210                              fc->seg.spatial_pred_seg_cdf[i], NULL);
211   }
212 
213   for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
214     av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
215                              NULL);
216   }
217 
218   if (!frame_is_intra_only(cm)) {
219     for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
220       av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
221                                fc->comp_inter_cdf[i], NULL);
222     }
223 
224     for (i = 0; i < REF_CONTEXTS; ++i) {
225       for (j = 0; j < SINGLE_REFS - 1; ++j) {
226         av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
227                                  fc->single_ref_cdf[i][j], NULL);
228       }
229     }
230 
231     for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
232       av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
233                                fc->comp_ref_type_cdf[i], NULL);
234     }
235 
236     for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
237       for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
238         av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
239                                  fc->uni_comp_ref_cdf[i][j], NULL);
240       }
241     }
242 
243     for (i = 0; i < REF_CONTEXTS; ++i) {
244       for (j = 0; j < FWD_REFS - 1; ++j) {
245         av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
246                                  fc->comp_ref_cdf[i][j], NULL);
247       }
248     }
249 
250     for (i = 0; i < REF_CONTEXTS; ++i) {
251       for (j = 0; j < BWD_REFS - 1; ++j) {
252         av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
253                                  fc->comp_bwdref_cdf[i][j], NULL);
254       }
255     }
256 
257     for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
258       av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
259                                fc->intra_inter_cdf[i], NULL);
260     }
261 
262     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
263       av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
264                                NULL);
265     }
266 
267     for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
268       av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
269                                fc->zeromv_cdf[i], NULL);
270     }
271 
272     for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
273       av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
274                                NULL);
275     }
276 
277     for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
278       av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
279                                NULL);
280     }
281     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
282       av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
283                                fc->inter_compound_mode_cdf[i], NULL);
284     for (i = 0; i < BLOCK_SIZES_ALL; ++i)
285       av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
286                                fc->compound_type_cdf[i], NULL);
287     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
288       if (av1_is_wedge_used(i)) {
289         av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
290                                  fc->wedge_idx_cdf[i], NULL);
291       }
292     }
293     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
294       av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
295                                fc->interintra_cdf[i], NULL);
296       av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
297                                fc->interintra_mode_cdf[i], NULL);
298     }
299     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
300       av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
301                                fc->wedge_interintra_cdf[i], NULL);
302     }
303     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
304       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
305                                fc->motion_mode_cdf[i], NULL);
306     }
307     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
308       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
309                                fc->obmc_cdf[i], NULL);
310     }
311     for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
312       av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
313                                fc->compound_index_cdf[i], NULL);
314     }
315     for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
316       av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
317                                fc->comp_group_idx_cdf[i], NULL);
318     }
319   }
320 }
321 
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)322 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
323   av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
324                            fc->switchable_restore_cdf, NULL);
325   av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
326                            fc->wiener_restore_cdf, NULL);
327   av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
328                            fc->sgrproj_restore_cdf, NULL);
329 }
330 
331 // Values are now correlated to quantizer.
332 static int sad_per_bit_lut_8[QINDEX_RANGE];
333 static int sad_per_bit_lut_10[QINDEX_RANGE];
334 static int sad_per_bit_lut_12[QINDEX_RANGE];
335 
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)336 static void init_me_luts_bd(int *bit16lut, int range,
337                             aom_bit_depth_t bit_depth) {
338   int i;
339   // Initialize the sad lut tables using a formulaic calculation for now.
340   // This is to make it easier to resolve the impact of experimental changes
341   // to the quantizer tables.
342   for (i = 0; i < range; i++) {
343     const double q = av1_convert_qindex_to_q(i, bit_depth);
344     bit16lut[i] = (int)(0.0418 * q + 2.4107);
345   }
346 }
347 
init_me_luts(void)348 static void init_me_luts(void) {
349   init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
350   init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
351   init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
352 }
353 
av1_init_me_luts(void)354 void av1_init_me_luts(void) { aom_once(init_me_luts); }
355 
356 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
357                                          8,  8,  4,  4,  2,  2,  1,  0 };
358 
359 static const int rd_layer_depth_factor[7] = {
360   160, 160, 160, 160, 192, 208, 224
361 };
362 
363 // Returns the default rd multiplier for inter frames for a given qindex.
364 // The function here is a first pass estimate based on data from
365 // a previous Vizer run
def_inter_rd_multiplier(int qindex)366 static double def_inter_rd_multiplier(int qindex) {
367   return 3.2 + (0.0015 * (double)qindex);
368 }
369 
370 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
371 // The function here is a first pass estimate based on data from
372 // a previous Vizer run
def_arf_rd_multiplier(int qindex)373 static double def_arf_rd_multiplier(int qindex) {
374   return 3.25 + (0.0015 * (double)qindex);
375 }
376 
377 // Returns the default rd multiplier for key frames for a given qindex.
378 // The function here is a first pass estimate based on data from
379 // a previous Vizer run
def_kf_rd_multiplier(int qindex)380 static double def_kf_rd_multiplier(int qindex) {
381   return 3.3 + (0.0015 * (double)qindex);
382 }
383 
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)384 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
385                                         FRAME_UPDATE_TYPE update_type,
386                                         int qindex) {
387   const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
388   int64_t rdmult = q * q;
389   if (update_type == KF_UPDATE) {
390     double def_rd_q_mult = def_kf_rd_multiplier(q);
391     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
392   } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
393     double def_rd_q_mult = def_arf_rd_multiplier(q);
394     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395   } else {
396     double def_rd_q_mult = def_inter_rd_multiplier(q);
397     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398   }
399 
400   switch (bit_depth) {
401     case AOM_BITS_8: break;
402     case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
403     case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
404     default:
405       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
406       return -1;
407   }
408   return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
409 }
410 
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage)411 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
412                         const FRAME_UPDATE_TYPE update_type,
413                         const int layer_depth, const int boost_index,
414                         const FRAME_TYPE frame_type,
415                         const int use_fixed_qp_offsets,
416                         const int is_stat_consumption_stage) {
417   int64_t rdmult =
418       av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
419   if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
420       (frame_type != KEY_FRAME)) {
421     // Layer depth adjustment
422     rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
423     // ARF boost adjustment
424     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
425   }
426   return (int)rdmult;
427 }
428 
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)429 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
430   assert(beta > 0.0);
431   int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
432   int newq = (int)rint(q / sqrt(beta));
433   int orig_qindex = qindex;
434   if (newq == q) {
435     return 0;
436   }
437   if (newq < q) {
438     while (qindex > 0) {
439       qindex--;
440       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
441       if (newq >= q) {
442         break;
443       }
444     }
445   } else {
446     while (qindex < MAXQ) {
447       qindex++;
448       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
449       if (newq <= q) {
450         break;
451       }
452     }
453   }
454   return qindex - orig_qindex;
455 }
456 
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)457 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
458                                   int curr_qindex) {
459   curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
460   const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
461   const int deltaq_deadzone = delta_q_res / 4;
462   const int qmask = ~(delta_q_res - 1);
463   int abs_deltaq_index = abs(curr_qindex - prev_qindex);
464   abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
465   int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
466   adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
467   return adjust_qindex;
468 }
469 
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)470 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
471   assert(beta > 0.0);
472   const AV1_COMMON *cm = &cpi->common;
473 
474   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
475   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
476   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
477   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
478 
479   const int qindex_rdmult = cm->quant_params.base_qindex;
480   return (int)(av1_compute_rd_mult(
481                    qindex_rdmult, cm->seq_params->bit_depth,
482                    cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
483                    layer_depth, boost_index, frame_type,
484                    cpi->oxcf.q_cfg.use_fixed_qp_offsets,
485                    is_stat_consumption_stage(cpi)) /
486                beta);
487 }
488 
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)489 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
490   double q;
491   switch (bit_depth) {
492     case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
493     case AOM_BITS_10:
494       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
495       break;
496     case AOM_BITS_12:
497       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
498       break;
499     default:
500       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501       return -1;
502   }
503   // TODO(debargha): Adjust the function below.
504   return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
505 }
506 
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)507 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
508   switch (cpi->common.seq_params->bit_depth) {
509     case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
510     case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
511     case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
512     default:
513       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
514   }
515 }
516 
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)517 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
518                                  int use_nonrd_pick_mode) {
519   int i, bsize, segment_id;
520   THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
521   int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
522 
523   if (use_nonrd_pick_mode) {
524     for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
525       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
526       if (ref != INTRA_FRAME) {
527         for (i = 0; i < RTC_INTER_MODES; i++)
528           mode_indices[num_modes_count++] =
529               mode_idx[ref][mode_offset(inter_mode_list[i])];
530       } else {
531         for (i = 0; i < RTC_INTRA_MODES; i++)
532           mode_indices[num_modes_count++] =
533               mode_idx[ref][mode_offset(intra_mode_list[i])];
534       }
535     }
536   }
537 
538   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
539     const int qindex = clamp(
540         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
541             cm->quant_params.y_dc_delta_q,
542         0, MAXQ);
543     const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
544 
545     for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
546       // Threshold here seems unnecessarily harsh but fine given actual
547       // range of values used for cpi->sf.thresh_mult[].
548       const int t = q * rd_thresh_block_size_factor[bsize];
549       const int thresh_max = INT_MAX / t;
550 
551       for (i = 0; i < num_modes_count; ++i) {
552         const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
553         rd->threshes[segment_id][bsize][mode_index] =
554             rd->thresh_mult[mode_index] < thresh_max
555                 ? rd->thresh_mult[mode_index] * t / 4
556                 : INT_MAX;
557       }
558     }
559   }
560 }
561 
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)562 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
563                           const int num_planes) {
564   const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
565   for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
566     for (int plane = 0; plane < nplanes; ++plane) {
567       LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
568 
569       for (int ctx = 0; ctx < 2; ++ctx) {
570         aom_cdf_prob *pcdf;
571         switch (eob_multi_size) {
572           case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
573           case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
574           case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
575           case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
576           case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
577           case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
578           case 6:
579           default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
580         }
581         av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
582       }
583     }
584   }
585   for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
586     for (int plane = 0; plane < nplanes; ++plane) {
587       LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
588 
589       for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
590         av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
591                                  fc->txb_skip_cdf[tx_size][ctx], NULL);
592 
593       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
594         av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
595                                  fc->coeff_base_eob_cdf[tx_size][plane][ctx],
596                                  NULL);
597       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
598         av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
599                                  fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
600 
601       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
602         pcost->base_cost[ctx][4] = 0;
603         pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
604                                    av1_cost_literal(1) -
605                                    pcost->base_cost[ctx][0];
606         pcost->base_cost[ctx][6] =
607             pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
608         pcost->base_cost[ctx][7] =
609             pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
610       }
611 
612       for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
613         av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
614                                  fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
615 
616       for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
617         av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
618                                  fc->dc_sign_cdf[plane][ctx], NULL);
619 
620       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
621         int br_rate[BR_CDF_SIZE];
622         int prev_cost = 0;
623         int i, j;
624         av1_cost_tokens_from_cdf(
625             br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
626             NULL);
627         // printf("br_rate: ");
628         // for(j = 0; j < BR_CDF_SIZE; j++)
629         //  printf("%4d ", br_rate[j]);
630         // printf("\n");
631         for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
632           for (j = 0; j < BR_CDF_SIZE - 1; j++) {
633             pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
634           }
635           prev_cost += br_rate[j];
636         }
637         pcost->lps_cost[ctx][i] = prev_cost;
638         // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
639         // for (i = 0; i <= COEFF_BASE_RANGE; i++)
640         //  printf("%5d ", pcost->lps_cost[ctx][i]);
641         // printf("\n");
642       }
643       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
644         pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
645             pcost->lps_cost[ctx][0];
646         for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
647           pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
648               pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
649         }
650       }
651     }
652   }
653 }
654 
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)655 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
656                        MvCosts *mv_costs) {
657   // Avoid accessing 'mv_costs' when it is not allocated.
658   if (mv_costs == NULL) return;
659 
660   mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
661   mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
662   mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
663   mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
664   if (integer_mv) {
665     mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
666     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
667                              nmvc, MV_SUBPEL_NONE);
668   } else {
669     mv_costs->mv_cost_stack =
670         usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
671     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
672                              nmvc, usehp);
673   }
674 }
675 
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)676 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
677   dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
678   dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
679   av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
680                            MV_SUBPEL_NONE);
681 }
682 
683 // Populates speed features based on codec control settings (of type
684 // COST_UPDATE_TYPE) and expected speed feature settings (of type
685 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
686 // The populated/updated speed features are used for cost updates in the
687 // encoder.
688 // WARNING: Population of unified cost update frequency needs to be taken care
689 // accordingly, in case of any modifications/additions to the enum
690 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)691 static INLINE void populate_unified_cost_update_freq(
692     const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
693   INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
694   // Mapping of entropy cost update frequency from the encoder's codec control
695   // settings of type COST_UPDATE_TYPE to speed features of type
696   // INTERNAL_COST_UPDATE_TYPE.
697   static const INTERNAL_COST_UPDATE_TYPE
698       map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
699         INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
700         INTERNAL_COST_UPD_OFF
701       };
702 
703   inter_sf->mv_cost_upd_level =
704       AOMMIN(inter_sf->mv_cost_upd_level,
705              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
706   inter_sf->coeff_cost_upd_level =
707       AOMMIN(inter_sf->coeff_cost_upd_level,
708              map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
709   inter_sf->mode_cost_upd_level =
710       AOMMIN(inter_sf->mode_cost_upd_level,
711              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
712   sf->intra_sf.dv_cost_upd_level =
713       AOMMIN(sf->intra_sf.dv_cost_upd_level,
714              map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
715 }
716 
717 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)718 static INLINE int is_frame_level_cost_upd_freq_set(
719     const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
720     const int use_nonrd_pick_mode, const int frames_since_key) {
721   const int fill_costs =
722       frame_is_intra_only(cm) ||
723       (use_nonrd_pick_mode ? frames_since_key < 2
724                            : (cm->current_frame.frame_number & 0x07) == 1);
725   return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
726           cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
727 }
728 
729 // Decide whether we want to update the mode entropy cost for the current frame.
730 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)731 static AOM_INLINE int should_force_mode_cost_update(const AV1_COMP *cpi) {
732   const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
733   if (!rt_sf->frame_level_mode_cost_update) {
734     return false;
735   }
736 
737   if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
738     return cpi->frames_since_last_update == 1;
739   } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
740     if (cpi->svc.number_spatial_layers == 1 &&
741         cpi->svc.number_temporal_layers == 1) {
742       const AV1_COMMON *const cm = &cpi->common;
743       const RATE_CONTROL *const rc = &cpi->rc;
744 
745       return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
746              rc->high_source_sad || rc->frames_since_key < 10 ||
747              cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
748              cm->current_frame.frame_number % 8 == 0;
749     } else if (cpi->svc.number_temporal_layers > 1) {
750       return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
751     }
752   }
753 
754   return false;
755 }
756 
av1_initialize_rd_consts(AV1_COMP * cpi)757 void av1_initialize_rd_consts(AV1_COMP *cpi) {
758   AV1_COMMON *const cm = &cpi->common;
759   MACROBLOCK *const x = &cpi->td.mb;
760   SPEED_FEATURES *const sf = &cpi->sf;
761   RD_OPT *const rd = &cpi->rd;
762   int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
763   int frames_since_key = cpi->rc.frames_since_key;
764 
765   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
766   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
767   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
768   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
769 
770   const int qindex_rdmult =
771       cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
772   rd->RDMULT = av1_compute_rd_mult(
773       qindex_rdmult, cm->seq_params->bit_depth,
774       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
775       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
776       is_stat_consumption_stage(cpi));
777 #if CONFIG_RD_COMMAND
778   if (cpi->oxcf.pass == 2) {
779     const RD_COMMAND *rd_command = &cpi->rd_command;
780     if (rd_command->option_ls[rd_command->frame_index] ==
781         RD_OPTION_SET_Q_RDMULT) {
782       rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
783     }
784   }
785 #endif  // CONFIG_RD_COMMAND
786 
787   av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
788 
789   set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
790 
791   populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
792   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
793   // Frame level mv cost update
794   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
795                                        use_nonrd_pick_mode, frames_since_key))
796     av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
797                       cm->features.allow_high_precision_mv, x->mv_costs);
798 
799   // Frame level coefficient cost update
800   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
801                                        use_nonrd_pick_mode, frames_since_key))
802     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
803 
804   // Frame level mode cost update
805   if (should_force_mode_cost_update(cpi) ||
806       is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
807                                        use_nonrd_pick_mode, frames_since_key))
808     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
809 
810   // Frame level dv cost update
811   if (av1_need_dv_costs(cpi)) {
812     if (cpi->td.dv_costs_alloc == NULL) {
813       CHECK_MEM_ERROR(
814           cm, cpi->td.dv_costs_alloc,
815           (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
816       cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
817     }
818     av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
819   }
820 }
821 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)822 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
823   // NOTE: The tables below must be of the same size.
824 
825   // The functions described below are sampled at the four most significant
826   // bits of x^2 + 8 / 256.
827 
828   // Normalized rate:
829   // This table models the rate for a Laplacian source with given variance
830   // when quantized with a uniform quantizer with given stepsize. The
831   // closed form expression is:
832   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
833   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
834   // and H(x) is the binary entropy function.
835   static const int rate_tab_q10[] = {
836     65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
837     4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
838     3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
839     2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
840     1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
841     911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
842     395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
843     73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
844     5,     3,    2,    1,    1,    1,    0,    0,
845   };
846   // Normalized distortion:
847   // This table models the normalized distortion for a Laplacian source
848   // with given variance when quantized with a uniform quantizer
849   // with given stepsize. The closed form expression is:
850   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
851   // where x = qpstep / sqrt(variance).
852   // Note the actual distortion is Dn * variance.
853   static const int dist_tab_q10[] = {
854     0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
855     5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
856     18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
857     59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
858     151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
859     375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
860     680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
861     949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
862     1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
863   };
864   static const int xsq_iq_q10[] = {
865     0,      4,      8,      12,     16,     20,     24,     28,     32,
866     40,     48,     56,     64,     72,     80,     88,     96,     112,
867     128,    144,    160,    176,    192,    208,    224,    256,    288,
868     320,    352,    384,    416,    448,    480,    544,    608,    672,
869     736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
870     1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
871     3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
872     7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
873     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
874     36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
875     81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
876     180192, 196576, 212960, 229344, 245728,
877   };
878   const int tmp = (xsq_q10 >> 2) + 8;
879   const int k = get_msb(tmp) - 3;
880   const int xq = (k << 3) + ((tmp >> k) & 0x7);
881   const int one_q10 = 1 << 10;
882   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
883   const int b_q10 = one_q10 - a_q10;
884   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
885   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
886 }
887 
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)888 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
889                                   unsigned int qstep, int *rate,
890                                   int64_t *dist) {
891   // This function models the rate and distortion for a Laplacian
892   // source with given variance when quantized with a uniform quantizer
893   // with given stepsize. The closed form expressions are in:
894   // Hang and Chen, "Source Model for transform video coder and its
895   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
896   // Sys. for Video Tech., April 1997.
897   if (var == 0) {
898     *rate = 0;
899     *dist = 0;
900   } else {
901     int d_q10, r_q10;
902     static const uint32_t MAX_XSQ_Q10 = 245727;
903     const uint64_t xsq_q10_64 =
904         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
905     const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
906     model_rd_norm(xsq_q10, &r_q10, &d_q10);
907     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
908     *dist = (var * (int64_t)d_q10 + 512) >> 10;
909   }
910 }
911 
interp_cubic(const double * p,double x)912 static double interp_cubic(const double *p, double x) {
913   return p[1] + 0.5 * x *
914                     (p[2] - p[0] +
915                      x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
916                           x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
917 }
918 
919 /*
920 static double interp_bicubic(const double *p, int p_stride, double x,
921                              double y) {
922   double q[4];
923   q[0] = interp_cubic(p, x);
924   q[1] = interp_cubic(p + p_stride, x);
925   q[2] = interp_cubic(p + 2 * p_stride, x);
926   q[3] = interp_cubic(p + 3 * p_stride, x);
927   return interp_cubic(q, y);
928 }
929 */
930 
931 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
932   0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
933 };
934 
sse_norm_curvfit_model_cat_lookup(double sse_norm)935 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
936   return (sse_norm > 16.0);
937 }
938 
939 // Models distortion by sse using a logistic function on
940 // l = log2(sse / q^2) as:
941 // dbysse = 16 / (1 + k exp(l + c))
get_dbysse_logistic(double l,double c,double k)942 static double get_dbysse_logistic(double l, double c, double k) {
943   const double A = 16.0;
944   const double dbysse = A / (1 + k * exp(l + c));
945   return dbysse;
946 }
947 
948 // Models rate using a clamped linear function on
949 // l = log2(sse / q^2) as:
950 // rate = max(0, a + b * l)
get_rate_clamplinear(double l,double a,double b)951 static double get_rate_clamplinear(double l, double a, double b) {
952   const double rate = a + b * l;
953   return (rate < 0 ? 0 : rate);
954 }
955 
956 static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
957   0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
958 };
959 
960 static const double surffit_rate_params[9][4] = {
961   {
962       638.390212,
963       2.253108,
964       166.585650,
965       -3.939401,
966   },
967   {
968       5.256905,
969       81.997240,
970       -1.321771,
971       17.694216,
972   },
973   {
974       -74.193045,
975       72.431868,
976       -19.033152,
977       15.407276,
978   },
979   {
980       416.770113,
981       14.794188,
982       167.686830,
983       -6.997756,
984   },
985   {
986       378.511276,
987       9.558376,
988       154.658843,
989       -6.635663,
990   },
991   {
992       277.818787,
993       4.413180,
994       150.317637,
995       -9.893038,
996   },
997   {
998       142.212132,
999       11.542038,
1000       94.393964,
1001       -5.518517,
1002   },
1003   {
1004       219.100256,
1005       4.007421,
1006       108.932852,
1007       -6.981310,
1008   },
1009   {
1010       222.261971,
1011       3.251049,
1012       95.972916,
1013       -5.609789,
1014   },
1015 };
1016 
1017 static const double surffit_dist_params[7] = { 1.475844,  4.328362, -5.680233,
1018                                                -0.500994, 0.554585, 4.839478,
1019                                                -0.695837 };
1020 
rate_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * rpar)1021 static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1022                                              double *rpar) {
1023   const int cat = bsize_surffit_model_cat_lookup[bsize];
1024   rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
1025   rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
1026 }
1027 
dist_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * dpar)1028 static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1029                                              double *dpar) {
1030   (void)bsize;
1031   const double *params = surffit_dist_params;
1032   dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
1033   dpar[1] = params[4] + params[5] * exp(params[6] * xm);
1034 }
1035 
av1_model_rd_surffit(BLOCK_SIZE bsize,double sse_norm,double xm,double yl,double * rate_f,double * distbysse_f)1036 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
1037                           double yl, double *rate_f, double *distbysse_f) {
1038   (void)sse_norm;
1039   double rpar[2], dpar[2];
1040   rate_surffit_model_params_lookup(bsize, xm, rpar);
1041   dist_surffit_model_params_lookup(bsize, xm, dpar);
1042 
1043   *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
1044   *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
1045 }
1046 
1047 static const double interp_rgrid_curv[4][65] = {
1048   {
1049       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1050       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1051       0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
1052       122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
1053       126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
1054       262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
1055       726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
1056       1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
1057       1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
1058       2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
1059       2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
1060       2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
1061       3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
1062   },
1063   {
1064       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1065       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1066       0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
1067       28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
1068       39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
1069       137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
1070       614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
1071       1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
1072       1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
1073       1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
1074       2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
1075       2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
1076       3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
1077   },
1078   {
1079       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1080       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1081       0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
1082       6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
1083       13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
1084       98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
1085       525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
1086       926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
1087       1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1088       1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1089       2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1090       2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1091       3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1092   },
1093   {
1094       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1095       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1096       0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
1097       0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
1098       3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
1099       65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
1100       355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
1101       619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
1102       1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1103       1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1104       1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1105       2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1106       3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1107   },
1108 };
1109 
1110 static const double interp_dgrid_curv[3][65] = {
1111   {
1112       16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1113       15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1114       15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1115       13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1116       7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1117       1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1118       0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1119       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1120       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1121       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1122       0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1123   },
1124   {
1125       16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1126       15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1127       15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1128       13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1129       5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1130       1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1131       0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1132       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1133       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1134       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1135       0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1136   },
1137 };
1138 
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1139 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1140                           double *rate_f, double *distbysse_f) {
1141   const double x_start = -15.5;
1142   const double x_end = 16.5;
1143   const double x_step = 0.5;
1144   const double epsilon = 1e-6;
1145   const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1146   const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1147   (void)x_end;
1148 
1149   xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1150   xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1151   const double x = (xqr - x_start) / x_step;
1152   const int xi = (int)floor(x);
1153   const double xo = x - xi;
1154 
1155   assert(xi > 0);
1156 
1157   const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1158   *rate_f = interp_cubic(prate, xo);
1159   const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1160   *distbysse_f = interp_cubic(pdist, xo);
1161 }
1162 
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1163 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1164                                        const struct macroblockd_plane *pd,
1165                                        ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1166                                        ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1167   const int num_4x4_w = mi_size_wide[plane_bsize];
1168   const int num_4x4_h = mi_size_high[plane_bsize];
1169   const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1170   const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1171 
1172   memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1173   memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1174 }
1175 
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1176 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1177                               const struct macroblockd_plane *pd,
1178                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1179                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1180   assert(plane_bsize < BLOCK_SIZES_ALL);
1181   get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1182 }
1183 
1184 // Special clamping used in the encoder when calculating a prediction
1185 //
1186 // Logically, all pixel fetches used for prediction are clamped against the
1187 // edges of the frame. But doing this directly is slow, so instead we allocate
1188 // a finite border around the frame and fill it with copies of the outermost
1189 // pixels.
1190 //
1191 // Since this border is finite, we need to clamp the motion vector before
1192 // prediction in order to avoid out-of-bounds reads. At the same time, this
1193 // clamp must not change the prediction result.
1194 //
1195 // We can balance both of these concerns by calculating how far we would have
1196 // to go in each direction before the extended prediction region (the current
1197 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1198 // so that it touches the frame only at one row or column. This is a special
1199 // point because any more extreme MV will always lead to the same prediction.
1200 // So it is safe to clamp at that point.
1201 //
1202 // In the worst case, this requires a border of
1203 //   max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1204 // around the frame edges.
enc_clamp_mv(const AV1_COMMON * cm,const MACROBLOCKD * xd,MV * mv)1205 static INLINE void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1206                                 MV *mv) {
1207   int bw = xd->width << MI_SIZE_LOG2;
1208   int bh = xd->height << MI_SIZE_LOG2;
1209 
1210   int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1211   int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1212   int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1213   int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1214 
1215   const SubpelMvLimits mv_limits = {
1216     .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1217     .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1218     .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1219     .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1220   };
1221   clamp_mv(mv, &mv_limits);
1222 }
1223 
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1224 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1225                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1226   const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1227   const int_mv ref_mv =
1228       av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1229   const int_mv ref_mv1 =
1230       av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1231   MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1232   int num_mv_refs = 0;
1233   pred_mv[num_mv_refs++] = ref_mv.as_mv;
1234   if (ref_mv.as_int != ref_mv1.as_int) {
1235     pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1236   }
1237 
1238   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1239 
1240   const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1241   int zero_seen = 0;
1242   int best_sad = INT_MAX;
1243   int max_mv = 0;
1244   // Get the sad for each candidate reference mv.
1245   for (int i = 0; i < num_mv_refs; ++i) {
1246     MV *this_mv = &pred_mv[i];
1247     enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1248 
1249     const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1250     const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1251     max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1252 
1253     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1254     zero_seen |= (fp_row == 0 && fp_col == 0);
1255 
1256     const uint8_t *const ref_y_ptr =
1257         &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1258     // Find sad for current vector.
1259     const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1260         src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1261     // Note if it is the best so far.
1262     if (this_sad < best_sad) {
1263       best_sad = this_sad;
1264     }
1265     if (i == 0)
1266       x->pred_mv0_sad[ref_frame] = this_sad;
1267     else if (i == 1)
1268       x->pred_mv1_sad[ref_frame] = this_sad;
1269   }
1270 
1271   // Note the index of the mv that worked best in the reference list.
1272   x->max_mv_context[ref_frame] = max_mv;
1273   x->pred_mv_sad[ref_frame] = best_sad;
1274 }
1275 
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1276 void av1_setup_pred_block(const MACROBLOCKD *xd,
1277                           struct buf_2d dst[MAX_MB_PLANE],
1278                           const YV12_BUFFER_CONFIG *src,
1279                           const struct scale_factors *scale,
1280                           const struct scale_factors *scale_uv,
1281                           const int num_planes) {
1282   dst[0].buf = src->y_buffer;
1283   dst[0].stride = src->y_stride;
1284   dst[1].buf = src->u_buffer;
1285   dst[2].buf = src->v_buffer;
1286   dst[1].stride = dst[2].stride = src->uv_stride;
1287 
1288   const int mi_row = xd->mi_row;
1289   const int mi_col = xd->mi_col;
1290   for (int i = 0; i < num_planes; ++i) {
1291     setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1292                      i ? src->uv_crop_width : src->y_crop_width,
1293                      i ? src->uv_crop_height : src->y_crop_height,
1294                      dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1295                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1296   }
1297 }
1298 
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1299 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1300                                              int ref_frame) {
1301   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1302   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1303   const RefCntBuffer *const ref_buf =
1304       get_ref_frame_buf(&cpi->common, ref_frame);
1305   return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1306                                                        : NULL;
1307 }
1308 
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1309 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1310                             InterpFilter interp_filter, int dual_filter) {
1311   if (interp_filter == SWITCHABLE) {
1312     const MB_MODE_INFO *const mbmi = xd->mi[0];
1313     int inter_filter_cost = 0;
1314     for (int dir = 0; dir < 2; ++dir) {
1315       if (dir && !dual_filter) break;
1316       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1317       const InterpFilter filter =
1318           av1_extract_interp_filter(mbmi->interp_filters, dir);
1319       inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1320     }
1321     return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1322   } else {
1323     return 0;
1324   }
1325 }
1326 
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1327 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1328   RD_OPT *const rd = &cpi->rd;
1329 
1330   // Set baseline threshold values.
1331   av1_zero(rd->thresh_mult);
1332 
1333   rd->thresh_mult[THR_NEARESTMV] = 300;
1334   rd->thresh_mult[THR_NEARESTL2] = 300;
1335   rd->thresh_mult[THR_NEARESTL3] = 300;
1336   rd->thresh_mult[THR_NEARESTB] = 300;
1337   rd->thresh_mult[THR_NEARESTA2] = 300;
1338   rd->thresh_mult[THR_NEARESTA] = 300;
1339   rd->thresh_mult[THR_NEARESTG] = 300;
1340 
1341   rd->thresh_mult[THR_NEWMV] = 1000;
1342   rd->thresh_mult[THR_NEWL2] = 1000;
1343   rd->thresh_mult[THR_NEWL3] = 1000;
1344   rd->thresh_mult[THR_NEWB] = 1000;
1345   rd->thresh_mult[THR_NEWA2] = 1100;
1346   rd->thresh_mult[THR_NEWA] = 1000;
1347   rd->thresh_mult[THR_NEWG] = 1000;
1348 
1349   rd->thresh_mult[THR_NEARMV] = 1000;
1350   rd->thresh_mult[THR_NEARL2] = 1000;
1351   rd->thresh_mult[THR_NEARL3] = 1000;
1352   rd->thresh_mult[THR_NEARB] = 1000;
1353   rd->thresh_mult[THR_NEARA2] = 1000;
1354   rd->thresh_mult[THR_NEARA] = 1000;
1355   rd->thresh_mult[THR_NEARG] = 1000;
1356 
1357   rd->thresh_mult[THR_GLOBALMV] = 2200;
1358   rd->thresh_mult[THR_GLOBALL2] = 2000;
1359   rd->thresh_mult[THR_GLOBALL3] = 2000;
1360   rd->thresh_mult[THR_GLOBALB] = 2400;
1361   rd->thresh_mult[THR_GLOBALA2] = 2000;
1362   rd->thresh_mult[THR_GLOBALG] = 2000;
1363   rd->thresh_mult[THR_GLOBALA] = 2400;
1364 
1365   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1366   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1367   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1368   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1369   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1370   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1371   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1372   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1373   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1374   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1375   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1376   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1377 
1378   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1379   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1380   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1381   rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1382 
1383   rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1384   rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1385   rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1386   rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1387   rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1388   rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1389   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1390 
1391   rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1392   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1393   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1394   rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1395   rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1396   rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1397   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1398 
1399   rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1400   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1401   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1402   rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1403   rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1404   rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1405   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1406 
1407   rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1408   rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1409   rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1410   rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1411   rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1412   rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1413   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1414 
1415   rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1416   rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1417   rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1418   rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1419   rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1420   rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1421   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1422 
1423   rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1424   rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1425   rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1426   rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1427   rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1428   rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1429   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1430 
1431   rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1432   rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1433   rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1434   rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1435   rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1436   rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1437   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1438 
1439   rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1440   rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1441   rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1442   rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1443   rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1444   rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1445   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1446 
1447   rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1448   rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1449   rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1450   rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1451   rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1452   rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1453   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1454 
1455   rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1456   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1457   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1458   rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1459   rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1460   rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1461   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1462 
1463   rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1464   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1465   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1466   rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1467   rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1468   rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1469   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1470 
1471   rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1472   rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1473   rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1474   rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1475   rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1476   rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1477   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1478 
1479   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1480   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1481   rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1482   rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1483   rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1484   rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1485   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1486 
1487   rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1488   rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1489   rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1490   rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1491   rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1492   rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1493   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1494 
1495   rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1496   rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1497   rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1498   rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1499   rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1500   rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1501   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1502 
1503   rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1504   rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1505   rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1506   rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1507   rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1508   rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1509   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1510 
1511   rd->thresh_mult[THR_DC] = 1000;
1512   rd->thresh_mult[THR_PAETH] = 1000;
1513   rd->thresh_mult[THR_SMOOTH] = 2200;
1514   rd->thresh_mult[THR_SMOOTH_V] = 2000;
1515   rd->thresh_mult[THR_SMOOTH_H] = 2000;
1516   rd->thresh_mult[THR_H_PRED] = 2000;
1517   rd->thresh_mult[THR_V_PRED] = 1800;
1518   rd->thresh_mult[THR_D135_PRED] = 2500;
1519   rd->thresh_mult[THR_D203_PRED] = 2000;
1520   rd->thresh_mult[THR_D157_PRED] = 2500;
1521   rd->thresh_mult[THR_D67_PRED] = 2000;
1522   rd->thresh_mult[THR_D113_PRED] = 2500;
1523   rd->thresh_mult[THR_D45_PRED] = 2500;
1524 }
1525 
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1526 static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1527                                    THR_MODES best_mode_index,
1528                                    THR_MODES mode_start, THR_MODES mode_end,
1529                                    BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1530                                    int max_rd_thresh_factor) {
1531   for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1532     for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1533       int *const fact = &factor_buf[bs][mode];
1534       if (mode == best_mode_index) {
1535         *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1536       } else {
1537         *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1538       }
1539     }
1540   }
1541 }
1542 
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1543 void av1_update_rd_thresh_fact(
1544     const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1545     int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1546     THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1547     THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1548   assert(use_adaptive_rd_thresh > 0);
1549   const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1550 
1551   const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1552   BLOCK_SIZE min_size, max_size;
1553   if (bsize_is_1_to_4) {
1554     // This part handles block sizes with 1:4 and 4:1 aspect ratios
1555     // TODO(any): Experiment with threshold update for parent/child blocks
1556     min_size = bsize;
1557     max_size = bsize;
1558   } else {
1559     min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1560     max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1561   }
1562 
1563   update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1564                   min_size, max_size, max_rd_thresh_factor);
1565   update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1566                   min_size, max_size, max_rd_thresh_factor);
1567 }
1568 
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1569 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1570                                aom_bit_depth_t bit_depth) {
1571   const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1572   switch (bit_depth) {
1573     case AOM_BITS_8: return 20 * q;
1574     case AOM_BITS_10: return 5 * q;
1575     case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1576     default:
1577       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1578       return -1;
1579   }
1580 }
1581