1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38 #include "config/aom_config.h"
39
40 #define RD_THRESH_POW 1.25
41
42 // The baseline rd thresholds for breaking out of the rd loop for
43 // certain modes are assumed to be based on 8x8 blocks.
44 // This table is used to correct for block size.
45 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
46 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
47 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
48 };
49
50 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
51 [EXT_TX_SIZES] = {
52 { 1, 1, 1, 1 }, // unused
53 { 1, 1, 0, 0 },
54 { 0, 0, 1, 0 },
55 };
56
57 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
58 [EXT_TX_SIZES] = {
59 { 1, 1, 1, 1 }, // unused
60 { 1, 1, 0, 0 },
61 { 0, 0, 1, 0 },
62 { 0, 1, 1, 1 },
63 };
64
65 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
66 EXT_TX_SETS_INTER)] = {
67 {
68 // Intra
69 EXT_TX_SET_DCTONLY,
70 EXT_TX_SET_DTT4_IDTX_1DDCT,
71 EXT_TX_SET_DTT4_IDTX,
72 },
73 {
74 // Inter
75 EXT_TX_SET_DCTONLY,
76 EXT_TX_SET_ALL16,
77 EXT_TX_SET_DTT9_IDTX_1DDCT,
78 EXT_TX_SET_DCT_IDTX,
79 },
80 };
81
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)82 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
83 FRAME_CONTEXT *fc) {
84 int i, j;
85
86 for (i = 0; i < PARTITION_CONTEXTS; ++i)
87 av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
88 fc->partition_cdf[i], NULL);
89
90 if (cm->current_frame.skip_mode_info.skip_mode_flag) {
91 for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
92 av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
93 fc->skip_mode_cdfs[i], NULL);
94 }
95 }
96
97 for (i = 0; i < SKIP_CONTEXTS; ++i) {
98 av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
99 fc->skip_txfm_cdfs[i], NULL);
100 }
101
102 for (i = 0; i < KF_MODE_CONTEXTS; ++i)
103 for (j = 0; j < KF_MODE_CONTEXTS; ++j)
104 av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
105 fc->kf_y_cdf[i][j], NULL);
106
107 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
108 av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
109 NULL);
110 for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
111 for (j = 0; j < INTRA_MODES; ++j)
112 av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
113 fc->uv_mode_cdf[i][j], NULL);
114
115 av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
116 fc->filter_intra_mode_cdf, NULL);
117 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
118 if (av1_filter_intra_allowed_bsize(cm, i))
119 av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
120 fc->filter_intra_cdfs[i], NULL);
121 }
122
123 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
124 av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
125 fc->switchable_interp_cdf[i], NULL);
126
127 for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
128 av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
129 fc->palette_y_size_cdf[i], NULL);
130 av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
131 fc->palette_uv_size_cdf[i], NULL);
132 for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
133 av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
134 fc->palette_y_mode_cdf[i][j], NULL);
135 }
136 }
137
138 for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
139 av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
140 fc->palette_uv_mode_cdf[i], NULL);
141 }
142
143 for (i = 0; i < PALETTE_SIZES; ++i) {
144 for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
145 av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
146 fc->palette_y_color_index_cdf[i][j], NULL);
147 av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
148 fc->palette_uv_color_index_cdf[i][j], NULL);
149 }
150 }
151
152 int sign_cost[CFL_JOINT_SIGNS];
153 av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
154 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
155 int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
156 int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
157 if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
158 memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
159 } else {
160 const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
161 av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
162 }
163 if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
164 memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
165 } else {
166 const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
167 av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
168 }
169 for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
170 cost_u[u] += sign_cost[joint_sign];
171 }
172
173 for (i = 0; i < MAX_TX_CATS; ++i)
174 for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
175 av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
176 fc->tx_size_cdf[i][j], NULL);
177
178 for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
179 av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
180 fc->txfm_partition_cdf[i], NULL);
181 }
182
183 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
184 int s;
185 for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
186 if (use_inter_ext_tx_for_txsize[s][i]) {
187 av1_cost_tokens_from_cdf(
188 mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
189 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
190 }
191 }
192 for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
193 if (use_intra_ext_tx_for_txsize[s][i]) {
194 for (j = 0; j < INTRA_MODES; ++j) {
195 av1_cost_tokens_from_cdf(
196 mode_costs->intra_tx_type_costs[s][i][j],
197 fc->intra_ext_tx_cdf[s][i][j],
198 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
199 }
200 }
201 }
202 }
203 for (i = 0; i < DIRECTIONAL_MODES; ++i) {
204 av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
205 fc->angle_delta_cdf[i], NULL);
206 }
207 av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
208
209 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
210 av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
211 fc->seg.spatial_pred_seg_cdf[i], NULL);
212 }
213
214 for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
215 av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
216 NULL);
217 }
218
219 if (!frame_is_intra_only(cm)) {
220 for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
221 av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
222 fc->comp_inter_cdf[i], NULL);
223 }
224
225 for (i = 0; i < REF_CONTEXTS; ++i) {
226 for (j = 0; j < SINGLE_REFS - 1; ++j) {
227 av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
228 fc->single_ref_cdf[i][j], NULL);
229 }
230 }
231
232 for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
233 av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
234 fc->comp_ref_type_cdf[i], NULL);
235 }
236
237 for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
238 for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
239 av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
240 fc->uni_comp_ref_cdf[i][j], NULL);
241 }
242 }
243
244 for (i = 0; i < REF_CONTEXTS; ++i) {
245 for (j = 0; j < FWD_REFS - 1; ++j) {
246 av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
247 fc->comp_ref_cdf[i][j], NULL);
248 }
249 }
250
251 for (i = 0; i < REF_CONTEXTS; ++i) {
252 for (j = 0; j < BWD_REFS - 1; ++j) {
253 av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
254 fc->comp_bwdref_cdf[i][j], NULL);
255 }
256 }
257
258 for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
259 av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
260 fc->intra_inter_cdf[i], NULL);
261 }
262
263 for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
264 av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
265 NULL);
266 }
267
268 for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
269 av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
270 fc->zeromv_cdf[i], NULL);
271 }
272
273 for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
274 av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
275 NULL);
276 }
277
278 for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
279 av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
280 NULL);
281 }
282 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
283 av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
284 fc->inter_compound_mode_cdf[i], NULL);
285 for (i = 0; i < BLOCK_SIZES_ALL; ++i)
286 av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
287 fc->compound_type_cdf[i], NULL);
288 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
289 if (av1_is_wedge_used(i)) {
290 av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
291 fc->wedge_idx_cdf[i], NULL);
292 }
293 }
294 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
295 av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
296 fc->interintra_cdf[i], NULL);
297 av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
298 fc->interintra_mode_cdf[i], NULL);
299 }
300 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
301 av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
302 fc->wedge_interintra_cdf[i], NULL);
303 }
304 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
305 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
306 fc->motion_mode_cdf[i], NULL);
307 }
308 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
309 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
310 fc->obmc_cdf[i], NULL);
311 }
312 for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
313 av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
314 fc->compound_index_cdf[i], NULL);
315 }
316 for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
317 av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
318 fc->comp_group_idx_cdf[i], NULL);
319 }
320 }
321 }
322
323 #if !CONFIG_REALTIME_ONLY
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)324 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
325 av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
326 fc->switchable_restore_cdf, NULL);
327 av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
328 fc->wiener_restore_cdf, NULL);
329 av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
330 fc->sgrproj_restore_cdf, NULL);
331 }
332 #endif // !CONFIG_REALTIME_ONLY
333
334 // Values are now correlated to quantizer.
335 static int sad_per_bit_lut_8[QINDEX_RANGE];
336 static int sad_per_bit_lut_10[QINDEX_RANGE];
337 static int sad_per_bit_lut_12[QINDEX_RANGE];
338
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)339 static void init_me_luts_bd(int *bit16lut, int range,
340 aom_bit_depth_t bit_depth) {
341 int i;
342 // Initialize the sad lut tables using a formulaic calculation for now.
343 // This is to make it easier to resolve the impact of experimental changes
344 // to the quantizer tables.
345 for (i = 0; i < range; i++) {
346 const double q = av1_convert_qindex_to_q(i, bit_depth);
347 bit16lut[i] = (int)(0.0418 * q + 2.4107);
348 }
349 }
350
init_me_luts(void)351 static void init_me_luts(void) {
352 init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
353 init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
354 init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
355 }
356
av1_init_me_luts(void)357 void av1_init_me_luts(void) { aom_once(init_me_luts); }
358
359 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
360 8, 8, 4, 4, 2, 2, 1, 0 };
361
362 static const int rd_layer_depth_factor[7] = {
363 160, 160, 160, 160, 192, 208, 224
364 };
365
366 // Returns the default rd multiplier for inter frames for a given qindex.
367 // The function here is a first pass estimate based on data from
368 // a previous Vizer run
def_inter_rd_multiplier(int qindex)369 static double def_inter_rd_multiplier(int qindex) {
370 return 3.2 + (0.0015 * (double)qindex);
371 }
372
373 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
374 // The function here is a first pass estimate based on data from
375 // a previous Vizer run
def_arf_rd_multiplier(int qindex)376 static double def_arf_rd_multiplier(int qindex) {
377 return 3.25 + (0.0015 * (double)qindex);
378 }
379
380 // Returns the default rd multiplier for key frames for a given qindex.
381 // The function here is a first pass estimate based on data from
382 // a previous Vizer run
def_kf_rd_multiplier(int qindex)383 static double def_kf_rd_multiplier(int qindex) {
384 return 3.3 + (0.0015 * (double)qindex);
385 }
386
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex,aom_tune_metric tuning)387 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
388 FRAME_UPDATE_TYPE update_type,
389 int qindex, aom_tune_metric tuning) {
390 const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
391 int64_t rdmult = q * q;
392 if (update_type == KF_UPDATE) {
393 double def_rd_q_mult = def_kf_rd_multiplier(q);
394 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395 } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
396 double def_rd_q_mult = def_arf_rd_multiplier(q);
397 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398 } else {
399 double def_rd_q_mult = def_inter_rd_multiplier(q);
400 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
401 }
402
403 if (tuning == AOM_TUNE_IQ) {
404 // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
405 // quality. The most noticeable effect is a mild bias towards choosing
406 // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
407 // transforms).
408 // For very high qindexes, start progressively reducing the weight towards
409 // unity (128/128), as transforms are large enough and making them even
410 // larger actually harms subjective quality and SSIMULACRA 2 scores.
411 // This weight part of the equation was determined by iteratively increasing
412 // weight on CID22 and Daala's subset1, and observing its effects on visual
413 // quality and SSIMULACRA 2 scores along the usable (0-100) range.
414 // The ramp-down part of the equation was determined by choosing a fixed
415 // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
416 // 2 scores for encodes with qindexes greater than 159 scored at or above
417 // their equivalents with no rdmult adjustment.
418 const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
419 rdmult = (int64_t)((double)rdmult * weight / 128.0);
420 }
421
422 switch (bit_depth) {
423 case AOM_BITS_8: break;
424 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
425 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
426 default:
427 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
428 return -1;
429 }
430 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
431 }
432
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage,const aom_tune_metric tuning)433 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
434 const FRAME_UPDATE_TYPE update_type,
435 const int layer_depth, const int boost_index,
436 const FRAME_TYPE frame_type,
437 const int use_fixed_qp_offsets,
438 const int is_stat_consumption_stage,
439 const aom_tune_metric tuning) {
440 int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
441 qindex, tuning);
442 if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
443 (frame_type != KEY_FRAME)) {
444 // Layer depth adjustment
445 rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
446 // ARF boost adjustment
447 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
448 }
449 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
450 }
451
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)452 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
453 assert(beta > 0.0);
454 int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
455 int newq = (int)rint(q / sqrt(beta));
456 int orig_qindex = qindex;
457 if (newq == q) {
458 return 0;
459 }
460 if (newq < q) {
461 while (qindex > 0) {
462 qindex--;
463 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
464 if (newq >= q) {
465 break;
466 }
467 }
468 } else {
469 while (qindex < MAXQ) {
470 qindex++;
471 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
472 if (newq <= q) {
473 break;
474 }
475 }
476 }
477 return qindex - orig_qindex;
478 }
479
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)480 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
481 int curr_qindex) {
482 curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
483 const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
484 const int deltaq_deadzone = delta_q_res / 4;
485 const int qmask = ~(delta_q_res - 1);
486 int abs_deltaq_index = abs(curr_qindex - prev_qindex);
487 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
488 int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
489 adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
490 return adjust_qindex;
491 }
492
493 #if !CONFIG_REALTIME_ONLY
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)494 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
495 assert(beta > 0.0);
496 const AV1_COMMON *cm = &cpi->common;
497
498 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
499 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
500 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
501 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
502
503 const int qindex_rdmult = cm->quant_params.base_qindex;
504 return (int)(av1_compute_rd_mult(
505 qindex_rdmult, cm->seq_params->bit_depth,
506 cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
507 layer_depth, boost_index, frame_type,
508 cpi->oxcf.q_cfg.use_fixed_qp_offsets,
509 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
510 beta);
511 }
512 #endif // !CONFIG_REALTIME_ONLY
513
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)514 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
515 double q;
516 switch (bit_depth) {
517 case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
518 case AOM_BITS_10:
519 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
520 break;
521 case AOM_BITS_12:
522 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
523 break;
524 default:
525 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
526 return -1;
527 }
528 // TODO(debargha): Adjust the function below.
529 return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
530 }
531
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)532 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
533 switch (cpi->common.seq_params->bit_depth) {
534 case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
535 case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
536 case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
537 default:
538 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
539 }
540 }
541
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)542 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
543 int use_nonrd_pick_mode) {
544 int i, bsize, segment_id;
545 THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
546 int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
547
548 if (use_nonrd_pick_mode) {
549 for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
550 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
551 if (ref != INTRA_FRAME) {
552 for (i = 0; i < RTC_INTER_MODES; i++)
553 mode_indices[num_modes_count++] =
554 mode_idx[ref][mode_offset(inter_mode_list[i])];
555 } else {
556 for (i = 0; i < RTC_INTRA_MODES; i++)
557 mode_indices[num_modes_count++] =
558 mode_idx[ref][mode_offset(intra_mode_list[i])];
559 }
560 }
561 }
562
563 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
564 const int qindex = clamp(
565 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
566 cm->quant_params.y_dc_delta_q,
567 0, MAXQ);
568 const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
569
570 for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
571 // Threshold here seems unnecessarily harsh but fine given actual
572 // range of values used for cpi->sf.thresh_mult[].
573 const int t = q * rd_thresh_block_size_factor[bsize];
574 const int thresh_max = INT_MAX / t;
575
576 for (i = 0; i < num_modes_count; ++i) {
577 const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
578 rd->threshes[segment_id][bsize][mode_index] =
579 rd->thresh_mult[mode_index] < thresh_max
580 ? rd->thresh_mult[mode_index] * t / 4
581 : INT_MAX;
582 }
583 }
584 }
585 }
586
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)587 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
588 const int num_planes) {
589 const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
590 for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
591 for (int plane = 0; plane < nplanes; ++plane) {
592 LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
593
594 for (int ctx = 0; ctx < 2; ++ctx) {
595 aom_cdf_prob *pcdf;
596 switch (eob_multi_size) {
597 case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
598 case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
599 case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
600 case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
601 case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
602 case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
603 case 6:
604 default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
605 }
606 av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
607 }
608 }
609 }
610 for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
611 for (int plane = 0; plane < nplanes; ++plane) {
612 LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
613
614 for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
615 av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
616 fc->txb_skip_cdf[tx_size][ctx], NULL);
617
618 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
619 av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
620 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
621 NULL);
622 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
623 av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
624 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
625
626 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
627 pcost->base_cost[ctx][4] = 0;
628 pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
629 av1_cost_literal(1) -
630 pcost->base_cost[ctx][0];
631 pcost->base_cost[ctx][6] =
632 pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
633 pcost->base_cost[ctx][7] =
634 pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
635 }
636
637 for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
638 av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
639 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
640
641 for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
642 av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
643 fc->dc_sign_cdf[plane][ctx], NULL);
644
645 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
646 int br_rate[BR_CDF_SIZE];
647 int prev_cost = 0;
648 int i, j;
649 av1_cost_tokens_from_cdf(
650 br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
651 NULL);
652 // printf("br_rate: ");
653 // for(j = 0; j < BR_CDF_SIZE; j++)
654 // printf("%4d ", br_rate[j]);
655 // printf("\n");
656 for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
657 for (j = 0; j < BR_CDF_SIZE - 1; j++) {
658 pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
659 }
660 prev_cost += br_rate[j];
661 }
662 pcost->lps_cost[ctx][i] = prev_cost;
663 // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
664 // for (i = 0; i <= COEFF_BASE_RANGE; i++)
665 // printf("%5d ", pcost->lps_cost[ctx][i]);
666 // printf("\n");
667 }
668 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
669 pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
670 pcost->lps_cost[ctx][0];
671 for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
672 pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
673 pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
674 }
675 }
676 }
677 }
678 }
679
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)680 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
681 MvCosts *mv_costs) {
682 // Avoid accessing 'mv_costs' when it is not allocated.
683 if (mv_costs == NULL) return;
684
685 mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
686 mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
687 mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
688 mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
689 if (integer_mv) {
690 mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
691 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
692 nmvc, MV_SUBPEL_NONE);
693 } else {
694 mv_costs->mv_cost_stack =
695 usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
696 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
697 nmvc, usehp);
698 }
699 }
700
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)701 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
702 dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
703 dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
704 av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
705 MV_SUBPEL_NONE);
706 }
707
708 // Populates speed features based on codec control settings (of type
709 // COST_UPDATE_TYPE) and expected speed feature settings (of type
710 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
711 // The populated/updated speed features are used for cost updates in the
712 // encoder.
713 // WARNING: Population of unified cost update frequency needs to be taken care
714 // accordingly, in case of any modifications/additions to the enum
715 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)716 static inline void populate_unified_cost_update_freq(
717 const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
718 INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
719 // Mapping of entropy cost update frequency from the encoder's codec control
720 // settings of type COST_UPDATE_TYPE to speed features of type
721 // INTERNAL_COST_UPDATE_TYPE.
722 static const INTERNAL_COST_UPDATE_TYPE
723 map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
724 INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
725 INTERNAL_COST_UPD_OFF
726 };
727
728 inter_sf->mv_cost_upd_level =
729 AOMMIN(inter_sf->mv_cost_upd_level,
730 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
731 inter_sf->coeff_cost_upd_level =
732 AOMMIN(inter_sf->coeff_cost_upd_level,
733 map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
734 inter_sf->mode_cost_upd_level =
735 AOMMIN(inter_sf->mode_cost_upd_level,
736 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
737 sf->intra_sf.dv_cost_upd_level =
738 AOMMIN(sf->intra_sf.dv_cost_upd_level,
739 map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
740 }
741
742 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)743 static inline int is_frame_level_cost_upd_freq_set(
744 const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
745 const int use_nonrd_pick_mode, const int frames_since_key) {
746 const int fill_costs =
747 frame_is_intra_only(cm) ||
748 (use_nonrd_pick_mode ? frames_since_key < 2
749 : (cm->current_frame.frame_number & 0x07) == 1);
750 return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
751 cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
752 }
753
754 // Decide whether we want to update the mode entropy cost for the current frame.
755 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)756 static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
757 const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
758 if (!rt_sf->frame_level_mode_cost_update) {
759 return false;
760 }
761
762 if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
763 return cpi->frames_since_last_update == 1;
764 } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
765 if (cpi->svc.number_spatial_layers == 1 &&
766 cpi->svc.number_temporal_layers == 1) {
767 const AV1_COMMON *const cm = &cpi->common;
768 const RATE_CONTROL *const rc = &cpi->rc;
769
770 return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
771 rc->high_source_sad || rc->frames_since_key < 10 ||
772 cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
773 cm->current_frame.frame_number % 8 == 0;
774 } else if (cpi->svc.number_temporal_layers > 1) {
775 return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
776 }
777 }
778
779 return false;
780 }
781
av1_initialize_rd_consts(AV1_COMP * cpi)782 void av1_initialize_rd_consts(AV1_COMP *cpi) {
783 AV1_COMMON *const cm = &cpi->common;
784 MACROBLOCK *const x = &cpi->td.mb;
785 SPEED_FEATURES *const sf = &cpi->sf;
786 RD_OPT *const rd = &cpi->rd;
787 int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
788 int frames_since_key = cpi->rc.frames_since_key;
789
790 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
791 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
792 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
793 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
794
795 const int qindex_rdmult =
796 cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
797 rd->RDMULT = av1_compute_rd_mult(
798 qindex_rdmult, cm->seq_params->bit_depth,
799 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
800 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
801 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
802 #if CONFIG_RD_COMMAND
803 if (cpi->oxcf.pass == 2) {
804 const RD_COMMAND *rd_command = &cpi->rd_command;
805 if (rd_command->option_ls[rd_command->frame_index] ==
806 RD_OPTION_SET_Q_RDMULT) {
807 rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
808 }
809 }
810 #endif // CONFIG_RD_COMMAND
811
812 av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
813
814 set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
815
816 populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
817 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
818 // Frame level mv cost update
819 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
820 use_nonrd_pick_mode, frames_since_key))
821 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
822 cm->features.allow_high_precision_mv, x->mv_costs);
823
824 // Frame level coefficient cost update
825 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
826 use_nonrd_pick_mode, frames_since_key))
827 av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
828
829 // Frame level mode cost update
830 if (should_force_mode_cost_update(cpi) ||
831 is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
832 use_nonrd_pick_mode, frames_since_key))
833 av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
834
835 // Frame level dv cost update
836 if (av1_need_dv_costs(cpi)) {
837 if (cpi->td.dv_costs_alloc == NULL) {
838 CHECK_MEM_ERROR(
839 cm, cpi->td.dv_costs_alloc,
840 (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
841 cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
842 }
843 av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
844 }
845 }
846
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)847 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
848 // NOTE: The tables below must be of the same size.
849
850 // The functions described below are sampled at the four most significant
851 // bits of x^2 + 8 / 256.
852
853 // Normalized rate:
854 // This table models the rate for a Laplacian source with given variance
855 // when quantized with a uniform quantizer with given stepsize. The
856 // closed form expression is:
857 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
858 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
859 // and H(x) is the binary entropy function.
860 static const int rate_tab_q10[] = {
861 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
862 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
863 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
864 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
865 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
866 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
867 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
868 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
869 5, 3, 2, 1, 1, 1, 0, 0,
870 };
871 // Normalized distortion:
872 // This table models the normalized distortion for a Laplacian source
873 // with given variance when quantized with a uniform quantizer
874 // with given stepsize. The closed form expression is:
875 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
876 // where x = qpstep / sqrt(variance).
877 // Note the actual distortion is Dn * variance.
878 static const int dist_tab_q10[] = {
879 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
880 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
881 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
882 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
883 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
884 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
885 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
886 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
887 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
888 };
889 static const int xsq_iq_q10[] = {
890 0, 4, 8, 12, 16, 20, 24, 28, 32,
891 40, 48, 56, 64, 72, 80, 88, 96, 112,
892 128, 144, 160, 176, 192, 208, 224, 256, 288,
893 320, 352, 384, 416, 448, 480, 544, 608, 672,
894 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
895 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
896 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
897 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
898 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
899 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
900 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
901 180192, 196576, 212960, 229344, 245728,
902 };
903 const int tmp = (xsq_q10 >> 2) + 8;
904 const int k = get_msb(tmp) - 3;
905 const int xq = (k << 3) + ((tmp >> k) & 0x7);
906 const int one_q10 = 1 << 10;
907 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
908 const int b_q10 = one_q10 - a_q10;
909 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
910 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
911 }
912
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)913 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
914 unsigned int qstep, int *rate,
915 int64_t *dist) {
916 // This function models the rate and distortion for a Laplacian
917 // source with given variance when quantized with a uniform quantizer
918 // with given stepsize. The closed form expressions are in:
919 // Hang and Chen, "Source Model for transform video coder and its
920 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
921 // Sys. for Video Tech., April 1997.
922 if (var == 0) {
923 *rate = 0;
924 *dist = 0;
925 } else {
926 int d_q10, r_q10;
927 static const uint32_t MAX_XSQ_Q10 = 245727;
928 const uint64_t xsq_q10_64 =
929 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
930 const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
931 model_rd_norm(xsq_q10, &r_q10, &d_q10);
932 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
933 *dist = (var * (int64_t)d_q10 + 512) >> 10;
934 }
935 }
936
interp_cubic(const double * p,double x)937 static double interp_cubic(const double *p, double x) {
938 return p[1] + 0.5 * x *
939 (p[2] - p[0] +
940 x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
941 x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
942 }
943
944 /*
945 static double interp_bicubic(const double *p, int p_stride, double x,
946 double y) {
947 double q[4];
948 q[0] = interp_cubic(p, x);
949 q[1] = interp_cubic(p + p_stride, x);
950 q[2] = interp_cubic(p + 2 * p_stride, x);
951 q[3] = interp_cubic(p + 3 * p_stride, x);
952 return interp_cubic(q, y);
953 }
954 */
955
956 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
957 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
958 };
959
sse_norm_curvfit_model_cat_lookup(double sse_norm)960 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
961 return (sse_norm > 16.0);
962 }
963
964 static const double interp_rgrid_curv[4][65] = {
965 {
966 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
967 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
968 0.000000, 118.257702, 120.210658, 121.434853, 122.100487,
969 122.377758, 122.436865, 72.290102, 96.974289, 101.652727,
970 126.830141, 140.417377, 157.644879, 184.315291, 215.823873,
971 262.300169, 335.919859, 420.624173, 519.185032, 619.854243,
972 726.053595, 827.663369, 933.127475, 1037.988755, 1138.839609,
973 1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
974 1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
975 2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
976 2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
977 2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
978 3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
979 },
980 {
981 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
982 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
983 0.000000, 13.087244, 15.919735, 25.930313, 24.412411,
984 28.567417, 29.924194, 30.857010, 32.742979, 36.382570,
985 39.210386, 42.265690, 47.378572, 57.014850, 82.740067,
986 137.346562, 219.968084, 316.781856, 415.643773, 516.706538,
987 614.914364, 714.303763, 815.512135, 911.210485, 1008.501528,
988 1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
989 1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
990 1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
991 2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
992 2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
993 3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
994 },
995 {
996 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
997 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
998 0.000000, 4.656893, 5.123633, 5.594132, 6.162376,
999 6.918433, 7.768444, 8.739415, 10.105862, 11.477328,
1000 13.236604, 15.421030, 19.093623, 25.801871, 46.724612,
1001 98.841054, 181.113466, 272.586364, 359.499769, 445.546343,
1002 525.944439, 605.188743, 681.793483, 756.668359, 838.486885,
1003 926.950356, 1015.482542, 1113.353926, 1204.897193, 1288.871992,
1004 1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1005 1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1006 2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1007 2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1008 3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1009 },
1010 {
1011 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1012 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1013 0.000000, 0.337370, 0.391916, 0.468839, 0.566334,
1014 0.762564, 1.069225, 1.384361, 1.787581, 2.293948,
1015 3.251909, 4.412991, 8.050068, 11.606073, 27.668092,
1016 65.227758, 128.463938, 202.097653, 262.715851, 312.464873,
1017 355.601398, 400.609054, 447.201352, 495.761568, 552.871938,
1018 619.067625, 691.984883, 773.753288, 860.628503, 946.262808,
1019 1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1020 1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1021 1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1022 2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1023 3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1024 },
1025 };
1026
1027 static const double interp_dgrid_curv[3][65] = {
1028 {
1029 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1030 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1031 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1032 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1033 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
1034 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
1035 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
1036 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1037 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1038 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1039 0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
1040 },
1041 {
1042 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1043 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1044 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1045 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
1046 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
1047 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
1048 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
1049 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1050 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1051 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1052 0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
1053 },
1054 };
1055
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1056 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1057 double *rate_f, double *distbysse_f) {
1058 const double x_start = -15.5;
1059 const double x_end = 16.5;
1060 const double x_step = 0.5;
1061 const double epsilon = 1e-6;
1062 const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1063 const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1064 (void)x_end;
1065
1066 xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1067 xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1068 const double x = (xqr - x_start) / x_step;
1069 const int xi = (int)floor(x);
1070 const double xo = x - xi;
1071
1072 assert(xi > 0);
1073
1074 const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1075 *rate_f = interp_cubic(prate, xo);
1076 const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1077 *distbysse_f = interp_cubic(pdist, xo);
1078 }
1079
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1080 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1081 const struct macroblockd_plane *pd,
1082 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1083 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1084 const int num_4x4_w = mi_size_wide[plane_bsize];
1085 const int num_4x4_h = mi_size_high[plane_bsize];
1086 const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1087 const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1088
1089 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1090 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1091 }
1092
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1093 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1094 const struct macroblockd_plane *pd,
1095 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1096 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1097 assert(plane_bsize < BLOCK_SIZES_ALL);
1098 get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1099 }
1100
1101 // Special clamping used in the encoder when calculating a prediction
1102 //
1103 // Logically, all pixel fetches used for prediction are clamped against the
1104 // edges of the frame. But doing this directly is slow, so instead we allocate
1105 // a finite border around the frame and fill it with copies of the outermost
1106 // pixels.
1107 //
1108 // Since this border is finite, we need to clamp the motion vector before
1109 // prediction in order to avoid out-of-bounds reads. At the same time, this
1110 // clamp must not change the prediction result.
1111 //
1112 // We can balance both of these concerns by calculating how far we would have
1113 // to go in each direction before the extended prediction region (the current
1114 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1115 // so that it touches the frame only at one row or column. This is a special
1116 // point because any more extreme MV will always lead to the same prediction.
1117 // So it is safe to clamp at that point.
1118 //
1119 // In the worst case, this requires a border of
1120 // max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1121 // around the frame edges.
enc_clamp_mv(const AV1_COMMON * cm,const MACROBLOCKD * xd,MV * mv)1122 static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1123 MV *mv) {
1124 int bw = xd->width << MI_SIZE_LOG2;
1125 int bh = xd->height << MI_SIZE_LOG2;
1126
1127 int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1128 int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1129 int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1130 int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1131
1132 const SubpelMvLimits mv_limits = {
1133 .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1134 .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1135 .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1136 .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1137 };
1138 clamp_mv(mv, &mv_limits);
1139 }
1140
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1141 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1142 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1143 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1144 const int_mv ref_mv =
1145 av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1146 const int_mv ref_mv1 =
1147 av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1148 MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1149 int num_mv_refs = 0;
1150 pred_mv[num_mv_refs++] = ref_mv.as_mv;
1151 if (ref_mv.as_int != ref_mv1.as_int) {
1152 pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1153 }
1154
1155 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1156
1157 const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1158 int zero_seen = 0;
1159 int best_sad = INT_MAX;
1160 int max_mv = 0;
1161 // Get the sad for each candidate reference mv.
1162 for (int i = 0; i < num_mv_refs; ++i) {
1163 MV *this_mv = &pred_mv[i];
1164 enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1165
1166 const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1167 const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1168 max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1169
1170 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1171 zero_seen |= (fp_row == 0 && fp_col == 0);
1172
1173 const uint8_t *const ref_y_ptr =
1174 &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1175 // Find sad for current vector.
1176 const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1177 src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1178 // Note if it is the best so far.
1179 if (this_sad < best_sad) {
1180 best_sad = this_sad;
1181 }
1182 if (i == 0)
1183 x->pred_mv0_sad[ref_frame] = this_sad;
1184 else if (i == 1)
1185 x->pred_mv1_sad[ref_frame] = this_sad;
1186 }
1187
1188 // Note the index of the mv that worked best in the reference list.
1189 x->max_mv_context[ref_frame] = max_mv;
1190 x->pred_mv_sad[ref_frame] = best_sad;
1191 }
1192
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1193 void av1_setup_pred_block(const MACROBLOCKD *xd,
1194 struct buf_2d dst[MAX_MB_PLANE],
1195 const YV12_BUFFER_CONFIG *src,
1196 const struct scale_factors *scale,
1197 const struct scale_factors *scale_uv,
1198 const int num_planes) {
1199 dst[0].buf = src->y_buffer;
1200 dst[0].stride = src->y_stride;
1201 dst[1].buf = src->u_buffer;
1202 dst[2].buf = src->v_buffer;
1203 dst[1].stride = dst[2].stride = src->uv_stride;
1204
1205 const int mi_row = xd->mi_row;
1206 const int mi_col = xd->mi_col;
1207 for (int i = 0; i < num_planes; ++i) {
1208 setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1209 i ? src->uv_crop_width : src->y_crop_width,
1210 i ? src->uv_crop_height : src->y_crop_height,
1211 dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1212 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1213 }
1214 }
1215
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1216 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1217 int ref_frame) {
1218 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1219 RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1220 const RefCntBuffer *const ref_buf =
1221 get_ref_frame_buf(&cpi->common, ref_frame);
1222 return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1223 : NULL;
1224 }
1225
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1226 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1227 InterpFilter interp_filter, int dual_filter) {
1228 if (interp_filter == SWITCHABLE) {
1229 const MB_MODE_INFO *const mbmi = xd->mi[0];
1230 int inter_filter_cost = 0;
1231 for (int dir = 0; dir < 2; ++dir) {
1232 if (dir && !dual_filter) break;
1233 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1234 const InterpFilter filter =
1235 av1_extract_interp_filter(mbmi->interp_filters, dir);
1236 inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1237 }
1238 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1239 } else {
1240 return 0;
1241 }
1242 }
1243
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1244 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1245 RD_OPT *const rd = &cpi->rd;
1246
1247 // Set baseline threshold values.
1248 av1_zero(rd->thresh_mult);
1249
1250 rd->thresh_mult[THR_NEARESTMV] = 300;
1251 rd->thresh_mult[THR_NEARESTL2] = 300;
1252 rd->thresh_mult[THR_NEARESTL3] = 300;
1253 rd->thresh_mult[THR_NEARESTB] = 300;
1254 rd->thresh_mult[THR_NEARESTA2] = 300;
1255 rd->thresh_mult[THR_NEARESTA] = 300;
1256 rd->thresh_mult[THR_NEARESTG] = 300;
1257
1258 rd->thresh_mult[THR_NEWMV] = 1000;
1259 rd->thresh_mult[THR_NEWL2] = 1000;
1260 rd->thresh_mult[THR_NEWL3] = 1000;
1261 rd->thresh_mult[THR_NEWB] = 1000;
1262 rd->thresh_mult[THR_NEWA2] = 1100;
1263 rd->thresh_mult[THR_NEWA] = 1000;
1264 rd->thresh_mult[THR_NEWG] = 1000;
1265
1266 rd->thresh_mult[THR_NEARMV] = 1000;
1267 rd->thresh_mult[THR_NEARL2] = 1000;
1268 rd->thresh_mult[THR_NEARL3] = 1000;
1269 rd->thresh_mult[THR_NEARB] = 1000;
1270 rd->thresh_mult[THR_NEARA2] = 1000;
1271 rd->thresh_mult[THR_NEARA] = 1000;
1272 rd->thresh_mult[THR_NEARG] = 1000;
1273
1274 rd->thresh_mult[THR_GLOBALMV] = 2200;
1275 rd->thresh_mult[THR_GLOBALL2] = 2000;
1276 rd->thresh_mult[THR_GLOBALL3] = 2000;
1277 rd->thresh_mult[THR_GLOBALB] = 2400;
1278 rd->thresh_mult[THR_GLOBALA2] = 2000;
1279 rd->thresh_mult[THR_GLOBALG] = 2000;
1280 rd->thresh_mult[THR_GLOBALA] = 2400;
1281
1282 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1283 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1284 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1285 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1286 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1287 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1288 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1289 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1290 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1291 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1292 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1293 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1294
1295 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1296 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1297 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1298 rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1299
1300 rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1301 rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1302 rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1303 rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1304 rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1305 rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1306 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1307
1308 rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1309 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1310 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1311 rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1312 rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1313 rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1314 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1315
1316 rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1317 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1318 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1319 rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1320 rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1321 rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1322 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1323
1324 rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1325 rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1326 rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1327 rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1328 rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1329 rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1330 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1331
1332 rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1333 rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1334 rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1335 rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1336 rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1337 rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1338 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1339
1340 rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1341 rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1342 rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1343 rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1344 rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1345 rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1346 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1347
1348 rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1349 rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1350 rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1351 rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1352 rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1353 rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1354 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1355
1356 rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1357 rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1358 rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1359 rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1360 rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1361 rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1362 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1363
1364 rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1365 rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1366 rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1367 rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1368 rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1369 rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1370 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1371
1372 rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1373 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1374 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1375 rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1376 rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1377 rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1378 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1379
1380 rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1381 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1382 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1383 rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1384 rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1385 rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1386 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1387
1388 rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1389 rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1390 rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1391 rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1392 rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1393 rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1394 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1395
1396 rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1397 rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1398 rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1399 rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1400 rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1401 rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1402 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1403
1404 rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1405 rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1406 rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1407 rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1408 rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1409 rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1410 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1411
1412 rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1413 rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1414 rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1415 rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1416 rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1417 rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1418 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1419
1420 rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1421 rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1422 rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1423 rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1424 rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1425 rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1426 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1427
1428 rd->thresh_mult[THR_DC] = 1000;
1429 rd->thresh_mult[THR_PAETH] = 1000;
1430 rd->thresh_mult[THR_SMOOTH] = 2200;
1431 rd->thresh_mult[THR_SMOOTH_V] = 2000;
1432 rd->thresh_mult[THR_SMOOTH_H] = 2000;
1433 rd->thresh_mult[THR_H_PRED] = 2000;
1434 rd->thresh_mult[THR_V_PRED] = 1800;
1435 rd->thresh_mult[THR_D135_PRED] = 2500;
1436 rd->thresh_mult[THR_D203_PRED] = 2000;
1437 rd->thresh_mult[THR_D157_PRED] = 2500;
1438 rd->thresh_mult[THR_D67_PRED] = 2000;
1439 rd->thresh_mult[THR_D113_PRED] = 2500;
1440 rd->thresh_mult[THR_D45_PRED] = 2500;
1441 }
1442
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1443 static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
1444 THR_MODES best_mode_index,
1445 THR_MODES mode_start, THR_MODES mode_end,
1446 BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1447 int max_rd_thresh_factor) {
1448 for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1449 for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1450 int *const fact = &factor_buf[bs][mode];
1451 if (mode == best_mode_index) {
1452 *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1453 } else {
1454 *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1455 }
1456 }
1457 }
1458 }
1459
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1460 void av1_update_rd_thresh_fact(
1461 const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1462 int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1463 THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1464 THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1465 assert(use_adaptive_rd_thresh > 0);
1466 const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1467
1468 const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1469 BLOCK_SIZE min_size, max_size;
1470 if (bsize_is_1_to_4) {
1471 // This part handles block sizes with 1:4 and 4:1 aspect ratios
1472 // TODO(any): Experiment with threshold update for parent/child blocks
1473 min_size = bsize;
1474 max_size = bsize;
1475 } else {
1476 min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1477 max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1478 }
1479
1480 update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1481 min_size, max_size, max_rd_thresh_factor);
1482 update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1483 min_size, max_size, max_rd_thresh_factor);
1484 }
1485
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1486 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1487 aom_bit_depth_t bit_depth) {
1488 const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1489 switch (bit_depth) {
1490 case AOM_BITS_8: return 20 * q;
1491 case AOM_BITS_10: return 5 * q;
1492 case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1493 default:
1494 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1495 return -1;
1496 }
1497 }
1498