1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38
39 #define RD_THRESH_POW 1.25
40
41 // The baseline rd thresholds for breaking out of the rd loop for
42 // certain modes are assumed to be based on 8x8 blocks.
43 // This table is used to correct for block size.
44 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
45 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
46 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
47 };
48
49 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
50 [EXT_TX_SIZES] = {
51 { 1, 1, 1, 1 }, // unused
52 { 1, 1, 0, 0 },
53 { 0, 0, 1, 0 },
54 };
55
56 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
57 [EXT_TX_SIZES] = {
58 { 1, 1, 1, 1 }, // unused
59 { 1, 1, 0, 0 },
60 { 0, 0, 1, 0 },
61 { 0, 1, 1, 1 },
62 };
63
64 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
65 EXT_TX_SETS_INTER)] = {
66 {
67 // Intra
68 EXT_TX_SET_DCTONLY,
69 EXT_TX_SET_DTT4_IDTX_1DDCT,
70 EXT_TX_SET_DTT4_IDTX,
71 },
72 {
73 // Inter
74 EXT_TX_SET_DCTONLY,
75 EXT_TX_SET_ALL16,
76 EXT_TX_SET_DTT9_IDTX_1DDCT,
77 EXT_TX_SET_DCT_IDTX,
78 },
79 };
80
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)81 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
82 FRAME_CONTEXT *fc) {
83 int i, j;
84
85 for (i = 0; i < PARTITION_CONTEXTS; ++i)
86 av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
87 fc->partition_cdf[i], NULL);
88
89 if (cm->current_frame.skip_mode_info.skip_mode_flag) {
90 for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
91 av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
92 fc->skip_mode_cdfs[i], NULL);
93 }
94 }
95
96 for (i = 0; i < SKIP_CONTEXTS; ++i) {
97 av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
98 fc->skip_txfm_cdfs[i], NULL);
99 }
100
101 for (i = 0; i < KF_MODE_CONTEXTS; ++i)
102 for (j = 0; j < KF_MODE_CONTEXTS; ++j)
103 av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
104 fc->kf_y_cdf[i][j], NULL);
105
106 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
107 av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
108 NULL);
109 for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
110 for (j = 0; j < INTRA_MODES; ++j)
111 av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
112 fc->uv_mode_cdf[i][j], NULL);
113
114 av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
115 fc->filter_intra_mode_cdf, NULL);
116 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
117 if (av1_filter_intra_allowed_bsize(cm, i))
118 av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
119 fc->filter_intra_cdfs[i], NULL);
120 }
121
122 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
123 av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
124 fc->switchable_interp_cdf[i], NULL);
125
126 for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
127 av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
128 fc->palette_y_size_cdf[i], NULL);
129 av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
130 fc->palette_uv_size_cdf[i], NULL);
131 for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
132 av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
133 fc->palette_y_mode_cdf[i][j], NULL);
134 }
135 }
136
137 for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
138 av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
139 fc->palette_uv_mode_cdf[i], NULL);
140 }
141
142 for (i = 0; i < PALETTE_SIZES; ++i) {
143 for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
144 av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
145 fc->palette_y_color_index_cdf[i][j], NULL);
146 av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
147 fc->palette_uv_color_index_cdf[i][j], NULL);
148 }
149 }
150
151 int sign_cost[CFL_JOINT_SIGNS];
152 av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
153 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
154 int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
155 int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
156 if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
157 memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
158 } else {
159 const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
160 av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
161 }
162 if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
163 memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
164 } else {
165 const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
166 av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
167 }
168 for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
169 cost_u[u] += sign_cost[joint_sign];
170 }
171
172 for (i = 0; i < MAX_TX_CATS; ++i)
173 for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
174 av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
175 fc->tx_size_cdf[i][j], NULL);
176
177 for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
178 av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
179 fc->txfm_partition_cdf[i], NULL);
180 }
181
182 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
183 int s;
184 for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
185 if (use_inter_ext_tx_for_txsize[s][i]) {
186 av1_cost_tokens_from_cdf(
187 mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
188 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
189 }
190 }
191 for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
192 if (use_intra_ext_tx_for_txsize[s][i]) {
193 for (j = 0; j < INTRA_MODES; ++j) {
194 av1_cost_tokens_from_cdf(
195 mode_costs->intra_tx_type_costs[s][i][j],
196 fc->intra_ext_tx_cdf[s][i][j],
197 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
198 }
199 }
200 }
201 }
202 for (i = 0; i < DIRECTIONAL_MODES; ++i) {
203 av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
204 fc->angle_delta_cdf[i], NULL);
205 }
206 av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
207
208 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
209 av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
210 fc->seg.spatial_pred_seg_cdf[i], NULL);
211 }
212
213 for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
214 av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
215 NULL);
216 }
217
218 if (!frame_is_intra_only(cm)) {
219 for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
220 av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
221 fc->comp_inter_cdf[i], NULL);
222 }
223
224 for (i = 0; i < REF_CONTEXTS; ++i) {
225 for (j = 0; j < SINGLE_REFS - 1; ++j) {
226 av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
227 fc->single_ref_cdf[i][j], NULL);
228 }
229 }
230
231 for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
232 av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
233 fc->comp_ref_type_cdf[i], NULL);
234 }
235
236 for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
237 for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
238 av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
239 fc->uni_comp_ref_cdf[i][j], NULL);
240 }
241 }
242
243 for (i = 0; i < REF_CONTEXTS; ++i) {
244 for (j = 0; j < FWD_REFS - 1; ++j) {
245 av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
246 fc->comp_ref_cdf[i][j], NULL);
247 }
248 }
249
250 for (i = 0; i < REF_CONTEXTS; ++i) {
251 for (j = 0; j < BWD_REFS - 1; ++j) {
252 av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
253 fc->comp_bwdref_cdf[i][j], NULL);
254 }
255 }
256
257 for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
258 av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
259 fc->intra_inter_cdf[i], NULL);
260 }
261
262 for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
263 av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
264 NULL);
265 }
266
267 for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
268 av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
269 fc->zeromv_cdf[i], NULL);
270 }
271
272 for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
273 av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
274 NULL);
275 }
276
277 for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
278 av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
279 NULL);
280 }
281 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
282 av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
283 fc->inter_compound_mode_cdf[i], NULL);
284 for (i = 0; i < BLOCK_SIZES_ALL; ++i)
285 av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
286 fc->compound_type_cdf[i], NULL);
287 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
288 if (av1_is_wedge_used(i)) {
289 av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
290 fc->wedge_idx_cdf[i], NULL);
291 }
292 }
293 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
294 av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
295 fc->interintra_cdf[i], NULL);
296 av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
297 fc->interintra_mode_cdf[i], NULL);
298 }
299 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
300 av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
301 fc->wedge_interintra_cdf[i], NULL);
302 }
303 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
304 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
305 fc->motion_mode_cdf[i], NULL);
306 }
307 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
308 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
309 fc->obmc_cdf[i], NULL);
310 }
311 for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
312 av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
313 fc->compound_index_cdf[i], NULL);
314 }
315 for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
316 av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
317 fc->comp_group_idx_cdf[i], NULL);
318 }
319 }
320 }
321
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)322 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
323 av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
324 fc->switchable_restore_cdf, NULL);
325 av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
326 fc->wiener_restore_cdf, NULL);
327 av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
328 fc->sgrproj_restore_cdf, NULL);
329 }
330
331 // Values are now correlated to quantizer.
332 static int sad_per_bit_lut_8[QINDEX_RANGE];
333 static int sad_per_bit_lut_10[QINDEX_RANGE];
334 static int sad_per_bit_lut_12[QINDEX_RANGE];
335
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)336 static void init_me_luts_bd(int *bit16lut, int range,
337 aom_bit_depth_t bit_depth) {
338 int i;
339 // Initialize the sad lut tables using a formulaic calculation for now.
340 // This is to make it easier to resolve the impact of experimental changes
341 // to the quantizer tables.
342 for (i = 0; i < range; i++) {
343 const double q = av1_convert_qindex_to_q(i, bit_depth);
344 bit16lut[i] = (int)(0.0418 * q + 2.4107);
345 }
346 }
347
init_me_luts(void)348 static void init_me_luts(void) {
349 init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
350 init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
351 init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
352 }
353
av1_init_me_luts(void)354 void av1_init_me_luts(void) { aom_once(init_me_luts); }
355
356 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
357 8, 8, 4, 4, 2, 2, 1, 0 };
358
359 static const int rd_layer_depth_factor[7] = {
360 160, 160, 160, 160, 192, 208, 224
361 };
362
363 // Returns the default rd multiplier for inter frames for a given qindex.
364 // The function here is a first pass estimate based on data from
365 // a previous Vizer run
def_inter_rd_multiplier(int qindex)366 static double def_inter_rd_multiplier(int qindex) {
367 return 3.2 + (0.0015 * (double)qindex);
368 }
369
370 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
371 // The function here is a first pass estimate based on data from
372 // a previous Vizer run
def_arf_rd_multiplier(int qindex)373 static double def_arf_rd_multiplier(int qindex) {
374 return 3.25 + (0.0015 * (double)qindex);
375 }
376
377 // Returns the default rd multiplier for key frames for a given qindex.
378 // The function here is a first pass estimate based on data from
379 // a previous Vizer run
def_kf_rd_multiplier(int qindex)380 static double def_kf_rd_multiplier(int qindex) {
381 return 3.3 + (0.0015 * (double)qindex);
382 }
383
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)384 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
385 FRAME_UPDATE_TYPE update_type,
386 int qindex) {
387 const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
388 int64_t rdmult = q * q;
389 if (update_type == KF_UPDATE) {
390 double def_rd_q_mult = def_kf_rd_multiplier(q);
391 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
392 } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
393 double def_rd_q_mult = def_arf_rd_multiplier(q);
394 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395 } else {
396 double def_rd_q_mult = def_inter_rd_multiplier(q);
397 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398 }
399
400 switch (bit_depth) {
401 case AOM_BITS_8: break;
402 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
403 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
404 default:
405 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
406 return -1;
407 }
408 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
409 }
410
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage)411 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
412 const FRAME_UPDATE_TYPE update_type,
413 const int layer_depth, const int boost_index,
414 const FRAME_TYPE frame_type,
415 const int use_fixed_qp_offsets,
416 const int is_stat_consumption_stage) {
417 int64_t rdmult =
418 av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
419 if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
420 (frame_type != KEY_FRAME)) {
421 // Layer depth adjustment
422 rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
423 // ARF boost adjustment
424 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
425 }
426 return (int)rdmult;
427 }
428
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)429 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
430 assert(beta > 0.0);
431 int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
432 int newq = (int)rint(q / sqrt(beta));
433 int orig_qindex = qindex;
434 if (newq == q) {
435 return 0;
436 }
437 if (newq < q) {
438 while (qindex > 0) {
439 qindex--;
440 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
441 if (newq >= q) {
442 break;
443 }
444 }
445 } else {
446 while (qindex < MAXQ) {
447 qindex++;
448 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
449 if (newq <= q) {
450 break;
451 }
452 }
453 }
454 return qindex - orig_qindex;
455 }
456
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)457 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
458 int curr_qindex) {
459 curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
460 const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
461 const int deltaq_deadzone = delta_q_res / 4;
462 const int qmask = ~(delta_q_res - 1);
463 int abs_deltaq_index = abs(curr_qindex - prev_qindex);
464 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
465 int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
466 adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
467 return adjust_qindex;
468 }
469
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)470 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
471 assert(beta > 0.0);
472 const AV1_COMMON *cm = &cpi->common;
473
474 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
475 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
476 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
477 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
478
479 const int qindex_rdmult = cm->quant_params.base_qindex;
480 return (int)(av1_compute_rd_mult(
481 qindex_rdmult, cm->seq_params->bit_depth,
482 cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
483 layer_depth, boost_index, frame_type,
484 cpi->oxcf.q_cfg.use_fixed_qp_offsets,
485 is_stat_consumption_stage(cpi)) /
486 beta);
487 }
488
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)489 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
490 double q;
491 switch (bit_depth) {
492 case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
493 case AOM_BITS_10:
494 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
495 break;
496 case AOM_BITS_12:
497 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
498 break;
499 default:
500 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501 return -1;
502 }
503 // TODO(debargha): Adjust the function below.
504 return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
505 }
506
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)507 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
508 switch (cpi->common.seq_params->bit_depth) {
509 case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
510 case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
511 case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
512 default:
513 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
514 }
515 }
516
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)517 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
518 int use_nonrd_pick_mode) {
519 int i, bsize, segment_id;
520 THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
521 int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
522
523 if (use_nonrd_pick_mode) {
524 for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
525 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
526 if (ref != INTRA_FRAME) {
527 for (i = 0; i < RTC_INTER_MODES; i++)
528 mode_indices[num_modes_count++] =
529 mode_idx[ref][mode_offset(inter_mode_list[i])];
530 } else {
531 for (i = 0; i < RTC_INTRA_MODES; i++)
532 mode_indices[num_modes_count++] =
533 mode_idx[ref][mode_offset(intra_mode_list[i])];
534 }
535 }
536 }
537
538 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
539 const int qindex = clamp(
540 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
541 cm->quant_params.y_dc_delta_q,
542 0, MAXQ);
543 const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
544
545 for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
546 // Threshold here seems unnecessarily harsh but fine given actual
547 // range of values used for cpi->sf.thresh_mult[].
548 const int t = q * rd_thresh_block_size_factor[bsize];
549 const int thresh_max = INT_MAX / t;
550
551 for (i = 0; i < num_modes_count; ++i) {
552 const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
553 rd->threshes[segment_id][bsize][mode_index] =
554 rd->thresh_mult[mode_index] < thresh_max
555 ? rd->thresh_mult[mode_index] * t / 4
556 : INT_MAX;
557 }
558 }
559 }
560 }
561
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)562 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
563 const int num_planes) {
564 const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
565 for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
566 for (int plane = 0; plane < nplanes; ++plane) {
567 LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
568
569 for (int ctx = 0; ctx < 2; ++ctx) {
570 aom_cdf_prob *pcdf;
571 switch (eob_multi_size) {
572 case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
573 case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
574 case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
575 case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
576 case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
577 case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
578 case 6:
579 default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
580 }
581 av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
582 }
583 }
584 }
585 for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
586 for (int plane = 0; plane < nplanes; ++plane) {
587 LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
588
589 for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
590 av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
591 fc->txb_skip_cdf[tx_size][ctx], NULL);
592
593 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
594 av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
595 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
596 NULL);
597 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
598 av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
599 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
600
601 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
602 pcost->base_cost[ctx][4] = 0;
603 pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
604 av1_cost_literal(1) -
605 pcost->base_cost[ctx][0];
606 pcost->base_cost[ctx][6] =
607 pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
608 pcost->base_cost[ctx][7] =
609 pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
610 }
611
612 for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
613 av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
614 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
615
616 for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
617 av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
618 fc->dc_sign_cdf[plane][ctx], NULL);
619
620 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
621 int br_rate[BR_CDF_SIZE];
622 int prev_cost = 0;
623 int i, j;
624 av1_cost_tokens_from_cdf(
625 br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
626 NULL);
627 // printf("br_rate: ");
628 // for(j = 0; j < BR_CDF_SIZE; j++)
629 // printf("%4d ", br_rate[j]);
630 // printf("\n");
631 for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
632 for (j = 0; j < BR_CDF_SIZE - 1; j++) {
633 pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
634 }
635 prev_cost += br_rate[j];
636 }
637 pcost->lps_cost[ctx][i] = prev_cost;
638 // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
639 // for (i = 0; i <= COEFF_BASE_RANGE; i++)
640 // printf("%5d ", pcost->lps_cost[ctx][i]);
641 // printf("\n");
642 }
643 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
644 pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
645 pcost->lps_cost[ctx][0];
646 for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
647 pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
648 pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
649 }
650 }
651 }
652 }
653 }
654
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)655 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
656 MvCosts *mv_costs) {
657 // Avoid accessing 'mv_costs' when it is not allocated.
658 if (mv_costs == NULL) return;
659
660 mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
661 mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
662 mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
663 mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
664 if (integer_mv) {
665 mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
666 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
667 nmvc, MV_SUBPEL_NONE);
668 } else {
669 mv_costs->mv_cost_stack =
670 usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
671 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
672 nmvc, usehp);
673 }
674 }
675
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)676 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
677 dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
678 dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
679 av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
680 MV_SUBPEL_NONE);
681 }
682
683 // Populates speed features based on codec control settings (of type
684 // COST_UPDATE_TYPE) and expected speed feature settings (of type
685 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
686 // The populated/updated speed features are used for cost updates in the
687 // encoder.
688 // WARNING: Population of unified cost update frequency needs to be taken care
689 // accordingly, in case of any modifications/additions to the enum
690 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)691 static INLINE void populate_unified_cost_update_freq(
692 const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
693 INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
694 // Mapping of entropy cost update frequency from the encoder's codec control
695 // settings of type COST_UPDATE_TYPE to speed features of type
696 // INTERNAL_COST_UPDATE_TYPE.
697 static const INTERNAL_COST_UPDATE_TYPE
698 map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
699 INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
700 INTERNAL_COST_UPD_OFF
701 };
702
703 inter_sf->mv_cost_upd_level =
704 AOMMIN(inter_sf->mv_cost_upd_level,
705 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
706 inter_sf->coeff_cost_upd_level =
707 AOMMIN(inter_sf->coeff_cost_upd_level,
708 map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
709 inter_sf->mode_cost_upd_level =
710 AOMMIN(inter_sf->mode_cost_upd_level,
711 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
712 sf->intra_sf.dv_cost_upd_level =
713 AOMMIN(sf->intra_sf.dv_cost_upd_level,
714 map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
715 }
716
717 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)718 static INLINE int is_frame_level_cost_upd_freq_set(
719 const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
720 const int use_nonrd_pick_mode, const int frames_since_key) {
721 const int fill_costs =
722 frame_is_intra_only(cm) ||
723 (use_nonrd_pick_mode ? frames_since_key < 2
724 : (cm->current_frame.frame_number & 0x07) == 1);
725 return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
726 cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
727 }
728
729 // Decide whether we want to update the mode entropy cost for the current frame.
730 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)731 static AOM_INLINE int should_force_mode_cost_update(const AV1_COMP *cpi) {
732 const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
733 if (!rt_sf->frame_level_mode_cost_update) {
734 return false;
735 }
736
737 if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
738 return cpi->frames_since_last_update == 1;
739 } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
740 if (cpi->svc.number_spatial_layers == 1 &&
741 cpi->svc.number_temporal_layers == 1) {
742 const AV1_COMMON *const cm = &cpi->common;
743 const RATE_CONTROL *const rc = &cpi->rc;
744
745 return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
746 rc->high_source_sad || rc->frames_since_key < 10 ||
747 cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
748 cm->current_frame.frame_number % 8 == 0;
749 } else if (cpi->svc.number_temporal_layers > 1) {
750 return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
751 }
752 }
753
754 return false;
755 }
756
av1_initialize_rd_consts(AV1_COMP * cpi)757 void av1_initialize_rd_consts(AV1_COMP *cpi) {
758 AV1_COMMON *const cm = &cpi->common;
759 MACROBLOCK *const x = &cpi->td.mb;
760 SPEED_FEATURES *const sf = &cpi->sf;
761 RD_OPT *const rd = &cpi->rd;
762 int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
763 int frames_since_key = cpi->rc.frames_since_key;
764
765 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
766 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
767 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
768 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
769
770 const int qindex_rdmult =
771 cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
772 rd->RDMULT = av1_compute_rd_mult(
773 qindex_rdmult, cm->seq_params->bit_depth,
774 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
775 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
776 is_stat_consumption_stage(cpi));
777 #if CONFIG_RD_COMMAND
778 if (cpi->oxcf.pass == 2) {
779 const RD_COMMAND *rd_command = &cpi->rd_command;
780 if (rd_command->option_ls[rd_command->frame_index] ==
781 RD_OPTION_SET_Q_RDMULT) {
782 rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
783 }
784 }
785 #endif // CONFIG_RD_COMMAND
786
787 av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
788
789 set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
790
791 populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
792 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
793 // Frame level mv cost update
794 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
795 use_nonrd_pick_mode, frames_since_key))
796 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
797 cm->features.allow_high_precision_mv, x->mv_costs);
798
799 // Frame level coefficient cost update
800 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
801 use_nonrd_pick_mode, frames_since_key))
802 av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
803
804 // Frame level mode cost update
805 if (should_force_mode_cost_update(cpi) ||
806 is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
807 use_nonrd_pick_mode, frames_since_key))
808 av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
809
810 // Frame level dv cost update
811 if (av1_need_dv_costs(cpi)) {
812 if (cpi->td.mb.dv_costs == NULL) {
813 CHECK_MEM_ERROR(
814 cm, cpi->td.mb.dv_costs,
815 (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.mb.dv_costs)));
816 }
817 av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
818 }
819 }
820
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)821 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
822 // NOTE: The tables below must be of the same size.
823
824 // The functions described below are sampled at the four most significant
825 // bits of x^2 + 8 / 256.
826
827 // Normalized rate:
828 // This table models the rate for a Laplacian source with given variance
829 // when quantized with a uniform quantizer with given stepsize. The
830 // closed form expression is:
831 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
832 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
833 // and H(x) is the binary entropy function.
834 static const int rate_tab_q10[] = {
835 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
836 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
837 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
838 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
839 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
840 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
841 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
842 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
843 5, 3, 2, 1, 1, 1, 0, 0,
844 };
845 // Normalized distortion:
846 // This table models the normalized distortion for a Laplacian source
847 // with given variance when quantized with a uniform quantizer
848 // with given stepsize. The closed form expression is:
849 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
850 // where x = qpstep / sqrt(variance).
851 // Note the actual distortion is Dn * variance.
852 static const int dist_tab_q10[] = {
853 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
854 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
855 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
856 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
857 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
858 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
859 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
860 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
861 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
862 };
863 static const int xsq_iq_q10[] = {
864 0, 4, 8, 12, 16, 20, 24, 28, 32,
865 40, 48, 56, 64, 72, 80, 88, 96, 112,
866 128, 144, 160, 176, 192, 208, 224, 256, 288,
867 320, 352, 384, 416, 448, 480, 544, 608, 672,
868 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
869 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
870 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
871 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
872 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
873 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
874 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
875 180192, 196576, 212960, 229344, 245728,
876 };
877 const int tmp = (xsq_q10 >> 2) + 8;
878 const int k = get_msb(tmp) - 3;
879 const int xq = (k << 3) + ((tmp >> k) & 0x7);
880 const int one_q10 = 1 << 10;
881 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
882 const int b_q10 = one_q10 - a_q10;
883 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
884 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
885 }
886
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)887 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
888 unsigned int qstep, int *rate,
889 int64_t *dist) {
890 // This function models the rate and distortion for a Laplacian
891 // source with given variance when quantized with a uniform quantizer
892 // with given stepsize. The closed form expressions are in:
893 // Hang and Chen, "Source Model for transform video coder and its
894 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
895 // Sys. for Video Tech., April 1997.
896 if (var == 0) {
897 *rate = 0;
898 *dist = 0;
899 } else {
900 int d_q10, r_q10;
901 static const uint32_t MAX_XSQ_Q10 = 245727;
902 const uint64_t xsq_q10_64 =
903 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
904 const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
905 model_rd_norm(xsq_q10, &r_q10, &d_q10);
906 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
907 *dist = (var * (int64_t)d_q10 + 512) >> 10;
908 }
909 }
910
interp_cubic(const double * p,double x)911 static double interp_cubic(const double *p, double x) {
912 return p[1] + 0.5 * x *
913 (p[2] - p[0] +
914 x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
915 x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
916 }
917
918 /*
919 static double interp_bicubic(const double *p, int p_stride, double x,
920 double y) {
921 double q[4];
922 q[0] = interp_cubic(p, x);
923 q[1] = interp_cubic(p + p_stride, x);
924 q[2] = interp_cubic(p + 2 * p_stride, x);
925 q[3] = interp_cubic(p + 3 * p_stride, x);
926 return interp_cubic(q, y);
927 }
928 */
929
930 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
931 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
932 };
933
sse_norm_curvfit_model_cat_lookup(double sse_norm)934 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
935 return (sse_norm > 16.0);
936 }
937
938 // Models distortion by sse using a logistic function on
939 // l = log2(sse / q^2) as:
940 // dbysse = 16 / (1 + k exp(l + c))
get_dbysse_logistic(double l,double c,double k)941 static double get_dbysse_logistic(double l, double c, double k) {
942 const double A = 16.0;
943 const double dbysse = A / (1 + k * exp(l + c));
944 return dbysse;
945 }
946
947 // Models rate using a clamped linear function on
948 // l = log2(sse / q^2) as:
949 // rate = max(0, a + b * l)
get_rate_clamplinear(double l,double a,double b)950 static double get_rate_clamplinear(double l, double a, double b) {
951 const double rate = a + b * l;
952 return (rate < 0 ? 0 : rate);
953 }
954
955 static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
956 0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
957 };
958
959 static const double surffit_rate_params[9][4] = {
960 {
961 638.390212,
962 2.253108,
963 166.585650,
964 -3.939401,
965 },
966 {
967 5.256905,
968 81.997240,
969 -1.321771,
970 17.694216,
971 },
972 {
973 -74.193045,
974 72.431868,
975 -19.033152,
976 15.407276,
977 },
978 {
979 416.770113,
980 14.794188,
981 167.686830,
982 -6.997756,
983 },
984 {
985 378.511276,
986 9.558376,
987 154.658843,
988 -6.635663,
989 },
990 {
991 277.818787,
992 4.413180,
993 150.317637,
994 -9.893038,
995 },
996 {
997 142.212132,
998 11.542038,
999 94.393964,
1000 -5.518517,
1001 },
1002 {
1003 219.100256,
1004 4.007421,
1005 108.932852,
1006 -6.981310,
1007 },
1008 {
1009 222.261971,
1010 3.251049,
1011 95.972916,
1012 -5.609789,
1013 },
1014 };
1015
1016 static const double surffit_dist_params[7] = { 1.475844, 4.328362, -5.680233,
1017 -0.500994, 0.554585, 4.839478,
1018 -0.695837 };
1019
rate_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * rpar)1020 static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1021 double *rpar) {
1022 const int cat = bsize_surffit_model_cat_lookup[bsize];
1023 rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
1024 rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
1025 }
1026
dist_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * dpar)1027 static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1028 double *dpar) {
1029 (void)bsize;
1030 const double *params = surffit_dist_params;
1031 dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
1032 dpar[1] = params[4] + params[5] * exp(params[6] * xm);
1033 }
1034
av1_model_rd_surffit(BLOCK_SIZE bsize,double sse_norm,double xm,double yl,double * rate_f,double * distbysse_f)1035 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
1036 double yl, double *rate_f, double *distbysse_f) {
1037 (void)sse_norm;
1038 double rpar[2], dpar[2];
1039 rate_surffit_model_params_lookup(bsize, xm, rpar);
1040 dist_surffit_model_params_lookup(bsize, xm, dpar);
1041
1042 *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
1043 *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
1044 }
1045
1046 static const double interp_rgrid_curv[4][65] = {
1047 {
1048 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1049 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1050 0.000000, 118.257702, 120.210658, 121.434853, 122.100487,
1051 122.377758, 122.436865, 72.290102, 96.974289, 101.652727,
1052 126.830141, 140.417377, 157.644879, 184.315291, 215.823873,
1053 262.300169, 335.919859, 420.624173, 519.185032, 619.854243,
1054 726.053595, 827.663369, 933.127475, 1037.988755, 1138.839609,
1055 1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
1056 1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
1057 2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
1058 2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
1059 2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
1060 3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
1061 },
1062 {
1063 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1064 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1065 0.000000, 13.087244, 15.919735, 25.930313, 24.412411,
1066 28.567417, 29.924194, 30.857010, 32.742979, 36.382570,
1067 39.210386, 42.265690, 47.378572, 57.014850, 82.740067,
1068 137.346562, 219.968084, 316.781856, 415.643773, 516.706538,
1069 614.914364, 714.303763, 815.512135, 911.210485, 1008.501528,
1070 1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
1071 1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
1072 1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
1073 2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
1074 2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
1075 3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
1076 },
1077 {
1078 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1079 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1080 0.000000, 4.656893, 5.123633, 5.594132, 6.162376,
1081 6.918433, 7.768444, 8.739415, 10.105862, 11.477328,
1082 13.236604, 15.421030, 19.093623, 25.801871, 46.724612,
1083 98.841054, 181.113466, 272.586364, 359.499769, 445.546343,
1084 525.944439, 605.188743, 681.793483, 756.668359, 838.486885,
1085 926.950356, 1015.482542, 1113.353926, 1204.897193, 1288.871992,
1086 1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1087 1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1088 2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1089 2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1090 3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1091 },
1092 {
1093 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1094 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1095 0.000000, 0.337370, 0.391916, 0.468839, 0.566334,
1096 0.762564, 1.069225, 1.384361, 1.787581, 2.293948,
1097 3.251909, 4.412991, 8.050068, 11.606073, 27.668092,
1098 65.227758, 128.463938, 202.097653, 262.715851, 312.464873,
1099 355.601398, 400.609054, 447.201352, 495.761568, 552.871938,
1100 619.067625, 691.984883, 773.753288, 860.628503, 946.262808,
1101 1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1102 1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1103 1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1104 2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1105 3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1106 },
1107 };
1108
1109 static const double interp_dgrid_curv[3][65] = {
1110 {
1111 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1112 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1113 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1114 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1115 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
1116 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
1117 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
1118 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1119 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1120 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1121 0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
1122 },
1123 {
1124 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1125 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1126 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1127 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
1128 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
1129 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
1130 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
1131 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1132 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1133 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1134 0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
1135 },
1136 };
1137
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1138 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1139 double *rate_f, double *distbysse_f) {
1140 const double x_start = -15.5;
1141 const double x_end = 16.5;
1142 const double x_step = 0.5;
1143 const double epsilon = 1e-6;
1144 const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1145 const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1146 (void)x_end;
1147
1148 xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1149 xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1150 const double x = (xqr - x_start) / x_step;
1151 const int xi = (int)floor(x);
1152 const double xo = x - xi;
1153
1154 assert(xi > 0);
1155
1156 const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1157 *rate_f = interp_cubic(prate, xo);
1158 const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1159 *distbysse_f = interp_cubic(pdist, xo);
1160 }
1161
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1162 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1163 const struct macroblockd_plane *pd,
1164 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1165 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1166 const int num_4x4_w = mi_size_wide[plane_bsize];
1167 const int num_4x4_h = mi_size_high[plane_bsize];
1168 const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1169 const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1170
1171 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1172 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1173 }
1174
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1175 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1176 const struct macroblockd_plane *pd,
1177 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1178 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1179 assert(plane_bsize < BLOCK_SIZES_ALL);
1180 get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1181 }
1182
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1183 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1184 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1185 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1186 const int_mv ref_mv =
1187 av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1188 const int_mv ref_mv1 =
1189 av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1190 MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1191 int num_mv_refs = 0;
1192 pred_mv[num_mv_refs++] = ref_mv.as_mv;
1193 if (ref_mv.as_int != ref_mv1.as_int) {
1194 pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1195 }
1196
1197 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1198
1199 const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1200 int zero_seen = 0;
1201 int best_sad = INT_MAX;
1202 int max_mv = 0;
1203 // Get the sad for each candidate reference mv.
1204 for (int i = 0; i < num_mv_refs; ++i) {
1205 const MV *this_mv = &pred_mv[i];
1206 const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1207 const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1208 max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1209
1210 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1211 zero_seen |= (fp_row == 0 && fp_col == 0);
1212
1213 const uint8_t *const ref_y_ptr =
1214 &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1215 // Find sad for current vector.
1216 const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1217 src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1218 // Note if it is the best so far.
1219 if (this_sad < best_sad) {
1220 best_sad = this_sad;
1221 }
1222 if (i == 0)
1223 x->pred_mv0_sad[ref_frame] = this_sad;
1224 else if (i == 1)
1225 x->pred_mv1_sad[ref_frame] = this_sad;
1226 }
1227
1228 // Note the index of the mv that worked best in the reference list.
1229 x->max_mv_context[ref_frame] = max_mv;
1230 x->pred_mv_sad[ref_frame] = best_sad;
1231 }
1232
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1233 void av1_setup_pred_block(const MACROBLOCKD *xd,
1234 struct buf_2d dst[MAX_MB_PLANE],
1235 const YV12_BUFFER_CONFIG *src,
1236 const struct scale_factors *scale,
1237 const struct scale_factors *scale_uv,
1238 const int num_planes) {
1239 dst[0].buf = src->y_buffer;
1240 dst[0].stride = src->y_stride;
1241 dst[1].buf = src->u_buffer;
1242 dst[2].buf = src->v_buffer;
1243 dst[1].stride = dst[2].stride = src->uv_stride;
1244
1245 const int mi_row = xd->mi_row;
1246 const int mi_col = xd->mi_col;
1247 for (int i = 0; i < num_planes; ++i) {
1248 setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1249 i ? src->uv_crop_width : src->y_crop_width,
1250 i ? src->uv_crop_height : src->y_crop_height,
1251 dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1252 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1253 }
1254 }
1255
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1256 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1257 int ref_frame) {
1258 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1259 RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1260 const RefCntBuffer *const ref_buf =
1261 get_ref_frame_buf(&cpi->common, ref_frame);
1262 return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1263 : NULL;
1264 }
1265
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1266 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1267 InterpFilter interp_filter, int dual_filter) {
1268 if (interp_filter == SWITCHABLE) {
1269 const MB_MODE_INFO *const mbmi = xd->mi[0];
1270 int inter_filter_cost = 0;
1271 for (int dir = 0; dir < 2; ++dir) {
1272 if (dir && !dual_filter) break;
1273 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1274 const InterpFilter filter =
1275 av1_extract_interp_filter(mbmi->interp_filters, dir);
1276 inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1277 }
1278 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1279 } else {
1280 return 0;
1281 }
1282 }
1283
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1284 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1285 RD_OPT *const rd = &cpi->rd;
1286
1287 // Set baseline threshold values.
1288 av1_zero(rd->thresh_mult);
1289
1290 rd->thresh_mult[THR_NEARESTMV] = 300;
1291 rd->thresh_mult[THR_NEARESTL2] = 300;
1292 rd->thresh_mult[THR_NEARESTL3] = 300;
1293 rd->thresh_mult[THR_NEARESTB] = 300;
1294 rd->thresh_mult[THR_NEARESTA2] = 300;
1295 rd->thresh_mult[THR_NEARESTA] = 300;
1296 rd->thresh_mult[THR_NEARESTG] = 300;
1297
1298 rd->thresh_mult[THR_NEWMV] = 1000;
1299 rd->thresh_mult[THR_NEWL2] = 1000;
1300 rd->thresh_mult[THR_NEWL3] = 1000;
1301 rd->thresh_mult[THR_NEWB] = 1000;
1302 rd->thresh_mult[THR_NEWA2] = 1100;
1303 rd->thresh_mult[THR_NEWA] = 1000;
1304 rd->thresh_mult[THR_NEWG] = 1000;
1305
1306 rd->thresh_mult[THR_NEARMV] = 1000;
1307 rd->thresh_mult[THR_NEARL2] = 1000;
1308 rd->thresh_mult[THR_NEARL3] = 1000;
1309 rd->thresh_mult[THR_NEARB] = 1000;
1310 rd->thresh_mult[THR_NEARA2] = 1000;
1311 rd->thresh_mult[THR_NEARA] = 1000;
1312 rd->thresh_mult[THR_NEARG] = 1000;
1313
1314 rd->thresh_mult[THR_GLOBALMV] = 2200;
1315 rd->thresh_mult[THR_GLOBALL2] = 2000;
1316 rd->thresh_mult[THR_GLOBALL3] = 2000;
1317 rd->thresh_mult[THR_GLOBALB] = 2400;
1318 rd->thresh_mult[THR_GLOBALA2] = 2000;
1319 rd->thresh_mult[THR_GLOBALG] = 2000;
1320 rd->thresh_mult[THR_GLOBALA] = 2400;
1321
1322 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1323 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1324 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1325 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1326 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1327 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1328 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1329 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1330 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1331 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1332 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1333 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1334
1335 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1336 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1337 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1338 rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1339
1340 rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1341 rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1342 rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1343 rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1344 rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1345 rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1346 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1347
1348 rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1349 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1350 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1351 rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1352 rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1353 rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1354 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1355
1356 rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1357 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1358 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1359 rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1360 rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1361 rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1362 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1363
1364 rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1365 rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1366 rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1367 rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1368 rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1369 rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1370 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1371
1372 rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1373 rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1374 rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1375 rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1376 rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1377 rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1378 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1379
1380 rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1381 rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1382 rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1383 rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1384 rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1385 rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1386 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1387
1388 rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1389 rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1390 rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1391 rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1392 rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1393 rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1394 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1395
1396 rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1397 rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1398 rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1399 rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1400 rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1401 rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1402 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1403
1404 rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1405 rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1406 rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1407 rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1408 rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1409 rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1410 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1411
1412 rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1413 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1414 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1415 rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1416 rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1417 rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1418 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1419
1420 rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1421 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1422 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1423 rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1424 rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1425 rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1426 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1427
1428 rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1429 rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1430 rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1431 rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1432 rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1433 rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1434 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1435
1436 rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1437 rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1438 rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1439 rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1440 rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1441 rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1442 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1443
1444 rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1445 rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1446 rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1447 rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1448 rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1449 rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1450 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1451
1452 rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1453 rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1454 rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1455 rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1456 rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1457 rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1458 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1459
1460 rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1461 rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1462 rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1463 rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1464 rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1465 rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1466 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1467
1468 rd->thresh_mult[THR_DC] = 1000;
1469 rd->thresh_mult[THR_PAETH] = 1000;
1470 rd->thresh_mult[THR_SMOOTH] = 2200;
1471 rd->thresh_mult[THR_SMOOTH_V] = 2000;
1472 rd->thresh_mult[THR_SMOOTH_H] = 2000;
1473 rd->thresh_mult[THR_H_PRED] = 2000;
1474 rd->thresh_mult[THR_V_PRED] = 1800;
1475 rd->thresh_mult[THR_D135_PRED] = 2500;
1476 rd->thresh_mult[THR_D203_PRED] = 2000;
1477 rd->thresh_mult[THR_D157_PRED] = 2500;
1478 rd->thresh_mult[THR_D67_PRED] = 2000;
1479 rd->thresh_mult[THR_D113_PRED] = 2500;
1480 rd->thresh_mult[THR_D45_PRED] = 2500;
1481 }
1482
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1483 static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1484 THR_MODES best_mode_index,
1485 THR_MODES mode_start, THR_MODES mode_end,
1486 BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1487 int max_rd_thresh_factor) {
1488 for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1489 for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1490 int *const fact = &factor_buf[bs][mode];
1491 if (mode == best_mode_index) {
1492 *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1493 } else {
1494 *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1495 }
1496 }
1497 }
1498 }
1499
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1500 void av1_update_rd_thresh_fact(
1501 const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1502 int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1503 THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1504 THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1505 assert(use_adaptive_rd_thresh > 0);
1506 const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1507
1508 const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1509 BLOCK_SIZE min_size, max_size;
1510 if (bsize_is_1_to_4) {
1511 // This part handles block sizes with 1:4 and 4:1 aspect ratios
1512 // TODO(any): Experiment with threshold update for parent/child blocks
1513 min_size = bsize;
1514 max_size = bsize;
1515 } else {
1516 min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1517 max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1518 }
1519
1520 update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1521 min_size, max_size, max_rd_thresh_factor);
1522 update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1523 min_size, max_size, max_rd_thresh_factor);
1524 }
1525
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1526 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1527 aom_bit_depth_t bit_depth) {
1528 const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1529 switch (bit_depth) {
1530 case AOM_BITS_8: return 20 * q;
1531 case AOM_BITS_10: return 5 * q;
1532 case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1533 default:
1534 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1535 return -1;
1536 }
1537 }
1538