1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38
39 #define RD_THRESH_POW 1.25
40
41 // The baseline rd thresholds for breaking out of the rd loop for
42 // certain modes are assumed to be based on 8x8 blocks.
43 // This table is used to correct for block size.
44 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
45 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
46 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
47 };
48
49 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
50 [EXT_TX_SIZES] = {
51 { 1, 1, 1, 1 }, // unused
52 { 1, 1, 0, 0 },
53 { 0, 0, 1, 0 },
54 };
55
56 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
57 [EXT_TX_SIZES] = {
58 { 1, 1, 1, 1 }, // unused
59 { 1, 1, 0, 0 },
60 { 0, 0, 1, 0 },
61 { 0, 1, 1, 1 },
62 };
63
64 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
65 EXT_TX_SETS_INTER)] = {
66 {
67 // Intra
68 EXT_TX_SET_DCTONLY,
69 EXT_TX_SET_DTT4_IDTX_1DDCT,
70 EXT_TX_SET_DTT4_IDTX,
71 },
72 {
73 // Inter
74 EXT_TX_SET_DCTONLY,
75 EXT_TX_SET_ALL16,
76 EXT_TX_SET_DTT9_IDTX_1DDCT,
77 EXT_TX_SET_DCT_IDTX,
78 },
79 };
80
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)81 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
82 FRAME_CONTEXT *fc) {
83 int i, j;
84
85 for (i = 0; i < PARTITION_CONTEXTS; ++i)
86 av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
87 fc->partition_cdf[i], NULL);
88
89 if (cm->current_frame.skip_mode_info.skip_mode_flag) {
90 for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
91 av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
92 fc->skip_mode_cdfs[i], NULL);
93 }
94 }
95
96 for (i = 0; i < SKIP_CONTEXTS; ++i) {
97 av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
98 fc->skip_txfm_cdfs[i], NULL);
99 }
100
101 for (i = 0; i < KF_MODE_CONTEXTS; ++i)
102 for (j = 0; j < KF_MODE_CONTEXTS; ++j)
103 av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
104 fc->kf_y_cdf[i][j], NULL);
105
106 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
107 av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
108 NULL);
109 for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
110 for (j = 0; j < INTRA_MODES; ++j)
111 av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
112 fc->uv_mode_cdf[i][j], NULL);
113
114 av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
115 fc->filter_intra_mode_cdf, NULL);
116 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
117 if (av1_filter_intra_allowed_bsize(cm, i))
118 av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
119 fc->filter_intra_cdfs[i], NULL);
120 }
121
122 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
123 av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
124 fc->switchable_interp_cdf[i], NULL);
125
126 for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
127 av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
128 fc->palette_y_size_cdf[i], NULL);
129 av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
130 fc->palette_uv_size_cdf[i], NULL);
131 for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
132 av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
133 fc->palette_y_mode_cdf[i][j], NULL);
134 }
135 }
136
137 for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
138 av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
139 fc->palette_uv_mode_cdf[i], NULL);
140 }
141
142 for (i = 0; i < PALETTE_SIZES; ++i) {
143 for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
144 av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
145 fc->palette_y_color_index_cdf[i][j], NULL);
146 av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
147 fc->palette_uv_color_index_cdf[i][j], NULL);
148 }
149 }
150
151 int sign_cost[CFL_JOINT_SIGNS];
152 av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
153 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
154 int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
155 int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
156 if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
157 memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
158 } else {
159 const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
160 av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
161 }
162 if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
163 memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
164 } else {
165 const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
166 av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
167 }
168 for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
169 cost_u[u] += sign_cost[joint_sign];
170 }
171
172 for (i = 0; i < MAX_TX_CATS; ++i)
173 for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
174 av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
175 fc->tx_size_cdf[i][j], NULL);
176
177 for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
178 av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
179 fc->txfm_partition_cdf[i], NULL);
180 }
181
182 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
183 int s;
184 for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
185 if (use_inter_ext_tx_for_txsize[s][i]) {
186 av1_cost_tokens_from_cdf(
187 mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
188 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
189 }
190 }
191 for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
192 if (use_intra_ext_tx_for_txsize[s][i]) {
193 for (j = 0; j < INTRA_MODES; ++j) {
194 av1_cost_tokens_from_cdf(
195 mode_costs->intra_tx_type_costs[s][i][j],
196 fc->intra_ext_tx_cdf[s][i][j],
197 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
198 }
199 }
200 }
201 }
202 for (i = 0; i < DIRECTIONAL_MODES; ++i) {
203 av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
204 fc->angle_delta_cdf[i], NULL);
205 }
206 av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
207
208 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
209 av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
210 fc->seg.spatial_pred_seg_cdf[i], NULL);
211 }
212
213 for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
214 av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
215 NULL);
216 }
217
218 if (!frame_is_intra_only(cm)) {
219 for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
220 av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
221 fc->comp_inter_cdf[i], NULL);
222 }
223
224 for (i = 0; i < REF_CONTEXTS; ++i) {
225 for (j = 0; j < SINGLE_REFS - 1; ++j) {
226 av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
227 fc->single_ref_cdf[i][j], NULL);
228 }
229 }
230
231 for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
232 av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
233 fc->comp_ref_type_cdf[i], NULL);
234 }
235
236 for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
237 for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
238 av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
239 fc->uni_comp_ref_cdf[i][j], NULL);
240 }
241 }
242
243 for (i = 0; i < REF_CONTEXTS; ++i) {
244 for (j = 0; j < FWD_REFS - 1; ++j) {
245 av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
246 fc->comp_ref_cdf[i][j], NULL);
247 }
248 }
249
250 for (i = 0; i < REF_CONTEXTS; ++i) {
251 for (j = 0; j < BWD_REFS - 1; ++j) {
252 av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
253 fc->comp_bwdref_cdf[i][j], NULL);
254 }
255 }
256
257 for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
258 av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
259 fc->intra_inter_cdf[i], NULL);
260 }
261
262 for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
263 av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
264 NULL);
265 }
266
267 for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
268 av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
269 fc->zeromv_cdf[i], NULL);
270 }
271
272 for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
273 av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
274 NULL);
275 }
276
277 for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
278 av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
279 NULL);
280 }
281 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
282 av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
283 fc->inter_compound_mode_cdf[i], NULL);
284 for (i = 0; i < BLOCK_SIZES_ALL; ++i)
285 av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
286 fc->compound_type_cdf[i], NULL);
287 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
288 if (av1_is_wedge_used(i)) {
289 av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
290 fc->wedge_idx_cdf[i], NULL);
291 }
292 }
293 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
294 av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
295 fc->interintra_cdf[i], NULL);
296 av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
297 fc->interintra_mode_cdf[i], NULL);
298 }
299 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
300 av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
301 fc->wedge_interintra_cdf[i], NULL);
302 }
303 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
304 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
305 fc->motion_mode_cdf[i], NULL);
306 }
307 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
308 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
309 fc->obmc_cdf[i], NULL);
310 }
311 for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
312 av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
313 fc->compound_index_cdf[i], NULL);
314 }
315 for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
316 av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
317 fc->comp_group_idx_cdf[i], NULL);
318 }
319 }
320 }
321
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)322 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
323 av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
324 fc->switchable_restore_cdf, NULL);
325 av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
326 fc->wiener_restore_cdf, NULL);
327 av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
328 fc->sgrproj_restore_cdf, NULL);
329 }
330
331 // Values are now correlated to quantizer.
332 static int sad_per_bit_lut_8[QINDEX_RANGE];
333 static int sad_per_bit_lut_10[QINDEX_RANGE];
334 static int sad_per_bit_lut_12[QINDEX_RANGE];
335
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)336 static void init_me_luts_bd(int *bit16lut, int range,
337 aom_bit_depth_t bit_depth) {
338 int i;
339 // Initialize the sad lut tables using a formulaic calculation for now.
340 // This is to make it easier to resolve the impact of experimental changes
341 // to the quantizer tables.
342 for (i = 0; i < range; i++) {
343 const double q = av1_convert_qindex_to_q(i, bit_depth);
344 bit16lut[i] = (int)(0.0418 * q + 2.4107);
345 }
346 }
347
init_me_luts(void)348 static void init_me_luts(void) {
349 init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
350 init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
351 init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
352 }
353
av1_init_me_luts(void)354 void av1_init_me_luts(void) { aom_once(init_me_luts); }
355
356 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
357 8, 8, 4, 4, 2, 2, 1, 0 };
358
359 static const int rd_layer_depth_factor[7] = {
360 160, 160, 160, 160, 192, 208, 224
361 };
362
363 // Returns the default rd multiplier for inter frames for a given qindex.
364 // The function here is a first pass estimate based on data from
365 // a previous Vizer run
def_inter_rd_multiplier(int qindex)366 static double def_inter_rd_multiplier(int qindex) {
367 return 3.2 + (0.0015 * (double)qindex);
368 }
369
370 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
371 // The function here is a first pass estimate based on data from
372 // a previous Vizer run
def_arf_rd_multiplier(int qindex)373 static double def_arf_rd_multiplier(int qindex) {
374 return 3.25 + (0.0015 * (double)qindex);
375 }
376
377 // Returns the default rd multiplier for key frames for a given qindex.
378 // The function here is a first pass estimate based on data from
379 // a previous Vizer run
def_kf_rd_multiplier(int qindex)380 static double def_kf_rd_multiplier(int qindex) {
381 return 3.3 + (0.0015 * (double)qindex);
382 }
383
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)384 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
385 FRAME_UPDATE_TYPE update_type,
386 int qindex) {
387 const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
388 int64_t rdmult = q * q;
389 if (update_type == KF_UPDATE) {
390 double def_rd_q_mult = def_kf_rd_multiplier(q);
391 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
392 } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
393 double def_rd_q_mult = def_arf_rd_multiplier(q);
394 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395 } else {
396 double def_rd_q_mult = def_inter_rd_multiplier(q);
397 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398 }
399
400 switch (bit_depth) {
401 case AOM_BITS_8: break;
402 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
403 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
404 default:
405 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
406 return -1;
407 }
408 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
409 }
410
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage)411 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
412 const FRAME_UPDATE_TYPE update_type,
413 const int layer_depth, const int boost_index,
414 const FRAME_TYPE frame_type,
415 const int use_fixed_qp_offsets,
416 const int is_stat_consumption_stage) {
417 int64_t rdmult =
418 av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
419 if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
420 (frame_type != KEY_FRAME)) {
421 // Layer depth adjustment
422 rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
423 // ARF boost adjustment
424 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
425 }
426 return (int)rdmult;
427 }
428
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)429 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
430 assert(beta > 0.0);
431 int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
432 int newq = (int)rint(q / sqrt(beta));
433 int orig_qindex = qindex;
434 if (newq == q) {
435 return 0;
436 }
437 if (newq < q) {
438 while (qindex > 0) {
439 qindex--;
440 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
441 if (newq >= q) {
442 break;
443 }
444 }
445 } else {
446 while (qindex < MAXQ) {
447 qindex++;
448 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
449 if (newq <= q) {
450 break;
451 }
452 }
453 }
454 return qindex - orig_qindex;
455 }
456
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)457 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
458 int curr_qindex) {
459 curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
460 const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
461 const int deltaq_deadzone = delta_q_res / 4;
462 const int qmask = ~(delta_q_res - 1);
463 int abs_deltaq_index = abs(curr_qindex - prev_qindex);
464 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
465 int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
466 adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
467 return adjust_qindex;
468 }
469
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)470 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
471 assert(beta > 0.0);
472 const AV1_COMMON *cm = &cpi->common;
473
474 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
475 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
476 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
477 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
478
479 const int qindex_rdmult = cm->quant_params.base_qindex;
480 return (int)(av1_compute_rd_mult(
481 qindex_rdmult, cm->seq_params->bit_depth,
482 cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
483 layer_depth, boost_index, frame_type,
484 cpi->oxcf.q_cfg.use_fixed_qp_offsets,
485 is_stat_consumption_stage(cpi)) /
486 beta);
487 }
488
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)489 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
490 double q;
491 switch (bit_depth) {
492 case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
493 case AOM_BITS_10:
494 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
495 break;
496 case AOM_BITS_12:
497 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
498 break;
499 default:
500 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501 return -1;
502 }
503 // TODO(debargha): Adjust the function below.
504 return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
505 }
506
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)507 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
508 switch (cpi->common.seq_params->bit_depth) {
509 case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
510 case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
511 case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
512 default:
513 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
514 }
515 }
516
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)517 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
518 int use_nonrd_pick_mode) {
519 int i, bsize, segment_id;
520 THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
521 int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
522
523 if (use_nonrd_pick_mode) {
524 for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
525 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
526 if (ref != INTRA_FRAME) {
527 for (i = 0; i < RTC_INTER_MODES; i++)
528 mode_indices[num_modes_count++] =
529 mode_idx[ref][mode_offset(inter_mode_list[i])];
530 } else {
531 for (i = 0; i < RTC_INTRA_MODES; i++)
532 mode_indices[num_modes_count++] =
533 mode_idx[ref][mode_offset(intra_mode_list[i])];
534 }
535 }
536 }
537
538 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
539 const int qindex = clamp(
540 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
541 cm->quant_params.y_dc_delta_q,
542 0, MAXQ);
543 const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
544
545 for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
546 // Threshold here seems unnecessarily harsh but fine given actual
547 // range of values used for cpi->sf.thresh_mult[].
548 const int t = q * rd_thresh_block_size_factor[bsize];
549 const int thresh_max = INT_MAX / t;
550
551 for (i = 0; i < num_modes_count; ++i) {
552 const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
553 rd->threshes[segment_id][bsize][mode_index] =
554 rd->thresh_mult[mode_index] < thresh_max
555 ? rd->thresh_mult[mode_index] * t / 4
556 : INT_MAX;
557 }
558 }
559 }
560 }
561
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)562 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
563 const int num_planes) {
564 const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
565 for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
566 for (int plane = 0; plane < nplanes; ++plane) {
567 LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
568
569 for (int ctx = 0; ctx < 2; ++ctx) {
570 aom_cdf_prob *pcdf;
571 switch (eob_multi_size) {
572 case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
573 case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
574 case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
575 case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
576 case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
577 case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
578 case 6:
579 default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
580 }
581 av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
582 }
583 }
584 }
585 for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
586 for (int plane = 0; plane < nplanes; ++plane) {
587 LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
588
589 for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
590 av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
591 fc->txb_skip_cdf[tx_size][ctx], NULL);
592
593 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
594 av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
595 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
596 NULL);
597 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
598 av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
599 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
600
601 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
602 pcost->base_cost[ctx][4] = 0;
603 pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
604 av1_cost_literal(1) -
605 pcost->base_cost[ctx][0];
606 pcost->base_cost[ctx][6] =
607 pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
608 pcost->base_cost[ctx][7] =
609 pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
610 }
611
612 for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
613 av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
614 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
615
616 for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
617 av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
618 fc->dc_sign_cdf[plane][ctx], NULL);
619
620 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
621 int br_rate[BR_CDF_SIZE];
622 int prev_cost = 0;
623 int i, j;
624 av1_cost_tokens_from_cdf(
625 br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
626 NULL);
627 // printf("br_rate: ");
628 // for(j = 0; j < BR_CDF_SIZE; j++)
629 // printf("%4d ", br_rate[j]);
630 // printf("\n");
631 for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
632 for (j = 0; j < BR_CDF_SIZE - 1; j++) {
633 pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
634 }
635 prev_cost += br_rate[j];
636 }
637 pcost->lps_cost[ctx][i] = prev_cost;
638 // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
639 // for (i = 0; i <= COEFF_BASE_RANGE; i++)
640 // printf("%5d ", pcost->lps_cost[ctx][i]);
641 // printf("\n");
642 }
643 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
644 pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
645 pcost->lps_cost[ctx][0];
646 for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
647 pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
648 pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
649 }
650 }
651 }
652 }
653 }
654
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)655 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
656 MvCosts *mv_costs) {
657 // Avoid accessing 'mv_costs' when it is not allocated.
658 if (mv_costs == NULL) return;
659
660 mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
661 mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
662 mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
663 mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
664 if (integer_mv) {
665 mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
666 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
667 nmvc, MV_SUBPEL_NONE);
668 } else {
669 mv_costs->mv_cost_stack =
670 usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
671 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
672 nmvc, usehp);
673 }
674 }
675
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)676 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
677 dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
678 dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
679 av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
680 MV_SUBPEL_NONE);
681 }
682
683 // Populates speed features based on codec control settings (of type
684 // COST_UPDATE_TYPE) and expected speed feature settings (of type
685 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
686 // The populated/updated speed features are used for cost updates in the
687 // encoder.
688 // WARNING: Population of unified cost update frequency needs to be taken care
689 // accordingly, in case of any modifications/additions to the enum
690 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)691 static INLINE void populate_unified_cost_update_freq(
692 const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
693 INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
694 // Mapping of entropy cost update frequency from the encoder's codec control
695 // settings of type COST_UPDATE_TYPE to speed features of type
696 // INTERNAL_COST_UPDATE_TYPE.
697 static const INTERNAL_COST_UPDATE_TYPE
698 map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
699 INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
700 INTERNAL_COST_UPD_OFF
701 };
702
703 inter_sf->mv_cost_upd_level =
704 AOMMIN(inter_sf->mv_cost_upd_level,
705 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
706 inter_sf->coeff_cost_upd_level =
707 AOMMIN(inter_sf->coeff_cost_upd_level,
708 map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
709 inter_sf->mode_cost_upd_level =
710 AOMMIN(inter_sf->mode_cost_upd_level,
711 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
712 sf->intra_sf.dv_cost_upd_level =
713 AOMMIN(sf->intra_sf.dv_cost_upd_level,
714 map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
715 }
716
717 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)718 static INLINE int is_frame_level_cost_upd_freq_set(
719 const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
720 const int use_nonrd_pick_mode, const int frames_since_key) {
721 const int fill_costs =
722 frame_is_intra_only(cm) ||
723 (use_nonrd_pick_mode ? frames_since_key < 2
724 : (cm->current_frame.frame_number & 0x07) == 1);
725 return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
726 cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
727 }
728
729 // Decide whether we want to update the mode entropy cost for the current frame.
730 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)731 static AOM_INLINE int should_force_mode_cost_update(const AV1_COMP *cpi) {
732 const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
733 if (!rt_sf->frame_level_mode_cost_update) {
734 return false;
735 }
736
737 if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
738 return cpi->frames_since_last_update == 1;
739 } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
740 if (cpi->svc.number_spatial_layers == 1 &&
741 cpi->svc.number_temporal_layers == 1) {
742 const AV1_COMMON *const cm = &cpi->common;
743 const RATE_CONTROL *const rc = &cpi->rc;
744
745 return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
746 rc->high_source_sad || rc->frames_since_key < 10 ||
747 cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
748 cm->current_frame.frame_number % 8 == 0;
749 } else if (cpi->svc.number_temporal_layers > 1) {
750 return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
751 }
752 }
753
754 return false;
755 }
756
av1_initialize_rd_consts(AV1_COMP * cpi)757 void av1_initialize_rd_consts(AV1_COMP *cpi) {
758 AV1_COMMON *const cm = &cpi->common;
759 MACROBLOCK *const x = &cpi->td.mb;
760 SPEED_FEATURES *const sf = &cpi->sf;
761 RD_OPT *const rd = &cpi->rd;
762 int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
763 int frames_since_key = cpi->rc.frames_since_key;
764
765 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
766 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
767 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
768 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
769
770 const int qindex_rdmult =
771 cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
772 rd->RDMULT = av1_compute_rd_mult(
773 qindex_rdmult, cm->seq_params->bit_depth,
774 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
775 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
776 is_stat_consumption_stage(cpi));
777 #if CONFIG_RD_COMMAND
778 if (cpi->oxcf.pass == 2) {
779 const RD_COMMAND *rd_command = &cpi->rd_command;
780 if (rd_command->option_ls[rd_command->frame_index] ==
781 RD_OPTION_SET_Q_RDMULT) {
782 rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
783 }
784 }
785 #endif // CONFIG_RD_COMMAND
786
787 av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
788
789 set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
790
791 populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
792 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
793 // Frame level mv cost update
794 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
795 use_nonrd_pick_mode, frames_since_key))
796 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
797 cm->features.allow_high_precision_mv, x->mv_costs);
798
799 // Frame level coefficient cost update
800 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
801 use_nonrd_pick_mode, frames_since_key))
802 av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
803
804 // Frame level mode cost update
805 if (should_force_mode_cost_update(cpi) ||
806 is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
807 use_nonrd_pick_mode, frames_since_key))
808 av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
809
810 // Frame level dv cost update
811 if (av1_need_dv_costs(cpi)) {
812 if (cpi->td.dv_costs_alloc == NULL) {
813 CHECK_MEM_ERROR(
814 cm, cpi->td.dv_costs_alloc,
815 (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
816 cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
817 }
818 av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
819 }
820 }
821
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)822 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
823 // NOTE: The tables below must be of the same size.
824
825 // The functions described below are sampled at the four most significant
826 // bits of x^2 + 8 / 256.
827
828 // Normalized rate:
829 // This table models the rate for a Laplacian source with given variance
830 // when quantized with a uniform quantizer with given stepsize. The
831 // closed form expression is:
832 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
833 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
834 // and H(x) is the binary entropy function.
835 static const int rate_tab_q10[] = {
836 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
837 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
838 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
839 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
840 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
841 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
842 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
843 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
844 5, 3, 2, 1, 1, 1, 0, 0,
845 };
846 // Normalized distortion:
847 // This table models the normalized distortion for a Laplacian source
848 // with given variance when quantized with a uniform quantizer
849 // with given stepsize. The closed form expression is:
850 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
851 // where x = qpstep / sqrt(variance).
852 // Note the actual distortion is Dn * variance.
853 static const int dist_tab_q10[] = {
854 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
855 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
856 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
857 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
858 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
859 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
860 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
861 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
862 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
863 };
864 static const int xsq_iq_q10[] = {
865 0, 4, 8, 12, 16, 20, 24, 28, 32,
866 40, 48, 56, 64, 72, 80, 88, 96, 112,
867 128, 144, 160, 176, 192, 208, 224, 256, 288,
868 320, 352, 384, 416, 448, 480, 544, 608, 672,
869 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
870 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
871 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
872 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
873 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
874 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
875 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
876 180192, 196576, 212960, 229344, 245728,
877 };
878 const int tmp = (xsq_q10 >> 2) + 8;
879 const int k = get_msb(tmp) - 3;
880 const int xq = (k << 3) + ((tmp >> k) & 0x7);
881 const int one_q10 = 1 << 10;
882 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
883 const int b_q10 = one_q10 - a_q10;
884 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
885 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
886 }
887
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)888 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
889 unsigned int qstep, int *rate,
890 int64_t *dist) {
891 // This function models the rate and distortion for a Laplacian
892 // source with given variance when quantized with a uniform quantizer
893 // with given stepsize. The closed form expressions are in:
894 // Hang and Chen, "Source Model for transform video coder and its
895 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
896 // Sys. for Video Tech., April 1997.
897 if (var == 0) {
898 *rate = 0;
899 *dist = 0;
900 } else {
901 int d_q10, r_q10;
902 static const uint32_t MAX_XSQ_Q10 = 245727;
903 const uint64_t xsq_q10_64 =
904 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
905 const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
906 model_rd_norm(xsq_q10, &r_q10, &d_q10);
907 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
908 *dist = (var * (int64_t)d_q10 + 512) >> 10;
909 }
910 }
911
interp_cubic(const double * p,double x)912 static double interp_cubic(const double *p, double x) {
913 return p[1] + 0.5 * x *
914 (p[2] - p[0] +
915 x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
916 x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
917 }
918
919 /*
920 static double interp_bicubic(const double *p, int p_stride, double x,
921 double y) {
922 double q[4];
923 q[0] = interp_cubic(p, x);
924 q[1] = interp_cubic(p + p_stride, x);
925 q[2] = interp_cubic(p + 2 * p_stride, x);
926 q[3] = interp_cubic(p + 3 * p_stride, x);
927 return interp_cubic(q, y);
928 }
929 */
930
931 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
932 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
933 };
934
sse_norm_curvfit_model_cat_lookup(double sse_norm)935 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
936 return (sse_norm > 16.0);
937 }
938
939 // Models distortion by sse using a logistic function on
940 // l = log2(sse / q^2) as:
941 // dbysse = 16 / (1 + k exp(l + c))
get_dbysse_logistic(double l,double c,double k)942 static double get_dbysse_logistic(double l, double c, double k) {
943 const double A = 16.0;
944 const double dbysse = A / (1 + k * exp(l + c));
945 return dbysse;
946 }
947
948 // Models rate using a clamped linear function on
949 // l = log2(sse / q^2) as:
950 // rate = max(0, a + b * l)
get_rate_clamplinear(double l,double a,double b)951 static double get_rate_clamplinear(double l, double a, double b) {
952 const double rate = a + b * l;
953 return (rate < 0 ? 0 : rate);
954 }
955
956 static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
957 0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
958 };
959
960 static const double surffit_rate_params[9][4] = {
961 {
962 638.390212,
963 2.253108,
964 166.585650,
965 -3.939401,
966 },
967 {
968 5.256905,
969 81.997240,
970 -1.321771,
971 17.694216,
972 },
973 {
974 -74.193045,
975 72.431868,
976 -19.033152,
977 15.407276,
978 },
979 {
980 416.770113,
981 14.794188,
982 167.686830,
983 -6.997756,
984 },
985 {
986 378.511276,
987 9.558376,
988 154.658843,
989 -6.635663,
990 },
991 {
992 277.818787,
993 4.413180,
994 150.317637,
995 -9.893038,
996 },
997 {
998 142.212132,
999 11.542038,
1000 94.393964,
1001 -5.518517,
1002 },
1003 {
1004 219.100256,
1005 4.007421,
1006 108.932852,
1007 -6.981310,
1008 },
1009 {
1010 222.261971,
1011 3.251049,
1012 95.972916,
1013 -5.609789,
1014 },
1015 };
1016
1017 static const double surffit_dist_params[7] = { 1.475844, 4.328362, -5.680233,
1018 -0.500994, 0.554585, 4.839478,
1019 -0.695837 };
1020
rate_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * rpar)1021 static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1022 double *rpar) {
1023 const int cat = bsize_surffit_model_cat_lookup[bsize];
1024 rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
1025 rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
1026 }
1027
dist_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * dpar)1028 static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
1029 double *dpar) {
1030 (void)bsize;
1031 const double *params = surffit_dist_params;
1032 dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
1033 dpar[1] = params[4] + params[5] * exp(params[6] * xm);
1034 }
1035
av1_model_rd_surffit(BLOCK_SIZE bsize,double sse_norm,double xm,double yl,double * rate_f,double * distbysse_f)1036 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
1037 double yl, double *rate_f, double *distbysse_f) {
1038 (void)sse_norm;
1039 double rpar[2], dpar[2];
1040 rate_surffit_model_params_lookup(bsize, xm, rpar);
1041 dist_surffit_model_params_lookup(bsize, xm, dpar);
1042
1043 *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
1044 *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
1045 }
1046
1047 static const double interp_rgrid_curv[4][65] = {
1048 {
1049 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1050 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1051 0.000000, 118.257702, 120.210658, 121.434853, 122.100487,
1052 122.377758, 122.436865, 72.290102, 96.974289, 101.652727,
1053 126.830141, 140.417377, 157.644879, 184.315291, 215.823873,
1054 262.300169, 335.919859, 420.624173, 519.185032, 619.854243,
1055 726.053595, 827.663369, 933.127475, 1037.988755, 1138.839609,
1056 1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
1057 1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
1058 2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
1059 2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
1060 2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
1061 3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
1062 },
1063 {
1064 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1065 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1066 0.000000, 13.087244, 15.919735, 25.930313, 24.412411,
1067 28.567417, 29.924194, 30.857010, 32.742979, 36.382570,
1068 39.210386, 42.265690, 47.378572, 57.014850, 82.740067,
1069 137.346562, 219.968084, 316.781856, 415.643773, 516.706538,
1070 614.914364, 714.303763, 815.512135, 911.210485, 1008.501528,
1071 1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
1072 1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
1073 1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
1074 2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
1075 2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
1076 3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
1077 },
1078 {
1079 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1080 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1081 0.000000, 4.656893, 5.123633, 5.594132, 6.162376,
1082 6.918433, 7.768444, 8.739415, 10.105862, 11.477328,
1083 13.236604, 15.421030, 19.093623, 25.801871, 46.724612,
1084 98.841054, 181.113466, 272.586364, 359.499769, 445.546343,
1085 525.944439, 605.188743, 681.793483, 756.668359, 838.486885,
1086 926.950356, 1015.482542, 1113.353926, 1204.897193, 1288.871992,
1087 1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1088 1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1089 2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1090 2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1091 3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1092 },
1093 {
1094 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1095 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
1096 0.000000, 0.337370, 0.391916, 0.468839, 0.566334,
1097 0.762564, 1.069225, 1.384361, 1.787581, 2.293948,
1098 3.251909, 4.412991, 8.050068, 11.606073, 27.668092,
1099 65.227758, 128.463938, 202.097653, 262.715851, 312.464873,
1100 355.601398, 400.609054, 447.201352, 495.761568, 552.871938,
1101 619.067625, 691.984883, 773.753288, 860.628503, 946.262808,
1102 1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1103 1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1104 1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1105 2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1106 3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1107 },
1108 };
1109
1110 static const double interp_dgrid_curv[3][65] = {
1111 {
1112 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1113 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1114 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1115 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1116 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
1117 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
1118 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
1119 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1120 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1121 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1122 0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
1123 },
1124 {
1125 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1126 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1127 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1128 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
1129 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
1130 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
1131 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
1132 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1133 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1134 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1135 0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
1136 },
1137 };
1138
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1139 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1140 double *rate_f, double *distbysse_f) {
1141 const double x_start = -15.5;
1142 const double x_end = 16.5;
1143 const double x_step = 0.5;
1144 const double epsilon = 1e-6;
1145 const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1146 const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1147 (void)x_end;
1148
1149 xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1150 xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1151 const double x = (xqr - x_start) / x_step;
1152 const int xi = (int)floor(x);
1153 const double xo = x - xi;
1154
1155 assert(xi > 0);
1156
1157 const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1158 *rate_f = interp_cubic(prate, xo);
1159 const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1160 *distbysse_f = interp_cubic(pdist, xo);
1161 }
1162
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1163 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1164 const struct macroblockd_plane *pd,
1165 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1166 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1167 const int num_4x4_w = mi_size_wide[plane_bsize];
1168 const int num_4x4_h = mi_size_high[plane_bsize];
1169 const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1170 const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1171
1172 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1173 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1174 }
1175
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1176 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1177 const struct macroblockd_plane *pd,
1178 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1179 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1180 assert(plane_bsize < BLOCK_SIZES_ALL);
1181 get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1182 }
1183
1184 // Special clamping used in the encoder when calculating a prediction
1185 //
1186 // Logically, all pixel fetches used for prediction are clamped against the
1187 // edges of the frame. But doing this directly is slow, so instead we allocate
1188 // a finite border around the frame and fill it with copies of the outermost
1189 // pixels.
1190 //
1191 // Since this border is finite, we need to clamp the motion vector before
1192 // prediction in order to avoid out-of-bounds reads. At the same time, this
1193 // clamp must not change the prediction result.
1194 //
1195 // We can balance both of these concerns by calculating how far we would have
1196 // to go in each direction before the extended prediction region (the current
1197 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1198 // so that it touches the frame only at one row or column. This is a special
1199 // point because any more extreme MV will always lead to the same prediction.
1200 // So it is safe to clamp at that point.
1201 //
1202 // In the worst case, this requires a border of
1203 // max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1204 // around the frame edges.
enc_clamp_mv(const AV1_COMMON * cm,const MACROBLOCKD * xd,MV * mv)1205 static INLINE void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1206 MV *mv) {
1207 int bw = xd->width << MI_SIZE_LOG2;
1208 int bh = xd->height << MI_SIZE_LOG2;
1209
1210 int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1211 int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1212 int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1213 int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1214
1215 const SubpelMvLimits mv_limits = {
1216 .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1217 .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1218 .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1219 .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1220 };
1221 clamp_mv(mv, &mv_limits);
1222 }
1223
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1224 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1225 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1226 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1227 const int_mv ref_mv =
1228 av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1229 const int_mv ref_mv1 =
1230 av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1231 MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1232 int num_mv_refs = 0;
1233 pred_mv[num_mv_refs++] = ref_mv.as_mv;
1234 if (ref_mv.as_int != ref_mv1.as_int) {
1235 pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1236 }
1237
1238 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1239
1240 const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1241 int zero_seen = 0;
1242 int best_sad = INT_MAX;
1243 int max_mv = 0;
1244 // Get the sad for each candidate reference mv.
1245 for (int i = 0; i < num_mv_refs; ++i) {
1246 MV *this_mv = &pred_mv[i];
1247 enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1248
1249 const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1250 const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1251 max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1252
1253 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1254 zero_seen |= (fp_row == 0 && fp_col == 0);
1255
1256 const uint8_t *const ref_y_ptr =
1257 &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1258 // Find sad for current vector.
1259 const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1260 src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1261 // Note if it is the best so far.
1262 if (this_sad < best_sad) {
1263 best_sad = this_sad;
1264 }
1265 if (i == 0)
1266 x->pred_mv0_sad[ref_frame] = this_sad;
1267 else if (i == 1)
1268 x->pred_mv1_sad[ref_frame] = this_sad;
1269 }
1270
1271 // Note the index of the mv that worked best in the reference list.
1272 x->max_mv_context[ref_frame] = max_mv;
1273 x->pred_mv_sad[ref_frame] = best_sad;
1274 }
1275
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1276 void av1_setup_pred_block(const MACROBLOCKD *xd,
1277 struct buf_2d dst[MAX_MB_PLANE],
1278 const YV12_BUFFER_CONFIG *src,
1279 const struct scale_factors *scale,
1280 const struct scale_factors *scale_uv,
1281 const int num_planes) {
1282 dst[0].buf = src->y_buffer;
1283 dst[0].stride = src->y_stride;
1284 dst[1].buf = src->u_buffer;
1285 dst[2].buf = src->v_buffer;
1286 dst[1].stride = dst[2].stride = src->uv_stride;
1287
1288 const int mi_row = xd->mi_row;
1289 const int mi_col = xd->mi_col;
1290 for (int i = 0; i < num_planes; ++i) {
1291 setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1292 i ? src->uv_crop_width : src->y_crop_width,
1293 i ? src->uv_crop_height : src->y_crop_height,
1294 dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1295 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1296 }
1297 }
1298
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1299 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1300 int ref_frame) {
1301 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1302 RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1303 const RefCntBuffer *const ref_buf =
1304 get_ref_frame_buf(&cpi->common, ref_frame);
1305 return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1306 : NULL;
1307 }
1308
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1309 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1310 InterpFilter interp_filter, int dual_filter) {
1311 if (interp_filter == SWITCHABLE) {
1312 const MB_MODE_INFO *const mbmi = xd->mi[0];
1313 int inter_filter_cost = 0;
1314 for (int dir = 0; dir < 2; ++dir) {
1315 if (dir && !dual_filter) break;
1316 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1317 const InterpFilter filter =
1318 av1_extract_interp_filter(mbmi->interp_filters, dir);
1319 inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1320 }
1321 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1322 } else {
1323 return 0;
1324 }
1325 }
1326
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1327 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1328 RD_OPT *const rd = &cpi->rd;
1329
1330 // Set baseline threshold values.
1331 av1_zero(rd->thresh_mult);
1332
1333 rd->thresh_mult[THR_NEARESTMV] = 300;
1334 rd->thresh_mult[THR_NEARESTL2] = 300;
1335 rd->thresh_mult[THR_NEARESTL3] = 300;
1336 rd->thresh_mult[THR_NEARESTB] = 300;
1337 rd->thresh_mult[THR_NEARESTA2] = 300;
1338 rd->thresh_mult[THR_NEARESTA] = 300;
1339 rd->thresh_mult[THR_NEARESTG] = 300;
1340
1341 rd->thresh_mult[THR_NEWMV] = 1000;
1342 rd->thresh_mult[THR_NEWL2] = 1000;
1343 rd->thresh_mult[THR_NEWL3] = 1000;
1344 rd->thresh_mult[THR_NEWB] = 1000;
1345 rd->thresh_mult[THR_NEWA2] = 1100;
1346 rd->thresh_mult[THR_NEWA] = 1000;
1347 rd->thresh_mult[THR_NEWG] = 1000;
1348
1349 rd->thresh_mult[THR_NEARMV] = 1000;
1350 rd->thresh_mult[THR_NEARL2] = 1000;
1351 rd->thresh_mult[THR_NEARL3] = 1000;
1352 rd->thresh_mult[THR_NEARB] = 1000;
1353 rd->thresh_mult[THR_NEARA2] = 1000;
1354 rd->thresh_mult[THR_NEARA] = 1000;
1355 rd->thresh_mult[THR_NEARG] = 1000;
1356
1357 rd->thresh_mult[THR_GLOBALMV] = 2200;
1358 rd->thresh_mult[THR_GLOBALL2] = 2000;
1359 rd->thresh_mult[THR_GLOBALL3] = 2000;
1360 rd->thresh_mult[THR_GLOBALB] = 2400;
1361 rd->thresh_mult[THR_GLOBALA2] = 2000;
1362 rd->thresh_mult[THR_GLOBALG] = 2000;
1363 rd->thresh_mult[THR_GLOBALA] = 2400;
1364
1365 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1366 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1367 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1368 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1369 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1370 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1371 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1372 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1373 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1374 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1375 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1376 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1377
1378 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1379 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1380 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1381 rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1382
1383 rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1384 rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1385 rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1386 rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1387 rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1388 rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1389 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1390
1391 rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1392 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1393 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1394 rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1395 rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1396 rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1397 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1398
1399 rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1400 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1401 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1402 rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1403 rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1404 rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1405 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1406
1407 rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1408 rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1409 rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1410 rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1411 rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1412 rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1413 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1414
1415 rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1416 rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1417 rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1418 rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1419 rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1420 rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1421 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1422
1423 rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1424 rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1425 rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1426 rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1427 rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1428 rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1429 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1430
1431 rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1432 rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1433 rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1434 rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1435 rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1436 rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1437 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1438
1439 rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1440 rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1441 rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1442 rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1443 rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1444 rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1445 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1446
1447 rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1448 rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1449 rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1450 rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1451 rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1452 rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1453 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1454
1455 rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1456 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1457 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1458 rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1459 rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1460 rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1461 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1462
1463 rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1464 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1465 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1466 rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1467 rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1468 rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1469 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1470
1471 rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1472 rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1473 rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1474 rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1475 rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1476 rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1477 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1478
1479 rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1480 rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1481 rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1482 rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1483 rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1484 rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1485 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1486
1487 rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1488 rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1489 rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1490 rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1491 rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1492 rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1493 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1494
1495 rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1496 rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1497 rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1498 rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1499 rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1500 rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1501 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1502
1503 rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1504 rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1505 rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1506 rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1507 rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1508 rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1509 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1510
1511 rd->thresh_mult[THR_DC] = 1000;
1512 rd->thresh_mult[THR_PAETH] = 1000;
1513 rd->thresh_mult[THR_SMOOTH] = 2200;
1514 rd->thresh_mult[THR_SMOOTH_V] = 2000;
1515 rd->thresh_mult[THR_SMOOTH_H] = 2000;
1516 rd->thresh_mult[THR_H_PRED] = 2000;
1517 rd->thresh_mult[THR_V_PRED] = 1800;
1518 rd->thresh_mult[THR_D135_PRED] = 2500;
1519 rd->thresh_mult[THR_D203_PRED] = 2000;
1520 rd->thresh_mult[THR_D157_PRED] = 2500;
1521 rd->thresh_mult[THR_D67_PRED] = 2000;
1522 rd->thresh_mult[THR_D113_PRED] = 2500;
1523 rd->thresh_mult[THR_D45_PRED] = 2500;
1524 }
1525
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1526 static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1527 THR_MODES best_mode_index,
1528 THR_MODES mode_start, THR_MODES mode_end,
1529 BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1530 int max_rd_thresh_factor) {
1531 for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1532 for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1533 int *const fact = &factor_buf[bs][mode];
1534 if (mode == best_mode_index) {
1535 *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1536 } else {
1537 *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1538 }
1539 }
1540 }
1541 }
1542
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1543 void av1_update_rd_thresh_fact(
1544 const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1545 int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1546 THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1547 THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1548 assert(use_adaptive_rd_thresh > 0);
1549 const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1550
1551 const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1552 BLOCK_SIZE min_size, max_size;
1553 if (bsize_is_1_to_4) {
1554 // This part handles block sizes with 1:4 and 4:1 aspect ratios
1555 // TODO(any): Experiment with threshold update for parent/child blocks
1556 min_size = bsize;
1557 max_size = bsize;
1558 } else {
1559 min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1560 max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1561 }
1562
1563 update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1564 min_size, max_size, max_rd_thresh_factor);
1565 update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1566 min_size, max_size, max_rd_thresh_factor);
1567 }
1568
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1569 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1570 aom_bit_depth_t bit_depth) {
1571 const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1572 switch (bit_depth) {
1573 case AOM_BITS_8: return 20 * q;
1574 case AOM_BITS_10: return 5 * q;
1575 case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1576 default:
1577 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1578 return -1;
1579 }
1580 }
1581