1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42
43 #define RD_THRESH_POW 1.25
44
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49 rd_cost->rate = INT_MAX;
50 rd_cost->dist = INT64_MAX;
51 rd_cost->rdcost = INT64_MAX;
52 }
53
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55 rd_cost->rate = 0;
56 rd_cost->dist = 0;
57 rd_cost->rdcost = 0;
58 }
59
vp9_calculate_rd_cost(int mult,int div,int rate,int64_t dist)60 int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61 assert(mult >= 0);
62 assert(div > 0);
63 if (rate >= 0 && dist >= 0) {
64 return RDCOST(mult, div, rate, dist);
65 }
66 if (rate >= 0 && dist < 0) {
67 return RDCOST_NEG_D(mult, div, rate, -dist);
68 }
69 if (rate < 0 && dist >= 0) {
70 return RDCOST_NEG_R(mult, div, -rate, dist);
71 }
72 return -RDCOST(mult, div, -rate, -dist);
73 }
74
vp9_rd_cost_update(int mult,int div,RD_COST * rd_cost)75 void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76 if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77 rd_cost->rdcost =
78 vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79 } else {
80 vp9_rd_cost_reset(rd_cost);
81 }
82 }
83
84 // The baseline rd thresholds for breaking out of the rd loop for
85 // certain modes are assumed to be based on 8x8 blocks.
86 // This table is used to correct for block size.
87 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90 };
91
fill_mode_costs(VP9_COMP * cpi)92 static void fill_mode_costs(VP9_COMP *cpi) {
93 const FRAME_CONTEXT *const fc = cpi->common.fc;
94 int i, j;
95
96 for (i = 0; i < INTRA_MODES; ++i) {
97 for (j = 0; j < INTRA_MODES; ++j) {
98 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99 vp9_intra_mode_tree);
100 }
101 }
102
103 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104 for (i = 0; i < INTRA_MODES; ++i) {
105 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106 vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108 fc->uv_mode_prob[i], vp9_intra_mode_tree);
109 }
110
111 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112 vp9_cost_tokens(cpi->switchable_interp_costs[i],
113 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114 }
115
116 for (i = TX_8X8; i < TX_SIZES; ++i) {
117 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118 const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119 int k;
120 for (k = 0; k <= i; ++k) {
121 int cost = 0;
122 int m;
123 for (m = 0; m <= k - (k == i); ++m) {
124 if (m == k)
125 cost += vp9_cost_zero(tx_probs[m]);
126 else
127 cost += vp9_cost_one(tx_probs[m]);
128 }
129 cpi->tx_size_cost[i - 1][j][k] = cost;
130 }
131 }
132 }
133 }
134
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])135 static void fill_token_costs(vp9_coeff_cost *c,
136 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137 int i, j, k, l;
138 TX_SIZE t;
139 for (t = TX_4X4; t <= TX_32X32; ++t)
140 for (i = 0; i < PLANE_TYPES; ++i)
141 for (j = 0; j < REF_TYPES; ++j)
142 for (k = 0; k < COEF_BANDS; ++k)
143 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144 vpx_prob probs[ENTROPY_NODES];
145 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148 vp9_coef_tree);
149 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150 c[t][i][j][k][1][l][EOB_TOKEN]);
151 }
152 }
153
154 // Values are now correlated to quantizer.
155 static int sad_per_bit16lut_8[QINDEX_RANGE];
156 static int sad_per_bit4lut_8[QINDEX_RANGE];
157
158 #if CONFIG_VP9_HIGHBITDEPTH
159 static int sad_per_bit16lut_10[QINDEX_RANGE];
160 static int sad_per_bit4lut_10[QINDEX_RANGE];
161 static int sad_per_bit16lut_12[QINDEX_RANGE];
162 static int sad_per_bit4lut_12[QINDEX_RANGE];
163 #endif
164
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)165 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166 vpx_bit_depth_t bit_depth) {
167 int i;
168 // Initialize the sad lut tables using a formulaic calculation for now.
169 // This is to make it easier to resolve the impact of experimental changes
170 // to the quantizer tables.
171 for (i = 0; i < range; i++) {
172 const double q = vp9_convert_qindex_to_q(i, bit_depth);
173 bit16lut[i] = (int)(0.0418 * q + 2.4107);
174 bit4lut[i] = (int)(0.063 * q + 2.742);
175 }
176 }
177
vp9_init_me_luts(void)178 void vp9_init_me_luts(void) {
179 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180 VPX_BITS_8);
181 #if CONFIG_VP9_HIGHBITDEPTH
182 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183 VPX_BITS_10);
184 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185 VPX_BITS_12);
186 #endif
187 }
188
189 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190 8, 8, 4, 4, 2, 2, 1, 0 };
191
192 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
193 // that the show frame flag is set, should not be used as no real frame
194 // is encoded so we should not reach here. However, a dummy value
195 // is inserted here to make sure the data structure has the right number
196 // of values assigned.
197 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198 128, 144, 144 };
199
200 // Configure Vizier RD parameters.
201 // Later this function will use passed in command line values.
vp9_init_rd_parameters(VP9_COMP * cpi)202 void vp9_init_rd_parameters(VP9_COMP *cpi) {
203 RD_CONTROL *const rdc = &cpi->rd_ctrl;
204
205 // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
206 // initialized by the pass in values.
207 // Be careful that parameters below are only initialized to 1, if we do not
208 // pass values to them. It is desired to take care of each parameter when
209 // using |use_vizier_rc_params|.
210 if (cpi->twopass.use_vizier_rc_params) return;
211
212 // Make sure this function is floating point safe.
213 vpx_clear_system_state();
214
215 rdc->rd_mult_inter_qp_fac = 1.0;
216 rdc->rd_mult_arf_qp_fac = 1.0;
217 rdc->rd_mult_key_qp_fac = 1.0;
218 }
219
220 // Returns the default rd multiplier for inter frames for a given qindex.
221 // The function here is a first pass estimate based on data from
222 // a previous Vizer run
def_inter_rd_multiplier(int qindex)223 static double def_inter_rd_multiplier(int qindex) {
224 return 4.15 + (0.001 * (double)qindex);
225 }
226
227 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
228 // The function here is a first pass estimate based on data from
229 // a previous Vizer run
def_arf_rd_multiplier(int qindex)230 static double def_arf_rd_multiplier(int qindex) {
231 return 4.25 + (0.001 * (double)qindex);
232 }
233
234 // Returns the default rd multiplier for key frames for a given qindex.
235 // The function here is a first pass estimate based on data from
236 // a previous Vizer run
def_kf_rd_multiplier(int qindex)237 static double def_kf_rd_multiplier(int qindex) {
238 return 4.35 + (0.001 * (double)qindex);
239 }
240
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)241 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
242 const RD_CONTROL *rdc = &cpi->rd_ctrl;
243 const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
244 // largest dc_quant is 21387, therefore rdmult should fit in int32_t
245 int rdmult = q * q;
246
247 if (cpi->ext_ratectrl.ready &&
248 (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
249 cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
250 return cpi->ext_ratectrl.ext_rdmult;
251 }
252
253 // Make sure this function is floating point safe.
254 vpx_clear_system_state();
255
256 if (cpi->common.frame_type == KEY_FRAME) {
257 double def_rd_q_mult = def_kf_rd_multiplier(qindex);
258 rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
259 } else if (!cpi->rc.is_src_frame_alt_ref &&
260 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
261 double def_rd_q_mult = def_arf_rd_multiplier(qindex);
262 rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
263 } else {
264 double def_rd_q_mult = def_inter_rd_multiplier(qindex);
265 rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
266 }
267
268 #if CONFIG_VP9_HIGHBITDEPTH
269 switch (cpi->common.bit_depth) {
270 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
271 case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
272 default: break;
273 }
274 #endif // CONFIG_VP9_HIGHBITDEPTH
275 return rdmult > 0 ? rdmult : 1;
276 }
277
modulate_rdmult(const VP9_COMP * cpi,int rdmult)278 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
279 int64_t rdmult_64 = rdmult;
280 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
281 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
282 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
283 const int gfu_boost = cpi->multi_layer_arf
284 ? gf_group->gfu_boost[gf_group->index]
285 : cpi->rc.gfu_boost;
286 const int boost_index = VPXMIN(15, (gfu_boost / 100));
287
288 rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
289 rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
290 }
291 return (int)rdmult_64;
292 }
293
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)294 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
295 int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
296 if (cpi->ext_ratectrl.ready &&
297 (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
298 cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
299 return cpi->ext_ratectrl.ext_rdmult;
300 }
301 return modulate_rdmult(cpi, rdmult);
302 }
303
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)304 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
305 int rdmult =
306 vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
307 rdmult = (int)((double)rdmult / beta);
308 rdmult = rdmult > 0 ? rdmult : 1;
309 return modulate_rdmult(cpi, rdmult);
310 }
311
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)312 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
313 double q;
314 #if CONFIG_VP9_HIGHBITDEPTH
315 switch (bit_depth) {
316 case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
317 case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
318 default:
319 assert(bit_depth == VPX_BITS_12);
320 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
321 break;
322 }
323 #else
324 (void)bit_depth;
325 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
326 #endif // CONFIG_VP9_HIGHBITDEPTH
327 // TODO(debargha): Adjust the function below.
328 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
329 }
330
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)331 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
332 #if CONFIG_VP9_HIGHBITDEPTH
333 switch (cpi->common.bit_depth) {
334 case VPX_BITS_8:
335 x->sadperbit16 = sad_per_bit16lut_8[qindex];
336 x->sadperbit4 = sad_per_bit4lut_8[qindex];
337 break;
338 case VPX_BITS_10:
339 x->sadperbit16 = sad_per_bit16lut_10[qindex];
340 x->sadperbit4 = sad_per_bit4lut_10[qindex];
341 break;
342 default:
343 assert(cpi->common.bit_depth == VPX_BITS_12);
344 x->sadperbit16 = sad_per_bit16lut_12[qindex];
345 x->sadperbit4 = sad_per_bit4lut_12[qindex];
346 break;
347 }
348 #else
349 (void)cpi;
350 x->sadperbit16 = sad_per_bit16lut_8[qindex];
351 x->sadperbit4 = sad_per_bit4lut_8[qindex];
352 #endif // CONFIG_VP9_HIGHBITDEPTH
353 }
354
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)355 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
356 int i, bsize, segment_id;
357
358 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
359 const int qindex =
360 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
361 cm->y_dc_delta_q,
362 0, MAXQ);
363 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
364
365 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
366 // Threshold here seems unnecessarily harsh but fine given actual
367 // range of values used for cpi->sf.thresh_mult[].
368 const int t = q * rd_thresh_block_size_factor[bsize];
369 const int thresh_max = INT_MAX / t;
370
371 if (bsize >= BLOCK_8X8) {
372 for (i = 0; i < MAX_MODES; ++i)
373 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
374 ? rd->thresh_mult[i] * t / 4
375 : INT_MAX;
376 } else {
377 for (i = 0; i < MAX_REFS; ++i)
378 rd->threshes[segment_id][bsize][i] =
379 rd->thresh_mult_sub8x8[i] < thresh_max
380 ? rd->thresh_mult_sub8x8[i] * t / 4
381 : INT_MAX;
382 }
383 }
384 }
385 }
386
vp9_build_inter_mode_cost(VP9_COMP * cpi)387 void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
388 const VP9_COMMON *const cm = &cpi->common;
389 int i;
390 for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
391 vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
392 vp9_inter_mode_tree);
393 }
394 }
395
vp9_initialize_rd_consts(VP9_COMP * cpi)396 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
397 VP9_COMMON *const cm = &cpi->common;
398 MACROBLOCK *const x = &cpi->td.mb;
399 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
400 RD_OPT *const rd = &cpi->rd;
401 int i;
402
403 vpx_clear_system_state();
404
405 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
406 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
407
408 set_error_per_bit(x, rd->RDMULT);
409
410 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
411 cm->frame_type != KEY_FRAME)
412 ? 0
413 : 1;
414
415 set_block_thresholds(cm, rd);
416 set_partition_probs(cm, xd);
417
418 if (cpi->oxcf.pass == 1) {
419 if (!frame_is_intra_only(cm))
420 vp9_build_nmv_cost_table(
421 x->nmvjointcost,
422 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
423 &cm->fc->nmvc, cm->allow_high_precision_mv);
424 } else {
425 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
426 fill_token_costs(x->token_costs, cm->fc->coef_probs);
427
428 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
429 cm->frame_type == KEY_FRAME) {
430 for (i = 0; i < PARTITION_CONTEXTS; ++i)
431 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
432 vp9_partition_tree);
433 }
434
435 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
436 cm->frame_type == KEY_FRAME) {
437 fill_mode_costs(cpi);
438
439 if (!frame_is_intra_only(cm)) {
440 vp9_build_nmv_cost_table(
441 x->nmvjointcost,
442 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
443 &cm->fc->nmvc, cm->allow_high_precision_mv);
444 vp9_build_inter_mode_cost(cpi);
445 }
446 }
447 }
448 }
449
450 // NOTE: The tables below must be of the same size.
451
452 // The functions described below are sampled at the four most significant
453 // bits of x^2 + 8 / 256.
454
455 // Normalized rate:
456 // This table models the rate for a Laplacian source with given variance
457 // when quantized with a uniform quantizer with given stepsize. The
458 // closed form expression is:
459 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
460 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
461 // and H(x) is the binary entropy function.
462 static const int rate_tab_q10[] = {
463 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
464 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
465 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
466 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
467 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745,
468 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213,
469 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21,
470 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0,
471 };
472
473 // Normalized distortion:
474 // This table models the normalized distortion for a Laplacian source
475 // with given variance when quantized with a uniform quantizer
476 // with given stepsize. The closed form expression is:
477 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
478 // where x = qpstep / sqrt(variance).
479 // Note the actual distortion is Dn * variance.
480 static const int dist_tab_q10[] = {
481 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5,
482 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21,
483 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69,
484 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200,
485 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458,
486 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823,
487 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001,
488 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
489 };
490 static const int xsq_iq_q10[] = {
491 0, 4, 8, 12, 16, 20, 24, 28, 32,
492 40, 48, 56, 64, 72, 80, 88, 96, 112,
493 128, 144, 160, 176, 192, 208, 224, 256, 288,
494 320, 352, 384, 416, 448, 480, 544, 608, 672,
495 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
496 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
497 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
498 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
499 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
500 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
501 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
502 180192, 196576, 212960, 229344, 245728,
503 };
504
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)505 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
506 const int tmp = (xsq_q10 >> 2) + 8;
507 const int k = get_msb(tmp) - 3;
508 const int xq = (k << 3) + ((tmp >> k) & 0x7);
509 const int one_q10 = 1 << 10;
510 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
511 const int b_q10 = one_q10 - a_q10;
512 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
513 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
514 }
515
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])516 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
517 int r_q10[MAX_MB_PLANE],
518 int d_q10[MAX_MB_PLANE]) {
519 int i;
520 const int one_q10 = 1 << 10;
521 for (i = 0; i < MAX_MB_PLANE; ++i) {
522 const int tmp = (xsq_q10[i] >> 2) + 8;
523 const int k = get_msb(tmp) - 3;
524 const int xq = (k << 3) + ((tmp >> k) & 0x7);
525 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
526 const int b_q10 = one_q10 - a_q10;
527 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
528 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
529 }
530 }
531
532 static const uint32_t MAX_XSQ_Q10 = 245727;
533
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)534 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
535 unsigned int qstep, int *rate,
536 int64_t *dist) {
537 // This function models the rate and distortion for a Laplacian
538 // source with given variance when quantized with a uniform quantizer
539 // with given stepsize. The closed form expressions are in:
540 // Hang and Chen, "Source Model for transform video coder and its
541 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
542 // Sys. for Video Tech., April 1997.
543 if (var == 0) {
544 *rate = 0;
545 *dist = 0;
546 } else {
547 int d_q10, r_q10;
548 const uint64_t xsq_q10_64 =
549 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
550 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
551 model_rd_norm(xsq_q10, &r_q10, &d_q10);
552 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
553 *dist = (var * (int64_t)d_q10 + 512) >> 10;
554 }
555 }
556
557 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
558 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)559 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
560 unsigned int n_log2[MAX_MB_PLANE],
561 unsigned int qstep[MAX_MB_PLANE],
562 int64_t *rate_sum, int64_t *dist_sum) {
563 int i;
564 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
565 for (i = 0; i < MAX_MB_PLANE; ++i) {
566 const uint64_t xsq_q10_64 =
567 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
568 var[i];
569 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
570 }
571 model_rd_norm_vec(xsq_q10, r_q10, d_q10);
572 for (i = 0; i < MAX_MB_PLANE; ++i) {
573 int rate =
574 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
575 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
576 *rate_sum += rate;
577 *dist_sum += dist;
578 }
579 }
580
581 // Disable gcc 12.2 false positive warning.
582 // warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
583 #if defined(__GNUC__) && !defined(__clang__)
584 #pragma GCC diagnostic push
585 #pragma GCC diagnostic ignored "-Wstringop-overflow"
586 #endif
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])587 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
588 const struct macroblockd_plane *pd,
589 ENTROPY_CONTEXT t_above[16],
590 ENTROPY_CONTEXT t_left[16]) {
591 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
592 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
593 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
594 const ENTROPY_CONTEXT *const above = pd->above_context;
595 const ENTROPY_CONTEXT *const left = pd->left_context;
596
597 int i;
598 switch (tx_size) {
599 case TX_4X4:
600 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
601 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
602 break;
603 case TX_8X8:
604 for (i = 0; i < num_4x4_w; i += 2)
605 t_above[i] = !!*(const uint16_t *)&above[i];
606 for (i = 0; i < num_4x4_h; i += 2)
607 t_left[i] = !!*(const uint16_t *)&left[i];
608 break;
609 case TX_16X16:
610 for (i = 0; i < num_4x4_w; i += 4)
611 t_above[i] = !!*(const uint32_t *)&above[i];
612 for (i = 0; i < num_4x4_h; i += 4)
613 t_left[i] = !!*(const uint32_t *)&left[i];
614 break;
615 default:
616 assert(tx_size == TX_32X32);
617 for (i = 0; i < num_4x4_w; i += 8)
618 t_above[i] = !!*(const uint64_t *)&above[i];
619 for (i = 0; i < num_4x4_h; i += 8)
620 t_left[i] = !!*(const uint64_t *)&left[i];
621 break;
622 }
623 }
624 #if defined(__GNUC__) && !defined(__clang__)
625 #pragma GCC diagnostic pop
626 #endif
627
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)628 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
629 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
630 int i;
631 int zero_seen = 0;
632 int best_index = 0;
633 int best_sad = INT_MAX;
634 int this_sad = INT_MAX;
635 int max_mv = 0;
636 int near_same_nearest;
637 uint8_t *src_y_ptr = x->plane[0].src.buf;
638 uint8_t *ref_y_ptr;
639 const int num_mv_refs =
640 MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
641
642 MV pred_mv[3];
643 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
644 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
645 pred_mv[2] = x->pred_mv[ref_frame];
646 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
647
648 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
649 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
650
651 // Get the sad for each candidate reference mv.
652 for (i = 0; i < num_mv_refs; ++i) {
653 const MV *this_mv = &pred_mv[i];
654 int fp_row, fp_col;
655 if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
656 if (i == 1 && near_same_nearest) continue;
657 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
658 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
659 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
660
661 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
662 zero_seen |= (fp_row == 0 && fp_col == 0);
663
664 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
665 // Find sad for current vector.
666 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
667 ref_y_ptr, ref_y_stride);
668 // Note if it is the best so far.
669 if (this_sad < best_sad) {
670 best_sad = this_sad;
671 best_index = i;
672 }
673 }
674
675 // Note the index of the mv that worked best in the reference list.
676 x->mv_best_ref_index[ref_frame] = best_index;
677 x->max_mv_context[ref_frame] = max_mv;
678 x->pred_mv_sad[ref_frame] = best_sad;
679 }
680
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)681 void vp9_setup_pred_block(const MACROBLOCKD *xd,
682 struct buf_2d dst[MAX_MB_PLANE],
683 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
684 const struct scale_factors *scale,
685 const struct scale_factors *scale_uv) {
686 int i;
687
688 dst[0].buf = src->y_buffer;
689 dst[0].stride = src->y_stride;
690 dst[1].buf = src->u_buffer;
691 dst[2].buf = src->v_buffer;
692 dst[1].stride = dst[2].stride = src->uv_stride;
693
694 for (i = 0; i < MAX_MB_PLANE; ++i) {
695 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
696 i ? scale_uv : scale, xd->plane[i].subsampling_x,
697 xd->plane[i].subsampling_y);
698 }
699 }
700
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)701 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
702 int stride) {
703 const int bw = b_width_log2_lookup[plane_bsize];
704 const int y = 4 * (raster_block >> bw);
705 const int x = 4 * (raster_block & ((1 << bw) - 1));
706 return y * stride + x;
707 }
708
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)709 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
710 int16_t *base) {
711 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
712 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
713 }
714
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)715 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
716 int ref_frame) {
717 const VP9_COMMON *const cm = &cpi->common;
718 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
719 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
720 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
721 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
722 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
723 : NULL;
724 }
725
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)726 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
727 const MODE_INFO *const mi = xd->mi[0];
728 const int ctx = get_pred_context_switchable_interp(xd);
729 return SWITCHABLE_INTERP_RATE_FACTOR *
730 cpi->switchable_interp_costs[ctx][mi->interp_filter];
731 }
732
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)733 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
734 int i;
735 RD_OPT *const rd = &cpi->rd;
736 SPEED_FEATURES *const sf = &cpi->sf;
737
738 // Set baseline threshold values.
739 for (i = 0; i < MAX_MODES; ++i)
740 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
741
742 if (sf->adaptive_rd_thresh) {
743 rd->thresh_mult[THR_NEARESTMV] = 300;
744 rd->thresh_mult[THR_NEARESTG] = 300;
745 rd->thresh_mult[THR_NEARESTA] = 300;
746 } else {
747 rd->thresh_mult[THR_NEARESTMV] = 0;
748 rd->thresh_mult[THR_NEARESTG] = 0;
749 rd->thresh_mult[THR_NEARESTA] = 0;
750 }
751
752 rd->thresh_mult[THR_DC] += 1000;
753
754 rd->thresh_mult[THR_NEWMV] += 1000;
755 rd->thresh_mult[THR_NEWA] += 1000;
756 rd->thresh_mult[THR_NEWG] += 1000;
757
758 rd->thresh_mult[THR_NEARMV] += 1000;
759 rd->thresh_mult[THR_NEARA] += 1000;
760 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
761 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
762
763 rd->thresh_mult[THR_TM] += 1000;
764
765 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
766 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
767 rd->thresh_mult[THR_NEARG] += 1000;
768 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
769 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
770
771 rd->thresh_mult[THR_ZEROMV] += 2000;
772 rd->thresh_mult[THR_ZEROG] += 2000;
773 rd->thresh_mult[THR_ZEROA] += 2000;
774 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
775 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
776
777 rd->thresh_mult[THR_H_PRED] += 2000;
778 rd->thresh_mult[THR_V_PRED] += 2000;
779 rd->thresh_mult[THR_D45_PRED] += 2500;
780 rd->thresh_mult[THR_D135_PRED] += 2500;
781 rd->thresh_mult[THR_D117_PRED] += 2500;
782 rd->thresh_mult[THR_D153_PRED] += 2500;
783 rd->thresh_mult[THR_D207_PRED] += 2500;
784 rd->thresh_mult[THR_D63_PRED] += 2500;
785 }
786
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)787 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
788 static const int thresh_mult[2][MAX_REFS] = {
789 { 2500, 2500, 2500, 4500, 4500, 2500 },
790 { 2000, 2000, 2000, 4000, 4000, 2000 }
791 };
792 RD_OPT *const rd = &cpi->rd;
793 const int idx = cpi->oxcf.mode == BEST;
794 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
795 }
796
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)797 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
798 int bsize, int best_mode_index) {
799 if (rd_thresh > 0) {
800 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
801 int mode;
802 for (mode = 0; mode < top_mode; ++mode) {
803 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
804 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
805 BLOCK_SIZE bs;
806 for (bs = min_size; bs <= max_size; ++bs) {
807 int *const fact = &factor_buf[bs][mode];
808 if (mode == best_mode_index) {
809 *fact -= (*fact >> 4);
810 } else {
811 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
812 }
813 }
814 }
815 }
816 }
817
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)818 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
819 int qindex, int qdelta) {
820 // Reduce the intra cost penalty for small blocks (<=16x16).
821 int reduction_fac =
822 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
823
824 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
825 // Don't reduce intra cost penalty if estimated noise level is high.
826 reduction_fac = 0;
827
828 // Always use VPX_BITS_8 as input here because the penalty is applied
829 // to rate not distortion so we want a consistent penalty for all bit
830 // depths. If the actual bit depth were passed in here then the value
831 // retured by vp9_dc_quant() would scale with the bit depth and we would
832 // then need to apply inverse scaling to correct back to a bit depth
833 // independent rate penalty.
834 return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
835 }
836