1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42
43 #define RD_THRESH_POW 1.25
44
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49 rd_cost->rate = INT_MAX;
50 rd_cost->dist = INT64_MAX;
51 rd_cost->rdcost = INT64_MAX;
52 }
53
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55 rd_cost->rate = 0;
56 rd_cost->dist = 0;
57 rd_cost->rdcost = 0;
58 }
59
vp9_calculate_rd_cost(int mult,int div,int rate,int64_t dist)60 int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61 assert(mult >= 0);
62 assert(div > 0);
63 if (rate >= 0 && dist >= 0) {
64 return RDCOST(mult, div, rate, dist);
65 }
66 if (rate >= 0 && dist < 0) {
67 return RDCOST_NEG_D(mult, div, rate, -dist);
68 }
69 if (rate < 0 && dist >= 0) {
70 return RDCOST_NEG_R(mult, div, -rate, dist);
71 }
72 return -RDCOST(mult, div, -rate, -dist);
73 }
74
vp9_rd_cost_update(int mult,int div,RD_COST * rd_cost)75 void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76 if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77 rd_cost->rdcost =
78 vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79 } else {
80 vp9_rd_cost_reset(rd_cost);
81 }
82 }
83
84 // The baseline rd thresholds for breaking out of the rd loop for
85 // certain modes are assumed to be based on 8x8 blocks.
86 // This table is used to correct for block size.
87 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90 };
91
fill_mode_costs(VP9_COMP * cpi)92 static void fill_mode_costs(VP9_COMP *cpi) {
93 const FRAME_CONTEXT *const fc = cpi->common.fc;
94 int i, j;
95
96 for (i = 0; i < INTRA_MODES; ++i) {
97 for (j = 0; j < INTRA_MODES; ++j) {
98 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99 vp9_intra_mode_tree);
100 }
101 }
102
103 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104 for (i = 0; i < INTRA_MODES; ++i) {
105 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106 vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108 fc->uv_mode_prob[i], vp9_intra_mode_tree);
109 }
110
111 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112 vp9_cost_tokens(cpi->switchable_interp_costs[i],
113 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114 }
115
116 for (i = TX_8X8; i < TX_SIZES; ++i) {
117 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118 const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119 int k;
120 for (k = 0; k <= i; ++k) {
121 int cost = 0;
122 int m;
123 for (m = 0; m <= k - (k == i); ++m) {
124 if (m == k)
125 cost += vp9_cost_zero(tx_probs[m]);
126 else
127 cost += vp9_cost_one(tx_probs[m]);
128 }
129 cpi->tx_size_cost[i - 1][j][k] = cost;
130 }
131 }
132 }
133 }
134
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])135 static void fill_token_costs(vp9_coeff_cost *c,
136 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137 int i, j, k, l;
138 TX_SIZE t;
139 for (t = TX_4X4; t <= TX_32X32; ++t)
140 for (i = 0; i < PLANE_TYPES; ++i)
141 for (j = 0; j < REF_TYPES; ++j)
142 for (k = 0; k < COEF_BANDS; ++k)
143 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144 vpx_prob probs[ENTROPY_NODES];
145 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148 vp9_coef_tree);
149 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150 c[t][i][j][k][1][l][EOB_TOKEN]);
151 }
152 }
153
154 // Values are now correlated to quantizer.
155 static int sad_per_bit16lut_8[QINDEX_RANGE];
156 static int sad_per_bit4lut_8[QINDEX_RANGE];
157
158 #if CONFIG_VP9_HIGHBITDEPTH
159 static int sad_per_bit16lut_10[QINDEX_RANGE];
160 static int sad_per_bit4lut_10[QINDEX_RANGE];
161 static int sad_per_bit16lut_12[QINDEX_RANGE];
162 static int sad_per_bit4lut_12[QINDEX_RANGE];
163 #endif
164
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)165 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166 vpx_bit_depth_t bit_depth) {
167 int i;
168 // Initialize the sad lut tables using a formulaic calculation for now.
169 // This is to make it easier to resolve the impact of experimental changes
170 // to the quantizer tables.
171 for (i = 0; i < range; i++) {
172 const double q = vp9_convert_qindex_to_q(i, bit_depth);
173 bit16lut[i] = (int)(0.0418 * q + 2.4107);
174 bit4lut[i] = (int)(0.063 * q + 2.742);
175 }
176 }
177
vp9_init_me_luts(void)178 void vp9_init_me_luts(void) {
179 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180 VPX_BITS_8);
181 #if CONFIG_VP9_HIGHBITDEPTH
182 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183 VPX_BITS_10);
184 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185 VPX_BITS_12);
186 #endif
187 }
188
189 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190 8, 8, 4, 4, 2, 2, 1, 0 };
191
192 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
193 // that the show frame flag is set, should not be used as no real frame
194 // is encoded so we should not reach here. However, a dummy value
195 // is inserted here to make sure the data structure has the right number
196 // of values assigned.
197 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198 128, 144, 144 };
199
200 // Configure Vizier RD parameters.
201 // Later this function will use passed in command line values.
vp9_init_rd_parameters(VP9_COMP * cpi)202 void vp9_init_rd_parameters(VP9_COMP *cpi) {
203 RD_CONTROL *const rdc = &cpi->rd_ctrl;
204
205 // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
206 // initialized by the pass in values.
207 // Be careful that parameters below are only initialized to 1, if we do not
208 // pass values to them. It is desired to take care of each parameter when
209 // using |use_vizier_rc_params|.
210 if (cpi->twopass.use_vizier_rc_params) return;
211
212 // Make sure this function is floating point safe.
213 vpx_clear_system_state();
214
215 rdc->rd_mult_inter_qp_fac = 1.0;
216 rdc->rd_mult_arf_qp_fac = 1.0;
217 rdc->rd_mult_key_qp_fac = 1.0;
218 }
219
220 // Returns the default rd multiplier for inter frames for a given qindex.
221 // The function here is a first pass estimate based on data from
222 // a previous Vizer run
def_inter_rd_multiplier(int qindex)223 static double def_inter_rd_multiplier(int qindex) {
224 return 4.15 + (0.001 * (double)qindex);
225 }
226
227 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
228 // The function here is a first pass estimate based on data from
229 // a previous Vizer run
def_arf_rd_multiplier(int qindex)230 static double def_arf_rd_multiplier(int qindex) {
231 return 4.25 + (0.001 * (double)qindex);
232 }
233
234 // Returns the default rd multiplier for key frames for a given qindex.
235 // The function here is a first pass estimate based on data from
236 // a previous Vizer run
def_kf_rd_multiplier(int qindex)237 static double def_kf_rd_multiplier(int qindex) {
238 return 4.35 + (0.001 * (double)qindex);
239 }
240
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)241 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
242 const RD_CONTROL *rdc = &cpi->rd_ctrl;
243 const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
244 // largest dc_quant is 21387, therefore rdmult should fit in int32_t
245 int rdmult = q * q;
246
247 // Make sure this function is floating point safe.
248 vpx_clear_system_state();
249
250 if (cpi->common.frame_type == KEY_FRAME) {
251 double def_rd_q_mult = def_kf_rd_multiplier(qindex);
252 rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
253 } else if (!cpi->rc.is_src_frame_alt_ref &&
254 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
255 double def_rd_q_mult = def_arf_rd_multiplier(qindex);
256 rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
257 } else {
258 double def_rd_q_mult = def_inter_rd_multiplier(qindex);
259 rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
260 }
261
262 #if CONFIG_VP9_HIGHBITDEPTH
263 switch (cpi->common.bit_depth) {
264 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
265 case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
266 default: break;
267 }
268 #endif // CONFIG_VP9_HIGHBITDEPTH
269 return rdmult > 0 ? rdmult : 1;
270 }
271
modulate_rdmult(const VP9_COMP * cpi,int rdmult)272 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
273 int64_t rdmult_64 = rdmult;
274 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
275 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
276 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
277 const int gfu_boost = cpi->multi_layer_arf
278 ? gf_group->gfu_boost[gf_group->index]
279 : cpi->rc.gfu_boost;
280 const int boost_index = VPXMIN(15, (gfu_boost / 100));
281
282 rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
283 rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
284 }
285 return (int)rdmult_64;
286 }
287
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)288 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
289 int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
290 return modulate_rdmult(cpi, rdmult);
291 }
292
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)293 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
294 int rdmult =
295 vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
296 rdmult = (int)((double)rdmult / beta);
297 rdmult = rdmult > 0 ? rdmult : 1;
298 return modulate_rdmult(cpi, rdmult);
299 }
300
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)301 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
302 double q;
303 #if CONFIG_VP9_HIGHBITDEPTH
304 switch (bit_depth) {
305 case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
306 case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
307 default:
308 assert(bit_depth == VPX_BITS_12);
309 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
310 break;
311 }
312 #else
313 (void)bit_depth;
314 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
315 #endif // CONFIG_VP9_HIGHBITDEPTH
316 // TODO(debargha): Adjust the function below.
317 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
318 }
319
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)320 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
321 #if CONFIG_VP9_HIGHBITDEPTH
322 switch (cpi->common.bit_depth) {
323 case VPX_BITS_8:
324 x->sadperbit16 = sad_per_bit16lut_8[qindex];
325 x->sadperbit4 = sad_per_bit4lut_8[qindex];
326 break;
327 case VPX_BITS_10:
328 x->sadperbit16 = sad_per_bit16lut_10[qindex];
329 x->sadperbit4 = sad_per_bit4lut_10[qindex];
330 break;
331 default:
332 assert(cpi->common.bit_depth == VPX_BITS_12);
333 x->sadperbit16 = sad_per_bit16lut_12[qindex];
334 x->sadperbit4 = sad_per_bit4lut_12[qindex];
335 break;
336 }
337 #else
338 (void)cpi;
339 x->sadperbit16 = sad_per_bit16lut_8[qindex];
340 x->sadperbit4 = sad_per_bit4lut_8[qindex];
341 #endif // CONFIG_VP9_HIGHBITDEPTH
342 }
343
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)344 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
345 int i, bsize, segment_id;
346
347 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
348 const int qindex =
349 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
350 cm->y_dc_delta_q,
351 0, MAXQ);
352 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
353
354 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
355 // Threshold here seems unnecessarily harsh but fine given actual
356 // range of values used for cpi->sf.thresh_mult[].
357 const int t = q * rd_thresh_block_size_factor[bsize];
358 const int thresh_max = INT_MAX / t;
359
360 if (bsize >= BLOCK_8X8) {
361 for (i = 0; i < MAX_MODES; ++i)
362 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
363 ? rd->thresh_mult[i] * t / 4
364 : INT_MAX;
365 } else {
366 for (i = 0; i < MAX_REFS; ++i)
367 rd->threshes[segment_id][bsize][i] =
368 rd->thresh_mult_sub8x8[i] < thresh_max
369 ? rd->thresh_mult_sub8x8[i] * t / 4
370 : INT_MAX;
371 }
372 }
373 }
374 }
375
vp9_build_inter_mode_cost(VP9_COMP * cpi)376 void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
377 const VP9_COMMON *const cm = &cpi->common;
378 int i;
379 for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
380 vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
381 vp9_inter_mode_tree);
382 }
383 }
384
vp9_initialize_rd_consts(VP9_COMP * cpi)385 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
386 VP9_COMMON *const cm = &cpi->common;
387 MACROBLOCK *const x = &cpi->td.mb;
388 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
389 RD_OPT *const rd = &cpi->rd;
390 int i;
391
392 vpx_clear_system_state();
393
394 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
395 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
396
397 set_error_per_bit(x, rd->RDMULT);
398
399 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
400 cm->frame_type != KEY_FRAME)
401 ? 0
402 : 1;
403
404 set_block_thresholds(cm, rd);
405 set_partition_probs(cm, xd);
406
407 if (cpi->oxcf.pass == 1) {
408 if (!frame_is_intra_only(cm))
409 vp9_build_nmv_cost_table(
410 x->nmvjointcost,
411 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
412 &cm->fc->nmvc, cm->allow_high_precision_mv);
413 } else {
414 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
415 fill_token_costs(x->token_costs, cm->fc->coef_probs);
416
417 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
418 cm->frame_type == KEY_FRAME) {
419 for (i = 0; i < PARTITION_CONTEXTS; ++i)
420 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
421 vp9_partition_tree);
422 }
423
424 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
425 cm->frame_type == KEY_FRAME) {
426 fill_mode_costs(cpi);
427
428 if (!frame_is_intra_only(cm)) {
429 vp9_build_nmv_cost_table(
430 x->nmvjointcost,
431 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
432 &cm->fc->nmvc, cm->allow_high_precision_mv);
433 vp9_build_inter_mode_cost(cpi);
434 }
435 }
436 }
437 }
438
439 // NOTE: The tables below must be of the same size.
440
441 // The functions described below are sampled at the four most significant
442 // bits of x^2 + 8 / 256.
443
444 // Normalized rate:
445 // This table models the rate for a Laplacian source with given variance
446 // when quantized with a uniform quantizer with given stepsize. The
447 // closed form expression is:
448 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
449 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
450 // and H(x) is the binary entropy function.
451 static const int rate_tab_q10[] = {
452 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
453 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
454 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
455 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
456 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745,
457 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213,
458 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21,
459 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0,
460 };
461
462 // Normalized distortion:
463 // This table models the normalized distortion for a Laplacian source
464 // with given variance when quantized with a uniform quantizer
465 // with given stepsize. The closed form expression is:
466 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
467 // where x = qpstep / sqrt(variance).
468 // Note the actual distortion is Dn * variance.
469 static const int dist_tab_q10[] = {
470 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5,
471 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21,
472 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69,
473 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200,
474 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458,
475 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823,
476 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001,
477 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
478 };
479 static const int xsq_iq_q10[] = {
480 0, 4, 8, 12, 16, 20, 24, 28, 32,
481 40, 48, 56, 64, 72, 80, 88, 96, 112,
482 128, 144, 160, 176, 192, 208, 224, 256, 288,
483 320, 352, 384, 416, 448, 480, 544, 608, 672,
484 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
485 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
486 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
487 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
488 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
489 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
490 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
491 180192, 196576, 212960, 229344, 245728,
492 };
493
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)494 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
495 const int tmp = (xsq_q10 >> 2) + 8;
496 const int k = get_msb(tmp) - 3;
497 const int xq = (k << 3) + ((tmp >> k) & 0x7);
498 const int one_q10 = 1 << 10;
499 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
500 const int b_q10 = one_q10 - a_q10;
501 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
502 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
503 }
504
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])505 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
506 int r_q10[MAX_MB_PLANE],
507 int d_q10[MAX_MB_PLANE]) {
508 int i;
509 const int one_q10 = 1 << 10;
510 for (i = 0; i < MAX_MB_PLANE; ++i) {
511 const int tmp = (xsq_q10[i] >> 2) + 8;
512 const int k = get_msb(tmp) - 3;
513 const int xq = (k << 3) + ((tmp >> k) & 0x7);
514 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
515 const int b_q10 = one_q10 - a_q10;
516 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
517 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
518 }
519 }
520
521 static const uint32_t MAX_XSQ_Q10 = 245727;
522
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)523 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
524 unsigned int qstep, int *rate,
525 int64_t *dist) {
526 // This function models the rate and distortion for a Laplacian
527 // source with given variance when quantized with a uniform quantizer
528 // with given stepsize. The closed form expressions are in:
529 // Hang and Chen, "Source Model for transform video coder and its
530 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
531 // Sys. for Video Tech., April 1997.
532 if (var == 0) {
533 *rate = 0;
534 *dist = 0;
535 } else {
536 int d_q10, r_q10;
537 const uint64_t xsq_q10_64 =
538 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
539 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
540 model_rd_norm(xsq_q10, &r_q10, &d_q10);
541 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
542 *dist = (var * (int64_t)d_q10 + 512) >> 10;
543 }
544 }
545
546 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
547 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)548 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
549 unsigned int n_log2[MAX_MB_PLANE],
550 unsigned int qstep[MAX_MB_PLANE],
551 int64_t *rate_sum, int64_t *dist_sum) {
552 int i;
553 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
554 for (i = 0; i < MAX_MB_PLANE; ++i) {
555 const uint64_t xsq_q10_64 =
556 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
557 var[i];
558 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
559 }
560 model_rd_norm_vec(xsq_q10, r_q10, d_q10);
561 for (i = 0; i < MAX_MB_PLANE; ++i) {
562 int rate =
563 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
564 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
565 *rate_sum += rate;
566 *dist_sum += dist;
567 }
568 }
569
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])570 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
571 const struct macroblockd_plane *pd,
572 ENTROPY_CONTEXT t_above[16],
573 ENTROPY_CONTEXT t_left[16]) {
574 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
575 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
576 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
577 const ENTROPY_CONTEXT *const above = pd->above_context;
578 const ENTROPY_CONTEXT *const left = pd->left_context;
579
580 int i;
581 switch (tx_size) {
582 case TX_4X4:
583 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
584 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
585 break;
586 case TX_8X8:
587 for (i = 0; i < num_4x4_w; i += 2)
588 t_above[i] = !!*(const uint16_t *)&above[i];
589 for (i = 0; i < num_4x4_h; i += 2)
590 t_left[i] = !!*(const uint16_t *)&left[i];
591 break;
592 case TX_16X16:
593 for (i = 0; i < num_4x4_w; i += 4)
594 t_above[i] = !!*(const uint32_t *)&above[i];
595 for (i = 0; i < num_4x4_h; i += 4)
596 t_left[i] = !!*(const uint32_t *)&left[i];
597 break;
598 default:
599 assert(tx_size == TX_32X32);
600 for (i = 0; i < num_4x4_w; i += 8)
601 t_above[i] = !!*(const uint64_t *)&above[i];
602 for (i = 0; i < num_4x4_h; i += 8)
603 t_left[i] = !!*(const uint64_t *)&left[i];
604 break;
605 }
606 }
607
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)608 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
609 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
610 int i;
611 int zero_seen = 0;
612 int best_index = 0;
613 int best_sad = INT_MAX;
614 int this_sad = INT_MAX;
615 int max_mv = 0;
616 int near_same_nearest;
617 uint8_t *src_y_ptr = x->plane[0].src.buf;
618 uint8_t *ref_y_ptr;
619 const int num_mv_refs =
620 MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
621
622 MV pred_mv[3];
623 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
624 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
625 pred_mv[2] = x->pred_mv[ref_frame];
626 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
627
628 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
629 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
630
631 // Get the sad for each candidate reference mv.
632 for (i = 0; i < num_mv_refs; ++i) {
633 const MV *this_mv = &pred_mv[i];
634 int fp_row, fp_col;
635 if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
636 if (i == 1 && near_same_nearest) continue;
637 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
638 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
639 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
640
641 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
642 zero_seen |= (fp_row == 0 && fp_col == 0);
643
644 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
645 // Find sad for current vector.
646 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
647 ref_y_ptr, ref_y_stride);
648 // Note if it is the best so far.
649 if (this_sad < best_sad) {
650 best_sad = this_sad;
651 best_index = i;
652 }
653 }
654
655 // Note the index of the mv that worked best in the reference list.
656 x->mv_best_ref_index[ref_frame] = best_index;
657 x->max_mv_context[ref_frame] = max_mv;
658 x->pred_mv_sad[ref_frame] = best_sad;
659 }
660
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)661 void vp9_setup_pred_block(const MACROBLOCKD *xd,
662 struct buf_2d dst[MAX_MB_PLANE],
663 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
664 const struct scale_factors *scale,
665 const struct scale_factors *scale_uv) {
666 int i;
667
668 dst[0].buf = src->y_buffer;
669 dst[0].stride = src->y_stride;
670 dst[1].buf = src->u_buffer;
671 dst[2].buf = src->v_buffer;
672 dst[1].stride = dst[2].stride = src->uv_stride;
673
674 for (i = 0; i < MAX_MB_PLANE; ++i) {
675 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
676 i ? scale_uv : scale, xd->plane[i].subsampling_x,
677 xd->plane[i].subsampling_y);
678 }
679 }
680
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)681 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
682 int stride) {
683 const int bw = b_width_log2_lookup[plane_bsize];
684 const int y = 4 * (raster_block >> bw);
685 const int x = 4 * (raster_block & ((1 << bw) - 1));
686 return y * stride + x;
687 }
688
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)689 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
690 int16_t *base) {
691 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
692 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
693 }
694
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)695 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
696 int ref_frame) {
697 const VP9_COMMON *const cm = &cpi->common;
698 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
699 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
700 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
701 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
702 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
703 : NULL;
704 }
705
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)706 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
707 const MODE_INFO *const mi = xd->mi[0];
708 const int ctx = get_pred_context_switchable_interp(xd);
709 return SWITCHABLE_INTERP_RATE_FACTOR *
710 cpi->switchable_interp_costs[ctx][mi->interp_filter];
711 }
712
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)713 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
714 int i;
715 RD_OPT *const rd = &cpi->rd;
716 SPEED_FEATURES *const sf = &cpi->sf;
717
718 // Set baseline threshold values.
719 for (i = 0; i < MAX_MODES; ++i)
720 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
721
722 if (sf->adaptive_rd_thresh) {
723 rd->thresh_mult[THR_NEARESTMV] = 300;
724 rd->thresh_mult[THR_NEARESTG] = 300;
725 rd->thresh_mult[THR_NEARESTA] = 300;
726 } else {
727 rd->thresh_mult[THR_NEARESTMV] = 0;
728 rd->thresh_mult[THR_NEARESTG] = 0;
729 rd->thresh_mult[THR_NEARESTA] = 0;
730 }
731
732 rd->thresh_mult[THR_DC] += 1000;
733
734 rd->thresh_mult[THR_NEWMV] += 1000;
735 rd->thresh_mult[THR_NEWA] += 1000;
736 rd->thresh_mult[THR_NEWG] += 1000;
737
738 rd->thresh_mult[THR_NEARMV] += 1000;
739 rd->thresh_mult[THR_NEARA] += 1000;
740 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
741 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
742
743 rd->thresh_mult[THR_TM] += 1000;
744
745 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
746 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
747 rd->thresh_mult[THR_NEARG] += 1000;
748 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
749 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
750
751 rd->thresh_mult[THR_ZEROMV] += 2000;
752 rd->thresh_mult[THR_ZEROG] += 2000;
753 rd->thresh_mult[THR_ZEROA] += 2000;
754 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
755 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
756
757 rd->thresh_mult[THR_H_PRED] += 2000;
758 rd->thresh_mult[THR_V_PRED] += 2000;
759 rd->thresh_mult[THR_D45_PRED] += 2500;
760 rd->thresh_mult[THR_D135_PRED] += 2500;
761 rd->thresh_mult[THR_D117_PRED] += 2500;
762 rd->thresh_mult[THR_D153_PRED] += 2500;
763 rd->thresh_mult[THR_D207_PRED] += 2500;
764 rd->thresh_mult[THR_D63_PRED] += 2500;
765 }
766
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)767 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
768 static const int thresh_mult[2][MAX_REFS] = {
769 { 2500, 2500, 2500, 4500, 4500, 2500 },
770 { 2000, 2000, 2000, 4000, 4000, 2000 }
771 };
772 RD_OPT *const rd = &cpi->rd;
773 const int idx = cpi->oxcf.mode == BEST;
774 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
775 }
776
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)777 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
778 int bsize, int best_mode_index) {
779 if (rd_thresh > 0) {
780 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
781 int mode;
782 for (mode = 0; mode < top_mode; ++mode) {
783 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
784 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
785 BLOCK_SIZE bs;
786 for (bs = min_size; bs <= max_size; ++bs) {
787 int *const fact = &factor_buf[bs][mode];
788 if (mode == best_mode_index) {
789 *fact -= (*fact >> 4);
790 } else {
791 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
792 }
793 }
794 }
795 }
796 }
797
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)798 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
799 int qindex, int qdelta) {
800 // Reduce the intra cost penalty for small blocks (<=16x16).
801 int reduction_fac =
802 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
803
804 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
805 // Don't reduce intra cost penalty if estimated noise level is high.
806 reduction_fac = 0;
807
808 // Always use VPX_BITS_8 as input here because the penalty is applied
809 // to rate not distortion so we want a consistent penalty for all bit
810 // depths. If the actual bit depth were passed in here then the value
811 // retured by vp9_dc_quant() would scale with the bit depth and we would
812 // then need to apply inverse scaling to correct back to a bit depth
813 // independent rate penalty.
814 return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
815 }
816