1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42
43 #define RD_THRESH_POW 1.25
44
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49 rd_cost->rate = INT_MAX;
50 rd_cost->dist = INT64_MAX;
51 rd_cost->rdcost = INT64_MAX;
52 }
53
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55 rd_cost->rate = 0;
56 rd_cost->dist = 0;
57 rd_cost->rdcost = 0;
58 }
59
60 // The baseline rd thresholds for breaking out of the rd loop for
61 // certain modes are assumed to be based on 8x8 blocks.
62 // This table is used to correct for block size.
63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
65 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
66 };
67
fill_mode_costs(VP9_COMP * cpi)68 static void fill_mode_costs(VP9_COMP *cpi) {
69 const FRAME_CONTEXT *const fc = cpi->common.fc;
70 int i, j;
71
72 for (i = 0; i < INTRA_MODES; ++i) {
73 for (j = 0; j < INTRA_MODES; ++j) {
74 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
75 vp9_intra_mode_tree);
76 }
77 }
78
79 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
80 for (i = 0; i < INTRA_MODES; ++i) {
81 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
82 vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
83 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
84 fc->uv_mode_prob[i], vp9_intra_mode_tree);
85 }
86
87 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
88 vp9_cost_tokens(cpi->switchable_interp_costs[i],
89 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
90 }
91
92 for (i = TX_8X8; i < TX_SIZES; ++i) {
93 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
94 const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
95 int k;
96 for (k = 0; k <= i; ++k) {
97 int cost = 0;
98 int m;
99 for (m = 0; m <= k - (k == i); ++m) {
100 if (m == k)
101 cost += vp9_cost_zero(tx_probs[m]);
102 else
103 cost += vp9_cost_one(tx_probs[m]);
104 }
105 cpi->tx_size_cost[i - 1][j][k] = cost;
106 }
107 }
108 }
109 }
110
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])111 static void fill_token_costs(vp9_coeff_cost *c,
112 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
113 int i, j, k, l;
114 TX_SIZE t;
115 for (t = TX_4X4; t <= TX_32X32; ++t)
116 for (i = 0; i < PLANE_TYPES; ++i)
117 for (j = 0; j < REF_TYPES; ++j)
118 for (k = 0; k < COEF_BANDS; ++k)
119 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
120 vpx_prob probs[ENTROPY_NODES];
121 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
122 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
123 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
124 vp9_coef_tree);
125 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
126 c[t][i][j][k][1][l][EOB_TOKEN]);
127 }
128 }
129
130 // Values are now correlated to quantizer.
131 static int sad_per_bit16lut_8[QINDEX_RANGE];
132 static int sad_per_bit4lut_8[QINDEX_RANGE];
133
134 #if CONFIG_VP9_HIGHBITDEPTH
135 static int sad_per_bit16lut_10[QINDEX_RANGE];
136 static int sad_per_bit4lut_10[QINDEX_RANGE];
137 static int sad_per_bit16lut_12[QINDEX_RANGE];
138 static int sad_per_bit4lut_12[QINDEX_RANGE];
139 #endif
140
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)141 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
142 vpx_bit_depth_t bit_depth) {
143 int i;
144 // Initialize the sad lut tables using a formulaic calculation for now.
145 // This is to make it easier to resolve the impact of experimental changes
146 // to the quantizer tables.
147 for (i = 0; i < range; i++) {
148 const double q = vp9_convert_qindex_to_q(i, bit_depth);
149 bit16lut[i] = (int)(0.0418 * q + 2.4107);
150 bit4lut[i] = (int)(0.063 * q + 2.742);
151 }
152 }
153
vp9_init_me_luts(void)154 void vp9_init_me_luts(void) {
155 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
156 VPX_BITS_8);
157 #if CONFIG_VP9_HIGHBITDEPTH
158 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
159 VPX_BITS_10);
160 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
161 VPX_BITS_12);
162 #endif
163 }
164
165 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
166 8, 8, 4, 4, 2, 2, 1, 0 };
167
168 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
169 // that the show frame flag is set, should not be used as no real frame
170 // is encoded so we should not reach here. However, a dummy value
171 // is inserted here to make sure the data structure has the right number
172 // of values assigned.
173 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
174 128, 144, 144 };
175
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)176 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
177 // largest dc_quant is 21387, therefore rdmult should always fit in int32_t
178 const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
179 uint32_t rdmult = q * q;
180
181 if (cpi->common.frame_type != KEY_FRAME) {
182 if (qindex < 128)
183 rdmult = rdmult * 4;
184 else if (qindex < 190)
185 rdmult = rdmult * 4 + rdmult / 2;
186 else
187 rdmult = rdmult * 3;
188 } else {
189 if (qindex < 64)
190 rdmult = rdmult * 4;
191 else if (qindex <= 128)
192 rdmult = rdmult * 3 + rdmult / 2;
193 else if (qindex < 190)
194 rdmult = rdmult * 4 + rdmult / 2;
195 else
196 rdmult = rdmult * 7 + rdmult / 2;
197 }
198 #if CONFIG_VP9_HIGHBITDEPTH
199 switch (cpi->common.bit_depth) {
200 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
201 case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
202 default: break;
203 }
204 #endif // CONFIG_VP9_HIGHBITDEPTH
205 return rdmult > 0 ? rdmult : 1;
206 }
207
modulate_rdmult(const VP9_COMP * cpi,int rdmult)208 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
209 int64_t rdmult_64 = rdmult;
210 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
211 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
212 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
213 const int gfu_boost = cpi->multi_layer_arf
214 ? gf_group->gfu_boost[gf_group->index]
215 : cpi->rc.gfu_boost;
216 const int boost_index = VPXMIN(15, (gfu_boost / 100));
217
218 rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
219 rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
220 }
221 return (int)rdmult_64;
222 }
223
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)224 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
225 int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
226 return modulate_rdmult(cpi, rdmult);
227 }
228
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)229 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
230 int rdmult =
231 vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
232 rdmult = (int)((double)rdmult / beta);
233 rdmult = rdmult > 0 ? rdmult : 1;
234 return modulate_rdmult(cpi, rdmult);
235 }
236
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)237 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
238 double q;
239 #if CONFIG_VP9_HIGHBITDEPTH
240 switch (bit_depth) {
241 case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
242 case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
243 default:
244 assert(bit_depth == VPX_BITS_12);
245 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
246 break;
247 }
248 #else
249 (void)bit_depth;
250 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
251 #endif // CONFIG_VP9_HIGHBITDEPTH
252 // TODO(debargha): Adjust the function below.
253 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
254 }
255
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)256 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
257 #if CONFIG_VP9_HIGHBITDEPTH
258 switch (cpi->common.bit_depth) {
259 case VPX_BITS_8:
260 x->sadperbit16 = sad_per_bit16lut_8[qindex];
261 x->sadperbit4 = sad_per_bit4lut_8[qindex];
262 break;
263 case VPX_BITS_10:
264 x->sadperbit16 = sad_per_bit16lut_10[qindex];
265 x->sadperbit4 = sad_per_bit4lut_10[qindex];
266 break;
267 default:
268 assert(cpi->common.bit_depth == VPX_BITS_12);
269 x->sadperbit16 = sad_per_bit16lut_12[qindex];
270 x->sadperbit4 = sad_per_bit4lut_12[qindex];
271 break;
272 }
273 #else
274 (void)cpi;
275 x->sadperbit16 = sad_per_bit16lut_8[qindex];
276 x->sadperbit4 = sad_per_bit4lut_8[qindex];
277 #endif // CONFIG_VP9_HIGHBITDEPTH
278 }
279
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)280 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
281 int i, bsize, segment_id;
282
283 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
284 const int qindex =
285 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
286 cm->y_dc_delta_q,
287 0, MAXQ);
288 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
289
290 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
291 // Threshold here seems unnecessarily harsh but fine given actual
292 // range of values used for cpi->sf.thresh_mult[].
293 const int t = q * rd_thresh_block_size_factor[bsize];
294 const int thresh_max = INT_MAX / t;
295
296 if (bsize >= BLOCK_8X8) {
297 for (i = 0; i < MAX_MODES; ++i)
298 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
299 ? rd->thresh_mult[i] * t / 4
300 : INT_MAX;
301 } else {
302 for (i = 0; i < MAX_REFS; ++i)
303 rd->threshes[segment_id][bsize][i] =
304 rd->thresh_mult_sub8x8[i] < thresh_max
305 ? rd->thresh_mult_sub8x8[i] * t / 4
306 : INT_MAX;
307 }
308 }
309 }
310 }
311
vp9_initialize_rd_consts(VP9_COMP * cpi)312 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
313 VP9_COMMON *const cm = &cpi->common;
314 MACROBLOCK *const x = &cpi->td.mb;
315 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
316 RD_OPT *const rd = &cpi->rd;
317 int i;
318
319 vpx_clear_system_state();
320
321 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
322 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
323
324 set_error_per_bit(x, rd->RDMULT);
325
326 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
327 cm->frame_type != KEY_FRAME)
328 ? 0
329 : 1;
330
331 set_block_thresholds(cm, rd);
332 set_partition_probs(cm, xd);
333
334 if (cpi->oxcf.pass == 1) {
335 if (!frame_is_intra_only(cm))
336 vp9_build_nmv_cost_table(
337 x->nmvjointcost,
338 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
339 &cm->fc->nmvc, cm->allow_high_precision_mv);
340 } else {
341 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
342 fill_token_costs(x->token_costs, cm->fc->coef_probs);
343
344 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
345 cm->frame_type == KEY_FRAME) {
346 for (i = 0; i < PARTITION_CONTEXTS; ++i)
347 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
348 vp9_partition_tree);
349 }
350
351 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
352 cm->frame_type == KEY_FRAME) {
353 fill_mode_costs(cpi);
354
355 if (!frame_is_intra_only(cm)) {
356 vp9_build_nmv_cost_table(
357 x->nmvjointcost,
358 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
359 &cm->fc->nmvc, cm->allow_high_precision_mv);
360
361 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
362 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
363 cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
364 }
365 }
366 }
367 }
368
369 // NOTE: The tables below must be of the same size.
370
371 // The functions described below are sampled at the four most significant
372 // bits of x^2 + 8 / 256.
373
374 // Normalized rate:
375 // This table models the rate for a Laplacian source with given variance
376 // when quantized with a uniform quantizer with given stepsize. The
377 // closed form expression is:
378 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
379 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
380 // and H(x) is the binary entropy function.
381 static const int rate_tab_q10[] = {
382 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
383 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
384 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
385 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
386 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745,
387 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213,
388 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21,
389 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0,
390 };
391
392 // Normalized distortion:
393 // This table models the normalized distortion for a Laplacian source
394 // with given variance when quantized with a uniform quantizer
395 // with given stepsize. The closed form expression is:
396 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
397 // where x = qpstep / sqrt(variance).
398 // Note the actual distortion is Dn * variance.
399 static const int dist_tab_q10[] = {
400 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5,
401 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21,
402 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69,
403 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200,
404 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458,
405 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823,
406 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001,
407 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
408 };
409 static const int xsq_iq_q10[] = {
410 0, 4, 8, 12, 16, 20, 24, 28, 32,
411 40, 48, 56, 64, 72, 80, 88, 96, 112,
412 128, 144, 160, 176, 192, 208, 224, 256, 288,
413 320, 352, 384, 416, 448, 480, 544, 608, 672,
414 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
415 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
416 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
417 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
418 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
419 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
420 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
421 180192, 196576, 212960, 229344, 245728,
422 };
423
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)424 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
425 const int tmp = (xsq_q10 >> 2) + 8;
426 const int k = get_msb(tmp) - 3;
427 const int xq = (k << 3) + ((tmp >> k) & 0x7);
428 const int one_q10 = 1 << 10;
429 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
430 const int b_q10 = one_q10 - a_q10;
431 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
432 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
433 }
434
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])435 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
436 int r_q10[MAX_MB_PLANE],
437 int d_q10[MAX_MB_PLANE]) {
438 int i;
439 const int one_q10 = 1 << 10;
440 for (i = 0; i < MAX_MB_PLANE; ++i) {
441 const int tmp = (xsq_q10[i] >> 2) + 8;
442 const int k = get_msb(tmp) - 3;
443 const int xq = (k << 3) + ((tmp >> k) & 0x7);
444 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
445 const int b_q10 = one_q10 - a_q10;
446 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
447 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
448 }
449 }
450
451 static const uint32_t MAX_XSQ_Q10 = 245727;
452
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)453 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
454 unsigned int qstep, int *rate,
455 int64_t *dist) {
456 // This function models the rate and distortion for a Laplacian
457 // source with given variance when quantized with a uniform quantizer
458 // with given stepsize. The closed form expressions are in:
459 // Hang and Chen, "Source Model for transform video coder and its
460 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
461 // Sys. for Video Tech., April 1997.
462 if (var == 0) {
463 *rate = 0;
464 *dist = 0;
465 } else {
466 int d_q10, r_q10;
467 const uint64_t xsq_q10_64 =
468 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
469 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
470 model_rd_norm(xsq_q10, &r_q10, &d_q10);
471 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
472 *dist = (var * (int64_t)d_q10 + 512) >> 10;
473 }
474 }
475
476 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
477 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)478 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
479 unsigned int n_log2[MAX_MB_PLANE],
480 unsigned int qstep[MAX_MB_PLANE],
481 int64_t *rate_sum, int64_t *dist_sum) {
482 int i;
483 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
484 for (i = 0; i < MAX_MB_PLANE; ++i) {
485 const uint64_t xsq_q10_64 =
486 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
487 var[i];
488 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
489 }
490 model_rd_norm_vec(xsq_q10, r_q10, d_q10);
491 for (i = 0; i < MAX_MB_PLANE; ++i) {
492 int rate =
493 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
494 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
495 *rate_sum += rate;
496 *dist_sum += dist;
497 }
498 }
499
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])500 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
501 const struct macroblockd_plane *pd,
502 ENTROPY_CONTEXT t_above[16],
503 ENTROPY_CONTEXT t_left[16]) {
504 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
505 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
506 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
507 const ENTROPY_CONTEXT *const above = pd->above_context;
508 const ENTROPY_CONTEXT *const left = pd->left_context;
509
510 int i;
511 switch (tx_size) {
512 case TX_4X4:
513 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
514 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
515 break;
516 case TX_8X8:
517 for (i = 0; i < num_4x4_w; i += 2)
518 t_above[i] = !!*(const uint16_t *)&above[i];
519 for (i = 0; i < num_4x4_h; i += 2)
520 t_left[i] = !!*(const uint16_t *)&left[i];
521 break;
522 case TX_16X16:
523 for (i = 0; i < num_4x4_w; i += 4)
524 t_above[i] = !!*(const uint32_t *)&above[i];
525 for (i = 0; i < num_4x4_h; i += 4)
526 t_left[i] = !!*(const uint32_t *)&left[i];
527 break;
528 default:
529 assert(tx_size == TX_32X32);
530 for (i = 0; i < num_4x4_w; i += 8)
531 t_above[i] = !!*(const uint64_t *)&above[i];
532 for (i = 0; i < num_4x4_h; i += 8)
533 t_left[i] = !!*(const uint64_t *)&left[i];
534 break;
535 }
536 }
537
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)538 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
539 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
540 int i;
541 int zero_seen = 0;
542 int best_index = 0;
543 int best_sad = INT_MAX;
544 int this_sad = INT_MAX;
545 int max_mv = 0;
546 int near_same_nearest;
547 uint8_t *src_y_ptr = x->plane[0].src.buf;
548 uint8_t *ref_y_ptr;
549 const int num_mv_refs =
550 MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
551
552 MV pred_mv[3];
553 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
554 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
555 pred_mv[2] = x->pred_mv[ref_frame];
556 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
557
558 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
559 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
560
561 // Get the sad for each candidate reference mv.
562 for (i = 0; i < num_mv_refs; ++i) {
563 const MV *this_mv = &pred_mv[i];
564 int fp_row, fp_col;
565 if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
566 if (i == 1 && near_same_nearest) continue;
567 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
568 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
569 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
570
571 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
572 zero_seen |= (fp_row == 0 && fp_col == 0);
573
574 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
575 // Find sad for current vector.
576 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
577 ref_y_ptr, ref_y_stride);
578 // Note if it is the best so far.
579 if (this_sad < best_sad) {
580 best_sad = this_sad;
581 best_index = i;
582 }
583 }
584
585 // Note the index of the mv that worked best in the reference list.
586 x->mv_best_ref_index[ref_frame] = best_index;
587 x->max_mv_context[ref_frame] = max_mv;
588 x->pred_mv_sad[ref_frame] = best_sad;
589 }
590
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)591 void vp9_setup_pred_block(const MACROBLOCKD *xd,
592 struct buf_2d dst[MAX_MB_PLANE],
593 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
594 const struct scale_factors *scale,
595 const struct scale_factors *scale_uv) {
596 int i;
597
598 dst[0].buf = src->y_buffer;
599 dst[0].stride = src->y_stride;
600 dst[1].buf = src->u_buffer;
601 dst[2].buf = src->v_buffer;
602 dst[1].stride = dst[2].stride = src->uv_stride;
603
604 for (i = 0; i < MAX_MB_PLANE; ++i) {
605 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
606 i ? scale_uv : scale, xd->plane[i].subsampling_x,
607 xd->plane[i].subsampling_y);
608 }
609 }
610
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)611 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
612 int stride) {
613 const int bw = b_width_log2_lookup[plane_bsize];
614 const int y = 4 * (raster_block >> bw);
615 const int x = 4 * (raster_block & ((1 << bw) - 1));
616 return y * stride + x;
617 }
618
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)619 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
620 int16_t *base) {
621 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
622 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
623 }
624
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)625 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
626 int ref_frame) {
627 const VP9_COMMON *const cm = &cpi->common;
628 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
629 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
630 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
631 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
632 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
633 : NULL;
634 }
635
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)636 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
637 const MODE_INFO *const mi = xd->mi[0];
638 const int ctx = get_pred_context_switchable_interp(xd);
639 return SWITCHABLE_INTERP_RATE_FACTOR *
640 cpi->switchable_interp_costs[ctx][mi->interp_filter];
641 }
642
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)643 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
644 int i;
645 RD_OPT *const rd = &cpi->rd;
646 SPEED_FEATURES *const sf = &cpi->sf;
647
648 // Set baseline threshold values.
649 for (i = 0; i < MAX_MODES; ++i)
650 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
651
652 if (sf->adaptive_rd_thresh) {
653 rd->thresh_mult[THR_NEARESTMV] = 300;
654 rd->thresh_mult[THR_NEARESTG] = 300;
655 rd->thresh_mult[THR_NEARESTA] = 300;
656 } else {
657 rd->thresh_mult[THR_NEARESTMV] = 0;
658 rd->thresh_mult[THR_NEARESTG] = 0;
659 rd->thresh_mult[THR_NEARESTA] = 0;
660 }
661
662 rd->thresh_mult[THR_DC] += 1000;
663
664 rd->thresh_mult[THR_NEWMV] += 1000;
665 rd->thresh_mult[THR_NEWA] += 1000;
666 rd->thresh_mult[THR_NEWG] += 1000;
667
668 rd->thresh_mult[THR_NEARMV] += 1000;
669 rd->thresh_mult[THR_NEARA] += 1000;
670 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
671 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
672
673 rd->thresh_mult[THR_TM] += 1000;
674
675 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
676 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
677 rd->thresh_mult[THR_NEARG] += 1000;
678 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
679 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
680
681 rd->thresh_mult[THR_ZEROMV] += 2000;
682 rd->thresh_mult[THR_ZEROG] += 2000;
683 rd->thresh_mult[THR_ZEROA] += 2000;
684 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
685 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
686
687 rd->thresh_mult[THR_H_PRED] += 2000;
688 rd->thresh_mult[THR_V_PRED] += 2000;
689 rd->thresh_mult[THR_D45_PRED] += 2500;
690 rd->thresh_mult[THR_D135_PRED] += 2500;
691 rd->thresh_mult[THR_D117_PRED] += 2500;
692 rd->thresh_mult[THR_D153_PRED] += 2500;
693 rd->thresh_mult[THR_D207_PRED] += 2500;
694 rd->thresh_mult[THR_D63_PRED] += 2500;
695 }
696
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)697 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
698 static const int thresh_mult[2][MAX_REFS] = {
699 { 2500, 2500, 2500, 4500, 4500, 2500 },
700 { 2000, 2000, 2000, 4000, 4000, 2000 }
701 };
702 RD_OPT *const rd = &cpi->rd;
703 const int idx = cpi->oxcf.mode == BEST;
704 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
705 }
706
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)707 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
708 int bsize, int best_mode_index) {
709 if (rd_thresh > 0) {
710 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
711 int mode;
712 for (mode = 0; mode < top_mode; ++mode) {
713 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
714 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
715 BLOCK_SIZE bs;
716 for (bs = min_size; bs <= max_size; ++bs) {
717 int *const fact = &factor_buf[bs][mode];
718 if (mode == best_mode_index) {
719 *fact -= (*fact >> 4);
720 } else {
721 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
722 }
723 }
724 }
725 }
726 }
727
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)728 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
729 int qindex, int qdelta) {
730 // Reduce the intra cost penalty for small blocks (<=16x16).
731 int reduction_fac =
732 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
733
734 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
735 // Don't reduce intra cost penalty if estimated noise level is high.
736 reduction_fac = 0;
737
738 // Always use VPX_BITS_8 as input here because the penalty is applied
739 // to rate not distortion so we want a consistent penalty for all bit
740 // depths. If the actual bit depth were passed in here then the value
741 // retured by vp9_dc_quant() would scale with the bit depth and we would
742 // then need to apply inverse scaling to correct back to a bit depth
743 // independent rate penalty.
744 return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
745 }
746