1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42
43 #define RD_THRESH_POW 1.25
44
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49 rd_cost->rate = INT_MAX;
50 rd_cost->dist = INT64_MAX;
51 rd_cost->rdcost = INT64_MAX;
52 }
53
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55 rd_cost->rate = 0;
56 rd_cost->dist = 0;
57 rd_cost->rdcost = 0;
58 }
59
60 // The baseline rd thresholds for breaking out of the rd loop for
61 // certain modes are assumed to be based on 8x8 blocks.
62 // This table is used to correct for block size.
63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
65 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
66 };
67
fill_mode_costs(VP9_COMP * cpi)68 static void fill_mode_costs(VP9_COMP *cpi) {
69 const FRAME_CONTEXT *const fc = cpi->common.fc;
70 int i, j;
71
72 for (i = 0; i < INTRA_MODES; ++i)
73 for (j = 0; j < INTRA_MODES; ++j)
74 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
75 vp9_intra_mode_tree);
76
77 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
78 for (i = 0; i < INTRA_MODES; ++i) {
79 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
80 vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
81 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
82 fc->uv_mode_prob[i], vp9_intra_mode_tree);
83 }
84
85 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
86 vp9_cost_tokens(cpi->switchable_interp_costs[i],
87 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
88 }
89
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])90 static void fill_token_costs(vp9_coeff_cost *c,
91 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
92 int i, j, k, l;
93 TX_SIZE t;
94 for (t = TX_4X4; t <= TX_32X32; ++t)
95 for (i = 0; i < PLANE_TYPES; ++i)
96 for (j = 0; j < REF_TYPES; ++j)
97 for (k = 0; k < COEF_BANDS; ++k)
98 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
99 vpx_prob probs[ENTROPY_NODES];
100 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
101 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
102 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
103 vp9_coef_tree);
104 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
105 c[t][i][j][k][1][l][EOB_TOKEN]);
106 }
107 }
108
109 // Values are now correlated to quantizer.
110 static int sad_per_bit16lut_8[QINDEX_RANGE];
111 static int sad_per_bit4lut_8[QINDEX_RANGE];
112
113 #if CONFIG_VP9_HIGHBITDEPTH
114 static int sad_per_bit16lut_10[QINDEX_RANGE];
115 static int sad_per_bit4lut_10[QINDEX_RANGE];
116 static int sad_per_bit16lut_12[QINDEX_RANGE];
117 static int sad_per_bit4lut_12[QINDEX_RANGE];
118 #endif
119
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)120 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
121 vpx_bit_depth_t bit_depth) {
122 int i;
123 // Initialize the sad lut tables using a formulaic calculation for now.
124 // This is to make it easier to resolve the impact of experimental changes
125 // to the quantizer tables.
126 for (i = 0; i < range; i++) {
127 const double q = vp9_convert_qindex_to_q(i, bit_depth);
128 bit16lut[i] = (int)(0.0418 * q + 2.4107);
129 bit4lut[i] = (int)(0.063 * q + 2.742);
130 }
131 }
132
vp9_init_me_luts(void)133 void vp9_init_me_luts(void) {
134 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
135 VPX_BITS_8);
136 #if CONFIG_VP9_HIGHBITDEPTH
137 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
138 VPX_BITS_10);
139 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
140 VPX_BITS_12);
141 #endif
142 }
143
144 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
145 8, 8, 4, 4, 2, 2, 1, 0 };
146 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
147 128, 144 };
148
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)149 int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
150 const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
151 #if CONFIG_VP9_HIGHBITDEPTH
152 int64_t rdmult = 0;
153 switch (cpi->common.bit_depth) {
154 case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
155 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
156 case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
157 default:
158 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
159 return -1;
160 }
161 #else
162 int64_t rdmult = 88 * q * q / 24;
163 #endif // CONFIG_VP9_HIGHBITDEPTH
164 return rdmult;
165 }
166
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)167 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
168 int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
169
170 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
171 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
172 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
173 const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
174
175 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
176 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
177 }
178 if (rdmult < 1) rdmult = 1;
179 return (int)rdmult;
180 }
181
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)182 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
183 double q;
184 #if CONFIG_VP9_HIGHBITDEPTH
185 switch (bit_depth) {
186 case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
187 case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
188 case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
189 default:
190 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
191 return -1;
192 }
193 #else
194 (void)bit_depth;
195 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
196 #endif // CONFIG_VP9_HIGHBITDEPTH
197 // TODO(debargha): Adjust the function below.
198 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
199 }
200
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)201 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
202 #if CONFIG_VP9_HIGHBITDEPTH
203 switch (cpi->common.bit_depth) {
204 case VPX_BITS_8:
205 x->sadperbit16 = sad_per_bit16lut_8[qindex];
206 x->sadperbit4 = sad_per_bit4lut_8[qindex];
207 break;
208 case VPX_BITS_10:
209 x->sadperbit16 = sad_per_bit16lut_10[qindex];
210 x->sadperbit4 = sad_per_bit4lut_10[qindex];
211 break;
212 case VPX_BITS_12:
213 x->sadperbit16 = sad_per_bit16lut_12[qindex];
214 x->sadperbit4 = sad_per_bit4lut_12[qindex];
215 break;
216 default:
217 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
218 }
219 #else
220 (void)cpi;
221 x->sadperbit16 = sad_per_bit16lut_8[qindex];
222 x->sadperbit4 = sad_per_bit4lut_8[qindex];
223 #endif // CONFIG_VP9_HIGHBITDEPTH
224 }
225
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)226 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
227 int i, bsize, segment_id;
228
229 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
230 const int qindex =
231 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
232 cm->y_dc_delta_q,
233 0, MAXQ);
234 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
235
236 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
237 // Threshold here seems unnecessarily harsh but fine given actual
238 // range of values used for cpi->sf.thresh_mult[].
239 const int t = q * rd_thresh_block_size_factor[bsize];
240 const int thresh_max = INT_MAX / t;
241
242 if (bsize >= BLOCK_8X8) {
243 for (i = 0; i < MAX_MODES; ++i)
244 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
245 ? rd->thresh_mult[i] * t / 4
246 : INT_MAX;
247 } else {
248 for (i = 0; i < MAX_REFS; ++i)
249 rd->threshes[segment_id][bsize][i] =
250 rd->thresh_mult_sub8x8[i] < thresh_max
251 ? rd->thresh_mult_sub8x8[i] * t / 4
252 : INT_MAX;
253 }
254 }
255 }
256 }
257
vp9_initialize_rd_consts(VP9_COMP * cpi)258 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
259 VP9_COMMON *const cm = &cpi->common;
260 MACROBLOCK *const x = &cpi->td.mb;
261 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
262 RD_OPT *const rd = &cpi->rd;
263 int i;
264
265 vpx_clear_system_state();
266
267 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
268 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
269
270 set_error_per_bit(x, rd->RDMULT);
271
272 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
273 cm->frame_type != KEY_FRAME)
274 ? 0
275 : 1;
276
277 set_block_thresholds(cm, rd);
278 set_partition_probs(cm, xd);
279
280 if (cpi->oxcf.pass == 1) {
281 if (!frame_is_intra_only(cm))
282 vp9_build_nmv_cost_table(
283 x->nmvjointcost,
284 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
285 &cm->fc->nmvc, cm->allow_high_precision_mv);
286 } else {
287 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
288 fill_token_costs(x->token_costs, cm->fc->coef_probs);
289
290 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
291 cm->frame_type == KEY_FRAME) {
292 for (i = 0; i < PARTITION_CONTEXTS; ++i)
293 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
294 vp9_partition_tree);
295 }
296
297 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
298 cm->frame_type == KEY_FRAME) {
299 fill_mode_costs(cpi);
300
301 if (!frame_is_intra_only(cm)) {
302 vp9_build_nmv_cost_table(
303 x->nmvjointcost,
304 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
305 &cm->fc->nmvc, cm->allow_high_precision_mv);
306
307 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
308 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
309 cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
310 }
311 }
312 }
313 }
314
315 // NOTE: The tables below must be of the same size.
316
317 // The functions described below are sampled at the four most significant
318 // bits of x^2 + 8 / 256.
319
320 // Normalized rate:
321 // This table models the rate for a Laplacian source with given variance
322 // when quantized with a uniform quantizer with given stepsize. The
323 // closed form expression is:
324 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
325 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
326 // and H(x) is the binary entropy function.
327 static const int rate_tab_q10[] = {
328 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
329 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
330 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
331 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
332 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745,
333 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213,
334 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21,
335 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0,
336 };
337
338 // Normalized distortion:
339 // This table models the normalized distortion for a Laplacian source
340 // with given variance when quantized with a uniform quantizer
341 // with given stepsize. The closed form expression is:
342 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
343 // where x = qpstep / sqrt(variance).
344 // Note the actual distortion is Dn * variance.
345 static const int dist_tab_q10[] = {
346 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5,
347 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21,
348 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69,
349 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200,
350 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458,
351 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823,
352 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001,
353 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
354 };
355 static const int xsq_iq_q10[] = {
356 0, 4, 8, 12, 16, 20, 24, 28, 32,
357 40, 48, 56, 64, 72, 80, 88, 96, 112,
358 128, 144, 160, 176, 192, 208, 224, 256, 288,
359 320, 352, 384, 416, 448, 480, 544, 608, 672,
360 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
361 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
362 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
363 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
364 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
365 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
366 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
367 180192, 196576, 212960, 229344, 245728,
368 };
369
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)370 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
371 const int tmp = (xsq_q10 >> 2) + 8;
372 const int k = get_msb(tmp) - 3;
373 const int xq = (k << 3) + ((tmp >> k) & 0x7);
374 const int one_q10 = 1 << 10;
375 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
376 const int b_q10 = one_q10 - a_q10;
377 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
378 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
379 }
380
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])381 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
382 int r_q10[MAX_MB_PLANE],
383 int d_q10[MAX_MB_PLANE]) {
384 int i;
385 const int one_q10 = 1 << 10;
386 for (i = 0; i < MAX_MB_PLANE; ++i) {
387 const int tmp = (xsq_q10[i] >> 2) + 8;
388 const int k = get_msb(tmp) - 3;
389 const int xq = (k << 3) + ((tmp >> k) & 0x7);
390 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
391 const int b_q10 = one_q10 - a_q10;
392 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
393 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
394 }
395 }
396
397 static const uint32_t MAX_XSQ_Q10 = 245727;
398
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)399 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
400 unsigned int qstep, int *rate,
401 int64_t *dist) {
402 // This function models the rate and distortion for a Laplacian
403 // source with given variance when quantized with a uniform quantizer
404 // with given stepsize. The closed form expressions are in:
405 // Hang and Chen, "Source Model for transform video coder and its
406 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
407 // Sys. for Video Tech., April 1997.
408 if (var == 0) {
409 *rate = 0;
410 *dist = 0;
411 } else {
412 int d_q10, r_q10;
413 const uint64_t xsq_q10_64 =
414 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
415 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
416 model_rd_norm(xsq_q10, &r_q10, &d_q10);
417 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
418 *dist = (var * (int64_t)d_q10 + 512) >> 10;
419 }
420 }
421
422 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
423 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)424 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
425 unsigned int n_log2[MAX_MB_PLANE],
426 unsigned int qstep[MAX_MB_PLANE],
427 int64_t *rate_sum, int64_t *dist_sum) {
428 int i;
429 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
430 for (i = 0; i < MAX_MB_PLANE; ++i) {
431 const uint64_t xsq_q10_64 =
432 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
433 var[i];
434 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
435 }
436 model_rd_norm_vec(xsq_q10, r_q10, d_q10);
437 for (i = 0; i < MAX_MB_PLANE; ++i) {
438 int rate =
439 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
440 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
441 *rate_sum += rate;
442 *dist_sum += dist;
443 }
444 }
445
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])446 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
447 const struct macroblockd_plane *pd,
448 ENTROPY_CONTEXT t_above[16],
449 ENTROPY_CONTEXT t_left[16]) {
450 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
451 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
452 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
453 const ENTROPY_CONTEXT *const above = pd->above_context;
454 const ENTROPY_CONTEXT *const left = pd->left_context;
455
456 int i;
457 switch (tx_size) {
458 case TX_4X4:
459 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
460 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
461 break;
462 case TX_8X8:
463 for (i = 0; i < num_4x4_w; i += 2)
464 t_above[i] = !!*(const uint16_t *)&above[i];
465 for (i = 0; i < num_4x4_h; i += 2)
466 t_left[i] = !!*(const uint16_t *)&left[i];
467 break;
468 case TX_16X16:
469 for (i = 0; i < num_4x4_w; i += 4)
470 t_above[i] = !!*(const uint32_t *)&above[i];
471 for (i = 0; i < num_4x4_h; i += 4)
472 t_left[i] = !!*(const uint32_t *)&left[i];
473 break;
474 case TX_32X32:
475 for (i = 0; i < num_4x4_w; i += 8)
476 t_above[i] = !!*(const uint64_t *)&above[i];
477 for (i = 0; i < num_4x4_h; i += 8)
478 t_left[i] = !!*(const uint64_t *)&left[i];
479 break;
480 default: assert(0 && "Invalid transform size."); break;
481 }
482 }
483
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)484 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
485 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
486 int i;
487 int zero_seen = 0;
488 int best_index = 0;
489 int best_sad = INT_MAX;
490 int this_sad = INT_MAX;
491 int max_mv = 0;
492 int near_same_nearest;
493 uint8_t *src_y_ptr = x->plane[0].src.buf;
494 uint8_t *ref_y_ptr;
495 const int num_mv_refs =
496 MAX_MV_REF_CANDIDATES +
497 (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
498
499 MV pred_mv[3];
500 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
501 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
502 pred_mv[2] = x->pred_mv[ref_frame];
503 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
504
505 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
506 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
507 // Get the sad for each candidate reference mv.
508 for (i = 0; i < num_mv_refs; ++i) {
509 const MV *this_mv = &pred_mv[i];
510 int fp_row, fp_col;
511
512 if (i == 1 && near_same_nearest) continue;
513 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
514 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
515 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
516
517 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
518 zero_seen |= (fp_row == 0 && fp_col == 0);
519
520 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
521 // Find sad for current vector.
522 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
523 ref_y_ptr, ref_y_stride);
524 // Note if it is the best so far.
525 if (this_sad < best_sad) {
526 best_sad = this_sad;
527 best_index = i;
528 }
529 }
530
531 // Note the index of the mv that worked best in the reference list.
532 x->mv_best_ref_index[ref_frame] = best_index;
533 x->max_mv_context[ref_frame] = max_mv;
534 x->pred_mv_sad[ref_frame] = best_sad;
535 }
536
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)537 void vp9_setup_pred_block(const MACROBLOCKD *xd,
538 struct buf_2d dst[MAX_MB_PLANE],
539 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
540 const struct scale_factors *scale,
541 const struct scale_factors *scale_uv) {
542 int i;
543
544 dst[0].buf = src->y_buffer;
545 dst[0].stride = src->y_stride;
546 dst[1].buf = src->u_buffer;
547 dst[2].buf = src->v_buffer;
548 dst[1].stride = dst[2].stride = src->uv_stride;
549
550 for (i = 0; i < MAX_MB_PLANE; ++i) {
551 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
552 i ? scale_uv : scale, xd->plane[i].subsampling_x,
553 xd->plane[i].subsampling_y);
554 }
555 }
556
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)557 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
558 int stride) {
559 const int bw = b_width_log2_lookup[plane_bsize];
560 const int y = 4 * (raster_block >> bw);
561 const int x = 4 * (raster_block & ((1 << bw) - 1));
562 return y * stride + x;
563 }
564
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)565 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
566 int16_t *base) {
567 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
568 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
569 }
570
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)571 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
572 int ref_frame) {
573 const VP9_COMMON *const cm = &cpi->common;
574 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
575 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
576 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
577 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
578 : NULL;
579 }
580
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)581 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
582 const MODE_INFO *const mi = xd->mi[0];
583 const int ctx = get_pred_context_switchable_interp(xd);
584 return SWITCHABLE_INTERP_RATE_FACTOR *
585 cpi->switchable_interp_costs[ctx][mi->interp_filter];
586 }
587
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)588 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
589 int i;
590 RD_OPT *const rd = &cpi->rd;
591 SPEED_FEATURES *const sf = &cpi->sf;
592
593 // Set baseline threshold values.
594 for (i = 0; i < MAX_MODES; ++i)
595 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
596
597 if (sf->adaptive_rd_thresh) {
598 rd->thresh_mult[THR_NEARESTMV] = 300;
599 rd->thresh_mult[THR_NEARESTG] = 300;
600 rd->thresh_mult[THR_NEARESTA] = 300;
601 } else {
602 rd->thresh_mult[THR_NEARESTMV] = 0;
603 rd->thresh_mult[THR_NEARESTG] = 0;
604 rd->thresh_mult[THR_NEARESTA] = 0;
605 }
606
607 rd->thresh_mult[THR_DC] += 1000;
608
609 rd->thresh_mult[THR_NEWMV] += 1000;
610 rd->thresh_mult[THR_NEWA] += 1000;
611 rd->thresh_mult[THR_NEWG] += 1000;
612
613 rd->thresh_mult[THR_NEARMV] += 1000;
614 rd->thresh_mult[THR_NEARA] += 1000;
615 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
616 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
617
618 rd->thresh_mult[THR_TM] += 1000;
619
620 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
621 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
622 rd->thresh_mult[THR_NEARG] += 1000;
623 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
624 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
625
626 rd->thresh_mult[THR_ZEROMV] += 2000;
627 rd->thresh_mult[THR_ZEROG] += 2000;
628 rd->thresh_mult[THR_ZEROA] += 2000;
629 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
630 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
631
632 rd->thresh_mult[THR_H_PRED] += 2000;
633 rd->thresh_mult[THR_V_PRED] += 2000;
634 rd->thresh_mult[THR_D45_PRED] += 2500;
635 rd->thresh_mult[THR_D135_PRED] += 2500;
636 rd->thresh_mult[THR_D117_PRED] += 2500;
637 rd->thresh_mult[THR_D153_PRED] += 2500;
638 rd->thresh_mult[THR_D207_PRED] += 2500;
639 rd->thresh_mult[THR_D63_PRED] += 2500;
640 }
641
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)642 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
643 static const int thresh_mult[2][MAX_REFS] = {
644 { 2500, 2500, 2500, 4500, 4500, 2500 },
645 { 2000, 2000, 2000, 4000, 4000, 2000 }
646 };
647 RD_OPT *const rd = &cpi->rd;
648 const int idx = cpi->oxcf.mode == BEST;
649 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
650 }
651
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)652 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
653 int bsize, int best_mode_index) {
654 if (rd_thresh > 0) {
655 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
656 int mode;
657 for (mode = 0; mode < top_mode; ++mode) {
658 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
659 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
660 BLOCK_SIZE bs;
661 for (bs = min_size; bs <= max_size; ++bs) {
662 int *const fact = &factor_buf[bs][mode];
663 if (mode == best_mode_index) {
664 *fact -= (*fact >> 4);
665 } else {
666 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
667 }
668 }
669 }
670 }
671 }
672
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)673 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
674 int qindex, int qdelta) {
675 // Reduce the intra cost penalty for small blocks (<=16x16).
676 int reduction_fac =
677 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
678
679 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
680 // Don't reduce intra cost penalty if estimated noise level is high.
681 reduction_fac = 0;
682
683 // Always use VPX_BITS_8 as input here because the penalty is applied
684 // to rate not distortion so we want a consistent penalty for all bit
685 // depths. If the actual bit depth were passed in here then the value
686 // retured by vp9_dc_quant() would scale with the bit depth and we would
687 // then need to apply inverse scaling to correct back to a bit depth
688 // independent rate penalty.
689 return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
690 }
691