1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_mem/vpx_mem.h"
18
19 #include "vp9/common/vp9_common.h"
20 #include "vp9/common/vp9_entropy.h"
21 #include "vp9/common/vp9_entropymode.h"
22 #include "vp9/common/vp9_mvref_common.h"
23 #include "vp9/common/vp9_pred_common.h"
24 #include "vp9/common/vp9_quant_common.h"
25 #include "vp9/common/vp9_reconinter.h"
26 #include "vp9/common/vp9_reconintra.h"
27 #include "vp9/common/vp9_seg_common.h"
28 #include "vp9/common/vp9_systemdependent.h"
29
30 #include "vp9/encoder/vp9_cost.h"
31 #include "vp9/encoder/vp9_encodemb.h"
32 #include "vp9/encoder/vp9_encodemv.h"
33 #include "vp9/encoder/vp9_encoder.h"
34 #include "vp9/encoder/vp9_mcomp.h"
35 #include "vp9/encoder/vp9_quantize.h"
36 #include "vp9/encoder/vp9_ratectrl.h"
37 #include "vp9/encoder/vp9_rd.h"
38 #include "vp9/encoder/vp9_tokenize.h"
39 #include "vp9/encoder/vp9_variance.h"
40
41 #define RD_THRESH_POW 1.25
42 #define RD_MULT_EPB_RATIO 64
43
44 // Factor to weigh the rate for switchable interp filters.
45 #define SWITCHABLE_INTERP_RATE_FACTOR 1
46
47 // The baseline rd thresholds for breaking out of the rd loop for
48 // certain modes are assumed to be based on 8x8 blocks.
49 // This table is used to correct for block size.
50 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
51 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
52 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
53 };
54
fill_mode_costs(VP9_COMP * cpi)55 static void fill_mode_costs(VP9_COMP *cpi) {
56 const FRAME_CONTEXT *const fc = &cpi->common.fc;
57 int i, j;
58
59 for (i = 0; i < INTRA_MODES; ++i)
60 for (j = 0; j < INTRA_MODES; ++j)
61 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
62 vp9_intra_mode_tree);
63
64 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
65 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
66 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
67 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
68 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
69
70 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
71 vp9_cost_tokens(cpi->switchable_interp_costs[i],
72 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
73 }
74
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])75 static void fill_token_costs(vp9_coeff_cost *c,
76 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
77 int i, j, k, l;
78 TX_SIZE t;
79 for (t = TX_4X4; t <= TX_32X32; ++t)
80 for (i = 0; i < PLANE_TYPES; ++i)
81 for (j = 0; j < REF_TYPES; ++j)
82 for (k = 0; k < COEF_BANDS; ++k)
83 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
84 vp9_prob probs[ENTROPY_NODES];
85 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
86 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
87 vp9_coef_tree);
88 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
89 vp9_coef_tree);
90 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
91 c[t][i][j][k][1][l][EOB_TOKEN]);
92 }
93 }
94
95 // Values are now correlated to quantizer.
96 static int sad_per_bit16lut_8[QINDEX_RANGE];
97 static int sad_per_bit4lut_8[QINDEX_RANGE];
98
99 #if CONFIG_VP9_HIGHBITDEPTH
100 static int sad_per_bit16lut_10[QINDEX_RANGE];
101 static int sad_per_bit4lut_10[QINDEX_RANGE];
102 static int sad_per_bit16lut_12[QINDEX_RANGE];
103 static int sad_per_bit4lut_12[QINDEX_RANGE];
104 #endif
105
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)106 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
107 vpx_bit_depth_t bit_depth) {
108 int i;
109 // Initialize the sad lut tables using a formulaic calculation for now.
110 // This is to make it easier to resolve the impact of experimental changes
111 // to the quantizer tables.
112 for (i = 0; i < range; i++) {
113 const double q = vp9_convert_qindex_to_q(i, bit_depth);
114 bit16lut[i] = (int)(0.0418 * q + 2.4107);
115 bit4lut[i] = (int)(0.063 * q + 2.742);
116 }
117 }
118
vp9_init_me_luts()119 void vp9_init_me_luts() {
120 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
121 VPX_BITS_8);
122 #if CONFIG_VP9_HIGHBITDEPTH
123 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
124 VPX_BITS_10);
125 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
126 VPX_BITS_12);
127 #endif
128 }
129
130 static const int rd_boost_factor[16] = {
131 64, 32, 32, 32, 24, 16, 12, 12,
132 8, 8, 4, 4, 2, 2, 1, 0
133 };
134 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
135 128, 144, 128, 128, 144
136 };
137
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)138 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
139 const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
140 #if CONFIG_VP9_HIGHBITDEPTH
141 int rdmult = 0;
142 switch (cpi->common.bit_depth) {
143 case VPX_BITS_8:
144 rdmult = 88 * q * q / 24;
145 break;
146 case VPX_BITS_10:
147 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
148 break;
149 case VPX_BITS_12:
150 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
151 break;
152 default:
153 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
154 return -1;
155 }
156 #else
157 int rdmult = 88 * q * q / 24;
158 #endif
159 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
160 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
161 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
162 const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
163
164 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
165 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
166 }
167 return rdmult;
168 }
169
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)170 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
171 double q;
172 #if CONFIG_VP9_HIGHBITDEPTH
173 switch (bit_depth) {
174 case VPX_BITS_8:
175 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
176 break;
177 case VPX_BITS_10:
178 q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
179 break;
180 case VPX_BITS_12:
181 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
182 break;
183 default:
184 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
185 return -1;
186 }
187 #else
188 (void) bit_depth;
189 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
190 #endif
191 // TODO(debargha): Adjust the function below.
192 return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
193 }
194
vp9_initialize_me_consts(VP9_COMP * cpi,int qindex)195 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
196 #if CONFIG_VP9_HIGHBITDEPTH
197 switch (cpi->common.bit_depth) {
198 case VPX_BITS_8:
199 cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
200 cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
201 break;
202 case VPX_BITS_10:
203 cpi->mb.sadperbit16 = sad_per_bit16lut_10[qindex];
204 cpi->mb.sadperbit4 = sad_per_bit4lut_10[qindex];
205 break;
206 case VPX_BITS_12:
207 cpi->mb.sadperbit16 = sad_per_bit16lut_12[qindex];
208 cpi->mb.sadperbit4 = sad_per_bit4lut_12[qindex];
209 break;
210 default:
211 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
212 }
213 #else
214 cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
215 cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
216 #endif
217 }
218
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)219 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
220 int i, bsize, segment_id;
221
222 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
223 const int qindex =
224 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
225 cm->y_dc_delta_q, 0, MAXQ);
226 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
227
228 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
229 // Threshold here seems unnecessarily harsh but fine given actual
230 // range of values used for cpi->sf.thresh_mult[].
231 const int t = q * rd_thresh_block_size_factor[bsize];
232 const int thresh_max = INT_MAX / t;
233
234 if (bsize >= BLOCK_8X8) {
235 for (i = 0; i < MAX_MODES; ++i)
236 rd->threshes[segment_id][bsize][i] =
237 rd->thresh_mult[i] < thresh_max
238 ? rd->thresh_mult[i] * t / 4
239 : INT_MAX;
240 } else {
241 for (i = 0; i < MAX_REFS; ++i)
242 rd->threshes[segment_id][bsize][i] =
243 rd->thresh_mult_sub8x8[i] < thresh_max
244 ? rd->thresh_mult_sub8x8[i] * t / 4
245 : INT_MAX;
246 }
247 }
248 }
249 }
250
vp9_initialize_rd_consts(VP9_COMP * cpi)251 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
252 VP9_COMMON *const cm = &cpi->common;
253 MACROBLOCK *const x = &cpi->mb;
254 RD_OPT *const rd = &cpi->rd;
255 int i;
256
257 vp9_clear_system_state();
258
259 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
260 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
261
262 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
263 x->errorperbit += (x->errorperbit == 0);
264
265 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
266 cm->frame_type != KEY_FRAME) ? 0 : 1;
267
268 set_block_thresholds(cm, rd);
269
270 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
271 fill_token_costs(x->token_costs, cm->fc.coef_probs);
272
273 for (i = 0; i < PARTITION_CONTEXTS; ++i)
274 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
275 vp9_partition_tree);
276 }
277
278 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
279 cm->frame_type == KEY_FRAME) {
280 fill_mode_costs(cpi);
281
282 if (!frame_is_intra_only(cm)) {
283 vp9_build_nmv_cost_table(x->nmvjointcost,
284 cm->allow_high_precision_mv ? x->nmvcost_hp
285 : x->nmvcost,
286 &cm->fc.nmvc, cm->allow_high_precision_mv);
287
288 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
289 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
290 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
291 }
292 }
293 }
294
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)295 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
296 // NOTE: The tables below must be of the same size.
297
298 // The functions described below are sampled at the four most significant
299 // bits of x^2 + 8 / 256.
300
301 // Normalized rate:
302 // This table models the rate for a Laplacian source with given variance
303 // when quantized with a uniform quantizer with given stepsize. The
304 // closed form expression is:
305 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
306 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
307 // and H(x) is the binary entropy function.
308 static const int rate_tab_q10[] = {
309 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
310 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
311 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
312 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
313 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
314 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
315 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
316 1159, 1086, 1021, 963, 911, 864, 821, 781,
317 745, 680, 623, 574, 530, 490, 455, 424,
318 395, 345, 304, 269, 239, 213, 190, 171,
319 154, 126, 104, 87, 73, 61, 52, 44,
320 38, 28, 21, 16, 12, 10, 8, 6,
321 5, 3, 2, 1, 1, 1, 0, 0,
322 };
323 // Normalized distortion:
324 // This table models the normalized distortion for a Laplacian source
325 // with given variance when quantized with a uniform quantizer
326 // with given stepsize. The closed form expression is:
327 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
328 // where x = qpstep / sqrt(variance).
329 // Note the actual distortion is Dn * variance.
330 static const int dist_tab_q10[] = {
331 0, 0, 1, 1, 1, 2, 2, 2,
332 3, 3, 4, 5, 5, 6, 7, 7,
333 8, 9, 11, 12, 13, 15, 16, 17,
334 18, 21, 24, 26, 29, 31, 34, 36,
335 39, 44, 49, 54, 59, 64, 69, 73,
336 78, 88, 97, 106, 115, 124, 133, 142,
337 151, 167, 184, 200, 215, 231, 245, 260,
338 274, 301, 327, 351, 375, 397, 418, 439,
339 458, 495, 528, 559, 587, 613, 637, 659,
340 680, 717, 749, 777, 801, 823, 842, 859,
341 874, 899, 919, 936, 949, 960, 969, 977,
342 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
343 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
344 };
345 static const int xsq_iq_q10[] = {
346 0, 4, 8, 12, 16, 20, 24, 28,
347 32, 40, 48, 56, 64, 72, 80, 88,
348 96, 112, 128, 144, 160, 176, 192, 208,
349 224, 256, 288, 320, 352, 384, 416, 448,
350 480, 544, 608, 672, 736, 800, 864, 928,
351 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
352 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
353 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
354 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
355 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
356 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
357 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
358 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
359 };
360 const int tmp = (xsq_q10 >> 2) + 8;
361 const int k = get_msb(tmp) - 3;
362 const int xq = (k << 3) + ((tmp >> k) & 0x7);
363 const int one_q10 = 1 << 10;
364 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
365 const int b_q10 = one_q10 - a_q10;
366 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
367 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
368 }
369
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n,unsigned int qstep,int * rate,int64_t * dist)370 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
371 unsigned int qstep, int *rate,
372 int64_t *dist) {
373 // This function models the rate and distortion for a Laplacian
374 // source with given variance when quantized with a uniform quantizer
375 // with given stepsize. The closed form expressions are in:
376 // Hang and Chen, "Source Model for transform video coder and its
377 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
378 // Sys. for Video Tech., April 1997.
379 if (var == 0) {
380 *rate = 0;
381 *dist = 0;
382 } else {
383 int d_q10, r_q10;
384 static const uint32_t MAX_XSQ_Q10 = 245727;
385 const uint64_t xsq_q10_64 =
386 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
387 const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
388 model_rd_norm(xsq_q10, &r_q10, &d_q10);
389 *rate = (n * r_q10 + 2) >> 2;
390 *dist = (var * (int64_t)d_q10 + 512) >> 10;
391 }
392 }
393
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])394 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
395 const struct macroblockd_plane *pd,
396 ENTROPY_CONTEXT t_above[16],
397 ENTROPY_CONTEXT t_left[16]) {
398 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
399 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
400 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
401 const ENTROPY_CONTEXT *const above = pd->above_context;
402 const ENTROPY_CONTEXT *const left = pd->left_context;
403
404 int i;
405 switch (tx_size) {
406 case TX_4X4:
407 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
408 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
409 break;
410 case TX_8X8:
411 for (i = 0; i < num_4x4_w; i += 2)
412 t_above[i] = !!*(const uint16_t *)&above[i];
413 for (i = 0; i < num_4x4_h; i += 2)
414 t_left[i] = !!*(const uint16_t *)&left[i];
415 break;
416 case TX_16X16:
417 for (i = 0; i < num_4x4_w; i += 4)
418 t_above[i] = !!*(const uint32_t *)&above[i];
419 for (i = 0; i < num_4x4_h; i += 4)
420 t_left[i] = !!*(const uint32_t *)&left[i];
421 break;
422 case TX_32X32:
423 for (i = 0; i < num_4x4_w; i += 8)
424 t_above[i] = !!*(const uint64_t *)&above[i];
425 for (i = 0; i < num_4x4_h; i += 8)
426 t_left[i] = !!*(const uint64_t *)&left[i];
427 break;
428 default:
429 assert(0 && "Invalid transform size.");
430 break;
431 }
432 }
433
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)434 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
435 uint8_t *ref_y_buffer, int ref_y_stride,
436 int ref_frame, BLOCK_SIZE block_size) {
437 MACROBLOCKD *xd = &x->e_mbd;
438 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
439 int i;
440 int zero_seen = 0;
441 int best_index = 0;
442 int best_sad = INT_MAX;
443 int this_sad = INT_MAX;
444 int max_mv = 0;
445 uint8_t *src_y_ptr = x->plane[0].src.buf;
446 uint8_t *ref_y_ptr;
447 const int num_mv_refs = MAX_MV_REF_CANDIDATES +
448 (cpi->sf.adaptive_motion_search &&
449 block_size < cpi->sf.max_partition_size);
450
451 MV pred_mv[3];
452 pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
453 pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
454 pred_mv[2] = x->pred_mv[ref_frame];
455
456 // Get the sad for each candidate reference mv.
457 for (i = 0; i < num_mv_refs; ++i) {
458 const MV *this_mv = &pred_mv[i];
459
460 max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
461 if (is_zero_mv(this_mv) && zero_seen)
462 continue;
463
464 zero_seen |= is_zero_mv(this_mv);
465
466 ref_y_ptr =
467 &ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)];
468
469 // Find sad for current vector.
470 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
471 ref_y_ptr, ref_y_stride);
472
473 // Note if it is the best so far.
474 if (this_sad < best_sad) {
475 best_sad = this_sad;
476 best_index = i;
477 }
478 }
479
480 // Note the index of the mv that worked best in the reference list.
481 x->mv_best_ref_index[ref_frame] = best_index;
482 x->max_mv_context[ref_frame] = max_mv;
483 x->pred_mv_sad[ref_frame] = best_sad;
484 }
485
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)486 void vp9_setup_pred_block(const MACROBLOCKD *xd,
487 struct buf_2d dst[MAX_MB_PLANE],
488 const YV12_BUFFER_CONFIG *src,
489 int mi_row, int mi_col,
490 const struct scale_factors *scale,
491 const struct scale_factors *scale_uv) {
492 int i;
493
494 dst[0].buf = src->y_buffer;
495 dst[0].stride = src->y_stride;
496 dst[1].buf = src->u_buffer;
497 dst[2].buf = src->v_buffer;
498 dst[1].stride = dst[2].stride = src->uv_stride;
499
500 for (i = 0; i < MAX_MB_PLANE; ++i) {
501 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
502 i ? scale_uv : scale,
503 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
504 }
505 }
506
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)507 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
508 int ref_frame) {
509 const VP9_COMMON *const cm = &cpi->common;
510 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
511 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
512 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
513 }
514
vp9_get_switchable_rate(const VP9_COMP * cpi)515 int vp9_get_switchable_rate(const VP9_COMP *cpi) {
516 const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
517 const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
518 const int ctx = vp9_get_pred_context_switchable_interp(xd);
519 return SWITCHABLE_INTERP_RATE_FACTOR *
520 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
521 }
522
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)523 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
524 int i;
525 RD_OPT *const rd = &cpi->rd;
526 SPEED_FEATURES *const sf = &cpi->sf;
527
528 // Set baseline threshold values.
529 for (i = 0; i < MAX_MODES; ++i)
530 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
531
532 if (sf->adaptive_rd_thresh) {
533 rd->thresh_mult[THR_NEARESTMV] = 300;
534 rd->thresh_mult[THR_NEARESTG] = 300;
535 rd->thresh_mult[THR_NEARESTA] = 300;
536 } else {
537 rd->thresh_mult[THR_NEARESTMV] = 0;
538 rd->thresh_mult[THR_NEARESTG] = 0;
539 rd->thresh_mult[THR_NEARESTA] = 0;
540 }
541
542 rd->thresh_mult[THR_DC] += 1000;
543
544 rd->thresh_mult[THR_NEWMV] += 1000;
545 rd->thresh_mult[THR_NEWA] += 1000;
546 rd->thresh_mult[THR_NEWG] += 1000;
547
548 // Adjust threshold only in real time mode, which only uses last
549 // reference frame.
550 rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
551
552 rd->thresh_mult[THR_NEARMV] += 1000;
553 rd->thresh_mult[THR_NEARA] += 1000;
554 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
555 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
556
557 rd->thresh_mult[THR_TM] += 1000;
558
559 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
560 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
561 rd->thresh_mult[THR_NEARG] += 1000;
562 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
563 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
564
565 rd->thresh_mult[THR_ZEROMV] += 2000;
566 rd->thresh_mult[THR_ZEROG] += 2000;
567 rd->thresh_mult[THR_ZEROA] += 2000;
568 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
569 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
570
571 rd->thresh_mult[THR_H_PRED] += 2000;
572 rd->thresh_mult[THR_V_PRED] += 2000;
573 rd->thresh_mult[THR_D45_PRED ] += 2500;
574 rd->thresh_mult[THR_D135_PRED] += 2500;
575 rd->thresh_mult[THR_D117_PRED] += 2500;
576 rd->thresh_mult[THR_D153_PRED] += 2500;
577 rd->thresh_mult[THR_D207_PRED] += 2500;
578 rd->thresh_mult[THR_D63_PRED] += 2500;
579 }
580
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)581 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
582 const SPEED_FEATURES *const sf = &cpi->sf;
583 RD_OPT *const rd = &cpi->rd;
584 int i;
585
586 for (i = 0; i < MAX_REFS; ++i)
587 rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0;
588
589 rd->thresh_mult_sub8x8[THR_LAST] += 2500;
590 rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
591 rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
592 rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
593 rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
594 rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
595
596 // Check for masked out split cases.
597 for (i = 0; i < MAX_REFS; ++i)
598 if (sf->disable_split_mask & (1 << i))
599 rd->thresh_mult_sub8x8[i] = INT_MAX;
600 }
601