1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_mem/vpx_mem.h"
18 #include "vpx_ports/bitops.h"
19 #include "vpx_ports/mem.h"
20 #include "vpx_ports/system_state.h"
21
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_entropy.h"
24 #include "vp9/common/vp9_entropymode.h"
25 #include "vp9/common/vp9_mvref_common.h"
26 #include "vp9/common/vp9_pred_common.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/common/vp9_reconinter.h"
29 #include "vp9/common/vp9_reconintra.h"
30 #include "vp9/common/vp9_seg_common.h"
31
32 #include "vp9/encoder/vp9_cost.h"
33 #include "vp9/encoder/vp9_encodemb.h"
34 #include "vp9/encoder/vp9_encodemv.h"
35 #include "vp9/encoder/vp9_encoder.h"
36 #include "vp9/encoder/vp9_mcomp.h"
37 #include "vp9/encoder/vp9_quantize.h"
38 #include "vp9/encoder/vp9_ratectrl.h"
39 #include "vp9/encoder/vp9_rd.h"
40 #include "vp9/encoder/vp9_tokenize.h"
41
42 #define RD_THRESH_POW 1.25
43 #define RD_MULT_EPB_RATIO 64
44
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49 rd_cost->rate = INT_MAX;
50 rd_cost->dist = INT64_MAX;
51 rd_cost->rdcost = INT64_MAX;
52 }
53
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55 rd_cost->rate = 0;
56 rd_cost->dist = 0;
57 rd_cost->rdcost = 0;
58 }
59
60 // The baseline rd thresholds for breaking out of the rd loop for
61 // certain modes are assumed to be based on 8x8 blocks.
62 // This table is used to correct for block size.
63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
65 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
66 };
67
fill_mode_costs(VP9_COMP * cpi)68 static void fill_mode_costs(VP9_COMP *cpi) {
69 const FRAME_CONTEXT *const fc = cpi->common.fc;
70 int i, j;
71
72 for (i = 0; i < INTRA_MODES; ++i)
73 for (j = 0; j < INTRA_MODES; ++j)
74 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
75 vp9_intra_mode_tree);
76
77 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
78 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
79 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
80 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
81 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
82
83 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
84 vp9_cost_tokens(cpi->switchable_interp_costs[i],
85 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
86 }
87
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])88 static void fill_token_costs(vp9_coeff_cost *c,
89 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
90 int i, j, k, l;
91 TX_SIZE t;
92 for (t = TX_4X4; t <= TX_32X32; ++t)
93 for (i = 0; i < PLANE_TYPES; ++i)
94 for (j = 0; j < REF_TYPES; ++j)
95 for (k = 0; k < COEF_BANDS; ++k)
96 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
97 vpx_prob probs[ENTROPY_NODES];
98 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
99 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
100 vp9_coef_tree);
101 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
102 vp9_coef_tree);
103 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
104 c[t][i][j][k][1][l][EOB_TOKEN]);
105 }
106 }
107
108 // Values are now correlated to quantizer.
109 static int sad_per_bit16lut_8[QINDEX_RANGE];
110 static int sad_per_bit4lut_8[QINDEX_RANGE];
111
112 #if CONFIG_VP9_HIGHBITDEPTH
113 static int sad_per_bit16lut_10[QINDEX_RANGE];
114 static int sad_per_bit4lut_10[QINDEX_RANGE];
115 static int sad_per_bit16lut_12[QINDEX_RANGE];
116 static int sad_per_bit4lut_12[QINDEX_RANGE];
117 #endif
118
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)119 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
120 vpx_bit_depth_t bit_depth) {
121 int i;
122 // Initialize the sad lut tables using a formulaic calculation for now.
123 // This is to make it easier to resolve the impact of experimental changes
124 // to the quantizer tables.
125 for (i = 0; i < range; i++) {
126 const double q = vp9_convert_qindex_to_q(i, bit_depth);
127 bit16lut[i] = (int)(0.0418 * q + 2.4107);
128 bit4lut[i] = (int)(0.063 * q + 2.742);
129 }
130 }
131
vp9_init_me_luts(void)132 void vp9_init_me_luts(void) {
133 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
134 VPX_BITS_8);
135 #if CONFIG_VP9_HIGHBITDEPTH
136 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
137 VPX_BITS_10);
138 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
139 VPX_BITS_12);
140 #endif
141 }
142
143 static const int rd_boost_factor[16] = {
144 64, 32, 32, 32, 24, 16, 12, 12,
145 8, 8, 4, 4, 2, 2, 1, 0
146 };
147 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
148 128, 144, 128, 128, 144
149 };
150
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)151 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
152 const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
153 #if CONFIG_VP9_HIGHBITDEPTH
154 int64_t rdmult = 0;
155 switch (cpi->common.bit_depth) {
156 case VPX_BITS_8:
157 rdmult = 88 * q * q / 24;
158 break;
159 case VPX_BITS_10:
160 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
161 break;
162 case VPX_BITS_12:
163 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
164 break;
165 default:
166 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
167 return -1;
168 }
169 #else
170 int64_t rdmult = 88 * q * q / 24;
171 #endif // CONFIG_VP9_HIGHBITDEPTH
172 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
173 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
174 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
175 const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
176
177 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
178 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
179 }
180 if (rdmult < 1)
181 rdmult = 1;
182 return (int)rdmult;
183 }
184
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)185 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
186 double q;
187 #if CONFIG_VP9_HIGHBITDEPTH
188 switch (bit_depth) {
189 case VPX_BITS_8:
190 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
191 break;
192 case VPX_BITS_10:
193 q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
194 break;
195 case VPX_BITS_12:
196 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
197 break;
198 default:
199 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
200 return -1;
201 }
202 #else
203 (void) bit_depth;
204 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
205 #endif // CONFIG_VP9_HIGHBITDEPTH
206 // TODO(debargha): Adjust the function below.
207 return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
208 }
209
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)210 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
211 #if CONFIG_VP9_HIGHBITDEPTH
212 switch (cpi->common.bit_depth) {
213 case VPX_BITS_8:
214 x->sadperbit16 = sad_per_bit16lut_8[qindex];
215 x->sadperbit4 = sad_per_bit4lut_8[qindex];
216 break;
217 case VPX_BITS_10:
218 x->sadperbit16 = sad_per_bit16lut_10[qindex];
219 x->sadperbit4 = sad_per_bit4lut_10[qindex];
220 break;
221 case VPX_BITS_12:
222 x->sadperbit16 = sad_per_bit16lut_12[qindex];
223 x->sadperbit4 = sad_per_bit4lut_12[qindex];
224 break;
225 default:
226 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
227 }
228 #else
229 (void)cpi;
230 x->sadperbit16 = sad_per_bit16lut_8[qindex];
231 x->sadperbit4 = sad_per_bit4lut_8[qindex];
232 #endif // CONFIG_VP9_HIGHBITDEPTH
233 }
234
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)235 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
236 int i, bsize, segment_id;
237
238 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
239 const int qindex =
240 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
241 cm->y_dc_delta_q, 0, MAXQ);
242 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
243
244 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
245 // Threshold here seems unnecessarily harsh but fine given actual
246 // range of values used for cpi->sf.thresh_mult[].
247 const int t = q * rd_thresh_block_size_factor[bsize];
248 const int thresh_max = INT_MAX / t;
249
250 if (bsize >= BLOCK_8X8) {
251 for (i = 0; i < MAX_MODES; ++i)
252 rd->threshes[segment_id][bsize][i] =
253 rd->thresh_mult[i] < thresh_max
254 ? rd->thresh_mult[i] * t / 4
255 : INT_MAX;
256 } else {
257 for (i = 0; i < MAX_REFS; ++i)
258 rd->threshes[segment_id][bsize][i] =
259 rd->thresh_mult_sub8x8[i] < thresh_max
260 ? rd->thresh_mult_sub8x8[i] * t / 4
261 : INT_MAX;
262 }
263 }
264 }
265 }
266
vp9_initialize_rd_consts(VP9_COMP * cpi)267 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
268 VP9_COMMON *const cm = &cpi->common;
269 MACROBLOCK *const x = &cpi->td.mb;
270 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
271 RD_OPT *const rd = &cpi->rd;
272 int i;
273
274 vpx_clear_system_state();
275
276 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
277 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
278
279 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
280 x->errorperbit += (x->errorperbit == 0);
281
282 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
283 cm->frame_type != KEY_FRAME) ? 0 : 1;
284
285 set_block_thresholds(cm, rd);
286 set_partition_probs(cm, xd);
287
288 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
289 fill_token_costs(x->token_costs, cm->fc->coef_probs);
290
291 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
292 cm->frame_type == KEY_FRAME) {
293 for (i = 0; i < PARTITION_CONTEXTS; ++i)
294 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
295 vp9_partition_tree);
296 }
297
298 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
299 cm->frame_type == KEY_FRAME) {
300 fill_mode_costs(cpi);
301
302 if (!frame_is_intra_only(cm)) {
303 vp9_build_nmv_cost_table(x->nmvjointcost,
304 cm->allow_high_precision_mv ? x->nmvcost_hp
305 : x->nmvcost,
306 &cm->fc->nmvc, cm->allow_high_precision_mv);
307
308 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
309 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
310 cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
311 }
312 }
313 }
314
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)315 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
316 // NOTE: The tables below must be of the same size.
317
318 // The functions described below are sampled at the four most significant
319 // bits of x^2 + 8 / 256.
320
321 // Normalized rate:
322 // This table models the rate for a Laplacian source with given variance
323 // when quantized with a uniform quantizer with given stepsize. The
324 // closed form expression is:
325 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
326 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
327 // and H(x) is the binary entropy function.
328 static const int rate_tab_q10[] = {
329 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
330 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
331 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
332 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
333 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
334 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
335 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
336 1159, 1086, 1021, 963, 911, 864, 821, 781,
337 745, 680, 623, 574, 530, 490, 455, 424,
338 395, 345, 304, 269, 239, 213, 190, 171,
339 154, 126, 104, 87, 73, 61, 52, 44,
340 38, 28, 21, 16, 12, 10, 8, 6,
341 5, 3, 2, 1, 1, 1, 0, 0,
342 };
343 // Normalized distortion:
344 // This table models the normalized distortion for a Laplacian source
345 // with given variance when quantized with a uniform quantizer
346 // with given stepsize. The closed form expression is:
347 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
348 // where x = qpstep / sqrt(variance).
349 // Note the actual distortion is Dn * variance.
350 static const int dist_tab_q10[] = {
351 0, 0, 1, 1, 1, 2, 2, 2,
352 3, 3, 4, 5, 5, 6, 7, 7,
353 8, 9, 11, 12, 13, 15, 16, 17,
354 18, 21, 24, 26, 29, 31, 34, 36,
355 39, 44, 49, 54, 59, 64, 69, 73,
356 78, 88, 97, 106, 115, 124, 133, 142,
357 151, 167, 184, 200, 215, 231, 245, 260,
358 274, 301, 327, 351, 375, 397, 418, 439,
359 458, 495, 528, 559, 587, 613, 637, 659,
360 680, 717, 749, 777, 801, 823, 842, 859,
361 874, 899, 919, 936, 949, 960, 969, 977,
362 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
363 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
364 };
365 static const int xsq_iq_q10[] = {
366 0, 4, 8, 12, 16, 20, 24, 28,
367 32, 40, 48, 56, 64, 72, 80, 88,
368 96, 112, 128, 144, 160, 176, 192, 208,
369 224, 256, 288, 320, 352, 384, 416, 448,
370 480, 544, 608, 672, 736, 800, 864, 928,
371 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
372 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
373 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
374 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
375 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
376 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
377 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
378 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
379 };
380 const int tmp = (xsq_q10 >> 2) + 8;
381 const int k = get_msb(tmp) - 3;
382 const int xq = (k << 3) + ((tmp >> k) & 0x7);
383 const int one_q10 = 1 << 10;
384 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
385 const int b_q10 = one_q10 - a_q10;
386 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
387 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
388 }
389
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)390 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
391 unsigned int qstep, int *rate,
392 int64_t *dist) {
393 // This function models the rate and distortion for a Laplacian
394 // source with given variance when quantized with a uniform quantizer
395 // with given stepsize. The closed form expressions are in:
396 // Hang and Chen, "Source Model for transform video coder and its
397 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
398 // Sys. for Video Tech., April 1997.
399 if (var == 0) {
400 *rate = 0;
401 *dist = 0;
402 } else {
403 int d_q10, r_q10;
404 static const uint32_t MAX_XSQ_Q10 = 245727;
405 const uint64_t xsq_q10_64 =
406 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
407 const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
408 model_rd_norm(xsq_q10, &r_q10, &d_q10);
409 *rate = ((r_q10 << n_log2) + 2) >> 2;
410 *dist = (var * (int64_t)d_q10 + 512) >> 10;
411 }
412 }
413
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])414 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
415 const struct macroblockd_plane *pd,
416 ENTROPY_CONTEXT t_above[16],
417 ENTROPY_CONTEXT t_left[16]) {
418 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
419 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
420 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
421 const ENTROPY_CONTEXT *const above = pd->above_context;
422 const ENTROPY_CONTEXT *const left = pd->left_context;
423
424 int i;
425 switch (tx_size) {
426 case TX_4X4:
427 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
428 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
429 break;
430 case TX_8X8:
431 for (i = 0; i < num_4x4_w; i += 2)
432 t_above[i] = !!*(const uint16_t *)&above[i];
433 for (i = 0; i < num_4x4_h; i += 2)
434 t_left[i] = !!*(const uint16_t *)&left[i];
435 break;
436 case TX_16X16:
437 for (i = 0; i < num_4x4_w; i += 4)
438 t_above[i] = !!*(const uint32_t *)&above[i];
439 for (i = 0; i < num_4x4_h; i += 4)
440 t_left[i] = !!*(const uint32_t *)&left[i];
441 break;
442 case TX_32X32:
443 for (i = 0; i < num_4x4_w; i += 8)
444 t_above[i] = !!*(const uint64_t *)&above[i];
445 for (i = 0; i < num_4x4_h; i += 8)
446 t_left[i] = !!*(const uint64_t *)&left[i];
447 break;
448 default:
449 assert(0 && "Invalid transform size.");
450 break;
451 }
452 }
453
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)454 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
455 uint8_t *ref_y_buffer, int ref_y_stride,
456 int ref_frame, BLOCK_SIZE block_size) {
457 int i;
458 int zero_seen = 0;
459 int best_index = 0;
460 int best_sad = INT_MAX;
461 int this_sad = INT_MAX;
462 int max_mv = 0;
463 int near_same_nearest;
464 uint8_t *src_y_ptr = x->plane[0].src.buf;
465 uint8_t *ref_y_ptr;
466 const int num_mv_refs = MAX_MV_REF_CANDIDATES +
467 (cpi->sf.adaptive_motion_search &&
468 block_size < x->max_partition_size);
469
470 MV pred_mv[3];
471 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
472 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
473 pred_mv[2] = x->pred_mv[ref_frame];
474 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
475
476 near_same_nearest =
477 x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
478 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
479 // Get the sad for each candidate reference mv.
480 for (i = 0; i < num_mv_refs; ++i) {
481 const MV *this_mv = &pred_mv[i];
482 int fp_row, fp_col;
483
484 if (i == 1 && near_same_nearest)
485 continue;
486 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
487 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
488 max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
489
490 if (fp_row ==0 && fp_col == 0 && zero_seen)
491 continue;
492 zero_seen |= (fp_row ==0 && fp_col == 0);
493
494 ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
495 // Find sad for current vector.
496 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
497 ref_y_ptr, ref_y_stride);
498 // Note if it is the best so far.
499 if (this_sad < best_sad) {
500 best_sad = this_sad;
501 best_index = i;
502 }
503 }
504
505 // Note the index of the mv that worked best in the reference list.
506 x->mv_best_ref_index[ref_frame] = best_index;
507 x->max_mv_context[ref_frame] = max_mv;
508 x->pred_mv_sad[ref_frame] = best_sad;
509 }
510
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)511 void vp9_setup_pred_block(const MACROBLOCKD *xd,
512 struct buf_2d dst[MAX_MB_PLANE],
513 const YV12_BUFFER_CONFIG *src,
514 int mi_row, int mi_col,
515 const struct scale_factors *scale,
516 const struct scale_factors *scale_uv) {
517 int i;
518
519 dst[0].buf = src->y_buffer;
520 dst[0].stride = src->y_stride;
521 dst[1].buf = src->u_buffer;
522 dst[2].buf = src->v_buffer;
523 dst[1].stride = dst[2].stride = src->uv_stride;
524
525 for (i = 0; i < MAX_MB_PLANE; ++i) {
526 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
527 i ? scale_uv : scale,
528 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
529 }
530 }
531
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)532 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
533 int raster_block, int stride) {
534 const int bw = b_width_log2_lookup[plane_bsize];
535 const int y = 4 * (raster_block >> bw);
536 const int x = 4 * (raster_block & ((1 << bw) - 1));
537 return y * stride + x;
538 }
539
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)540 int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
541 int raster_block, int16_t *base) {
542 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
543 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
544 }
545
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)546 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
547 int ref_frame) {
548 const VP9_COMMON *const cm = &cpi->common;
549 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
550 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
551 return
552 (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
553 &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
554 }
555
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)556 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
557 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
558 const int ctx = vp9_get_pred_context_switchable_interp(xd);
559 return SWITCHABLE_INTERP_RATE_FACTOR *
560 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
561 }
562
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)563 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
564 int i;
565 RD_OPT *const rd = &cpi->rd;
566 SPEED_FEATURES *const sf = &cpi->sf;
567
568 // Set baseline threshold values.
569 for (i = 0; i < MAX_MODES; ++i)
570 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
571
572 if (sf->adaptive_rd_thresh) {
573 rd->thresh_mult[THR_NEARESTMV] = 300;
574 rd->thresh_mult[THR_NEARESTG] = 300;
575 rd->thresh_mult[THR_NEARESTA] = 300;
576 } else {
577 rd->thresh_mult[THR_NEARESTMV] = 0;
578 rd->thresh_mult[THR_NEARESTG] = 0;
579 rd->thresh_mult[THR_NEARESTA] = 0;
580 }
581
582 rd->thresh_mult[THR_DC] += 1000;
583
584 rd->thresh_mult[THR_NEWMV] += 1000;
585 rd->thresh_mult[THR_NEWA] += 1000;
586 rd->thresh_mult[THR_NEWG] += 1000;
587
588 rd->thresh_mult[THR_NEARMV] += 1000;
589 rd->thresh_mult[THR_NEARA] += 1000;
590 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
591 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
592
593 rd->thresh_mult[THR_TM] += 1000;
594
595 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
596 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
597 rd->thresh_mult[THR_NEARG] += 1000;
598 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
599 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
600
601 rd->thresh_mult[THR_ZEROMV] += 2000;
602 rd->thresh_mult[THR_ZEROG] += 2000;
603 rd->thresh_mult[THR_ZEROA] += 2000;
604 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
605 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
606
607 rd->thresh_mult[THR_H_PRED] += 2000;
608 rd->thresh_mult[THR_V_PRED] += 2000;
609 rd->thresh_mult[THR_D45_PRED ] += 2500;
610 rd->thresh_mult[THR_D135_PRED] += 2500;
611 rd->thresh_mult[THR_D117_PRED] += 2500;
612 rd->thresh_mult[THR_D153_PRED] += 2500;
613 rd->thresh_mult[THR_D207_PRED] += 2500;
614 rd->thresh_mult[THR_D63_PRED] += 2500;
615 }
616
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)617 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
618 static const int thresh_mult[2][MAX_REFS] =
619 {{2500, 2500, 2500, 4500, 4500, 2500},
620 {2000, 2000, 2000, 4000, 4000, 2000}};
621 RD_OPT *const rd = &cpi->rd;
622 const int idx = cpi->oxcf.mode == BEST;
623 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
624 }
625
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)626 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
627 int bsize, int best_mode_index) {
628 if (rd_thresh > 0) {
629 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
630 int mode;
631 for (mode = 0; mode < top_mode; ++mode) {
632 const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
633 const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
634 BLOCK_SIZE bs;
635 for (bs = min_size; bs <= max_size; ++bs) {
636 int *const fact = &factor_buf[bs][mode];
637 if (mode == best_mode_index) {
638 *fact -= (*fact >> 4);
639 } else {
640 *fact = MIN(*fact + RD_THRESH_INC,
641 rd_thresh * RD_THRESH_MAX_FACT);
642 }
643 }
644 }
645 }
646 }
647
vp9_get_intra_cost_penalty(int qindex,int qdelta,vpx_bit_depth_t bit_depth)648 int vp9_get_intra_cost_penalty(int qindex, int qdelta,
649 vpx_bit_depth_t bit_depth) {
650 const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
651 #if CONFIG_VP9_HIGHBITDEPTH
652 switch (bit_depth) {
653 case VPX_BITS_8:
654 return 20 * q;
655 case VPX_BITS_10:
656 return 5 * q;
657 case VPX_BITS_12:
658 return ROUND_POWER_OF_TWO(5 * q, 2);
659 default:
660 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
661 return -1;
662 }
663 #else
664 return 20 * q;
665 #endif // CONFIG_VP9_HIGHBITDEPTH
666 }
667
668