1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42
43 #define RD_THRESH_POW 1.25
44 #define RD_MULT_EPB_RATIO 64
45
46 // Factor to weigh the rate for switchable interp filters.
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48
vp9_rd_cost_reset(RD_COST * rd_cost)49 void vp9_rd_cost_reset(RD_COST *rd_cost) {
50 rd_cost->rate = INT_MAX;
51 rd_cost->dist = INT64_MAX;
52 rd_cost->rdcost = INT64_MAX;
53 }
54
vp9_rd_cost_init(RD_COST * rd_cost)55 void vp9_rd_cost_init(RD_COST *rd_cost) {
56 rd_cost->rate = 0;
57 rd_cost->dist = 0;
58 rd_cost->rdcost = 0;
59 }
60
61 // The baseline rd thresholds for breaking out of the rd loop for
62 // certain modes are assumed to be based on 8x8 blocks.
63 // This table is used to correct for block size.
64 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
65 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
66 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
67 };
68
fill_mode_costs(VP9_COMP * cpi)69 static void fill_mode_costs(VP9_COMP *cpi) {
70 const FRAME_CONTEXT *const fc = cpi->common.fc;
71 int i, j;
72
73 for (i = 0; i < INTRA_MODES; ++i)
74 for (j = 0; j < INTRA_MODES; ++j)
75 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
76 vp9_intra_mode_tree);
77
78 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
79 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
80 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
81 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
82 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
83
84 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
85 vp9_cost_tokens(cpi->switchable_interp_costs[i],
86 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
87 }
88
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])89 static void fill_token_costs(vp9_coeff_cost *c,
90 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
91 int i, j, k, l;
92 TX_SIZE t;
93 for (t = TX_4X4; t <= TX_32X32; ++t)
94 for (i = 0; i < PLANE_TYPES; ++i)
95 for (j = 0; j < REF_TYPES; ++j)
96 for (k = 0; k < COEF_BANDS; ++k)
97 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
98 vpx_prob probs[ENTROPY_NODES];
99 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
100 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
101 vp9_coef_tree);
102 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
103 vp9_coef_tree);
104 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
105 c[t][i][j][k][1][l][EOB_TOKEN]);
106 }
107 }
108
109 // Values are now correlated to quantizer.
110 static int sad_per_bit16lut_8[QINDEX_RANGE];
111 static int sad_per_bit4lut_8[QINDEX_RANGE];
112
113 #if CONFIG_VP9_HIGHBITDEPTH
114 static int sad_per_bit16lut_10[QINDEX_RANGE];
115 static int sad_per_bit4lut_10[QINDEX_RANGE];
116 static int sad_per_bit16lut_12[QINDEX_RANGE];
117 static int sad_per_bit4lut_12[QINDEX_RANGE];
118 #endif
119
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)120 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
121 vpx_bit_depth_t bit_depth) {
122 int i;
123 // Initialize the sad lut tables using a formulaic calculation for now.
124 // This is to make it easier to resolve the impact of experimental changes
125 // to the quantizer tables.
126 for (i = 0; i < range; i++) {
127 const double q = vp9_convert_qindex_to_q(i, bit_depth);
128 bit16lut[i] = (int)(0.0418 * q + 2.4107);
129 bit4lut[i] = (int)(0.063 * q + 2.742);
130 }
131 }
132
vp9_init_me_luts(void)133 void vp9_init_me_luts(void) {
134 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
135 VPX_BITS_8);
136 #if CONFIG_VP9_HIGHBITDEPTH
137 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
138 VPX_BITS_10);
139 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
140 VPX_BITS_12);
141 #endif
142 }
143
144 static const int rd_boost_factor[16] = {
145 64, 32, 32, 32, 24, 16, 12, 12,
146 8, 8, 4, 4, 2, 2, 1, 0
147 };
148 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
149 128, 144, 128, 128, 144
150 };
151
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)152 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
153 const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
154 #if CONFIG_VP9_HIGHBITDEPTH
155 int64_t rdmult = 0;
156 switch (cpi->common.bit_depth) {
157 case VPX_BITS_8:
158 rdmult = 88 * q * q / 24;
159 break;
160 case VPX_BITS_10:
161 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
162 break;
163 case VPX_BITS_12:
164 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
165 break;
166 default:
167 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
168 return -1;
169 }
170 #else
171 int64_t rdmult = 88 * q * q / 24;
172 #endif // CONFIG_VP9_HIGHBITDEPTH
173 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
174 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
175 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
176 const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
177
178 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
179 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
180 }
181 if (rdmult < 1)
182 rdmult = 1;
183 return (int)rdmult;
184 }
185
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)186 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
187 double q;
188 #if CONFIG_VP9_HIGHBITDEPTH
189 switch (bit_depth) {
190 case VPX_BITS_8:
191 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
192 break;
193 case VPX_BITS_10:
194 q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
195 break;
196 case VPX_BITS_12:
197 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
198 break;
199 default:
200 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
201 return -1;
202 }
203 #else
204 (void) bit_depth;
205 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
206 #endif // CONFIG_VP9_HIGHBITDEPTH
207 // TODO(debargha): Adjust the function below.
208 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
209 }
210
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)211 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
212 #if CONFIG_VP9_HIGHBITDEPTH
213 switch (cpi->common.bit_depth) {
214 case VPX_BITS_8:
215 x->sadperbit16 = sad_per_bit16lut_8[qindex];
216 x->sadperbit4 = sad_per_bit4lut_8[qindex];
217 break;
218 case VPX_BITS_10:
219 x->sadperbit16 = sad_per_bit16lut_10[qindex];
220 x->sadperbit4 = sad_per_bit4lut_10[qindex];
221 break;
222 case VPX_BITS_12:
223 x->sadperbit16 = sad_per_bit16lut_12[qindex];
224 x->sadperbit4 = sad_per_bit4lut_12[qindex];
225 break;
226 default:
227 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
228 }
229 #else
230 (void)cpi;
231 x->sadperbit16 = sad_per_bit16lut_8[qindex];
232 x->sadperbit4 = sad_per_bit4lut_8[qindex];
233 #endif // CONFIG_VP9_HIGHBITDEPTH
234 }
235
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)236 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
237 int i, bsize, segment_id;
238
239 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
240 const int qindex =
241 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
242 cm->y_dc_delta_q, 0, MAXQ);
243 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
244
245 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
246 // Threshold here seems unnecessarily harsh but fine given actual
247 // range of values used for cpi->sf.thresh_mult[].
248 const int t = q * rd_thresh_block_size_factor[bsize];
249 const int thresh_max = INT_MAX / t;
250
251 if (bsize >= BLOCK_8X8) {
252 for (i = 0; i < MAX_MODES; ++i)
253 rd->threshes[segment_id][bsize][i] =
254 rd->thresh_mult[i] < thresh_max
255 ? rd->thresh_mult[i] * t / 4
256 : INT_MAX;
257 } else {
258 for (i = 0; i < MAX_REFS; ++i)
259 rd->threshes[segment_id][bsize][i] =
260 rd->thresh_mult_sub8x8[i] < thresh_max
261 ? rd->thresh_mult_sub8x8[i] * t / 4
262 : INT_MAX;
263 }
264 }
265 }
266 }
267
vp9_initialize_rd_consts(VP9_COMP * cpi)268 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
269 VP9_COMMON *const cm = &cpi->common;
270 MACROBLOCK *const x = &cpi->td.mb;
271 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
272 RD_OPT *const rd = &cpi->rd;
273 int i;
274
275 vpx_clear_system_state();
276
277 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
278 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
279
280 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
281 x->errorperbit += (x->errorperbit == 0);
282
283 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
284 cm->frame_type != KEY_FRAME) ? 0 : 1;
285
286 set_block_thresholds(cm, rd);
287 set_partition_probs(cm, xd);
288
289 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
290 fill_token_costs(x->token_costs, cm->fc->coef_probs);
291
292 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
293 cm->frame_type == KEY_FRAME) {
294 for (i = 0; i < PARTITION_CONTEXTS; ++i)
295 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
296 vp9_partition_tree);
297 }
298
299 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
300 cm->frame_type == KEY_FRAME) {
301 fill_mode_costs(cpi);
302
303 if (!frame_is_intra_only(cm)) {
304 vp9_build_nmv_cost_table(x->nmvjointcost,
305 cm->allow_high_precision_mv ? x->nmvcost_hp
306 : x->nmvcost,
307 &cm->fc->nmvc, cm->allow_high_precision_mv);
308
309 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
310 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
311 cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
312 }
313 }
314 }
315
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)316 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
317 // NOTE: The tables below must be of the same size.
318
319 // The functions described below are sampled at the four most significant
320 // bits of x^2 + 8 / 256.
321
322 // Normalized rate:
323 // This table models the rate for a Laplacian source with given variance
324 // when quantized with a uniform quantizer with given stepsize. The
325 // closed form expression is:
326 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
327 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
328 // and H(x) is the binary entropy function.
329 static const int rate_tab_q10[] = {
330 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
331 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
332 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
333 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
334 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
335 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
336 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
337 1159, 1086, 1021, 963, 911, 864, 821, 781,
338 745, 680, 623, 574, 530, 490, 455, 424,
339 395, 345, 304, 269, 239, 213, 190, 171,
340 154, 126, 104, 87, 73, 61, 52, 44,
341 38, 28, 21, 16, 12, 10, 8, 6,
342 5, 3, 2, 1, 1, 1, 0, 0,
343 };
344 // Normalized distortion:
345 // This table models the normalized distortion for a Laplacian source
346 // with given variance when quantized with a uniform quantizer
347 // with given stepsize. The closed form expression is:
348 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
349 // where x = qpstep / sqrt(variance).
350 // Note the actual distortion is Dn * variance.
351 static const int dist_tab_q10[] = {
352 0, 0, 1, 1, 1, 2, 2, 2,
353 3, 3, 4, 5, 5, 6, 7, 7,
354 8, 9, 11, 12, 13, 15, 16, 17,
355 18, 21, 24, 26, 29, 31, 34, 36,
356 39, 44, 49, 54, 59, 64, 69, 73,
357 78, 88, 97, 106, 115, 124, 133, 142,
358 151, 167, 184, 200, 215, 231, 245, 260,
359 274, 301, 327, 351, 375, 397, 418, 439,
360 458, 495, 528, 559, 587, 613, 637, 659,
361 680, 717, 749, 777, 801, 823, 842, 859,
362 874, 899, 919, 936, 949, 960, 969, 977,
363 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
364 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
365 };
366 static const int xsq_iq_q10[] = {
367 0, 4, 8, 12, 16, 20, 24, 28,
368 32, 40, 48, 56, 64, 72, 80, 88,
369 96, 112, 128, 144, 160, 176, 192, 208,
370 224, 256, 288, 320, 352, 384, 416, 448,
371 480, 544, 608, 672, 736, 800, 864, 928,
372 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
373 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
374 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
375 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
376 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
377 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
378 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
379 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
380 };
381 const int tmp = (xsq_q10 >> 2) + 8;
382 const int k = get_msb(tmp) - 3;
383 const int xq = (k << 3) + ((tmp >> k) & 0x7);
384 const int one_q10 = 1 << 10;
385 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
386 const int b_q10 = one_q10 - a_q10;
387 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
388 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
389 }
390
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)391 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
392 unsigned int qstep, int *rate,
393 int64_t *dist) {
394 // This function models the rate and distortion for a Laplacian
395 // source with given variance when quantized with a uniform quantizer
396 // with given stepsize. The closed form expressions are in:
397 // Hang and Chen, "Source Model for transform video coder and its
398 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
399 // Sys. for Video Tech., April 1997.
400 if (var == 0) {
401 *rate = 0;
402 *dist = 0;
403 } else {
404 int d_q10, r_q10;
405 static const uint32_t MAX_XSQ_Q10 = 245727;
406 const uint64_t xsq_q10_64 =
407 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
408 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
409 model_rd_norm(xsq_q10, &r_q10, &d_q10);
410 *rate = ((r_q10 << n_log2) + 2) >> 2;
411 *dist = (var * (int64_t)d_q10 + 512) >> 10;
412 }
413 }
414
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])415 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
416 const struct macroblockd_plane *pd,
417 ENTROPY_CONTEXT t_above[16],
418 ENTROPY_CONTEXT t_left[16]) {
419 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
420 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
421 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
422 const ENTROPY_CONTEXT *const above = pd->above_context;
423 const ENTROPY_CONTEXT *const left = pd->left_context;
424
425 int i;
426 switch (tx_size) {
427 case TX_4X4:
428 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
429 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
430 break;
431 case TX_8X8:
432 for (i = 0; i < num_4x4_w; i += 2)
433 t_above[i] = !!*(const uint16_t *)&above[i];
434 for (i = 0; i < num_4x4_h; i += 2)
435 t_left[i] = !!*(const uint16_t *)&left[i];
436 break;
437 case TX_16X16:
438 for (i = 0; i < num_4x4_w; i += 4)
439 t_above[i] = !!*(const uint32_t *)&above[i];
440 for (i = 0; i < num_4x4_h; i += 4)
441 t_left[i] = !!*(const uint32_t *)&left[i];
442 break;
443 case TX_32X32:
444 for (i = 0; i < num_4x4_w; i += 8)
445 t_above[i] = !!*(const uint64_t *)&above[i];
446 for (i = 0; i < num_4x4_h; i += 8)
447 t_left[i] = !!*(const uint64_t *)&left[i];
448 break;
449 default:
450 assert(0 && "Invalid transform size.");
451 break;
452 }
453 }
454
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)455 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
456 uint8_t *ref_y_buffer, int ref_y_stride,
457 int ref_frame, BLOCK_SIZE block_size) {
458 int i;
459 int zero_seen = 0;
460 int best_index = 0;
461 int best_sad = INT_MAX;
462 int this_sad = INT_MAX;
463 int max_mv = 0;
464 int near_same_nearest;
465 uint8_t *src_y_ptr = x->plane[0].src.buf;
466 uint8_t *ref_y_ptr;
467 const int num_mv_refs = MAX_MV_REF_CANDIDATES +
468 (cpi->sf.adaptive_motion_search &&
469 block_size < x->max_partition_size);
470
471 MV pred_mv[3];
472 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
473 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
474 pred_mv[2] = x->pred_mv[ref_frame];
475 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
476
477 near_same_nearest =
478 x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
479 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
480 // Get the sad for each candidate reference mv.
481 for (i = 0; i < num_mv_refs; ++i) {
482 const MV *this_mv = &pred_mv[i];
483 int fp_row, fp_col;
484
485 if (i == 1 && near_same_nearest)
486 continue;
487 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
488 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
489 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
490
491 if (fp_row ==0 && fp_col == 0 && zero_seen)
492 continue;
493 zero_seen |= (fp_row ==0 && fp_col == 0);
494
495 ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
496 // Find sad for current vector.
497 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
498 ref_y_ptr, ref_y_stride);
499 // Note if it is the best so far.
500 if (this_sad < best_sad) {
501 best_sad = this_sad;
502 best_index = i;
503 }
504 }
505
506 // Note the index of the mv that worked best in the reference list.
507 x->mv_best_ref_index[ref_frame] = best_index;
508 x->max_mv_context[ref_frame] = max_mv;
509 x->pred_mv_sad[ref_frame] = best_sad;
510 }
511
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)512 void vp9_setup_pred_block(const MACROBLOCKD *xd,
513 struct buf_2d dst[MAX_MB_PLANE],
514 const YV12_BUFFER_CONFIG *src,
515 int mi_row, int mi_col,
516 const struct scale_factors *scale,
517 const struct scale_factors *scale_uv) {
518 int i;
519
520 dst[0].buf = src->y_buffer;
521 dst[0].stride = src->y_stride;
522 dst[1].buf = src->u_buffer;
523 dst[2].buf = src->v_buffer;
524 dst[1].stride = dst[2].stride = src->uv_stride;
525
526 for (i = 0; i < MAX_MB_PLANE; ++i) {
527 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
528 i ? scale_uv : scale,
529 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
530 }
531 }
532
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)533 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
534 int raster_block, int stride) {
535 const int bw = b_width_log2_lookup[plane_bsize];
536 const int y = 4 * (raster_block >> bw);
537 const int x = 4 * (raster_block & ((1 << bw) - 1));
538 return y * stride + x;
539 }
540
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)541 int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
542 int raster_block, int16_t *base) {
543 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
544 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
545 }
546
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)547 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
548 int ref_frame) {
549 const VP9_COMMON *const cm = &cpi->common;
550 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
551 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
552 return
553 (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
554 &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
555 }
556
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)557 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
558 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
559 const int ctx = vp9_get_pred_context_switchable_interp(xd);
560 return SWITCHABLE_INTERP_RATE_FACTOR *
561 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
562 }
563
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)564 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
565 int i;
566 RD_OPT *const rd = &cpi->rd;
567 SPEED_FEATURES *const sf = &cpi->sf;
568
569 // Set baseline threshold values.
570 for (i = 0; i < MAX_MODES; ++i)
571 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
572
573 if (sf->adaptive_rd_thresh) {
574 rd->thresh_mult[THR_NEARESTMV] = 300;
575 rd->thresh_mult[THR_NEARESTG] = 300;
576 rd->thresh_mult[THR_NEARESTA] = 300;
577 } else {
578 rd->thresh_mult[THR_NEARESTMV] = 0;
579 rd->thresh_mult[THR_NEARESTG] = 0;
580 rd->thresh_mult[THR_NEARESTA] = 0;
581 }
582
583 rd->thresh_mult[THR_DC] += 1000;
584
585 rd->thresh_mult[THR_NEWMV] += 1000;
586 rd->thresh_mult[THR_NEWA] += 1000;
587 rd->thresh_mult[THR_NEWG] += 1000;
588
589 rd->thresh_mult[THR_NEARMV] += 1000;
590 rd->thresh_mult[THR_NEARA] += 1000;
591 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
592 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
593
594 rd->thresh_mult[THR_TM] += 1000;
595
596 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
597 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
598 rd->thresh_mult[THR_NEARG] += 1000;
599 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
600 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
601
602 rd->thresh_mult[THR_ZEROMV] += 2000;
603 rd->thresh_mult[THR_ZEROG] += 2000;
604 rd->thresh_mult[THR_ZEROA] += 2000;
605 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
606 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
607
608 rd->thresh_mult[THR_H_PRED] += 2000;
609 rd->thresh_mult[THR_V_PRED] += 2000;
610 rd->thresh_mult[THR_D45_PRED ] += 2500;
611 rd->thresh_mult[THR_D135_PRED] += 2500;
612 rd->thresh_mult[THR_D117_PRED] += 2500;
613 rd->thresh_mult[THR_D153_PRED] += 2500;
614 rd->thresh_mult[THR_D207_PRED] += 2500;
615 rd->thresh_mult[THR_D63_PRED] += 2500;
616 }
617
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)618 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
619 static const int thresh_mult[2][MAX_REFS] =
620 {{2500, 2500, 2500, 4500, 4500, 2500},
621 {2000, 2000, 2000, 4000, 4000, 2000}};
622 RD_OPT *const rd = &cpi->rd;
623 const int idx = cpi->oxcf.mode == BEST;
624 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
625 }
626
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)627 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
628 int bsize, int best_mode_index) {
629 if (rd_thresh > 0) {
630 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
631 int mode;
632 for (mode = 0; mode < top_mode; ++mode) {
633 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
634 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
635 BLOCK_SIZE bs;
636 for (bs = min_size; bs <= max_size; ++bs) {
637 int *const fact = &factor_buf[bs][mode];
638 if (mode == best_mode_index) {
639 *fact -= (*fact >> 4);
640 } else {
641 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
642 }
643 }
644 }
645 }
646 }
647
vp9_get_intra_cost_penalty(int qindex,int qdelta,vpx_bit_depth_t bit_depth)648 int vp9_get_intra_cost_penalty(int qindex, int qdelta,
649 vpx_bit_depth_t bit_depth) {
650 const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
651 #if CONFIG_VP9_HIGHBITDEPTH
652 switch (bit_depth) {
653 case VPX_BITS_8:
654 return 20 * q;
655 case VPX_BITS_10:
656 return 5 * q;
657 case VPX_BITS_12:
658 return ROUND_POWER_OF_TWO(5 * q, 2);
659 default:
660 assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
661 return -1;
662 }
663 #else
664 return 20 * q;
665 #endif // CONFIG_VP9_HIGHBITDEPTH
666 }
667
668