1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42
43 #define RD_THRESH_POW 1.25
44
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49 rd_cost->rate = INT_MAX;
50 rd_cost->dist = INT64_MAX;
51 rd_cost->rdcost = INT64_MAX;
52 }
53
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55 rd_cost->rate = 0;
56 rd_cost->dist = 0;
57 rd_cost->rdcost = 0;
58 }
59
vp9_calculate_rd_cost(int mult,int div,int rate,int64_t dist)60 int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61 assert(mult >= 0);
62 assert(div > 0);
63 if (rate >= 0 && dist >= 0) {
64 return RDCOST(mult, div, rate, dist);
65 }
66 if (rate >= 0 && dist < 0) {
67 return RDCOST_NEG_D(mult, div, rate, -dist);
68 }
69 if (rate < 0 && dist >= 0) {
70 return RDCOST_NEG_R(mult, div, -rate, dist);
71 }
72 return -RDCOST(mult, div, -rate, -dist);
73 }
74
vp9_rd_cost_update(int mult,int div,RD_COST * rd_cost)75 void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76 if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77 rd_cost->rdcost =
78 vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79 } else {
80 vp9_rd_cost_reset(rd_cost);
81 }
82 }
83
84 // The baseline rd thresholds for breaking out of the rd loop for
85 // certain modes are assumed to be based on 8x8 blocks.
86 // This table is used to correct for block size.
87 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90 };
91
fill_mode_costs(VP9_COMP * cpi)92 static void fill_mode_costs(VP9_COMP *cpi) {
93 const FRAME_CONTEXT *const fc = cpi->common.fc;
94 int i, j;
95
96 for (i = 0; i < INTRA_MODES; ++i) {
97 for (j = 0; j < INTRA_MODES; ++j) {
98 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99 vp9_intra_mode_tree);
100 }
101 }
102
103 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104 for (i = 0; i < INTRA_MODES; ++i) {
105 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106 vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108 fc->uv_mode_prob[i], vp9_intra_mode_tree);
109 }
110
111 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112 vp9_cost_tokens(cpi->switchable_interp_costs[i],
113 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114 }
115
116 for (i = TX_8X8; i < TX_SIZES; ++i) {
117 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118 const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119 int k;
120 for (k = 0; k <= i; ++k) {
121 int cost = 0;
122 int m;
123 for (m = 0; m <= k - (k == i); ++m) {
124 if (m == k)
125 cost += vp9_cost_zero(tx_probs[m]);
126 else
127 cost += vp9_cost_one(tx_probs[m]);
128 }
129 cpi->tx_size_cost[i - 1][j][k] = cost;
130 }
131 }
132 }
133 }
134
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])135 static void fill_token_costs(vp9_coeff_cost *c,
136 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137 int i, j, k, l;
138 TX_SIZE t;
139 for (t = TX_4X4; t <= TX_32X32; ++t)
140 for (i = 0; i < PLANE_TYPES; ++i)
141 for (j = 0; j < REF_TYPES; ++j)
142 for (k = 0; k < COEF_BANDS; ++k)
143 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144 vpx_prob probs[ENTROPY_NODES];
145 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148 vp9_coef_tree);
149 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150 c[t][i][j][k][1][l][EOB_TOKEN]);
151 }
152 }
153
154 // Values are now correlated to quantizer.
155 static int sad_per_bit16lut_8[QINDEX_RANGE];
156 static int sad_per_bit4lut_8[QINDEX_RANGE];
157
158 #if CONFIG_VP9_HIGHBITDEPTH
159 static int sad_per_bit16lut_10[QINDEX_RANGE];
160 static int sad_per_bit4lut_10[QINDEX_RANGE];
161 static int sad_per_bit16lut_12[QINDEX_RANGE];
162 static int sad_per_bit4lut_12[QINDEX_RANGE];
163 #endif
164
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)165 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166 vpx_bit_depth_t bit_depth) {
167 int i;
168 // Initialize the sad lut tables using a formulaic calculation for now.
169 // This is to make it easier to resolve the impact of experimental changes
170 // to the quantizer tables.
171 for (i = 0; i < range; i++) {
172 const double q = vp9_convert_qindex_to_q(i, bit_depth);
173 bit16lut[i] = (int)(0.0418 * q + 2.4107);
174 bit4lut[i] = (int)(0.063 * q + 2.742);
175 }
176 }
177
vp9_init_me_luts(void)178 void vp9_init_me_luts(void) {
179 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180 VPX_BITS_8);
181 #if CONFIG_VP9_HIGHBITDEPTH
182 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183 VPX_BITS_10);
184 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185 VPX_BITS_12);
186 #endif
187 }
188
189 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190 8, 8, 4, 4, 2, 2, 1, 0 };
191
192 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
193 // that the show frame flag is set, should not be used as no real frame
194 // is encoded so we should not reach here. However, a dummy value
195 // is inserted here to make sure the data structure has the right number
196 // of values assigned.
197 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198 128, 144, 144 };
199
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)200 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
201 // largest dc_quant is 21387, therefore rdmult should always fit in int32_t
202 const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
203 uint32_t rdmult = q * q;
204
205 if (cpi->common.frame_type != KEY_FRAME) {
206 if (qindex < 128)
207 rdmult = rdmult * 4;
208 else if (qindex < 190)
209 rdmult = rdmult * 4 + rdmult / 2;
210 else
211 rdmult = rdmult * 3;
212 } else {
213 if (qindex < 64)
214 rdmult = rdmult * 4;
215 else if (qindex <= 128)
216 rdmult = rdmult * 3 + rdmult / 2;
217 else if (qindex < 190)
218 rdmult = rdmult * 4 + rdmult / 2;
219 else
220 rdmult = rdmult * 7 + rdmult / 2;
221 }
222 #if CONFIG_VP9_HIGHBITDEPTH
223 switch (cpi->common.bit_depth) {
224 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
225 case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
226 default: break;
227 }
228 #endif // CONFIG_VP9_HIGHBITDEPTH
229 return rdmult > 0 ? rdmult : 1;
230 }
231
modulate_rdmult(const VP9_COMP * cpi,int rdmult)232 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
233 int64_t rdmult_64 = rdmult;
234 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
235 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
236 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
237 const int gfu_boost = cpi->multi_layer_arf
238 ? gf_group->gfu_boost[gf_group->index]
239 : cpi->rc.gfu_boost;
240 const int boost_index = VPXMIN(15, (gfu_boost / 100));
241
242 rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
243 rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
244 }
245 return (int)rdmult_64;
246 }
247
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)248 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
249 int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
250 return modulate_rdmult(cpi, rdmult);
251 }
252
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)253 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
254 int rdmult =
255 vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
256 rdmult = (int)((double)rdmult / beta);
257 rdmult = rdmult > 0 ? rdmult : 1;
258 return modulate_rdmult(cpi, rdmult);
259 }
260
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)261 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
262 double q;
263 #if CONFIG_VP9_HIGHBITDEPTH
264 switch (bit_depth) {
265 case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
266 case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
267 default:
268 assert(bit_depth == VPX_BITS_12);
269 q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
270 break;
271 }
272 #else
273 (void)bit_depth;
274 q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
275 #endif // CONFIG_VP9_HIGHBITDEPTH
276 // TODO(debargha): Adjust the function below.
277 return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
278 }
279
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)280 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
281 #if CONFIG_VP9_HIGHBITDEPTH
282 switch (cpi->common.bit_depth) {
283 case VPX_BITS_8:
284 x->sadperbit16 = sad_per_bit16lut_8[qindex];
285 x->sadperbit4 = sad_per_bit4lut_8[qindex];
286 break;
287 case VPX_BITS_10:
288 x->sadperbit16 = sad_per_bit16lut_10[qindex];
289 x->sadperbit4 = sad_per_bit4lut_10[qindex];
290 break;
291 default:
292 assert(cpi->common.bit_depth == VPX_BITS_12);
293 x->sadperbit16 = sad_per_bit16lut_12[qindex];
294 x->sadperbit4 = sad_per_bit4lut_12[qindex];
295 break;
296 }
297 #else
298 (void)cpi;
299 x->sadperbit16 = sad_per_bit16lut_8[qindex];
300 x->sadperbit4 = sad_per_bit4lut_8[qindex];
301 #endif // CONFIG_VP9_HIGHBITDEPTH
302 }
303
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)304 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
305 int i, bsize, segment_id;
306
307 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
308 const int qindex =
309 clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
310 cm->y_dc_delta_q,
311 0, MAXQ);
312 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
313
314 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
315 // Threshold here seems unnecessarily harsh but fine given actual
316 // range of values used for cpi->sf.thresh_mult[].
317 const int t = q * rd_thresh_block_size_factor[bsize];
318 const int thresh_max = INT_MAX / t;
319
320 if (bsize >= BLOCK_8X8) {
321 for (i = 0; i < MAX_MODES; ++i)
322 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
323 ? rd->thresh_mult[i] * t / 4
324 : INT_MAX;
325 } else {
326 for (i = 0; i < MAX_REFS; ++i)
327 rd->threshes[segment_id][bsize][i] =
328 rd->thresh_mult_sub8x8[i] < thresh_max
329 ? rd->thresh_mult_sub8x8[i] * t / 4
330 : INT_MAX;
331 }
332 }
333 }
334 }
335
vp9_build_inter_mode_cost(VP9_COMP * cpi)336 void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
337 const VP9_COMMON *const cm = &cpi->common;
338 int i;
339 for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
340 vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
341 vp9_inter_mode_tree);
342 }
343 }
344
vp9_initialize_rd_consts(VP9_COMP * cpi)345 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
346 VP9_COMMON *const cm = &cpi->common;
347 MACROBLOCK *const x = &cpi->td.mb;
348 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
349 RD_OPT *const rd = &cpi->rd;
350 int i;
351
352 vpx_clear_system_state();
353
354 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
355 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
356
357 set_error_per_bit(x, rd->RDMULT);
358
359 x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
360 cm->frame_type != KEY_FRAME)
361 ? 0
362 : 1;
363
364 set_block_thresholds(cm, rd);
365 set_partition_probs(cm, xd);
366
367 if (cpi->oxcf.pass == 1) {
368 if (!frame_is_intra_only(cm))
369 vp9_build_nmv_cost_table(
370 x->nmvjointcost,
371 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
372 &cm->fc->nmvc, cm->allow_high_precision_mv);
373 } else {
374 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
375 fill_token_costs(x->token_costs, cm->fc->coef_probs);
376
377 if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
378 cm->frame_type == KEY_FRAME) {
379 for (i = 0; i < PARTITION_CONTEXTS; ++i)
380 vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
381 vp9_partition_tree);
382 }
383
384 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
385 cm->frame_type == KEY_FRAME) {
386 fill_mode_costs(cpi);
387
388 if (!frame_is_intra_only(cm)) {
389 vp9_build_nmv_cost_table(
390 x->nmvjointcost,
391 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
392 &cm->fc->nmvc, cm->allow_high_precision_mv);
393 vp9_build_inter_mode_cost(cpi);
394 }
395 }
396 }
397 }
398
399 // NOTE: The tables below must be of the same size.
400
401 // The functions described below are sampled at the four most significant
402 // bits of x^2 + 8 / 256.
403
404 // Normalized rate:
405 // This table models the rate for a Laplacian source with given variance
406 // when quantized with a uniform quantizer with given stepsize. The
407 // closed form expression is:
408 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
409 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
410 // and H(x) is the binary entropy function.
411 static const int rate_tab_q10[] = {
412 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
413 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
414 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
415 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
416 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745,
417 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213,
418 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21,
419 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0,
420 };
421
422 // Normalized distortion:
423 // This table models the normalized distortion for a Laplacian source
424 // with given variance when quantized with a uniform quantizer
425 // with given stepsize. The closed form expression is:
426 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
427 // where x = qpstep / sqrt(variance).
428 // Note the actual distortion is Dn * variance.
429 static const int dist_tab_q10[] = {
430 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5,
431 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21,
432 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69,
433 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200,
434 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458,
435 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823,
436 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001,
437 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
438 };
439 static const int xsq_iq_q10[] = {
440 0, 4, 8, 12, 16, 20, 24, 28, 32,
441 40, 48, 56, 64, 72, 80, 88, 96, 112,
442 128, 144, 160, 176, 192, 208, 224, 256, 288,
443 320, 352, 384, 416, 448, 480, 544, 608, 672,
444 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
445 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
446 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
447 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
448 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
449 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
450 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
451 180192, 196576, 212960, 229344, 245728,
452 };
453
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)454 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
455 const int tmp = (xsq_q10 >> 2) + 8;
456 const int k = get_msb(tmp) - 3;
457 const int xq = (k << 3) + ((tmp >> k) & 0x7);
458 const int one_q10 = 1 << 10;
459 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
460 const int b_q10 = one_q10 - a_q10;
461 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
462 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
463 }
464
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])465 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
466 int r_q10[MAX_MB_PLANE],
467 int d_q10[MAX_MB_PLANE]) {
468 int i;
469 const int one_q10 = 1 << 10;
470 for (i = 0; i < MAX_MB_PLANE; ++i) {
471 const int tmp = (xsq_q10[i] >> 2) + 8;
472 const int k = get_msb(tmp) - 3;
473 const int xq = (k << 3) + ((tmp >> k) & 0x7);
474 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
475 const int b_q10 = one_q10 - a_q10;
476 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
477 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
478 }
479 }
480
481 static const uint32_t MAX_XSQ_Q10 = 245727;
482
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)483 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
484 unsigned int qstep, int *rate,
485 int64_t *dist) {
486 // This function models the rate and distortion for a Laplacian
487 // source with given variance when quantized with a uniform quantizer
488 // with given stepsize. The closed form expressions are in:
489 // Hang and Chen, "Source Model for transform video coder and its
490 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
491 // Sys. for Video Tech., April 1997.
492 if (var == 0) {
493 *rate = 0;
494 *dist = 0;
495 } else {
496 int d_q10, r_q10;
497 const uint64_t xsq_q10_64 =
498 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
499 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
500 model_rd_norm(xsq_q10, &r_q10, &d_q10);
501 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
502 *dist = (var * (int64_t)d_q10 + 512) >> 10;
503 }
504 }
505
506 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
507 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)508 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
509 unsigned int n_log2[MAX_MB_PLANE],
510 unsigned int qstep[MAX_MB_PLANE],
511 int64_t *rate_sum, int64_t *dist_sum) {
512 int i;
513 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
514 for (i = 0; i < MAX_MB_PLANE; ++i) {
515 const uint64_t xsq_q10_64 =
516 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
517 var[i];
518 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
519 }
520 model_rd_norm_vec(xsq_q10, r_q10, d_q10);
521 for (i = 0; i < MAX_MB_PLANE; ++i) {
522 int rate =
523 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
524 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
525 *rate_sum += rate;
526 *dist_sum += dist;
527 }
528 }
529
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])530 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
531 const struct macroblockd_plane *pd,
532 ENTROPY_CONTEXT t_above[16],
533 ENTROPY_CONTEXT t_left[16]) {
534 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
535 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
536 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
537 const ENTROPY_CONTEXT *const above = pd->above_context;
538 const ENTROPY_CONTEXT *const left = pd->left_context;
539
540 int i;
541 switch (tx_size) {
542 case TX_4X4:
543 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
544 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
545 break;
546 case TX_8X8:
547 for (i = 0; i < num_4x4_w; i += 2)
548 t_above[i] = !!*(const uint16_t *)&above[i];
549 for (i = 0; i < num_4x4_h; i += 2)
550 t_left[i] = !!*(const uint16_t *)&left[i];
551 break;
552 case TX_16X16:
553 for (i = 0; i < num_4x4_w; i += 4)
554 t_above[i] = !!*(const uint32_t *)&above[i];
555 for (i = 0; i < num_4x4_h; i += 4)
556 t_left[i] = !!*(const uint32_t *)&left[i];
557 break;
558 default:
559 assert(tx_size == TX_32X32);
560 for (i = 0; i < num_4x4_w; i += 8)
561 t_above[i] = !!*(const uint64_t *)&above[i];
562 for (i = 0; i < num_4x4_h; i += 8)
563 t_left[i] = !!*(const uint64_t *)&left[i];
564 break;
565 }
566 }
567
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)568 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
569 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
570 int i;
571 int zero_seen = 0;
572 int best_index = 0;
573 int best_sad = INT_MAX;
574 int this_sad = INT_MAX;
575 int max_mv = 0;
576 int near_same_nearest;
577 uint8_t *src_y_ptr = x->plane[0].src.buf;
578 uint8_t *ref_y_ptr;
579 const int num_mv_refs =
580 MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
581
582 MV pred_mv[3];
583 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
584 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
585 pred_mv[2] = x->pred_mv[ref_frame];
586 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
587
588 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
589 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
590
591 // Get the sad for each candidate reference mv.
592 for (i = 0; i < num_mv_refs; ++i) {
593 const MV *this_mv = &pred_mv[i];
594 int fp_row, fp_col;
595 if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
596 if (i == 1 && near_same_nearest) continue;
597 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
598 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
599 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
600
601 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
602 zero_seen |= (fp_row == 0 && fp_col == 0);
603
604 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
605 // Find sad for current vector.
606 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
607 ref_y_ptr, ref_y_stride);
608 // Note if it is the best so far.
609 if (this_sad < best_sad) {
610 best_sad = this_sad;
611 best_index = i;
612 }
613 }
614
615 // Note the index of the mv that worked best in the reference list.
616 x->mv_best_ref_index[ref_frame] = best_index;
617 x->max_mv_context[ref_frame] = max_mv;
618 x->pred_mv_sad[ref_frame] = best_sad;
619 }
620
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)621 void vp9_setup_pred_block(const MACROBLOCKD *xd,
622 struct buf_2d dst[MAX_MB_PLANE],
623 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
624 const struct scale_factors *scale,
625 const struct scale_factors *scale_uv) {
626 int i;
627
628 dst[0].buf = src->y_buffer;
629 dst[0].stride = src->y_stride;
630 dst[1].buf = src->u_buffer;
631 dst[2].buf = src->v_buffer;
632 dst[1].stride = dst[2].stride = src->uv_stride;
633
634 for (i = 0; i < MAX_MB_PLANE; ++i) {
635 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
636 i ? scale_uv : scale, xd->plane[i].subsampling_x,
637 xd->plane[i].subsampling_y);
638 }
639 }
640
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)641 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
642 int stride) {
643 const int bw = b_width_log2_lookup[plane_bsize];
644 const int y = 4 * (raster_block >> bw);
645 const int x = 4 * (raster_block & ((1 << bw) - 1));
646 return y * stride + x;
647 }
648
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)649 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
650 int16_t *base) {
651 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
652 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
653 }
654
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)655 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
656 int ref_frame) {
657 const VP9_COMMON *const cm = &cpi->common;
658 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
659 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
660 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
661 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
662 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
663 : NULL;
664 }
665
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)666 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
667 const MODE_INFO *const mi = xd->mi[0];
668 const int ctx = get_pred_context_switchable_interp(xd);
669 return SWITCHABLE_INTERP_RATE_FACTOR *
670 cpi->switchable_interp_costs[ctx][mi->interp_filter];
671 }
672
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)673 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
674 int i;
675 RD_OPT *const rd = &cpi->rd;
676 SPEED_FEATURES *const sf = &cpi->sf;
677
678 // Set baseline threshold values.
679 for (i = 0; i < MAX_MODES; ++i)
680 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
681
682 if (sf->adaptive_rd_thresh) {
683 rd->thresh_mult[THR_NEARESTMV] = 300;
684 rd->thresh_mult[THR_NEARESTG] = 300;
685 rd->thresh_mult[THR_NEARESTA] = 300;
686 } else {
687 rd->thresh_mult[THR_NEARESTMV] = 0;
688 rd->thresh_mult[THR_NEARESTG] = 0;
689 rd->thresh_mult[THR_NEARESTA] = 0;
690 }
691
692 rd->thresh_mult[THR_DC] += 1000;
693
694 rd->thresh_mult[THR_NEWMV] += 1000;
695 rd->thresh_mult[THR_NEWA] += 1000;
696 rd->thresh_mult[THR_NEWG] += 1000;
697
698 rd->thresh_mult[THR_NEARMV] += 1000;
699 rd->thresh_mult[THR_NEARA] += 1000;
700 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
701 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
702
703 rd->thresh_mult[THR_TM] += 1000;
704
705 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
706 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
707 rd->thresh_mult[THR_NEARG] += 1000;
708 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
709 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
710
711 rd->thresh_mult[THR_ZEROMV] += 2000;
712 rd->thresh_mult[THR_ZEROG] += 2000;
713 rd->thresh_mult[THR_ZEROA] += 2000;
714 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
715 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
716
717 rd->thresh_mult[THR_H_PRED] += 2000;
718 rd->thresh_mult[THR_V_PRED] += 2000;
719 rd->thresh_mult[THR_D45_PRED] += 2500;
720 rd->thresh_mult[THR_D135_PRED] += 2500;
721 rd->thresh_mult[THR_D117_PRED] += 2500;
722 rd->thresh_mult[THR_D153_PRED] += 2500;
723 rd->thresh_mult[THR_D207_PRED] += 2500;
724 rd->thresh_mult[THR_D63_PRED] += 2500;
725 }
726
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)727 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
728 static const int thresh_mult[2][MAX_REFS] = {
729 { 2500, 2500, 2500, 4500, 4500, 2500 },
730 { 2000, 2000, 2000, 4000, 4000, 2000 }
731 };
732 RD_OPT *const rd = &cpi->rd;
733 const int idx = cpi->oxcf.mode == BEST;
734 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
735 }
736
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)737 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
738 int bsize, int best_mode_index) {
739 if (rd_thresh > 0) {
740 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
741 int mode;
742 for (mode = 0; mode < top_mode; ++mode) {
743 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
744 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
745 BLOCK_SIZE bs;
746 for (bs = min_size; bs <= max_size; ++bs) {
747 int *const fact = &factor_buf[bs][mode];
748 if (mode == best_mode_index) {
749 *fact -= (*fact >> 4);
750 } else {
751 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
752 }
753 }
754 }
755 }
756 }
757
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)758 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
759 int qindex, int qdelta) {
760 // Reduce the intra cost penalty for small blocks (<=16x16).
761 int reduction_fac =
762 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
763
764 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
765 // Don't reduce intra cost penalty if estimated noise level is high.
766 reduction_fac = 0;
767
768 // Always use VPX_BITS_8 as input here because the penalty is applied
769 // to rate not distortion so we want a consistent penalty for all bit
770 // depths. If the actual bit depth were passed in here then the value
771 // retured by vp9_dc_quant() would scale with the bit depth and we would
772 // then need to apply inverse scaling to correct back to a bit depth
773 // independent rate penalty.
774 return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
775 }
776