1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
15
16 #include "./vp9_rtcd.h"
17
18 #include "vpx_mem/vpx_mem.h"
19
20 #include "vp9/common/vp9_common.h"
21 #include "vp9/common/vp9_entropy.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_mvref_common.h"
25 #include "vp9/common/vp9_pragmas.h"
26 #include "vp9/common/vp9_pred_common.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/common/vp9_reconinter.h"
29 #include "vp9/common/vp9_reconintra.h"
30 #include "vp9/common/vp9_seg_common.h"
31 #include "vp9/common/vp9_systemdependent.h"
32
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_mcomp.h"
37 #include "vp9/encoder/vp9_onyx_int.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rdopt.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 #include "vp9/encoder/vp9_variance.h"
43
44 #define RD_THRESH_MAX_FACT 64
45 #define RD_THRESH_INC 1
46 #define RD_THRESH_POW 1.25
47 #define RD_MULT_EPB_RATIO 64
48
49 /* Factor to weigh the rate for switchable interp filters */
50 #define SWITCHABLE_INTERP_RATE_FACTOR 1
51
52 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
53 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
54 #define ALT_REF_MODE_MASK 0xFFC648D0
55
56 #define MIN_EARLY_TERM_INDEX 3
57
58 typedef struct {
59 MB_PREDICTION_MODE mode;
60 MV_REFERENCE_FRAME ref_frame[2];
61 } MODE_DEFINITION;
62
63 typedef struct {
64 MV_REFERENCE_FRAME ref_frame[2];
65 } REF_DEFINITION;
66
67 struct rdcost_block_args {
68 MACROBLOCK *x;
69 ENTROPY_CONTEXT t_above[16];
70 ENTROPY_CONTEXT t_left[16];
71 int rate;
72 int64_t dist;
73 int64_t sse;
74 int this_rate;
75 int64_t this_dist;
76 int64_t this_sse;
77 int64_t this_rd;
78 int64_t best_rd;
79 int skip;
80 int use_fast_coef_costing;
81 const scan_order *so;
82 };
83
84 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
85 {NEARESTMV, {LAST_FRAME, NONE}},
86 {NEARESTMV, {ALTREF_FRAME, NONE}},
87 {NEARESTMV, {GOLDEN_FRAME, NONE}},
88
89 {DC_PRED, {INTRA_FRAME, NONE}},
90
91 {NEWMV, {LAST_FRAME, NONE}},
92 {NEWMV, {ALTREF_FRAME, NONE}},
93 {NEWMV, {GOLDEN_FRAME, NONE}},
94
95 {NEARMV, {LAST_FRAME, NONE}},
96 {NEARMV, {ALTREF_FRAME, NONE}},
97 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
98 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
99
100 {TM_PRED, {INTRA_FRAME, NONE}},
101
102 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
103 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
104 {NEARMV, {GOLDEN_FRAME, NONE}},
105 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
106 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
107
108 {ZEROMV, {LAST_FRAME, NONE}},
109 {ZEROMV, {GOLDEN_FRAME, NONE}},
110 {ZEROMV, {ALTREF_FRAME, NONE}},
111 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
112 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
113
114 {H_PRED, {INTRA_FRAME, NONE}},
115 {V_PRED, {INTRA_FRAME, NONE}},
116 {D135_PRED, {INTRA_FRAME, NONE}},
117 {D207_PRED, {INTRA_FRAME, NONE}},
118 {D153_PRED, {INTRA_FRAME, NONE}},
119 {D63_PRED, {INTRA_FRAME, NONE}},
120 {D117_PRED, {INTRA_FRAME, NONE}},
121 {D45_PRED, {INTRA_FRAME, NONE}},
122 };
123
124 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
125 {{LAST_FRAME, NONE}},
126 {{GOLDEN_FRAME, NONE}},
127 {{ALTREF_FRAME, NONE}},
128 {{LAST_FRAME, ALTREF_FRAME}},
129 {{GOLDEN_FRAME, ALTREF_FRAME}},
130 {{INTRA_FRAME, NONE}},
131 };
132
133 // The baseline rd thresholds for breaking out of the rd loop for
134 // certain modes are assumed to be based on 8x8 blocks.
135 // This table is used to correct for blocks size.
136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
137 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
138 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
139
raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)140 static int raster_block_offset(BLOCK_SIZE plane_bsize,
141 int raster_block, int stride) {
142 const int bw = b_width_log2(plane_bsize);
143 const int y = 4 * (raster_block >> bw);
144 const int x = 4 * (raster_block & ((1 << bw) - 1));
145 return y * stride + x;
146 }
raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)147 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
148 int raster_block, int16_t *base) {
149 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
150 return base + raster_block_offset(plane_bsize, raster_block, stride);
151 }
152
fill_mode_costs(VP9_COMP * cpi)153 static void fill_mode_costs(VP9_COMP *cpi) {
154 MACROBLOCK *const x = &cpi->mb;
155 const FRAME_CONTEXT *const fc = &cpi->common.fc;
156 int i, j;
157
158 for (i = 0; i < INTRA_MODES; i++)
159 for (j = 0; j < INTRA_MODES; j++)
160 vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
161 vp9_intra_mode_tree);
162
163 // TODO(rbultje) separate tables for superblock costing?
164 vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
165 vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
166 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
167 vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
168 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
169
170 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
171 vp9_cost_tokens((int *)x->switchable_interp_costs[i],
172 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
173 }
174
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])175 static void fill_token_costs(vp9_coeff_cost *c,
176 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
177 int i, j, k, l;
178 TX_SIZE t;
179 for (t = TX_4X4; t <= TX_32X32; ++t)
180 for (i = 0; i < PLANE_TYPES; ++i)
181 for (j = 0; j < REF_TYPES; ++j)
182 for (k = 0; k < COEF_BANDS; ++k)
183 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
184 vp9_prob probs[ENTROPY_NODES];
185 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
186 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
187 vp9_coef_tree);
188 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
189 vp9_coef_tree);
190 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
191 c[t][i][j][k][1][l][EOB_TOKEN]);
192 }
193 }
194
195 static const int rd_iifactor[32] = {
196 4, 4, 3, 2, 1, 0, 0, 0,
197 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0, 0,
199 0, 0, 0, 0, 0, 0, 0, 0,
200 };
201
202 // 3* dc_qlookup[Q]*dc_qlookup[Q];
203
204 /* values are now correlated to quantizer */
205 static int sad_per_bit16lut[QINDEX_RANGE];
206 static int sad_per_bit4lut[QINDEX_RANGE];
207
vp9_init_me_luts()208 void vp9_init_me_luts() {
209 int i;
210
211 // Initialize the sad lut tables using a formulaic calculation for now
212 // This is to make it easier to resolve the impact of experimental changes
213 // to the quantizer tables.
214 for (i = 0; i < QINDEX_RANGE; i++) {
215 const double q = vp9_convert_qindex_to_q(i);
216 sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
217 sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
218 }
219 }
220
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
222 const int q = vp9_dc_quant(qindex, 0);
223 // TODO(debargha): Adjust the function below
224 int rdmult = 88 * q * q / 25;
225 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
226 if (cpi->twopass.next_iiratio > 31)
227 rdmult += (rdmult * rd_iifactor[31]) >> 4;
228 else
229 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
230 }
231 return rdmult;
232 }
233
compute_rd_thresh_factor(int qindex)234 static int compute_rd_thresh_factor(int qindex) {
235 // TODO(debargha): Adjust the function below
236 const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
237 return MAX(q, 8);
238 }
239
vp9_initialize_me_consts(VP9_COMP * cpi,int qindex)240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
241 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
242 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
243 }
244
set_block_thresholds(VP9_COMP * cpi)245 static void set_block_thresholds(VP9_COMP *cpi) {
246 const VP9_COMMON *const cm = &cpi->common;
247 int i, bsize, segment_id;
248
249 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
250 const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
251 cm->base_qindex) + cm->y_dc_delta_q,
252 0, MAXQ);
253 const int q = compute_rd_thresh_factor(qindex);
254
255 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
256 // Threshold here seems unnecessarily harsh but fine given actual
257 // range of values used for cpi->sf.thresh_mult[].
258 const int t = q * rd_thresh_block_size_factor[bsize];
259 const int thresh_max = INT_MAX / t;
260
261 for (i = 0; i < MAX_MODES; ++i)
262 cpi->rd_threshes[segment_id][bsize][i] =
263 cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
264 : INT_MAX;
265
266 for (i = 0; i < MAX_REFS; ++i) {
267 cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
268 cpi->rd_thresh_mult_sub8x8[i] < thresh_max
269 ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
270 : INT_MAX;
271 }
272 }
273 }
274 }
275
vp9_initialize_rd_consts(VP9_COMP * cpi)276 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
277 VP9_COMMON *const cm = &cpi->common;
278 MACROBLOCK *const x = &cpi->mb;
279 int i;
280
281 vp9_clear_system_state();
282
283 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
284 cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
285
286 x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
287 x->errorperbit += (x->errorperbit == 0);
288
289 x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
290 cm->frame_type != KEY_FRAME) ? 0 : 1;
291
292 set_block_thresholds(cpi);
293
294 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
295 fill_token_costs(x->token_costs, cm->fc.coef_probs);
296
297 for (i = 0; i < PARTITION_CONTEXTS; i++)
298 vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
299 vp9_partition_tree);
300 }
301
302 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
303 cm->frame_type == KEY_FRAME) {
304 fill_mode_costs(cpi);
305
306 if (!frame_is_intra_only(cm)) {
307 vp9_build_nmv_cost_table(x->nmvjointcost,
308 cm->allow_high_precision_mv ? x->nmvcost_hp
309 : x->nmvcost,
310 &cm->fc.nmvc, cm->allow_high_precision_mv);
311
312 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
313 vp9_cost_tokens((int *)x->inter_mode_cost[i],
314 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
315 }
316 }
317 }
318
319 static const int MAX_XSQ_Q10 = 245727;
320
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)321 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
322 // NOTE: The tables below must be of the same size
323
324 // The functions described below are sampled at the four most significant
325 // bits of x^2 + 8 / 256
326
327 // Normalized rate
328 // This table models the rate for a Laplacian source
329 // source with given variance when quantized with a uniform quantizer
330 // with given stepsize. The closed form expression is:
331 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
332 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
333 // and H(x) is the binary entropy function.
334 static const int rate_tab_q10[] = {
335 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
336 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
337 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
338 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
339 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
340 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
341 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
342 1159, 1086, 1021, 963, 911, 864, 821, 781,
343 745, 680, 623, 574, 530, 490, 455, 424,
344 395, 345, 304, 269, 239, 213, 190, 171,
345 154, 126, 104, 87, 73, 61, 52, 44,
346 38, 28, 21, 16, 12, 10, 8, 6,
347 5, 3, 2, 1, 1, 1, 0, 0,
348 };
349 // Normalized distortion
350 // This table models the normalized distortion for a Laplacian source
351 // source with given variance when quantized with a uniform quantizer
352 // with given stepsize. The closed form expression is:
353 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
354 // where x = qpstep / sqrt(variance)
355 // Note the actual distortion is Dn * variance.
356 static const int dist_tab_q10[] = {
357 0, 0, 1, 1, 1, 2, 2, 2,
358 3, 3, 4, 5, 5, 6, 7, 7,
359 8, 9, 11, 12, 13, 15, 16, 17,
360 18, 21, 24, 26, 29, 31, 34, 36,
361 39, 44, 49, 54, 59, 64, 69, 73,
362 78, 88, 97, 106, 115, 124, 133, 142,
363 151, 167, 184, 200, 215, 231, 245, 260,
364 274, 301, 327, 351, 375, 397, 418, 439,
365 458, 495, 528, 559, 587, 613, 637, 659,
366 680, 717, 749, 777, 801, 823, 842, 859,
367 874, 899, 919, 936, 949, 960, 969, 977,
368 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
369 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
370 };
371 static const int xsq_iq_q10[] = {
372 0, 4, 8, 12, 16, 20, 24, 28,
373 32, 40, 48, 56, 64, 72, 80, 88,
374 96, 112, 128, 144, 160, 176, 192, 208,
375 224, 256, 288, 320, 352, 384, 416, 448,
376 480, 544, 608, 672, 736, 800, 864, 928,
377 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
378 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
379 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
380 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
381 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
382 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
383 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
384 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
385 };
386 /*
387 static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
388 assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
389 assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
390 assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
391 */
392 int tmp = (xsq_q10 >> 2) + 8;
393 int k = get_msb(tmp) - 3;
394 int xq = (k << 3) + ((tmp >> k) & 0x7);
395 const int one_q10 = 1 << 10;
396 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
397 const int b_q10 = one_q10 - a_q10;
398 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
399 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
400 }
401
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n,unsigned int qstep,int * rate,int64_t * dist)402 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
403 unsigned int qstep, int *rate,
404 int64_t *dist) {
405 // This function models the rate and distortion for a Laplacian
406 // source with given variance when quantized with a uniform quantizer
407 // with given stepsize. The closed form expressions are in:
408 // Hang and Chen, "Source Model for transform video coder and its
409 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
410 // Sys. for Video Tech., April 1997.
411 if (var == 0) {
412 *rate = 0;
413 *dist = 0;
414 } else {
415 int d_q10, r_q10;
416 const uint64_t xsq_q10_64 =
417 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
418 const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
419 MAX_XSQ_Q10 : (int)xsq_q10_64;
420 model_rd_norm(xsq_q10, &r_q10, &d_q10);
421 *rate = (n * r_q10 + 2) >> 2;
422 *dist = (var * (int64_t)d_q10 + 512) >> 10;
423 }
424 }
425
model_rd_for_sb(VP9_COMP * cpi,BLOCK_SIZE bsize,MACROBLOCK * x,MACROBLOCKD * xd,int * out_rate_sum,int64_t * out_dist_sum)426 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
427 MACROBLOCK *x, MACROBLOCKD *xd,
428 int *out_rate_sum, int64_t *out_dist_sum) {
429 // Note our transform coeffs are 8 times an orthogonal transform.
430 // Hence quantizer step is also 8 times. To get effective quantizer
431 // we need to divide by 8 before sending to modeling function.
432 int i;
433 int64_t rate_sum = 0;
434 int64_t dist_sum = 0;
435 const int ref = xd->mi[0]->mbmi.ref_frame[0];
436 unsigned int sse;
437
438 for (i = 0; i < MAX_MB_PLANE; ++i) {
439 struct macroblock_plane *const p = &x->plane[i];
440 struct macroblockd_plane *const pd = &xd->plane[i];
441 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
442
443 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
444 pd->dst.buf, pd->dst.stride, &sse);
445
446 if (i == 0)
447 x->pred_sse[ref] = sse;
448
449 // Fast approximate the modelling function.
450 if (cpi->speed > 4) {
451 int64_t rate;
452 int64_t dist;
453 int64_t square_error = sse;
454 int quantizer = (pd->dequant[1] >> 3);
455
456 if (quantizer < 120)
457 rate = (square_error * (280 - quantizer)) >> 8;
458 else
459 rate = 0;
460 dist = (square_error * quantizer) >> 8;
461 rate_sum += rate;
462 dist_sum += dist;
463 } else {
464 int rate;
465 int64_t dist;
466 vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
467 pd->dequant[1] >> 3, &rate, &dist);
468 rate_sum += rate;
469 dist_sum += dist;
470 }
471 }
472
473 *out_rate_sum = (int)rate_sum;
474 *out_dist_sum = dist_sum << 4;
475 }
476
model_rd_for_sb_y_tx(VP9_COMP * cpi,BLOCK_SIZE bsize,TX_SIZE tx_size,MACROBLOCK * x,MACROBLOCKD * xd,int * out_rate_sum,int64_t * out_dist_sum,int * out_skip)477 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
478 TX_SIZE tx_size,
479 MACROBLOCK *x, MACROBLOCKD *xd,
480 int *out_rate_sum, int64_t *out_dist_sum,
481 int *out_skip) {
482 int j, k;
483 BLOCK_SIZE bs;
484 const struct macroblock_plane *const p = &x->plane[0];
485 const struct macroblockd_plane *const pd = &xd->plane[0];
486 const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
487 const int height = 4 * num_4x4_blocks_high_lookup[bsize];
488 int rate_sum = 0;
489 int64_t dist_sum = 0;
490 const int t = 4 << tx_size;
491
492 if (tx_size == TX_4X4) {
493 bs = BLOCK_4X4;
494 } else if (tx_size == TX_8X8) {
495 bs = BLOCK_8X8;
496 } else if (tx_size == TX_16X16) {
497 bs = BLOCK_16X16;
498 } else if (tx_size == TX_32X32) {
499 bs = BLOCK_32X32;
500 } else {
501 assert(0);
502 }
503
504 *out_skip = 1;
505 for (j = 0; j < height; j += t) {
506 for (k = 0; k < width; k += t) {
507 int rate;
508 int64_t dist;
509 unsigned int sse;
510 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
511 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
512 &sse);
513 // sse works better than var, since there is no dc prediction used
514 vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
515 &rate, &dist);
516 rate_sum += rate;
517 dist_sum += dist;
518 *out_skip &= (rate < 1024);
519 }
520 }
521
522 *out_rate_sum = rate_sum;
523 *out_dist_sum = dist_sum << 4;
524 }
525
vp9_block_error_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size,int64_t * ssz)526 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
527 intptr_t block_size, int64_t *ssz) {
528 int i;
529 int64_t error = 0, sqcoeff = 0;
530
531 for (i = 0; i < block_size; i++) {
532 const int diff = coeff[i] - dqcoeff[i];
533 error += diff * diff;
534 sqcoeff += coeff[i] * coeff[i];
535 }
536
537 *ssz = sqcoeff;
538 return error;
539 }
540
541 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
542 * decide whether to include cost of a trailing EOB node or not (i.e. we
543 * can skip this if the last coefficient in this transform block, e.g. the
544 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
545 * were non-zero). */
546 static const int16_t band_counts[TX_SIZES][8] = {
547 { 1, 2, 3, 4, 3, 16 - 13, 0 },
548 { 1, 2, 3, 4, 11, 64 - 21, 0 },
549 { 1, 2, 3, 4, 11, 256 - 21, 0 },
550 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
551 };
cost_coeffs(MACROBLOCK * x,int plane,int block,ENTROPY_CONTEXT * A,ENTROPY_CONTEXT * L,TX_SIZE tx_size,const int16_t * scan,const int16_t * nb,int use_fast_coef_costing)552 static INLINE int cost_coeffs(MACROBLOCK *x,
553 int plane, int block,
554 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
555 TX_SIZE tx_size,
556 const int16_t *scan, const int16_t *nb,
557 int use_fast_coef_costing) {
558 MACROBLOCKD *const xd = &x->e_mbd;
559 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
560 const struct macroblock_plane *p = &x->plane[plane];
561 const struct macroblockd_plane *pd = &xd->plane[plane];
562 const PLANE_TYPE type = pd->plane_type;
563 const int16_t *band_count = &band_counts[tx_size][1];
564 const int eob = p->eobs[block];
565 const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
566 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
567 x->token_costs[tx_size][type][is_inter_block(mbmi)];
568 uint8_t token_cache[32 * 32];
569 int pt = combine_entropy_contexts(*A, *L);
570 int c, cost;
571 // Check for consistency of tx_size with mode info
572 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
573 : get_uv_tx_size(mbmi) == tx_size);
574
575 if (eob == 0) {
576 // single eob token
577 cost = token_costs[0][0][pt][EOB_TOKEN];
578 c = 0;
579 } else {
580 int band_left = *band_count++;
581
582 // dc token
583 int v = qcoeff[0];
584 int prev_t = vp9_dct_value_tokens_ptr[v].token;
585 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
586 token_cache[0] = vp9_pt_energy_class[prev_t];
587 ++token_costs;
588
589 // ac tokens
590 for (c = 1; c < eob; c++) {
591 const int rc = scan[c];
592 int t;
593
594 v = qcoeff[rc];
595 t = vp9_dct_value_tokens_ptr[v].token;
596 if (use_fast_coef_costing) {
597 cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
598 } else {
599 pt = get_coef_context(nb, token_cache, c);
600 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
601 token_cache[rc] = vp9_pt_energy_class[t];
602 }
603 prev_t = t;
604 if (!--band_left) {
605 band_left = *band_count++;
606 ++token_costs;
607 }
608 }
609
610 // eob token
611 if (band_left) {
612 if (use_fast_coef_costing) {
613 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
614 } else {
615 pt = get_coef_context(nb, token_cache, c);
616 cost += (*token_costs)[0][pt][EOB_TOKEN];
617 }
618 }
619 }
620
621 // is eob first coefficient;
622 *A = *L = (c > 0);
623
624 return cost;
625 }
dist_block(int plane,int block,TX_SIZE tx_size,struct rdcost_block_args * args)626 static void dist_block(int plane, int block, TX_SIZE tx_size,
627 struct rdcost_block_args* args) {
628 const int ss_txfrm_size = tx_size << 1;
629 MACROBLOCK* const x = args->x;
630 MACROBLOCKD* const xd = &x->e_mbd;
631 const struct macroblock_plane *const p = &x->plane[plane];
632 const struct macroblockd_plane *const pd = &xd->plane[plane];
633 int64_t this_sse;
634 int shift = tx_size == TX_32X32 ? 0 : 2;
635 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
636 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
637 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
638 &this_sse) >> shift;
639 args->sse = this_sse >> shift;
640
641 if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
642 // TODO(jingning): tune the model to better capture the distortion.
643 int64_t p = (pd->dequant[1] * pd->dequant[1] *
644 (1 << ss_txfrm_size)) >> (shift + 2);
645 args->dist += (p >> 4);
646 args->sse += p;
647 }
648 }
649
rate_block(int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,struct rdcost_block_args * args)650 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
651 TX_SIZE tx_size, struct rdcost_block_args* args) {
652 int x_idx, y_idx;
653 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
654
655 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
656 args->t_left + y_idx, tx_size,
657 args->so->scan, args->so->neighbors,
658 args->use_fast_coef_costing);
659 }
660
block_rd_txfm(int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)661 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
662 TX_SIZE tx_size, void *arg) {
663 struct rdcost_block_args *args = arg;
664 MACROBLOCK *const x = args->x;
665 MACROBLOCKD *const xd = &x->e_mbd;
666 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
667 int64_t rd1, rd2, rd;
668
669 if (args->skip)
670 return;
671
672 if (!is_inter_block(mbmi))
673 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
674 else
675 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
676
677 dist_block(plane, block, tx_size, args);
678 rate_block(plane, block, plane_bsize, tx_size, args);
679 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
680 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
681
682 // TODO(jingning): temporarily enabled only for luma component
683 rd = MIN(rd1, rd2);
684 if (plane == 0)
685 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
686 (rd1 > rd2 && !xd->lossless);
687
688 args->this_rate += args->rate;
689 args->this_dist += args->dist;
690 args->this_sse += args->sse;
691 args->this_rd += rd;
692
693 if (args->this_rd > args->best_rd) {
694 args->skip = 1;
695 return;
696 }
697 }
698
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])699 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
700 const struct macroblockd_plane *pd,
701 ENTROPY_CONTEXT t_above[16],
702 ENTROPY_CONTEXT t_left[16]) {
703 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
704 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
705 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
706 const ENTROPY_CONTEXT *const above = pd->above_context;
707 const ENTROPY_CONTEXT *const left = pd->left_context;
708
709 int i;
710 switch (tx_size) {
711 case TX_4X4:
712 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
713 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
714 break;
715 case TX_8X8:
716 for (i = 0; i < num_4x4_w; i += 2)
717 t_above[i] = !!*(const uint16_t *)&above[i];
718 for (i = 0; i < num_4x4_h; i += 2)
719 t_left[i] = !!*(const uint16_t *)&left[i];
720 break;
721 case TX_16X16:
722 for (i = 0; i < num_4x4_w; i += 4)
723 t_above[i] = !!*(const uint32_t *)&above[i];
724 for (i = 0; i < num_4x4_h; i += 4)
725 t_left[i] = !!*(const uint32_t *)&left[i];
726 break;
727 case TX_32X32:
728 for (i = 0; i < num_4x4_w; i += 8)
729 t_above[i] = !!*(const uint64_t *)&above[i];
730 for (i = 0; i < num_4x4_h; i += 8)
731 t_left[i] = !!*(const uint64_t *)&left[i];
732 break;
733 default:
734 assert(0 && "Invalid transform size.");
735 }
736 }
737
txfm_rd_in_plane(MACROBLOCK * x,int * rate,int64_t * distortion,int * skippable,int64_t * sse,int64_t ref_best_rd,int plane,BLOCK_SIZE bsize,TX_SIZE tx_size,int use_fast_coef_casting)738 static void txfm_rd_in_plane(MACROBLOCK *x,
739 int *rate, int64_t *distortion,
740 int *skippable, int64_t *sse,
741 int64_t ref_best_rd, int plane,
742 BLOCK_SIZE bsize, TX_SIZE tx_size,
743 int use_fast_coef_casting) {
744 MACROBLOCKD *const xd = &x->e_mbd;
745 const struct macroblockd_plane *const pd = &xd->plane[plane];
746 struct rdcost_block_args args = { 0 };
747 args.x = x;
748 args.best_rd = ref_best_rd;
749 args.use_fast_coef_costing = use_fast_coef_casting;
750
751 if (plane == 0)
752 xd->mi[0]->mbmi.tx_size = tx_size;
753
754 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
755
756 args.so = get_scan(xd, tx_size, pd->plane_type, 0);
757
758 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
759 block_rd_txfm, &args);
760 if (args.skip) {
761 *rate = INT_MAX;
762 *distortion = INT64_MAX;
763 *sse = INT64_MAX;
764 *skippable = 0;
765 } else {
766 *distortion = args.this_dist;
767 *rate = args.this_rate;
768 *sse = args.this_sse;
769 *skippable = vp9_is_skippable_in_plane(x, bsize, plane);
770 }
771 }
772
choose_largest_txfm_size(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * sse,int64_t ref_best_rd,BLOCK_SIZE bs)773 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
774 int *rate, int64_t *distortion,
775 int *skip, int64_t *sse,
776 int64_t ref_best_rd,
777 BLOCK_SIZE bs) {
778 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
779 VP9_COMMON *const cm = &cpi->common;
780 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
781 MACROBLOCKD *const xd = &x->e_mbd;
782 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
783
784 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
785
786 txfm_rd_in_plane(x, rate, distortion, skip,
787 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
788 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
789 cpi->tx_stepdown_count[0]++;
790 }
791
choose_txfm_size_from_rd(VP9_COMP * cpi,MACROBLOCK * x,int (* r)[2],int * rate,int64_t * d,int64_t * distortion,int * s,int * skip,int64_t tx_cache[TX_MODES],BLOCK_SIZE bs)792 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
793 int (*r)[2], int *rate,
794 int64_t *d, int64_t *distortion,
795 int *s, int *skip,
796 int64_t tx_cache[TX_MODES],
797 BLOCK_SIZE bs) {
798 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
799 VP9_COMMON *const cm = &cpi->common;
800 MACROBLOCKD *const xd = &x->e_mbd;
801 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
802 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
803 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
804 {INT64_MAX, INT64_MAX},
805 {INT64_MAX, INT64_MAX},
806 {INT64_MAX, INT64_MAX}};
807 int n, m;
808 int s0, s1;
809 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
810 int64_t best_rd = INT64_MAX;
811 TX_SIZE best_tx = TX_4X4;
812
813 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
814 assert(skip_prob > 0);
815 s0 = vp9_cost_bit(skip_prob, 0);
816 s1 = vp9_cost_bit(skip_prob, 1);
817
818 for (n = TX_4X4; n <= max_tx_size; n++) {
819 r[n][1] = r[n][0];
820 if (r[n][0] < INT_MAX) {
821 for (m = 0; m <= n - (n == max_tx_size); m++) {
822 if (m == n)
823 r[n][1] += vp9_cost_zero(tx_probs[m]);
824 else
825 r[n][1] += vp9_cost_one(tx_probs[m]);
826 }
827 }
828 if (d[n] == INT64_MAX) {
829 rd[n][0] = rd[n][1] = INT64_MAX;
830 } else if (s[n]) {
831 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
832 } else {
833 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
834 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
835 }
836
837 if (rd[n][1] < best_rd) {
838 best_tx = n;
839 best_rd = rd[n][1];
840 }
841 }
842 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
843 best_tx : MIN(max_tx_size, max_mode_tx_size);
844
845
846 *distortion = d[mbmi->tx_size];
847 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
848 *skip = s[mbmi->tx_size];
849
850 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
851 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
852 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
853 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
854
855 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
856 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
857 cpi->tx_stepdown_count[0]++;
858 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
859 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
860 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
861 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
862 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
863 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
864 } else {
865 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
866 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
867 }
868 }
869
scaled_rd_cost(int rdmult,int rddiv,int rate,int64_t dist,double scale)870 static int64_t scaled_rd_cost(int rdmult, int rddiv,
871 int rate, int64_t dist, double scale) {
872 return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
873 }
874
choose_txfm_size_from_modelrd(VP9_COMP * cpi,MACROBLOCK * x,int (* r)[2],int * rate,int64_t * d,int64_t * distortion,int * s,int * skip,int64_t * sse,int64_t ref_best_rd,BLOCK_SIZE bs)875 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
876 int (*r)[2], int *rate,
877 int64_t *d, int64_t *distortion,
878 int *s, int *skip, int64_t *sse,
879 int64_t ref_best_rd,
880 BLOCK_SIZE bs) {
881 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
882 VP9_COMMON *const cm = &cpi->common;
883 MACROBLOCKD *const xd = &x->e_mbd;
884 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
885 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
886 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
887 {INT64_MAX, INT64_MAX},
888 {INT64_MAX, INT64_MAX},
889 {INT64_MAX, INT64_MAX}};
890 int n, m;
891 int s0, s1;
892 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
893 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
894 int64_t best_rd = INT64_MAX;
895 TX_SIZE best_tx = TX_4X4;
896
897 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
898 assert(skip_prob > 0);
899 s0 = vp9_cost_bit(skip_prob, 0);
900 s1 = vp9_cost_bit(skip_prob, 1);
901
902 for (n = TX_4X4; n <= max_tx_size; n++) {
903 double scale = scale_rd[n];
904 r[n][1] = r[n][0];
905 for (m = 0; m <= n - (n == max_tx_size); m++) {
906 if (m == n)
907 r[n][1] += vp9_cost_zero(tx_probs[m]);
908 else
909 r[n][1] += vp9_cost_one(tx_probs[m]);
910 }
911 if (s[n]) {
912 rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
913 scale);
914 } else {
915 rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
916 scale);
917 rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
918 scale);
919 }
920 if (rd[n][1] < best_rd) {
921 best_rd = rd[n][1];
922 best_tx = n;
923 }
924 }
925
926 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
927 best_tx : MIN(max_tx_size, max_mode_tx_size);
928
929 // Actually encode using the chosen mode if a model was used, but do not
930 // update the r, d costs
931 txfm_rd_in_plane(x, rate, distortion, skip,
932 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
933 cpi->sf.use_fast_coef_costing);
934
935 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
936 cpi->tx_stepdown_count[0]++;
937 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
938 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
939 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
940 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
941 } else {
942 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
943 }
944 }
945
inter_super_block_yrd(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * psse,BLOCK_SIZE bs,int64_t txfm_cache[TX_MODES],int64_t ref_best_rd)946 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
947 int64_t *distortion, int *skip,
948 int64_t *psse, BLOCK_SIZE bs,
949 int64_t txfm_cache[TX_MODES],
950 int64_t ref_best_rd) {
951 int r[TX_SIZES][2], s[TX_SIZES];
952 int64_t d[TX_SIZES], sse[TX_SIZES];
953 MACROBLOCKD *xd = &x->e_mbd;
954 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
955 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
956 TX_SIZE tx_size;
957
958 assert(bs == mbmi->sb_type);
959
960 vp9_subtract_plane(x, bs, 0);
961
962 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
963 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
964 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
965 ref_best_rd, bs);
966 if (psse)
967 *psse = sse[mbmi->tx_size];
968 return;
969 }
970
971 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
972 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
973 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
974 &r[tx_size][0], &d[tx_size], &s[tx_size]);
975 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
976 skip, sse, ref_best_rd, bs);
977 } else {
978 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
979 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
980 &s[tx_size], &sse[tx_size],
981 ref_best_rd, 0, bs, tx_size,
982 cpi->sf.use_fast_coef_costing);
983 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
984 skip, txfm_cache, bs);
985 }
986 if (psse)
987 *psse = sse[mbmi->tx_size];
988 }
989
intra_super_block_yrd(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * psse,BLOCK_SIZE bs,int64_t txfm_cache[TX_MODES],int64_t ref_best_rd)990 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
991 int64_t *distortion, int *skip,
992 int64_t *psse, BLOCK_SIZE bs,
993 int64_t txfm_cache[TX_MODES],
994 int64_t ref_best_rd) {
995 int64_t sse[TX_SIZES];
996 MACROBLOCKD *xd = &x->e_mbd;
997 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
998
999 assert(bs == mbmi->sb_type);
1000 if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
1001 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1002 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1003 ref_best_rd, bs);
1004 } else {
1005 int r[TX_SIZES][2], s[TX_SIZES];
1006 int64_t d[TX_SIZES];
1007 TX_SIZE tx_size;
1008 for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
1009 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
1010 &s[tx_size], &sse[tx_size],
1011 ref_best_rd, 0, bs, tx_size,
1012 cpi->sf.use_fast_coef_costing);
1013 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1014 skip, txfm_cache, bs);
1015 }
1016 if (psse)
1017 *psse = sse[mbmi->tx_size];
1018 }
1019
1020
conditional_skipintra(MB_PREDICTION_MODE mode,MB_PREDICTION_MODE best_intra_mode)1021 static int conditional_skipintra(MB_PREDICTION_MODE mode,
1022 MB_PREDICTION_MODE best_intra_mode) {
1023 if (mode == D117_PRED &&
1024 best_intra_mode != V_PRED &&
1025 best_intra_mode != D135_PRED)
1026 return 1;
1027 if (mode == D63_PRED &&
1028 best_intra_mode != V_PRED &&
1029 best_intra_mode != D45_PRED)
1030 return 1;
1031 if (mode == D207_PRED &&
1032 best_intra_mode != H_PRED &&
1033 best_intra_mode != D45_PRED)
1034 return 1;
1035 if (mode == D153_PRED &&
1036 best_intra_mode != H_PRED &&
1037 best_intra_mode != D135_PRED)
1038 return 1;
1039 return 0;
1040 }
1041
rd_pick_intra4x4block(VP9_COMP * cpi,MACROBLOCK * x,int ib,MB_PREDICTION_MODE * best_mode,const int * bmode_costs,ENTROPY_CONTEXT * a,ENTROPY_CONTEXT * l,int * bestrate,int * bestratey,int64_t * bestdistortion,BLOCK_SIZE bsize,int64_t rd_thresh)1042 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1043 MB_PREDICTION_MODE *best_mode,
1044 const int *bmode_costs,
1045 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1046 int *bestrate, int *bestratey,
1047 int64_t *bestdistortion,
1048 BLOCK_SIZE bsize, int64_t rd_thresh) {
1049 MB_PREDICTION_MODE mode;
1050 MACROBLOCKD *const xd = &x->e_mbd;
1051 int64_t best_rd = rd_thresh;
1052
1053 struct macroblock_plane *p = &x->plane[0];
1054 struct macroblockd_plane *pd = &xd->plane[0];
1055 const int src_stride = p->src.stride;
1056 const int dst_stride = pd->dst.stride;
1057 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1058 src_stride)];
1059 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1060 dst_stride)];
1061 ENTROPY_CONTEXT ta[2], tempa[2];
1062 ENTROPY_CONTEXT tl[2], templ[2];
1063
1064 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1065 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1066 int idx, idy;
1067 uint8_t best_dst[8 * 8];
1068
1069 assert(ib < 4);
1070
1071 vpx_memcpy(ta, a, sizeof(ta));
1072 vpx_memcpy(tl, l, sizeof(tl));
1073 xd->mi[0]->mbmi.tx_size = TX_4X4;
1074
1075 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1076 int64_t this_rd;
1077 int ratey = 0;
1078 int64_t distortion = 0;
1079 int rate = bmode_costs[mode];
1080
1081 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1082 continue;
1083
1084 // Only do the oblique modes if the best so far is
1085 // one of the neighboring directional modes
1086 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1087 if (conditional_skipintra(mode, *best_mode))
1088 continue;
1089 }
1090
1091 vpx_memcpy(tempa, ta, sizeof(ta));
1092 vpx_memcpy(templ, tl, sizeof(tl));
1093
1094 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1095 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1096 const int block = ib + idy * 2 + idx;
1097 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1098 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1099 int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
1100 p->src_diff);
1101 int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1102 xd->mi[0]->bmi[block].as_mode = mode;
1103 vp9_predict_intra_block(xd, block, 1,
1104 TX_4X4, mode,
1105 x->skip_encode ? src : dst,
1106 x->skip_encode ? src_stride : dst_stride,
1107 dst, dst_stride, idx, idy, 0);
1108 vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1109
1110 if (xd->lossless) {
1111 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1112 vp9_fwht4x4(src_diff, coeff, 8);
1113 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1114 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1115 so->scan, so->neighbors,
1116 cpi->sf.use_fast_coef_costing);
1117 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1118 goto next;
1119 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1120 p->eobs[block]);
1121 } else {
1122 int64_t unused;
1123 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1124 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1125 vp9_fht4x4(src_diff, coeff, 8, tx_type);
1126 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1127 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1128 so->scan, so->neighbors,
1129 cpi->sf.use_fast_coef_costing);
1130 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1131 16, &unused) >> 2;
1132 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1133 goto next;
1134 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1135 dst, dst_stride, p->eobs[block]);
1136 }
1137 }
1138 }
1139
1140 rate += ratey;
1141 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1142
1143 if (this_rd < best_rd) {
1144 *bestrate = rate;
1145 *bestratey = ratey;
1146 *bestdistortion = distortion;
1147 best_rd = this_rd;
1148 *best_mode = mode;
1149 vpx_memcpy(a, tempa, sizeof(tempa));
1150 vpx_memcpy(l, templ, sizeof(templ));
1151 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1152 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1153 num_4x4_blocks_wide * 4);
1154 }
1155 next:
1156 {}
1157 }
1158
1159 if (best_rd >= rd_thresh || x->skip_encode)
1160 return best_rd;
1161
1162 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1163 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1164 num_4x4_blocks_wide * 4);
1165
1166 return best_rd;
1167 }
1168
rd_pick_intra_sub_8x8_y_mode(VP9_COMP * cpi,MACROBLOCK * mb,int * rate,int * rate_y,int64_t * distortion,int64_t best_rd)1169 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1170 int *rate, int *rate_y,
1171 int64_t *distortion,
1172 int64_t best_rd) {
1173 int i, j;
1174 const MACROBLOCKD *const xd = &mb->e_mbd;
1175 MODE_INFO *const mic = xd->mi[0];
1176 const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1177 const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1178 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1179 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1180 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1181 int idx, idy;
1182 int cost = 0;
1183 int64_t total_distortion = 0;
1184 int tot_rate_y = 0;
1185 int64_t total_rd = 0;
1186 ENTROPY_CONTEXT t_above[4], t_left[4];
1187 const int *bmode_costs = mb->mbmode_cost;
1188
1189 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1190 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1191
1192 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1193 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1194 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1195 MB_PREDICTION_MODE best_mode = DC_PRED;
1196 int r = INT_MAX, ry = INT_MAX;
1197 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1198 i = idy * 2 + idx;
1199 if (cpi->common.frame_type == KEY_FRAME) {
1200 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1201 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1202
1203 bmode_costs = mb->y_mode_costs[A][L];
1204 }
1205
1206 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1207 t_above + idx, t_left + idy, &r, &ry, &d,
1208 bsize, best_rd - total_rd);
1209 if (this_rd >= best_rd - total_rd)
1210 return INT64_MAX;
1211
1212 total_rd += this_rd;
1213 cost += r;
1214 total_distortion += d;
1215 tot_rate_y += ry;
1216
1217 mic->bmi[i].as_mode = best_mode;
1218 for (j = 1; j < num_4x4_blocks_high; ++j)
1219 mic->bmi[i + j * 2].as_mode = best_mode;
1220 for (j = 1; j < num_4x4_blocks_wide; ++j)
1221 mic->bmi[i + j].as_mode = best_mode;
1222
1223 if (total_rd >= best_rd)
1224 return INT64_MAX;
1225 }
1226 }
1227
1228 *rate = cost;
1229 *rate_y = tot_rate_y;
1230 *distortion = total_distortion;
1231 mic->mbmi.mode = mic->bmi[3].as_mode;
1232
1233 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1234 }
1235
rd_pick_intra_sby_mode(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,int64_t tx_cache[TX_MODES],int64_t best_rd)1236 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1237 int *rate, int *rate_tokenonly,
1238 int64_t *distortion, int *skippable,
1239 BLOCK_SIZE bsize,
1240 int64_t tx_cache[TX_MODES],
1241 int64_t best_rd) {
1242 MB_PREDICTION_MODE mode;
1243 MB_PREDICTION_MODE mode_selected = DC_PRED;
1244 MACROBLOCKD *const xd = &x->e_mbd;
1245 MODE_INFO *const mic = xd->mi[0];
1246 int this_rate, this_rate_tokenonly, s;
1247 int64_t this_distortion, this_rd;
1248 TX_SIZE best_tx = TX_4X4;
1249 int i;
1250 int *bmode_costs = x->mbmode_cost;
1251
1252 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1253 for (i = 0; i < TX_MODES; i++)
1254 tx_cache[i] = INT64_MAX;
1255
1256 /* Y Search for intra prediction mode */
1257 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1258 int64_t local_tx_cache[TX_MODES];
1259 MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1260 MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1261
1262 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1263 continue;
1264
1265 if (cpi->common.frame_type == KEY_FRAME) {
1266 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1267 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1268
1269 bmode_costs = x->y_mode_costs[A][L];
1270 }
1271 mic->mbmi.mode = mode;
1272
1273 intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1274 &s, NULL, bsize, local_tx_cache, best_rd);
1275
1276 if (this_rate_tokenonly == INT_MAX)
1277 continue;
1278
1279 this_rate = this_rate_tokenonly + bmode_costs[mode];
1280 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1281
1282 if (this_rd < best_rd) {
1283 mode_selected = mode;
1284 best_rd = this_rd;
1285 best_tx = mic->mbmi.tx_size;
1286 *rate = this_rate;
1287 *rate_tokenonly = this_rate_tokenonly;
1288 *distortion = this_distortion;
1289 *skippable = s;
1290 }
1291
1292 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1293 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1294 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1295 local_tx_cache[cpi->common.tx_mode];
1296 if (adj_rd < tx_cache[i]) {
1297 tx_cache[i] = adj_rd;
1298 }
1299 }
1300 }
1301 }
1302
1303 mic->mbmi.mode = mode_selected;
1304 mic->mbmi.tx_size = best_tx;
1305
1306 return best_rd;
1307 }
1308
super_block_uvrd(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skippable,int64_t * sse,BLOCK_SIZE bsize,int64_t ref_best_rd)1309 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
1310 int *rate, int64_t *distortion, int *skippable,
1311 int64_t *sse, BLOCK_SIZE bsize,
1312 int64_t ref_best_rd) {
1313 MACROBLOCKD *const xd = &x->e_mbd;
1314 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1315 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1316 int plane;
1317 int pnrate = 0, pnskip = 1;
1318 int64_t pndist = 0, pnsse = 0;
1319
1320 if (ref_best_rd < 0)
1321 goto term;
1322
1323 if (is_inter_block(mbmi)) {
1324 int plane;
1325 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1326 vp9_subtract_plane(x, bsize, plane);
1327 }
1328
1329 *rate = 0;
1330 *distortion = 0;
1331 *sse = 0;
1332 *skippable = 1;
1333
1334 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1335 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1336 ref_best_rd, plane, bsize, uv_txfm_size,
1337 cpi->sf.use_fast_coef_costing);
1338 if (pnrate == INT_MAX)
1339 goto term;
1340 *rate += pnrate;
1341 *distortion += pndist;
1342 *sse += pnsse;
1343 *skippable &= pnskip;
1344 }
1345 return;
1346
1347 term:
1348 *rate = INT_MAX;
1349 *distortion = INT64_MAX;
1350 *sse = INT64_MAX;
1351 *skippable = 0;
1352 return;
1353 }
1354
rd_pick_intra_sbuv_mode(VP9_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,TX_SIZE max_tx_size)1355 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1356 PICK_MODE_CONTEXT *ctx,
1357 int *rate, int *rate_tokenonly,
1358 int64_t *distortion, int *skippable,
1359 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1360 MACROBLOCKD *xd = &x->e_mbd;
1361 MB_PREDICTION_MODE mode;
1362 MB_PREDICTION_MODE mode_selected = DC_PRED;
1363 int64_t best_rd = INT64_MAX, this_rd;
1364 int this_rate_tokenonly, this_rate, s;
1365 int64_t this_distortion, this_sse;
1366
1367 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1368 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1369 continue;
1370
1371 xd->mi[0]->mbmi.uv_mode = mode;
1372
1373 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1374 &this_distortion, &s, &this_sse, bsize, best_rd);
1375 if (this_rate_tokenonly == INT_MAX)
1376 continue;
1377 this_rate = this_rate_tokenonly +
1378 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1379 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1380
1381 if (this_rd < best_rd) {
1382 mode_selected = mode;
1383 best_rd = this_rd;
1384 *rate = this_rate;
1385 *rate_tokenonly = this_rate_tokenonly;
1386 *distortion = this_distortion;
1387 *skippable = s;
1388 if (!x->select_txfm_size) {
1389 int i;
1390 struct macroblock_plane *const p = x->plane;
1391 struct macroblockd_plane *const pd = xd->plane;
1392 for (i = 1; i < MAX_MB_PLANE; ++i) {
1393 p[i].coeff = ctx->coeff_pbuf[i][2];
1394 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1395 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1396 p[i].eobs = ctx->eobs_pbuf[i][2];
1397
1398 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1399 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1400 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1401 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1402
1403 ctx->coeff_pbuf[i][0] = p[i].coeff;
1404 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
1405 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1406 ctx->eobs_pbuf[i][0] = p[i].eobs;
1407 }
1408 }
1409 }
1410 }
1411
1412 xd->mi[0]->mbmi.uv_mode = mode_selected;
1413 return best_rd;
1414 }
1415
rd_sbuv_dcpred(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize)1416 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
1417 int *rate, int *rate_tokenonly,
1418 int64_t *distortion, int *skippable,
1419 BLOCK_SIZE bsize) {
1420 const VP9_COMMON *cm = &cpi->common;
1421 int64_t unused;
1422
1423 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
1424 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1425 skippable, &unused, bsize, INT64_MAX);
1426 *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1427 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1428 }
1429
choose_intra_uv_mode(VP9_COMP * cpi,PICK_MODE_CONTEXT * ctx,BLOCK_SIZE bsize,TX_SIZE max_tx_size,int * rate_uv,int * rate_uv_tokenonly,int64_t * dist_uv,int * skip_uv,MB_PREDICTION_MODE * mode_uv)1430 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1431 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1432 int *rate_uv, int *rate_uv_tokenonly,
1433 int64_t *dist_uv, int *skip_uv,
1434 MB_PREDICTION_MODE *mode_uv) {
1435 MACROBLOCK *const x = &cpi->mb;
1436
1437 // Use an estimated rd for uv_intra based on DC_PRED if the
1438 // appropriate speed flag is set.
1439 if (cpi->sf.use_uv_intra_rd_estimate) {
1440 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1441 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1442 // Else do a proper rd search for each possible transform size that may
1443 // be considered in the main rd loop.
1444 } else {
1445 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1446 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1447 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1448 }
1449 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
1450 }
1451
cost_mv_ref(const VP9_COMP * cpi,MB_PREDICTION_MODE mode,int mode_context)1452 static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1453 int mode_context) {
1454 const MACROBLOCK *const x = &cpi->mb;
1455 const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
1456
1457 // Don't account for mode here if segment skip is enabled.
1458 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1459 assert(is_inter_mode(mode));
1460 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1461 } else {
1462 return 0;
1463 }
1464 }
1465
1466 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1467 BLOCK_SIZE bsize,
1468 int_mv *frame_mv,
1469 int mi_row, int mi_col,
1470 int_mv single_newmv[MAX_REF_FRAMES],
1471 int *rate_mv);
1472
labels2mode(VP9_COMP * cpi,MACROBLOCKD * xd,int i,MB_PREDICTION_MODE mode,int_mv this_mv[2],int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],int_mv seg_mvs[MAX_REF_FRAMES],int_mv * best_ref_mv[2],const int * mvjcost,int * mvcost[2])1473 static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1474 MB_PREDICTION_MODE mode,
1475 int_mv this_mv[2],
1476 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1477 int_mv seg_mvs[MAX_REF_FRAMES],
1478 int_mv *best_ref_mv[2],
1479 const int *mvjcost, int *mvcost[2]) {
1480 MODE_INFO *const mic = xd->mi[0];
1481 const MB_MODE_INFO *const mbmi = &mic->mbmi;
1482 int thismvcost = 0;
1483 int idx, idy;
1484 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1485 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1486 const int is_compound = has_second_ref(mbmi);
1487
1488 // the only time we should do costing for new motion vector or mode
1489 // is when we are on a new label (jbb May 08, 2007)
1490 switch (mode) {
1491 case NEWMV:
1492 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1493 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1494 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1495 if (is_compound) {
1496 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1497 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1498 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1499 }
1500 break;
1501 case NEARESTMV:
1502 this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1503 if (is_compound)
1504 this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1505 break;
1506 case NEARMV:
1507 this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1508 if (is_compound)
1509 this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1510 break;
1511 case ZEROMV:
1512 this_mv[0].as_int = 0;
1513 if (is_compound)
1514 this_mv[1].as_int = 0;
1515 break;
1516 default:
1517 break;
1518 }
1519
1520 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1521 if (is_compound)
1522 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1523
1524 mic->bmi[i].as_mode = mode;
1525
1526 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1527 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1528 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1529 &mic->bmi[i], sizeof(mic->bmi[i]));
1530
1531 return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1532 thismvcost;
1533 }
1534
encode_inter_mb_segment(VP9_COMP * cpi,MACROBLOCK * x,int64_t best_yrd,int i,int * labelyrate,int64_t * distortion,int64_t * sse,ENTROPY_CONTEXT * ta,ENTROPY_CONTEXT * tl,int mi_row,int mi_col)1535 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1536 MACROBLOCK *x,
1537 int64_t best_yrd,
1538 int i,
1539 int *labelyrate,
1540 int64_t *distortion, int64_t *sse,
1541 ENTROPY_CONTEXT *ta,
1542 ENTROPY_CONTEXT *tl,
1543 int mi_row, int mi_col) {
1544 int k;
1545 MACROBLOCKD *xd = &x->e_mbd;
1546 struct macroblockd_plane *const pd = &xd->plane[0];
1547 struct macroblock_plane *const p = &x->plane[0];
1548 MODE_INFO *const mi = xd->mi[0];
1549 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1550 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1551 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1552 int idx, idy;
1553
1554 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1555 p->src.stride)];
1556 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1557 pd->dst.stride)];
1558 int64_t thisdistortion = 0, thissse = 0;
1559 int thisrate = 0, ref;
1560 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1561 const int is_compound = has_second_ref(&mi->mbmi);
1562 const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1563
1564 for (ref = 0; ref < 1 + is_compound; ++ref) {
1565 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1566 pd->pre[ref].stride)];
1567 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1568 dst, pd->dst.stride,
1569 &mi->bmi[i].as_mv[ref].as_mv,
1570 &xd->block_refs[ref]->sf, width, height, ref,
1571 kernel, MV_PRECISION_Q3,
1572 mi_col * MI_SIZE + 4 * (i % 2),
1573 mi_row * MI_SIZE + 4 * (i / 2));
1574 }
1575
1576 vp9_subtract_block(height, width,
1577 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1578 src, p->src.stride,
1579 dst, pd->dst.stride);
1580
1581 k = i;
1582 for (idy = 0; idy < height / 4; ++idy) {
1583 for (idx = 0; idx < width / 4; ++idx) {
1584 int64_t ssz, rd, rd1, rd2;
1585 int16_t* coeff;
1586
1587 k += (idy * 2 + idx);
1588 coeff = BLOCK_OFFSET(p->coeff, k);
1589 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1590 coeff, 8);
1591 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1592 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1593 16, &ssz);
1594 thissse += ssz;
1595 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1596 so->scan, so->neighbors,
1597 cpi->sf.use_fast_coef_costing);
1598 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1599 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1600 rd = MIN(rd1, rd2);
1601 if (rd >= best_yrd)
1602 return INT64_MAX;
1603 }
1604 }
1605
1606 *distortion = thisdistortion >> 2;
1607 *labelyrate = thisrate;
1608 *sse = thissse >> 2;
1609
1610 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1611 }
1612
1613 typedef struct {
1614 int eobs;
1615 int brate;
1616 int byrate;
1617 int64_t bdist;
1618 int64_t bsse;
1619 int64_t brdcost;
1620 int_mv mvs[2];
1621 ENTROPY_CONTEXT ta[2];
1622 ENTROPY_CONTEXT tl[2];
1623 } SEG_RDSTAT;
1624
1625 typedef struct {
1626 int_mv *ref_mv[2];
1627 int_mv mvp;
1628
1629 int64_t segment_rd;
1630 int r;
1631 int64_t d;
1632 int64_t sse;
1633 int segment_yrate;
1634 MB_PREDICTION_MODE modes[4];
1635 SEG_RDSTAT rdstat[4][INTER_MODES];
1636 int mvthresh;
1637 } BEST_SEG_INFO;
1638
mv_check_bounds(const MACROBLOCK * x,const MV * mv)1639 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1640 return (mv->row >> 3) < x->mv_row_min ||
1641 (mv->row >> 3) > x->mv_row_max ||
1642 (mv->col >> 3) < x->mv_col_min ||
1643 (mv->col >> 3) > x->mv_col_max;
1644 }
1645
mi_buf_shift(MACROBLOCK * x,int i)1646 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1647 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
1648 struct macroblock_plane *const p = &x->plane[0];
1649 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1650
1651 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1652 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1653 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1654 pd->pre[0].stride)];
1655 if (has_second_ref(mbmi))
1656 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1657 pd->pre[1].stride)];
1658 }
1659
mi_buf_restore(MACROBLOCK * x,struct buf_2d orig_src,struct buf_2d orig_pre[2])1660 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1661 struct buf_2d orig_pre[2]) {
1662 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
1663 x->plane[0].src = orig_src;
1664 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1665 if (has_second_ref(mbmi))
1666 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1667 }
1668
mv_has_subpel(const MV * mv)1669 static INLINE int mv_has_subpel(const MV *mv) {
1670 return (mv->row & 0x0F) || (mv->col & 0x0F);
1671 }
1672
1673 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1674 // TODO(aconverse): Find out if this is still productive then clean up or remove
check_best_zero_mv(const VP9_COMP * cpi,const uint8_t mode_context[MAX_REF_FRAMES],int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],int disable_inter_mode_mask,int this_mode,int ref_frame,int second_ref_frame)1675 static int check_best_zero_mv(
1676 const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1677 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1678 int disable_inter_mode_mask, int this_mode, int ref_frame,
1679 int second_ref_frame) {
1680 if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
1681 (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1682 frame_mv[this_mode][ref_frame].as_int == 0 &&
1683 (second_ref_frame == NONE ||
1684 frame_mv[this_mode][second_ref_frame].as_int == 0)) {
1685 int rfc = mode_context[ref_frame];
1686 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1687 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1688 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1689
1690 if (this_mode == NEARMV) {
1691 if (c1 > c3) return 0;
1692 } else if (this_mode == NEARESTMV) {
1693 if (c2 > c3) return 0;
1694 } else {
1695 assert(this_mode == ZEROMV);
1696 if (second_ref_frame == NONE) {
1697 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
1698 (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
1699 return 0;
1700 } else {
1701 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
1702 frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
1703 (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
1704 frame_mv[NEARMV][second_ref_frame].as_int == 0))
1705 return 0;
1706 }
1707 }
1708 }
1709 return 1;
1710 }
1711
rd_check_segment_txsize(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,BEST_SEG_INFO * bsi_buf,int filter_idx,int_mv seg_mvs[4][MAX_REF_FRAMES],int mi_row,int mi_col)1712 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1713 const TileInfo *const tile,
1714 BEST_SEG_INFO *bsi_buf, int filter_idx,
1715 int_mv seg_mvs[4][MAX_REF_FRAMES],
1716 int mi_row, int mi_col) {
1717 int k, br = 0, idx, idy;
1718 int64_t bd = 0, block_sse = 0;
1719 MB_PREDICTION_MODE this_mode;
1720 MACROBLOCKD *xd = &x->e_mbd;
1721 VP9_COMMON *cm = &cpi->common;
1722 MODE_INFO *mi = xd->mi[0];
1723 MB_MODE_INFO *const mbmi = &mi->mbmi;
1724 struct macroblock_plane *const p = &x->plane[0];
1725 struct macroblockd_plane *const pd = &xd->plane[0];
1726 const int label_count = 4;
1727 int64_t this_segment_rd = 0;
1728 int label_mv_thresh;
1729 int segmentyrate = 0;
1730 const BLOCK_SIZE bsize = mbmi->sb_type;
1731 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1732 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1733 vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
1734 ENTROPY_CONTEXT t_above[2], t_left[2];
1735 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1736 int mode_idx;
1737 int subpelmv = 1, have_ref = 0;
1738 const int has_second_rf = has_second_ref(mbmi);
1739 const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
1740
1741 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1742 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1743
1744 // 64 makes this threshold really big effectively
1745 // making it so that we very rarely check mvs on
1746 // segments. setting this to 1 would make mv thresh
1747 // roughly equal to what it is for macroblocks
1748 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1749
1750 // Segmentation method overheads
1751 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1752 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1753 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1754 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1755 int_mv mode_mv[MB_MODE_COUNT][2];
1756 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1757 MB_PREDICTION_MODE mode_selected = ZEROMV;
1758 int64_t best_rd = INT64_MAX;
1759 const int i = idy * 2 + idx;
1760 int ref;
1761
1762 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1763 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1764 frame_mv[ZEROMV][frame].as_int = 0;
1765 vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1766 &frame_mv[NEARESTMV][frame],
1767 &frame_mv[NEARMV][frame]);
1768 }
1769
1770 // search for the best motion vector on this segment
1771 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1772 const struct buf_2d orig_src = x->plane[0].src;
1773 struct buf_2d orig_pre[2];
1774
1775 mode_idx = INTER_OFFSET(this_mode);
1776 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1777 if (disable_inter_mode_mask & (1 << mode_idx))
1778 continue;
1779
1780 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1781 disable_inter_mode_mask,
1782 this_mode, mbmi->ref_frame[0],
1783 mbmi->ref_frame[1]))
1784 continue;
1785
1786 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1787 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1788 sizeof(bsi->rdstat[i][mode_idx].ta));
1789 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1790 sizeof(bsi->rdstat[i][mode_idx].tl));
1791
1792 // motion search for newmv (single predictor case only)
1793 if (!has_second_rf && this_mode == NEWMV &&
1794 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1795 int_mv *const new_mv = &mode_mv[NEWMV][0];
1796 int step_param = 0;
1797 int further_steps;
1798 int thissme, bestsme = INT_MAX;
1799 int sadpb = x->sadperbit4;
1800 MV mvp_full;
1801 int max_mv;
1802
1803 /* Is the best so far sufficiently good that we cant justify doing
1804 * and new motion search. */
1805 if (best_rd < label_mv_thresh)
1806 break;
1807
1808 if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
1809 cpi->oxcf.mode != MODE_BESTQUALITY) {
1810 // use previous block's result as next block's MV predictor.
1811 if (i > 0) {
1812 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1813 if (i == 2)
1814 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1815 }
1816 }
1817 if (i == 0)
1818 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1819 else
1820 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1821
1822 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1823 // Take wtd average of the step_params based on the last frame's
1824 // max mv magnitude and the best ref mvs of the current block for
1825 // the given reference.
1826 step_param = (vp9_init_search_range(cpi, max_mv) +
1827 cpi->mv_step_param) >> 1;
1828 } else {
1829 step_param = cpi->mv_step_param;
1830 }
1831
1832 mvp_full.row = bsi->mvp.as_mv.row >> 3;
1833 mvp_full.col = bsi->mvp.as_mv.col >> 3;
1834
1835 if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1836 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1837 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1838 step_param = MAX(step_param, 8);
1839 }
1840
1841 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1842 // adjust src pointer for this block
1843 mi_buf_shift(x, i);
1844
1845 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1846
1847 if (cpi->sf.search_method == HEX) {
1848 bestsme = vp9_hex_search(x, &mvp_full,
1849 step_param,
1850 sadpb, 1, v_fn_ptr, 1,
1851 &bsi->ref_mv[0]->as_mv,
1852 &new_mv->as_mv);
1853 if (bestsme < INT_MAX)
1854 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1855 &bsi->ref_mv[0]->as_mv,
1856 v_fn_ptr, 1);
1857 } else if (cpi->sf.search_method == SQUARE) {
1858 bestsme = vp9_square_search(x, &mvp_full,
1859 step_param,
1860 sadpb, 1, v_fn_ptr, 1,
1861 &bsi->ref_mv[0]->as_mv,
1862 &new_mv->as_mv);
1863 if (bestsme < INT_MAX)
1864 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1865 &bsi->ref_mv[0]->as_mv,
1866 v_fn_ptr, 1);
1867 } else if (cpi->sf.search_method == BIGDIA) {
1868 bestsme = vp9_bigdia_search(x, &mvp_full,
1869 step_param,
1870 sadpb, 1, v_fn_ptr, 1,
1871 &bsi->ref_mv[0]->as_mv,
1872 &new_mv->as_mv);
1873 if (bestsme < INT_MAX)
1874 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1875 &bsi->ref_mv[0]->as_mv,
1876 v_fn_ptr, 1);
1877 } else {
1878 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1879 sadpb, further_steps, 0, v_fn_ptr,
1880 &bsi->ref_mv[0]->as_mv,
1881 &new_mv->as_mv);
1882 }
1883
1884 // Should we do a full search (best quality only)
1885 if (cpi->oxcf.mode == MODE_BESTQUALITY ||
1886 cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
1887 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1888 /* Check if mvp_full is within the range. */
1889 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1890 x->mv_row_min, x->mv_row_max);
1891 thissme = cpi->full_search_sad(x, &mvp_full,
1892 sadpb, 16, v_fn_ptr,
1893 x->nmvjointcost, x->mvcost,
1894 &bsi->ref_mv[0]->as_mv,
1895 &best_mv->as_mv);
1896 if (thissme < bestsme) {
1897 bestsme = thissme;
1898 new_mv->as_int = best_mv->as_int;
1899 } else {
1900 // The full search result is actually worse so re-instate the
1901 // previous best vector
1902 best_mv->as_int = new_mv->as_int;
1903 }
1904 }
1905
1906 if (bestsme < INT_MAX) {
1907 int distortion;
1908 cpi->find_fractional_mv_step(x,
1909 &new_mv->as_mv,
1910 &bsi->ref_mv[0]->as_mv,
1911 cm->allow_high_precision_mv,
1912 x->errorperbit, v_fn_ptr,
1913 cpi->sf.subpel_force_stop,
1914 cpi->sf.subpel_iters_per_step,
1915 x->nmvjointcost, x->mvcost,
1916 &distortion,
1917 &x->pred_sse[mbmi->ref_frame[0]]);
1918
1919 // save motion search result for use in compound prediction
1920 seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
1921 }
1922
1923 if (cpi->sf.adaptive_motion_search)
1924 x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
1925
1926 // restore src pointers
1927 mi_buf_restore(x, orig_src, orig_pre);
1928 }
1929
1930 if (has_second_rf) {
1931 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1932 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1933 continue;
1934 }
1935
1936 if (has_second_rf && this_mode == NEWMV &&
1937 mbmi->interp_filter == EIGHTTAP) {
1938 // adjust src pointers
1939 mi_buf_shift(x, i);
1940 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1941 int rate_mv;
1942 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1943 mi_row, mi_col, seg_mvs[i],
1944 &rate_mv);
1945 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1946 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1947 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1948 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1949 }
1950 // restore src pointers
1951 mi_buf_restore(x, orig_src, orig_pre);
1952 }
1953
1954 bsi->rdstat[i][mode_idx].brate =
1955 labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv,
1956 seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost);
1957
1958 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1959 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1960 mode_mv[this_mode][ref].as_int;
1961 if (num_4x4_blocks_wide > 1)
1962 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1963 mode_mv[this_mode][ref].as_int;
1964 if (num_4x4_blocks_high > 1)
1965 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1966 mode_mv[this_mode][ref].as_int;
1967 }
1968
1969 // Trap vectors that reach beyond the UMV borders
1970 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1971 (has_second_rf &&
1972 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1973 continue;
1974
1975 if (filter_idx > 0) {
1976 BEST_SEG_INFO *ref_bsi = bsi_buf;
1977 subpelmv = 0;
1978 have_ref = 1;
1979
1980 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1981 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1982 have_ref &= mode_mv[this_mode][ref].as_int ==
1983 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1984 }
1985
1986 if (filter_idx > 1 && !subpelmv && !have_ref) {
1987 ref_bsi = bsi_buf + 1;
1988 have_ref = 1;
1989 for (ref = 0; ref < 1 + has_second_rf; ++ref)
1990 have_ref &= mode_mv[this_mode][ref].as_int ==
1991 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1992 }
1993
1994 if (!subpelmv && have_ref &&
1995 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1996 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1997 sizeof(SEG_RDSTAT));
1998 if (num_4x4_blocks_wide > 1)
1999 bsi->rdstat[i + 1][mode_idx].eobs =
2000 ref_bsi->rdstat[i + 1][mode_idx].eobs;
2001 if (num_4x4_blocks_high > 1)
2002 bsi->rdstat[i + 2][mode_idx].eobs =
2003 ref_bsi->rdstat[i + 2][mode_idx].eobs;
2004
2005 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2006 mode_selected = this_mode;
2007 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2008 }
2009 continue;
2010 }
2011 }
2012
2013 bsi->rdstat[i][mode_idx].brdcost =
2014 encode_inter_mb_segment(cpi, x,
2015 bsi->segment_rd - this_segment_rd, i,
2016 &bsi->rdstat[i][mode_idx].byrate,
2017 &bsi->rdstat[i][mode_idx].bdist,
2018 &bsi->rdstat[i][mode_idx].bsse,
2019 bsi->rdstat[i][mode_idx].ta,
2020 bsi->rdstat[i][mode_idx].tl,
2021 mi_row, mi_col);
2022 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2023 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2024 bsi->rdstat[i][mode_idx].brate, 0);
2025 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2026 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2027 if (num_4x4_blocks_wide > 1)
2028 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2029 if (num_4x4_blocks_high > 1)
2030 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2031 }
2032
2033 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2034 mode_selected = this_mode;
2035 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2036 }
2037 } /*for each 4x4 mode*/
2038
2039 if (best_rd == INT64_MAX) {
2040 int iy, midx;
2041 for (iy = i + 1; iy < 4; ++iy)
2042 for (midx = 0; midx < INTER_MODES; ++midx)
2043 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2044 bsi->segment_rd = INT64_MAX;
2045 return;
2046 }
2047
2048 mode_idx = INTER_OFFSET(mode_selected);
2049 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2050 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2051
2052 labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected],
2053 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2054 x->mvcost);
2055
2056 br += bsi->rdstat[i][mode_idx].brate;
2057 bd += bsi->rdstat[i][mode_idx].bdist;
2058 block_sse += bsi->rdstat[i][mode_idx].bsse;
2059 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2060 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2061
2062 if (this_segment_rd > bsi->segment_rd) {
2063 int iy, midx;
2064 for (iy = i + 1; iy < 4; ++iy)
2065 for (midx = 0; midx < INTER_MODES; ++midx)
2066 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2067 bsi->segment_rd = INT64_MAX;
2068 return;
2069 }
2070 }
2071 } /* for each label */
2072
2073 bsi->r = br;
2074 bsi->d = bd;
2075 bsi->segment_yrate = segmentyrate;
2076 bsi->segment_rd = this_segment_rd;
2077 bsi->sse = block_sse;
2078
2079 // update the coding decisions
2080 for (k = 0; k < 4; ++k)
2081 bsi->modes[k] = mi->bmi[k].as_mode;
2082 }
2083
rd_pick_best_mbsegmentation(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,int_mv * best_ref_mv,int_mv * second_best_ref_mv,int64_t best_rd,int * returntotrate,int * returnyrate,int64_t * returndistortion,int * skippable,int64_t * psse,int mvthresh,int_mv seg_mvs[4][MAX_REF_FRAMES],BEST_SEG_INFO * bsi_buf,int filter_idx,int mi_row,int mi_col)2084 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2085 const TileInfo *const tile,
2086 int_mv *best_ref_mv,
2087 int_mv *second_best_ref_mv,
2088 int64_t best_rd,
2089 int *returntotrate,
2090 int *returnyrate,
2091 int64_t *returndistortion,
2092 int *skippable, int64_t *psse,
2093 int mvthresh,
2094 int_mv seg_mvs[4][MAX_REF_FRAMES],
2095 BEST_SEG_INFO *bsi_buf,
2096 int filter_idx,
2097 int mi_row, int mi_col) {
2098 int i;
2099 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2100 MACROBLOCKD *xd = &x->e_mbd;
2101 MODE_INFO *mi = xd->mi[0];
2102 MB_MODE_INFO *mbmi = &mi->mbmi;
2103 int mode_idx;
2104
2105 vp9_zero(*bsi);
2106
2107 bsi->segment_rd = best_rd;
2108 bsi->ref_mv[0] = best_ref_mv;
2109 bsi->ref_mv[1] = second_best_ref_mv;
2110 bsi->mvp.as_int = best_ref_mv->as_int;
2111 bsi->mvthresh = mvthresh;
2112
2113 for (i = 0; i < 4; i++)
2114 bsi->modes[i] = ZEROMV;
2115
2116 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2117 mi_row, mi_col);
2118
2119 if (bsi->segment_rd > best_rd)
2120 return INT64_MAX;
2121 /* set it to the best */
2122 for (i = 0; i < 4; i++) {
2123 mode_idx = INTER_OFFSET(bsi->modes[i]);
2124 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2125 if (has_second_ref(mbmi))
2126 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2127 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2128 mi->bmi[i].as_mode = bsi->modes[i];
2129 }
2130
2131 /*
2132 * used to set mbmi->mv.as_int
2133 */
2134 *returntotrate = bsi->r;
2135 *returndistortion = bsi->d;
2136 *returnyrate = bsi->segment_yrate;
2137 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2138 *psse = bsi->sse;
2139 mbmi->mode = bsi->modes[3];
2140
2141 return bsi->segment_rd;
2142 }
2143
mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)2144 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2145 uint8_t *ref_y_buffer, int ref_y_stride,
2146 int ref_frame, BLOCK_SIZE block_size ) {
2147 MACROBLOCKD *xd = &x->e_mbd;
2148 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2149 int_mv this_mv;
2150 int i;
2151 int zero_seen = 0;
2152 int best_index = 0;
2153 int best_sad = INT_MAX;
2154 int this_sad = INT_MAX;
2155 int max_mv = 0;
2156
2157 uint8_t *src_y_ptr = x->plane[0].src.buf;
2158 uint8_t *ref_y_ptr;
2159 int row_offset, col_offset;
2160 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2161 (cpi->sf.adaptive_motion_search &&
2162 cpi->common.show_frame &&
2163 block_size < cpi->sf.max_partition_size);
2164
2165 int_mv pred_mv[3];
2166 pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2167 pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2168 pred_mv[2] = x->pred_mv[ref_frame];
2169
2170 // Get the sad for each candidate reference mv
2171 for (i = 0; i < num_mv_refs; i++) {
2172 this_mv.as_int = pred_mv[i].as_int;
2173
2174 max_mv = MAX(max_mv,
2175 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2176 // only need to check zero mv once
2177 if (!this_mv.as_int && zero_seen)
2178 continue;
2179
2180 zero_seen = zero_seen || !this_mv.as_int;
2181
2182 row_offset = this_mv.as_mv.row >> 3;
2183 col_offset = this_mv.as_mv.col >> 3;
2184 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2185
2186 // Find sad for current vector.
2187 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2188 ref_y_ptr, ref_y_stride,
2189 0x7fffffff);
2190
2191 // Note if it is the best so far.
2192 if (this_sad < best_sad) {
2193 best_sad = this_sad;
2194 best_index = i;
2195 }
2196 }
2197
2198 // Note the index of the mv that worked best in the reference list.
2199 x->mv_best_ref_index[ref_frame] = best_index;
2200 x->max_mv_context[ref_frame] = max_mv;
2201 x->pred_mv_sad[ref_frame] = best_sad;
2202 }
2203
estimate_ref_frame_costs(VP9_COMP * cpi,int segment_id,unsigned int * ref_costs_single,unsigned int * ref_costs_comp,vp9_prob * comp_mode_p)2204 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2205 unsigned int *ref_costs_single,
2206 unsigned int *ref_costs_comp,
2207 vp9_prob *comp_mode_p) {
2208 VP9_COMMON *const cm = &cpi->common;
2209 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2210 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2211 SEG_LVL_REF_FRAME);
2212 if (seg_ref_active) {
2213 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2214 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2215 *comp_mode_p = 128;
2216 } else {
2217 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2218 vp9_prob comp_inter_p = 128;
2219
2220 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2221 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2222 *comp_mode_p = comp_inter_p;
2223 } else {
2224 *comp_mode_p = 128;
2225 }
2226
2227 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2228
2229 if (cm->reference_mode != COMPOUND_REFERENCE) {
2230 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2231 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2232 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2233
2234 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2235 base_cost += vp9_cost_bit(comp_inter_p, 0);
2236
2237 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2238 ref_costs_single[ALTREF_FRAME] = base_cost;
2239 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2240 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2241 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2242 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2243 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2244 } else {
2245 ref_costs_single[LAST_FRAME] = 512;
2246 ref_costs_single[GOLDEN_FRAME] = 512;
2247 ref_costs_single[ALTREF_FRAME] = 512;
2248 }
2249 if (cm->reference_mode != SINGLE_REFERENCE) {
2250 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2251 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2252
2253 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2254 base_cost += vp9_cost_bit(comp_inter_p, 1);
2255
2256 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2257 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2258 } else {
2259 ref_costs_comp[LAST_FRAME] = 512;
2260 ref_costs_comp[GOLDEN_FRAME] = 512;
2261 }
2262 }
2263 }
2264
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int_mv * ref_mv,int_mv * second_ref_mv,int64_t comp_pred_diff[REFERENCE_MODES],int64_t tx_size_diff[TX_MODES],int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS])2265 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2266 int mode_index,
2267 int_mv *ref_mv,
2268 int_mv *second_ref_mv,
2269 int64_t comp_pred_diff[REFERENCE_MODES],
2270 int64_t tx_size_diff[TX_MODES],
2271 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2272 MACROBLOCKD *const xd = &x->e_mbd;
2273
2274 // Take a snapshot of the coding context so it can be
2275 // restored if we decide to encode this way
2276 ctx->skip = x->skip;
2277 ctx->best_mode_index = mode_index;
2278 ctx->mic = *xd->mi[0];
2279
2280 ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2281 ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2282
2283 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2284 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2285 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2286
2287 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2288 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2289 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2290 }
2291
setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)2292 static void setup_pred_block(const MACROBLOCKD *xd,
2293 struct buf_2d dst[MAX_MB_PLANE],
2294 const YV12_BUFFER_CONFIG *src,
2295 int mi_row, int mi_col,
2296 const struct scale_factors *scale,
2297 const struct scale_factors *scale_uv) {
2298 int i;
2299
2300 dst[0].buf = src->y_buffer;
2301 dst[0].stride = src->y_stride;
2302 dst[1].buf = src->u_buffer;
2303 dst[2].buf = src->v_buffer;
2304 dst[1].stride = dst[2].stride = src->uv_stride;
2305 #if CONFIG_ALPHA
2306 dst[3].buf = src->alpha_buffer;
2307 dst[3].stride = src->alpha_stride;
2308 #endif
2309
2310 // TODO(jkoleszar): Make scale factors per-plane data
2311 for (i = 0; i < MAX_MB_PLANE; i++) {
2312 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2313 i ? scale_uv : scale,
2314 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2315 }
2316 }
2317
vp9_setup_buffer_inter(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,int mi_row,int mi_col,int_mv frame_nearest_mv[MAX_REF_FRAMES],int_mv frame_near_mv[MAX_REF_FRAMES],struct buf_2d yv12_mb[4][MAX_MB_PLANE])2318 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2319 const TileInfo *const tile,
2320 MV_REFERENCE_FRAME ref_frame,
2321 BLOCK_SIZE block_size,
2322 int mi_row, int mi_col,
2323 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2324 int_mv frame_near_mv[MAX_REF_FRAMES],
2325 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2326 const VP9_COMMON *cm = &cpi->common;
2327 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2328 MACROBLOCKD *const xd = &x->e_mbd;
2329 MODE_INFO *const mi = xd->mi[0];
2330 int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2331 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2332
2333 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2334 // use the UV scaling factors.
2335 setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2336
2337 // Gets an initial list of candidate vectors from neighbours and orders them
2338 vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
2339
2340 // Candidate refinement carried out at encoder and decoder
2341 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2342 &frame_nearest_mv[ref_frame],
2343 &frame_near_mv[ref_frame]);
2344
2345 // Further refinement that is encode side only to test the top few candidates
2346 // in full and choose the best as the centre point for subsequent searches.
2347 // The current implementation doesn't support scaling.
2348 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2349 mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2350 ref_frame, block_size);
2351 }
2352
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)2353 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2354 int ref_frame) {
2355 const VP9_COMMON *const cm = &cpi->common;
2356 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2357 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2358 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2359 }
2360
get_switchable_rate(const MACROBLOCK * x)2361 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2362 const MACROBLOCKD *const xd = &x->e_mbd;
2363 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2364 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2365 return SWITCHABLE_INTERP_RATE_FACTOR *
2366 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2367 }
2368
single_motion_search(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * tmp_mv,int * rate_mv)2369 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2370 const TileInfo *const tile,
2371 BLOCK_SIZE bsize,
2372 int mi_row, int mi_col,
2373 int_mv *tmp_mv, int *rate_mv) {
2374 MACROBLOCKD *xd = &x->e_mbd;
2375 VP9_COMMON *cm = &cpi->common;
2376 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2377 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2378 int bestsme = INT_MAX;
2379 int further_steps, step_param;
2380 int sadpb = x->sadperbit16;
2381 MV mvp_full;
2382 int ref = mbmi->ref_frame[0];
2383 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
2384
2385 int tmp_col_min = x->mv_col_min;
2386 int tmp_col_max = x->mv_col_max;
2387 int tmp_row_min = x->mv_row_min;
2388 int tmp_row_max = x->mv_row_max;
2389
2390 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2391 ref);
2392
2393 MV pred_mv[3];
2394 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
2395 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
2396 pred_mv[2] = x->pred_mv[ref].as_mv;
2397
2398 if (scaled_ref_frame) {
2399 int i;
2400 // Swap out the reference frame for a version that's been scaled to
2401 // match the resolution of the current frame, allowing the existing
2402 // motion search code to be used without additional modifications.
2403 for (i = 0; i < MAX_MB_PLANE; i++)
2404 backup_yv12[i] = xd->plane[i].pre[0];
2405
2406 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2407 }
2408
2409 vp9_set_mv_search_range(x, &ref_mv);
2410
2411 // Work out the size of the first step in the mv step search.
2412 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2413 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2414 // Take wtd average of the step_params based on the last frame's
2415 // max mv magnitude and that based on the best ref mvs of the current
2416 // block for the given reference.
2417 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2418 cpi->mv_step_param) >> 1;
2419 } else {
2420 step_param = cpi->mv_step_param;
2421 }
2422
2423 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2424 cpi->common.show_frame) {
2425 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2426 b_width_log2(bsize)));
2427 step_param = MAX(step_param, boffset);
2428 }
2429
2430 if (cpi->sf.adaptive_motion_search) {
2431 int bwl = b_width_log2_lookup[bsize];
2432 int bhl = b_height_log2_lookup[bsize];
2433 int i;
2434 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2435
2436 if (tlevel < 5)
2437 step_param += 2;
2438
2439 for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
2440 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2441 x->pred_mv[ref].as_int = 0;
2442 tmp_mv->as_int = INVALID_MV;
2443
2444 if (scaled_ref_frame) {
2445 int i;
2446 for (i = 0; i < MAX_MB_PLANE; i++)
2447 xd->plane[i].pre[0] = backup_yv12[i];
2448 }
2449 return;
2450 }
2451 }
2452 }
2453
2454 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2455
2456 mvp_full.col >>= 3;
2457 mvp_full.row >>= 3;
2458
2459 // Further step/diamond searches as necessary
2460 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2461
2462 if (cpi->sf.search_method == FAST_DIAMOND) {
2463 bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
2464 &cpi->fn_ptr[bsize], 1,
2465 &ref_mv, &tmp_mv->as_mv);
2466 if (bestsme < INT_MAX)
2467 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2468 &cpi->fn_ptr[bsize], 1);
2469 } else if (cpi->sf.search_method == FAST_HEX) {
2470 bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
2471 &cpi->fn_ptr[bsize], 1,
2472 &ref_mv, &tmp_mv->as_mv);
2473 if (bestsme < INT_MAX)
2474 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2475 &cpi->fn_ptr[bsize], 1);
2476 } else if (cpi->sf.search_method == HEX) {
2477 bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
2478 &cpi->fn_ptr[bsize], 1,
2479 &ref_mv, &tmp_mv->as_mv);
2480 if (bestsme < INT_MAX)
2481 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2482 &cpi->fn_ptr[bsize], 1);
2483 } else if (cpi->sf.search_method == SQUARE) {
2484 bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
2485 &cpi->fn_ptr[bsize], 1,
2486 &ref_mv, &tmp_mv->as_mv);
2487 if (bestsme < INT_MAX)
2488 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2489 &cpi->fn_ptr[bsize], 1);
2490 } else if (cpi->sf.search_method == BIGDIA) {
2491 bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
2492 &cpi->fn_ptr[bsize], 1,
2493 &ref_mv, &tmp_mv->as_mv);
2494 if (bestsme < INT_MAX)
2495 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2496 &cpi->fn_ptr[bsize], 1);
2497 } else {
2498 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2499 sadpb, further_steps, 1,
2500 &cpi->fn_ptr[bsize],
2501 &ref_mv, &tmp_mv->as_mv);
2502 }
2503
2504 x->mv_col_min = tmp_col_min;
2505 x->mv_col_max = tmp_col_max;
2506 x->mv_row_min = tmp_row_min;
2507 x->mv_row_max = tmp_row_max;
2508
2509 if (bestsme < INT_MAX) {
2510 int dis; /* TODO: use dis in distortion calculation later. */
2511 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2512 cm->allow_high_precision_mv,
2513 x->errorperbit,
2514 &cpi->fn_ptr[bsize],
2515 cpi->sf.subpel_force_stop,
2516 cpi->sf.subpel_iters_per_step,
2517 x->nmvjointcost, x->mvcost,
2518 &dis, &x->pred_sse[ref]);
2519 }
2520 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2521 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2522
2523 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2524 x->pred_mv[ref].as_int = tmp_mv->as_int;
2525
2526 if (scaled_ref_frame) {
2527 int i;
2528 for (i = 0; i < MAX_MB_PLANE; i++)
2529 xd->plane[i].pre[0] = backup_yv12[i];
2530 }
2531 }
2532
joint_motion_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int_mv * frame_mv,int mi_row,int mi_col,int_mv single_newmv[MAX_REF_FRAMES],int * rate_mv)2533 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2534 BLOCK_SIZE bsize,
2535 int_mv *frame_mv,
2536 int mi_row, int mi_col,
2537 int_mv single_newmv[MAX_REF_FRAMES],
2538 int *rate_mv) {
2539 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2540 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2541 MACROBLOCKD *xd = &x->e_mbd;
2542 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2543 const int refs[2] = { mbmi->ref_frame[0],
2544 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2545 int_mv ref_mv[2];
2546 int ite, ref;
2547 // Prediction buffer from second frame.
2548 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2549 const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2550
2551 // Do joint motion search in compound mode to get more accurate mv.
2552 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2553 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2554 int last_besterr[2] = {INT_MAX, INT_MAX};
2555 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2556 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2557 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2558 };
2559
2560 for (ref = 0; ref < 2; ++ref) {
2561 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2562
2563 if (scaled_ref_frame[ref]) {
2564 int i;
2565 // Swap out the reference frame for a version that's been scaled to
2566 // match the resolution of the current frame, allowing the existing
2567 // motion search code to be used without additional modifications.
2568 for (i = 0; i < MAX_MB_PLANE; i++)
2569 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2570 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2571 NULL);
2572 }
2573
2574 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2575 }
2576
2577 // Allow joint search multiple times iteratively for each ref frame
2578 // and break out the search loop if it couldn't find better mv.
2579 for (ite = 0; ite < 4; ite++) {
2580 struct buf_2d ref_yv12[2];
2581 int bestsme = INT_MAX;
2582 int sadpb = x->sadperbit16;
2583 int_mv tmp_mv;
2584 int search_range = 3;
2585
2586 int tmp_col_min = x->mv_col_min;
2587 int tmp_col_max = x->mv_col_max;
2588 int tmp_row_min = x->mv_row_min;
2589 int tmp_row_max = x->mv_row_max;
2590 int id = ite % 2;
2591
2592 // Initialized here because of compiler problem in Visual Studio.
2593 ref_yv12[0] = xd->plane[0].pre[0];
2594 ref_yv12[1] = xd->plane[0].pre[1];
2595
2596 // Get pred block from second frame.
2597 vp9_build_inter_predictor(ref_yv12[!id].buf,
2598 ref_yv12[!id].stride,
2599 second_pred, pw,
2600 &frame_mv[refs[!id]].as_mv,
2601 &xd->block_refs[!id]->sf,
2602 pw, ph, 0,
2603 kernel, MV_PRECISION_Q3,
2604 mi_col * MI_SIZE, mi_row * MI_SIZE);
2605
2606 // Compound motion search on first ref frame.
2607 if (id)
2608 xd->plane[0].pre[0] = ref_yv12[id];
2609 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2610
2611 // Use mv result from single mode as mvp.
2612 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2613
2614 tmp_mv.as_mv.col >>= 3;
2615 tmp_mv.as_mv.row >>= 3;
2616
2617 // Small-range full-pixel motion search
2618 bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
2619 search_range,
2620 &cpi->fn_ptr[bsize],
2621 x->nmvjointcost, x->mvcost,
2622 &ref_mv[id].as_mv, second_pred,
2623 pw, ph);
2624 if (bestsme < INT_MAX)
2625 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
2626 second_pred, &cpi->fn_ptr[bsize], 1);
2627
2628 x->mv_col_min = tmp_col_min;
2629 x->mv_col_max = tmp_col_max;
2630 x->mv_row_min = tmp_row_min;
2631 x->mv_row_max = tmp_row_max;
2632
2633 if (bestsme < INT_MAX) {
2634 int dis; /* TODO: use dis in distortion calculation later. */
2635 unsigned int sse;
2636 bestsme = cpi->find_fractional_mv_step_comp(
2637 x, &tmp_mv.as_mv,
2638 &ref_mv[id].as_mv,
2639 cpi->common.allow_high_precision_mv,
2640 x->errorperbit,
2641 &cpi->fn_ptr[bsize],
2642 0, cpi->sf.subpel_iters_per_step,
2643 x->nmvjointcost, x->mvcost,
2644 &dis, &sse, second_pred,
2645 pw, ph);
2646 }
2647
2648 if (id)
2649 xd->plane[0].pre[0] = scaled_first_yv12;
2650
2651 if (bestsme < last_besterr[id]) {
2652 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2653 last_besterr[id] = bestsme;
2654 } else {
2655 break;
2656 }
2657 }
2658
2659 *rate_mv = 0;
2660
2661 for (ref = 0; ref < 2; ++ref) {
2662 if (scaled_ref_frame[ref]) {
2663 // restore the predictor
2664 int i;
2665 for (i = 0; i < MAX_MB_PLANE; i++)
2666 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2667 }
2668
2669 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2670 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2671 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2672 }
2673
2674 vpx_free(second_pred);
2675 }
2676
restore_dst_buf(MACROBLOCKD * xd,uint8_t * orig_dst[MAX_MB_PLANE],int orig_dst_stride[MAX_MB_PLANE])2677 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2678 uint8_t *orig_dst[MAX_MB_PLANE],
2679 int orig_dst_stride[MAX_MB_PLANE]) {
2680 int i;
2681 for (i = 0; i < MAX_MB_PLANE; i++) {
2682 xd->plane[i].dst.buf = orig_dst[i];
2683 xd->plane[i].dst.stride = orig_dst_stride[i];
2684 }
2685 }
2686
handle_inter_mode(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,BLOCK_SIZE bsize,int64_t txfm_cache[],int * rate2,int64_t * distortion,int * skippable,int * rate_y,int64_t * distortion_y,int * rate_uv,int64_t * distortion_uv,int * mode_excluded,int * disable_skip,INTERP_FILTER * best_filter,int_mv (* mode_mv)[MAX_REF_FRAMES],int mi_row,int mi_col,int_mv single_newmv[MAX_REF_FRAMES],int64_t * psse,const int64_t ref_best_rd)2687 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2688 const TileInfo *const tile,
2689 BLOCK_SIZE bsize,
2690 int64_t txfm_cache[],
2691 int *rate2, int64_t *distortion,
2692 int *skippable,
2693 int *rate_y, int64_t *distortion_y,
2694 int *rate_uv, int64_t *distortion_uv,
2695 int *mode_excluded, int *disable_skip,
2696 INTERP_FILTER *best_filter,
2697 int_mv (*mode_mv)[MAX_REF_FRAMES],
2698 int mi_row, int mi_col,
2699 int_mv single_newmv[MAX_REF_FRAMES],
2700 int64_t *psse,
2701 const int64_t ref_best_rd) {
2702 VP9_COMMON *cm = &cpi->common;
2703 MACROBLOCKD *xd = &x->e_mbd;
2704 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2705 const int is_comp_pred = has_second_ref(mbmi);
2706 const int num_refs = is_comp_pred ? 2 : 1;
2707 const int this_mode = mbmi->mode;
2708 int_mv *frame_mv = mode_mv[this_mode];
2709 int i;
2710 int refs[2] = { mbmi->ref_frame[0],
2711 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2712 int_mv cur_mv[2];
2713 int64_t this_rd = 0;
2714 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2715 int pred_exists = 0;
2716 int intpel_mv;
2717 int64_t rd, best_rd = INT64_MAX;
2718 int best_needs_copy = 0;
2719 uint8_t *orig_dst[MAX_MB_PLANE];
2720 int orig_dst_stride[MAX_MB_PLANE];
2721 int rs = 0;
2722
2723 if (is_comp_pred) {
2724 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2725 frame_mv[refs[1]].as_int == INVALID_MV)
2726 return INT64_MAX;
2727 }
2728
2729 if (this_mode == NEWMV) {
2730 int rate_mv;
2731 if (is_comp_pred) {
2732 // Initialize mv using single prediction mode result.
2733 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2734 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2735
2736 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2737 joint_motion_search(cpi, x, bsize, frame_mv,
2738 mi_row, mi_col, single_newmv, &rate_mv);
2739 } else {
2740 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2741 &mbmi->ref_mvs[refs[0]][0].as_mv,
2742 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2743 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2744 &mbmi->ref_mvs[refs[1]][0].as_mv,
2745 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2746 }
2747 *rate2 += rate_mv;
2748 } else {
2749 int_mv tmp_mv;
2750 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2751 &tmp_mv, &rate_mv);
2752 if (tmp_mv.as_int == INVALID_MV)
2753 return INT64_MAX;
2754 *rate2 += rate_mv;
2755 frame_mv[refs[0]].as_int =
2756 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2757 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2758 }
2759 }
2760
2761 for (i = 0; i < num_refs; ++i) {
2762 cur_mv[i] = frame_mv[refs[i]];
2763 // Clip "next_nearest" so that it does not extend to far out of image
2764 if (this_mode != NEWMV)
2765 clamp_mv2(&cur_mv[i].as_mv, xd);
2766
2767 if (mv_check_bounds(x, &cur_mv[i].as_mv))
2768 return INT64_MAX;
2769 mbmi->mv[i].as_int = cur_mv[i].as_int;
2770 }
2771
2772 // do first prediction into the destination buffer. Do the next
2773 // prediction into a temporary buffer. Then keep track of which one
2774 // of these currently holds the best predictor, and use the other
2775 // one for future predictions. In the end, copy from tmp_buf to
2776 // dst if necessary.
2777 for (i = 0; i < MAX_MB_PLANE; i++) {
2778 orig_dst[i] = xd->plane[i].dst.buf;
2779 orig_dst_stride[i] = xd->plane[i].dst.stride;
2780 }
2781
2782 /* We don't include the cost of the second reference here, because there
2783 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2784 * words if you present them in that order, the second one is always known
2785 * if the first is known */
2786 *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2787
2788 if (!(*mode_excluded))
2789 *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2790 : cm->reference_mode == COMPOUND_REFERENCE;
2791
2792 pred_exists = 0;
2793 // Are all MVs integer pel for Y and UV
2794 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2795 if (is_comp_pred)
2796 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2797
2798 // Search for best switchable filter by checking the variance of
2799 // pred error irrespective of whether the filter will be used
2800 cpi->mask_filter_rd = 0;
2801 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2802 cpi->rd_filter_cache[i] = INT64_MAX;
2803
2804 if (cm->interp_filter != BILINEAR) {
2805 *best_filter = EIGHTTAP;
2806 if (x->source_variance <
2807 cpi->sf.disable_filter_search_var_thresh) {
2808 *best_filter = EIGHTTAP;
2809 } else {
2810 int newbest;
2811 int tmp_rate_sum = 0;
2812 int64_t tmp_dist_sum = 0;
2813
2814 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2815 int j;
2816 int64_t rs_rd;
2817 mbmi->interp_filter = i;
2818 rs = get_switchable_rate(x);
2819 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2820
2821 if (i > 0 && intpel_mv) {
2822 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2823 cpi->rd_filter_cache[i] = rd;
2824 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2825 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2826 if (cm->interp_filter == SWITCHABLE)
2827 rd += rs_rd;
2828 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2829 } else {
2830 int rate_sum = 0;
2831 int64_t dist_sum = 0;
2832 if ((cm->interp_filter == SWITCHABLE &&
2833 (!i || best_needs_copy)) ||
2834 (cm->interp_filter != SWITCHABLE &&
2835 (cm->interp_filter == mbmi->interp_filter ||
2836 (i == 0 && intpel_mv)))) {
2837 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2838 } else {
2839 for (j = 0; j < MAX_MB_PLANE; j++) {
2840 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2841 xd->plane[j].dst.stride = 64;
2842 }
2843 }
2844 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2845 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2846
2847 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2848 cpi->rd_filter_cache[i] = rd;
2849 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2850 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2851 if (cm->interp_filter == SWITCHABLE)
2852 rd += rs_rd;
2853 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2854
2855 if (i == 0 && intpel_mv) {
2856 tmp_rate_sum = rate_sum;
2857 tmp_dist_sum = dist_sum;
2858 }
2859 }
2860
2861 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2862 if (rd / 2 > ref_best_rd) {
2863 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2864 return INT64_MAX;
2865 }
2866 }
2867 newbest = i == 0 || rd < best_rd;
2868
2869 if (newbest) {
2870 best_rd = rd;
2871 *best_filter = mbmi->interp_filter;
2872 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2873 best_needs_copy = !best_needs_copy;
2874 }
2875
2876 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2877 (cm->interp_filter != SWITCHABLE &&
2878 cm->interp_filter == mbmi->interp_filter)) {
2879 pred_exists = 1;
2880 }
2881 }
2882 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2883 }
2884 }
2885 // Set the appropriate filter
2886 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2887 cm->interp_filter : *best_filter;
2888 rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
2889
2890 if (pred_exists) {
2891 if (best_needs_copy) {
2892 // again temporarily set the buffers to local memory to prevent a memcpy
2893 for (i = 0; i < MAX_MB_PLANE; i++) {
2894 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2895 xd->plane[i].dst.stride = 64;
2896 }
2897 }
2898 } else {
2899 // Handles the special case when a filter that is not in the
2900 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2901 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2902 }
2903
2904 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2905 int tmp_rate;
2906 int64_t tmp_dist;
2907 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2908 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2909 // if current pred_error modeled rd is substantially more than the best
2910 // so far, do not bother doing full rd
2911 if (rd / 2 > ref_best_rd) {
2912 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2913 return INT64_MAX;
2914 }
2915 }
2916
2917 if (cm->interp_filter == SWITCHABLE)
2918 *rate2 += get_switchable_rate(x);
2919
2920 if (!is_comp_pred) {
2921 if (!x->in_active_map) {
2922 if (psse)
2923 *psse = 0;
2924 *distortion = 0;
2925 x->skip = 1;
2926 } else if (cpi->allow_encode_breakout && x->encode_breakout) {
2927 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2928 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2929 unsigned int var, sse;
2930 // Skipping threshold for ac.
2931 unsigned int thresh_ac;
2932 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2933 // Use extreme low threshold for static frames to limit skipping.
2934 const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2935 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2936 // The encode_breakout input
2937 const unsigned int min_thresh =
2938 MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2939
2940 // Calculate threshold according to dequant value.
2941 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2942 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2943
2944 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2945 xd->plane[0].dst.buf,
2946 xd->plane[0].dst.stride, &sse);
2947
2948 // Adjust threshold according to partition size.
2949 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2950 b_height_log2_lookup[bsize]);
2951
2952 // Y skipping condition checking
2953 if (sse < thresh_ac || sse == 0) {
2954 // Skipping threshold for dc
2955 unsigned int thresh_dc;
2956
2957 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2958
2959 // dc skipping checking
2960 if ((sse - var) < thresh_dc || sse == var) {
2961 unsigned int sse_u, sse_v;
2962 unsigned int var_u, var_v;
2963
2964 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2965 x->plane[1].src.stride,
2966 xd->plane[1].dst.buf,
2967 xd->plane[1].dst.stride, &sse_u);
2968
2969 // U skipping condition checking
2970 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2971 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2972 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2973 x->plane[2].src.stride,
2974 xd->plane[2].dst.buf,
2975 xd->plane[2].dst.stride, &sse_v);
2976
2977 // V skipping condition checking
2978 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2979 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2980 x->skip = 1;
2981
2982 // The cost of skip bit needs to be added.
2983 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2984
2985 // Scaling factor for SSE from spatial domain to frequency domain
2986 // is 16. Adjust distortion accordingly.
2987 *distortion_uv = (sse_u + sse_v) << 4;
2988 *distortion = (sse << 4) + *distortion_uv;
2989
2990 *disable_skip = 1;
2991 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2992 }
2993 }
2994 }
2995 }
2996 }
2997 }
2998
2999 if (!x->skip) {
3000 int skippable_y, skippable_uv;
3001 int64_t sseuv = INT64_MAX;
3002 int64_t rdcosty = INT64_MAX;
3003
3004 // Y cost and distortion
3005 inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
3006 bsize, txfm_cache, ref_best_rd);
3007
3008 if (*rate_y == INT_MAX) {
3009 *rate2 = INT_MAX;
3010 *distortion = INT64_MAX;
3011 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3012 return INT64_MAX;
3013 }
3014
3015 *rate2 += *rate_y;
3016 *distortion += *distortion_y;
3017
3018 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3019 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
3020
3021 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
3022 bsize, ref_best_rd - rdcosty);
3023 if (*rate_uv == INT_MAX) {
3024 *rate2 = INT_MAX;
3025 *distortion = INT64_MAX;
3026 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3027 return INT64_MAX;
3028 }
3029
3030 *psse += sseuv;
3031 *rate2 += *rate_uv;
3032 *distortion += *distortion_uv;
3033 *skippable = skippable_y && skippable_uv;
3034 }
3035
3036 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3037 return this_rd; // if 0, this will be re-calculated by caller
3038 }
3039
swap_block_ptr(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int max_plane)3040 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3041 int max_plane) {
3042 struct macroblock_plane *const p = x->plane;
3043 struct macroblockd_plane *const pd = x->e_mbd.plane;
3044 int i;
3045
3046 for (i = 0; i < max_plane; ++i) {
3047 p[i].coeff = ctx->coeff_pbuf[i][1];
3048 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3049 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3050 p[i].eobs = ctx->eobs_pbuf[i][1];
3051
3052 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3053 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3054 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3055 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3056
3057 ctx->coeff_pbuf[i][0] = p[i].coeff;
3058 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
3059 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3060 ctx->eobs_pbuf[i][0] = p[i].eobs;
3061 }
3062 }
3063
vp9_rd_pick_intra_mode_sb(VP9_COMP * cpi,MACROBLOCK * x,int * returnrate,int64_t * returndist,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3064 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3065 int *returnrate, int64_t *returndist,
3066 BLOCK_SIZE bsize,
3067 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3068 VP9_COMMON *const cm = &cpi->common;
3069 MACROBLOCKD *const xd = &x->e_mbd;
3070 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3071 int y_skip = 0, uv_skip = 0;
3072 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3073 TX_SIZE max_uv_tx_size;
3074 x->skip_encode = 0;
3075 ctx->skip = 0;
3076 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3077
3078 if (bsize >= BLOCK_8X8) {
3079 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3080 &dist_y, &y_skip, bsize, tx_cache,
3081 best_rd) >= best_rd) {
3082 *returnrate = INT_MAX;
3083 return;
3084 }
3085 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
3086 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3087 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3088 } else {
3089 y_skip = 0;
3090 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3091 &dist_y, best_rd) >= best_rd) {
3092 *returnrate = INT_MAX;
3093 return;
3094 }
3095 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
3096 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3097 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3098 }
3099
3100 if (y_skip && uv_skip) {
3101 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3102 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3103 *returndist = dist_y + dist_uv;
3104 vp9_zero(ctx->tx_rd_diff);
3105 } else {
3106 int i;
3107 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3108 *returndist = dist_y + dist_uv;
3109 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3110 for (i = 0; i < TX_MODES; i++) {
3111 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3112 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3113 else
3114 ctx->tx_rd_diff[i] = 0;
3115 }
3116 }
3117
3118 ctx->mic = *xd->mi[0];
3119 }
3120
vp9_rd_pick_inter_mode_sb(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,int mi_row,int mi_col,int * returnrate,int64_t * returndistortion,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)3121 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3122 const TileInfo *const tile,
3123 int mi_row, int mi_col,
3124 int *returnrate,
3125 int64_t *returndistortion,
3126 BLOCK_SIZE bsize,
3127 PICK_MODE_CONTEXT *ctx,
3128 int64_t best_rd_so_far) {
3129 VP9_COMMON *const cm = &cpi->common;
3130 MACROBLOCKD *const xd = &x->e_mbd;
3131 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3132 const struct segmentation *const seg = &cm->seg;
3133 MB_PREDICTION_MODE this_mode;
3134 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3135 unsigned char segment_id = mbmi->segment_id;
3136 int comp_pred, i;
3137 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3138 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3139 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3140 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3141 VP9_ALT_FLAG };
3142 int64_t best_rd = best_rd_so_far;
3143 int64_t best_tx_rd[TX_MODES];
3144 int64_t best_tx_diff[TX_MODES];
3145 int64_t best_pred_diff[REFERENCE_MODES];
3146 int64_t best_pred_rd[REFERENCE_MODES];
3147 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3148 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3149 MB_MODE_INFO best_mbmode = { 0 };
3150 int mode_index, best_mode_index = 0;
3151 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3152 vp9_prob comp_mode_p;
3153 int64_t best_intra_rd = INT64_MAX;
3154 int64_t best_inter_rd = INT64_MAX;
3155 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3156 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3157 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3158 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3159 int64_t dist_uv[TX_SIZES];
3160 int skip_uv[TX_SIZES];
3161 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3162 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3163 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3164 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3165 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3166 int best_skip2 = 0;
3167 int mode_skip_mask = 0;
3168 int mode_skip_start = cpi->sf.mode_skip_start + 1;
3169 const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
3170 const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
3171 const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
3172 const int intra_y_mode_mask =
3173 cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3174 int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
3175
3176 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3177
3178 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3179 &comp_mode_p);
3180
3181 for (i = 0; i < REFERENCE_MODES; ++i)
3182 best_pred_rd[i] = INT64_MAX;
3183 for (i = 0; i < TX_MODES; i++)
3184 best_tx_rd[i] = INT64_MAX;
3185 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3186 best_filter_rd[i] = INT64_MAX;
3187 for (i = 0; i < TX_SIZES; i++)
3188 rate_uv_intra[i] = INT_MAX;
3189 for (i = 0; i < MAX_REF_FRAMES; ++i)
3190 x->pred_sse[i] = INT_MAX;
3191
3192 *returnrate = INT_MAX;
3193
3194 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3195 x->pred_mv_sad[ref_frame] = INT_MAX;
3196 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3197 vp9_setup_buffer_inter(cpi, x, tile,
3198 ref_frame, bsize, mi_row, mi_col,
3199 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3200 }
3201 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3202 frame_mv[ZEROMV][ref_frame].as_int = 0;
3203 }
3204
3205 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3206 // All modes from vp9_mode_order that use this frame as any ref
3207 static const int ref_frame_mask_all[] = {
3208 0x0, 0x123291, 0x25c444, 0x39b722
3209 };
3210 // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
3211 // this frame as their primary ref
3212 static const int ref_frame_mask_fixedmv[] = {
3213 0x0, 0x121281, 0x24c404, 0x080102
3214 };
3215 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3216 // Skip modes for missing references
3217 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3218 } else if (cpi->sf.reference_masking) {
3219 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3220 // Skip fixed mv modes for poor references
3221 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3222 mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
3223 break;
3224 }
3225 }
3226 }
3227 // If the segment reference frame feature is enabled....
3228 // then do nothing if the current ref frame is not allowed..
3229 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3230 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3231 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3232 }
3233 }
3234
3235 // If the segment skip feature is enabled....
3236 // then do nothing if the current mode is not allowed..
3237 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
3238 const int inter_non_zero_mode_mask = 0x1F7F7;
3239 mode_skip_mask |= inter_non_zero_mode_mask;
3240 }
3241
3242 // Disable this drop out case if the ref frame
3243 // segment level feature is enabled for this segment. This is to
3244 // prevent the possibility that we end up unable to pick any mode.
3245 if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3246 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3247 // unless ARNR filtering is enabled in which case we want
3248 // an unfiltered alternative. We allow near/nearest as well
3249 // because they may result in zero-zero MVs but be cheaper.
3250 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3251 const int altref_zero_mask =
3252 ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
3253 mode_skip_mask |= altref_zero_mask;
3254 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3255 mode_skip_mask |= (1 << THR_NEARA);
3256 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3257 mode_skip_mask |= (1 << THR_NEARESTA);
3258 }
3259 }
3260
3261 // TODO(JBB): This is to make up for the fact that we don't have sad
3262 // functions that work when the block size reads outside the umv. We
3263 // should fix this either by making the motion search just work on
3264 // a representative block in the boundary ( first ) and then implement a
3265 // function that does sads when inside the border..
3266 if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
3267 const int new_modes_mask =
3268 (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
3269 (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
3270 mode_skip_mask |= new_modes_mask;
3271 }
3272
3273 if (bsize > cpi->sf.max_intra_bsize) {
3274 mode_skip_mask |= 0xFF30808;
3275 }
3276
3277 if (!x->in_active_map) {
3278 int mode_index;
3279 assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
3280 if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
3281 mode_index = THR_NEARESTMV;
3282 else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
3283 mode_index = THR_NEARMV;
3284 else
3285 mode_index = THR_ZEROMV;
3286 mode_skip_mask = ~(1 << mode_index);
3287 mode_skip_start = MAX_MODES;
3288 disable_inter_mode_mask = 0;
3289 }
3290
3291 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3292 int mode_excluded = 0;
3293 int64_t this_rd = INT64_MAX;
3294 int disable_skip = 0;
3295 int compmode_cost = 0;
3296 int rate2 = 0, rate_y = 0, rate_uv = 0;
3297 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3298 int skippable = 0;
3299 int64_t tx_cache[TX_MODES];
3300 int i;
3301 int this_skip2 = 0;
3302 int64_t total_sse = INT64_MAX;
3303 int early_term = 0;
3304
3305 // Look at the reference frame of the best mode so far and set the
3306 // skip mask to look at a subset of the remaining modes.
3307 if (mode_index == mode_skip_start) {
3308 switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3309 case INTRA_FRAME:
3310 break;
3311 case LAST_FRAME:
3312 mode_skip_mask |= LAST_FRAME_MODE_MASK;
3313 break;
3314 case GOLDEN_FRAME:
3315 mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
3316 break;
3317 case ALTREF_FRAME:
3318 mode_skip_mask |= ALT_REF_MODE_MASK;
3319 break;
3320 case NONE:
3321 case MAX_REF_FRAMES:
3322 assert(0 && "Invalid Reference frame");
3323 }
3324 }
3325 if (mode_skip_mask & (1 << mode_index))
3326 continue;
3327
3328 // Test best rd so far against threshold for trying this mode.
3329 if (best_rd < ((int64_t)rd_threshes[mode_index] *
3330 rd_thresh_freq_fact[mode_index] >> 5) ||
3331 rd_threshes[mode_index] == INT_MAX)
3332 continue;
3333
3334 this_mode = vp9_mode_order[mode_index].mode;
3335 ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3336 if (ref_frame != INTRA_FRAME &&
3337 disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
3338 continue;
3339 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3340
3341 comp_pred = second_ref_frame > INTRA_FRAME;
3342 if (comp_pred) {
3343 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3344 vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3345 continue;
3346 if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3347 ref_frame != best_inter_ref_frame &&
3348 second_ref_frame != best_inter_ref_frame)
3349 continue;
3350 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3351 } else {
3352 if (ref_frame != INTRA_FRAME)
3353 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3354 }
3355
3356 if (ref_frame == INTRA_FRAME) {
3357 if (!(intra_y_mode_mask & (1 << this_mode)))
3358 continue;
3359 if (this_mode != DC_PRED) {
3360 // Disable intra modes other than DC_PRED for blocks with low variance
3361 // Threshold for intra skipping based on source variance
3362 // TODO(debargha): Specialize the threshold for super block sizes
3363 const unsigned int skip_intra_var_thresh = 64;
3364 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3365 x->source_variance < skip_intra_var_thresh)
3366 continue;
3367 // Only search the oblique modes if the best so far is
3368 // one of the neighboring directional modes
3369 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3370 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3371 if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3372 continue;
3373 }
3374 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3375 if (conditional_skipintra(this_mode, best_intra_mode))
3376 continue;
3377 }
3378 }
3379 } else {
3380 if (x->in_active_map &&
3381 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
3382 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
3383 disable_inter_mode_mask, this_mode, ref_frame,
3384 second_ref_frame))
3385 continue;
3386 }
3387
3388 mbmi->mode = this_mode;
3389 mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
3390 mbmi->ref_frame[0] = ref_frame;
3391 mbmi->ref_frame[1] = second_ref_frame;
3392 // Evaluate all sub-pel filters irrespective of whether we can use
3393 // them for this frame.
3394 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3395 : cm->interp_filter;
3396 x->skip = 0;
3397 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3398
3399 // Select prediction reference frames.
3400 for (i = 0; i < MAX_MB_PLANE; i++) {
3401 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3402 if (comp_pred)
3403 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3404 }
3405
3406 for (i = 0; i < TX_MODES; ++i)
3407 tx_cache[i] = INT64_MAX;
3408
3409 #ifdef MODE_TEST_HIT_STATS
3410 // TEST/DEBUG CODE
3411 // Keep a rcord of the number of test hits at each size
3412 cpi->mode_test_hits[bsize]++;
3413 #endif
3414
3415 if (ref_frame == INTRA_FRAME) {
3416 TX_SIZE uv_tx;
3417 intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3418 bsize, tx_cache, best_rd);
3419
3420 if (rate_y == INT_MAX)
3421 continue;
3422
3423 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3424 if (rate_uv_intra[uv_tx] == INT_MAX) {
3425 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3426 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3427 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3428 }
3429
3430 rate_uv = rate_uv_tokenonly[uv_tx];
3431 distortion_uv = dist_uv[uv_tx];
3432 skippable = skippable && skip_uv[uv_tx];
3433 mbmi->uv_mode = mode_uv[uv_tx];
3434
3435 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3436 if (this_mode != DC_PRED && this_mode != TM_PRED)
3437 rate2 += intra_cost_penalty;
3438 distortion2 = distortion_y + distortion_uv;
3439 } else {
3440 this_rd = handle_inter_mode(cpi, x, tile, bsize,
3441 tx_cache,
3442 &rate2, &distortion2, &skippable,
3443 &rate_y, &distortion_y,
3444 &rate_uv, &distortion_uv,
3445 &mode_excluded, &disable_skip,
3446 &tmp_best_filter, frame_mv,
3447 mi_row, mi_col,
3448 single_newmv, &total_sse, best_rd);
3449 if (this_rd == INT64_MAX)
3450 continue;
3451
3452 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3453
3454 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3455 rate2 += compmode_cost;
3456 }
3457
3458 // Estimate the reference frame signaling cost and add it
3459 // to the rolling cost variable.
3460 if (comp_pred) {
3461 rate2 += ref_costs_comp[ref_frame];
3462 } else {
3463 rate2 += ref_costs_single[ref_frame];
3464 }
3465
3466 if (!disable_skip) {
3467 // Test for the condition where skip block will be activated
3468 // because there are no non zero coefficients and make any
3469 // necessary adjustment for rate. Ignore if skip is coded at
3470 // segment level as the cost wont have been added in.
3471 // Is Mb level skip allowed (i.e. not coded at segment level).
3472 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3473 SEG_LVL_SKIP);
3474
3475 if (skippable) {
3476 // Back out the coefficient coding costs
3477 rate2 -= (rate_y + rate_uv);
3478 // for best yrd calculation
3479 rate_uv = 0;
3480
3481 if (mb_skip_allowed) {
3482 int prob_skip_cost;
3483
3484 // Cost the skip mb case
3485 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3486 if (skip_prob) {
3487 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3488 rate2 += prob_skip_cost;
3489 }
3490 }
3491 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3492 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3493 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3494 // Add in the cost of the no skip flag.
3495 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3496 } else {
3497 // FIXME(rbultje) make this work for splitmv also
3498 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3499 distortion2 = total_sse;
3500 assert(total_sse >= 0);
3501 rate2 -= (rate_y + rate_uv);
3502 rate_y = 0;
3503 rate_uv = 0;
3504 this_skip2 = 1;
3505 }
3506 } else if (mb_skip_allowed) {
3507 // Add in the cost of the no skip flag.
3508 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3509 }
3510
3511 // Calculate the final RD estimate for this mode.
3512 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3513 }
3514
3515 if (ref_frame == INTRA_FRAME) {
3516 // Keep record of best intra rd
3517 if (this_rd < best_intra_rd) {
3518 best_intra_rd = this_rd;
3519 best_intra_mode = mbmi->mode;
3520 }
3521 } else {
3522 // Keep record of best inter rd with single reference
3523 if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) {
3524 best_inter_rd = this_rd;
3525 best_inter_ref_frame = ref_frame;
3526 }
3527 }
3528
3529 if (!disable_skip && ref_frame == INTRA_FRAME) {
3530 for (i = 0; i < REFERENCE_MODES; ++i)
3531 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3532 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3533 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3534 }
3535
3536 // Store the respective mode distortions for later use.
3537 if (mode_distortions[this_mode] == -1
3538 || distortion2 < mode_distortions[this_mode]) {
3539 mode_distortions[this_mode] = distortion2;
3540 }
3541
3542 // Did this mode help.. i.e. is it the new best mode
3543 if (this_rd < best_rd || x->skip) {
3544 int max_plane = MAX_MB_PLANE;
3545 if (!mode_excluded) {
3546 // Note index of best mode so far
3547 best_mode_index = mode_index;
3548
3549 if (ref_frame == INTRA_FRAME) {
3550 /* required for left and above block mv */
3551 mbmi->mv[0].as_int = 0;
3552 max_plane = 1;
3553 }
3554
3555 *returnrate = rate2;
3556 *returndistortion = distortion2;
3557 best_rd = this_rd;
3558 best_mbmode = *mbmi;
3559 best_skip2 = this_skip2;
3560 if (!x->select_txfm_size)
3561 swap_block_ptr(x, ctx, max_plane);
3562 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3563 sizeof(uint8_t) * ctx->num_4x4_blk);
3564
3565 // TODO(debargha): enhance this test with a better distortion prediction
3566 // based on qp, activity mask and history
3567 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3568 (mode_index > MIN_EARLY_TERM_INDEX)) {
3569 const int qstep = xd->plane[0].dequant[1];
3570 // TODO(debargha): Enhance this by specializing for each mode_index
3571 int scale = 4;
3572 if (x->source_variance < UINT_MAX) {
3573 const int var_adjust = (x->source_variance < 16);
3574 scale -= var_adjust;
3575 }
3576 if (ref_frame > INTRA_FRAME &&
3577 distortion2 * scale < qstep * qstep) {
3578 early_term = 1;
3579 }
3580 }
3581 }
3582 }
3583
3584 /* keep record of best compound/single-only prediction */
3585 if (!disable_skip && ref_frame != INTRA_FRAME) {
3586 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3587
3588 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3589 single_rate = rate2 - compmode_cost;
3590 hybrid_rate = rate2;
3591 } else {
3592 single_rate = rate2;
3593 hybrid_rate = rate2 + compmode_cost;
3594 }
3595
3596 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3597 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3598
3599 if (!comp_pred) {
3600 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3601 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3602 }
3603 } else {
3604 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3605 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3606 }
3607 }
3608 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3609 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3610
3611 /* keep record of best filter type */
3612 if (!mode_excluded && cm->interp_filter != BILINEAR) {
3613 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
3614 SWITCHABLE_FILTERS : cm->interp_filter];
3615
3616 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3617 int64_t adj_rd;
3618 if (ref == INT64_MAX)
3619 adj_rd = 0;
3620 else if (cpi->rd_filter_cache[i] == INT64_MAX)
3621 // when early termination is triggered, the encoder does not have
3622 // access to the rate-distortion cost. it only knows that the cost
3623 // should be above the maximum valid value. hence it takes the known
3624 // maximum plus an arbitrary constant as the rate-distortion cost.
3625 adj_rd = cpi->mask_filter_rd - ref + 10;
3626 else
3627 adj_rd = cpi->rd_filter_cache[i] - ref;
3628
3629 adj_rd += this_rd;
3630 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3631 }
3632 }
3633 }
3634
3635 /* keep record of best txfm size */
3636 if (bsize < BLOCK_32X32) {
3637 if (bsize < BLOCK_16X16)
3638 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3639
3640 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3641 }
3642 if (!mode_excluded && this_rd != INT64_MAX) {
3643 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3644 int64_t adj_rd = INT64_MAX;
3645 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3646
3647 if (adj_rd < best_tx_rd[i])
3648 best_tx_rd[i] = adj_rd;
3649 }
3650 }
3651
3652 if (early_term)
3653 break;
3654
3655 if (x->skip && !comp_pred)
3656 break;
3657 }
3658
3659 if (best_rd >= best_rd_so_far)
3660 return INT64_MAX;
3661
3662 // If we used an estimate for the uv intra rd in the loop above...
3663 if (cpi->sf.use_uv_intra_rd_estimate) {
3664 // Do Intra UV best rd mode selection if best mode choice above was intra.
3665 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3666 TX_SIZE uv_tx_size;
3667 *mbmi = best_mbmode;
3668 uv_tx_size = get_uv_tx_size(mbmi);
3669 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3670 &rate_uv_tokenonly[uv_tx_size],
3671 &dist_uv[uv_tx_size],
3672 &skip_uv[uv_tx_size],
3673 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3674 uv_tx_size);
3675 }
3676 }
3677
3678 assert((cm->interp_filter == SWITCHABLE) ||
3679 (cm->interp_filter == best_mbmode.interp_filter) ||
3680 !is_inter_block(&best_mbmode));
3681
3682 // Updating rd_thresh_freq_fact[] here means that the different
3683 // partition/block sizes are handled independently based on the best
3684 // choice for the current partition. It may well be better to keep a scaled
3685 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3686 // combination that wins out.
3687 if (cpi->sf.adaptive_rd_thresh) {
3688 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3689 int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
3690
3691 if (mode_index == best_mode_index) {
3692 *fact -= (*fact >> 3);
3693 } else {
3694 *fact = MIN(*fact + RD_THRESH_INC,
3695 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
3696 }
3697 }
3698 }
3699
3700 // macroblock modes
3701 *mbmi = best_mbmode;
3702 x->skip |= best_skip2;
3703
3704 for (i = 0; i < REFERENCE_MODES; ++i) {
3705 if (best_pred_rd[i] == INT64_MAX)
3706 best_pred_diff[i] = INT_MIN;
3707 else
3708 best_pred_diff[i] = best_rd - best_pred_rd[i];
3709 }
3710
3711 if (!x->skip) {
3712 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3713 if (best_filter_rd[i] == INT64_MAX)
3714 best_filter_diff[i] = 0;
3715 else
3716 best_filter_diff[i] = best_rd - best_filter_rd[i];
3717 }
3718 if (cm->interp_filter == SWITCHABLE)
3719 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3720 for (i = 0; i < TX_MODES; i++) {
3721 if (best_tx_rd[i] == INT64_MAX)
3722 best_tx_diff[i] = 0;
3723 else
3724 best_tx_diff[i] = best_rd - best_tx_rd[i];
3725 }
3726 } else {
3727 vp9_zero(best_filter_diff);
3728 vp9_zero(best_tx_diff);
3729 }
3730
3731 if (!x->in_active_map) {
3732 assert(mbmi->ref_frame[0] == LAST_FRAME);
3733 assert(mbmi->ref_frame[1] == NONE);
3734 assert(mbmi->mode == NEARESTMV ||
3735 mbmi->mode == NEARMV ||
3736 mbmi->mode == ZEROMV);
3737 assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
3738 assert(mbmi->mode == mbmi->uv_mode);
3739 }
3740
3741 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3742 store_coding_context(x, ctx, best_mode_index,
3743 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3744 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3745 mbmi->ref_frame[1]][0],
3746 best_pred_diff, best_tx_diff, best_filter_diff);
3747
3748 return best_rd;
3749 }
3750
3751
vp9_rd_pick_inter_mode_sub8x8(VP9_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,int mi_row,int mi_col,int * returnrate,int64_t * returndistortion,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)3752 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3753 const TileInfo *const tile,
3754 int mi_row, int mi_col,
3755 int *returnrate,
3756 int64_t *returndistortion,
3757 BLOCK_SIZE bsize,
3758 PICK_MODE_CONTEXT *ctx,
3759 int64_t best_rd_so_far) {
3760 VP9_COMMON *cm = &cpi->common;
3761 MACROBLOCKD *xd = &x->e_mbd;
3762 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3763 const struct segmentation *seg = &cm->seg;
3764 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3765 unsigned char segment_id = mbmi->segment_id;
3766 int comp_pred, i;
3767 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3768 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3769 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3770 VP9_ALT_FLAG };
3771 int64_t best_rd = best_rd_so_far;
3772 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3773 int64_t best_tx_rd[TX_MODES];
3774 int64_t best_tx_diff[TX_MODES];
3775 int64_t best_pred_diff[REFERENCE_MODES];
3776 int64_t best_pred_rd[REFERENCE_MODES];
3777 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3778 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3779 MB_MODE_INFO best_mbmode = { 0 };
3780 int mode_index, best_mode_index = 0;
3781 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3782 vp9_prob comp_mode_p;
3783 int64_t best_inter_rd = INT64_MAX;
3784 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3785 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3786 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3787 int64_t dist_uv[TX_SIZES];
3788 int skip_uv[TX_SIZES];
3789 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3790 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3791 int_mv seg_mvs[4][MAX_REF_FRAMES];
3792 b_mode_info best_bmodes[4];
3793 int best_skip2 = 0;
3794 int ref_frame_mask = 0;
3795 int mode_skip_mask = 0;
3796
3797 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3798 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3799
3800 for (i = 0; i < 4; i++) {
3801 int j;
3802 for (j = 0; j < MAX_REF_FRAMES; j++)
3803 seg_mvs[i][j].as_int = INVALID_MV;
3804 }
3805
3806 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3807 &comp_mode_p);
3808
3809 for (i = 0; i < REFERENCE_MODES; ++i)
3810 best_pred_rd[i] = INT64_MAX;
3811 for (i = 0; i < TX_MODES; i++)
3812 best_tx_rd[i] = INT64_MAX;
3813 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3814 best_filter_rd[i] = INT64_MAX;
3815 for (i = 0; i < TX_SIZES; i++)
3816 rate_uv_intra[i] = INT_MAX;
3817
3818 *returnrate = INT_MAX;
3819
3820 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3821 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3822 vp9_setup_buffer_inter(cpi, x, tile,
3823 ref_frame, bsize, mi_row, mi_col,
3824 frame_mv[NEARESTMV], frame_mv[NEARMV],
3825 yv12_mb);
3826 }
3827 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3828 frame_mv[ZEROMV][ref_frame].as_int = 0;
3829 }
3830
3831 for (ref_frame = LAST_FRAME;
3832 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3833 int i;
3834 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3835 if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3836 ref_frame_mask |= (1 << ref_frame);
3837 break;
3838 }
3839 }
3840 }
3841
3842 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3843 int mode_excluded = 0;
3844 int64_t this_rd = INT64_MAX;
3845 int disable_skip = 0;
3846 int compmode_cost = 0;
3847 int rate2 = 0, rate_y = 0, rate_uv = 0;
3848 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3849 int skippable = 0;
3850 int64_t tx_cache[TX_MODES];
3851 int i;
3852 int this_skip2 = 0;
3853 int64_t total_sse = INT_MAX;
3854 int early_term = 0;
3855
3856 for (i = 0; i < TX_MODES; ++i)
3857 tx_cache[i] = INT64_MAX;
3858
3859 x->skip = 0;
3860 ref_frame = vp9_ref_order[mode_index].ref_frame[0];
3861 second_ref_frame = vp9_ref_order[mode_index].ref_frame[1];
3862
3863 // Look at the reference frame of the best mode so far and set the
3864 // skip mask to look at a subset of the remaining modes.
3865 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3866 if (mode_index == 3) {
3867 switch (vp9_ref_order[best_mode_index].ref_frame[0]) {
3868 case INTRA_FRAME:
3869 mode_skip_mask = 0;
3870 break;
3871 case LAST_FRAME:
3872 mode_skip_mask = 0x0010;
3873 break;
3874 case GOLDEN_FRAME:
3875 mode_skip_mask = 0x0008;
3876 break;
3877 case ALTREF_FRAME:
3878 mode_skip_mask = 0x0000;
3879 break;
3880 case NONE:
3881 case MAX_REF_FRAMES:
3882 assert(0 && "Invalid Reference frame");
3883 }
3884 }
3885 if (mode_skip_mask & (1 << mode_index))
3886 continue;
3887 }
3888
3889 // Test best rd so far against threshold for trying this mode.
3890 if ((best_rd <
3891 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3892 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3893 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3894 continue;
3895
3896 // Do not allow compound prediction if the segment level reference
3897 // frame feature is in use as in this case there can only be one reference.
3898 if ((second_ref_frame > INTRA_FRAME) &&
3899 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3900 continue;
3901
3902 mbmi->ref_frame[0] = ref_frame;
3903 mbmi->ref_frame[1] = second_ref_frame;
3904
3905 if (!(ref_frame == INTRA_FRAME
3906 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3907 continue;
3908 }
3909 if (!(second_ref_frame == NONE
3910 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3911 continue;
3912 }
3913
3914 comp_pred = second_ref_frame > INTRA_FRAME;
3915 if (comp_pred) {
3916 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3917 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3918 continue;
3919 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3920 if (ref_frame != best_inter_ref_frame &&
3921 second_ref_frame != best_inter_ref_frame)
3922 continue;
3923 }
3924
3925 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3926 // sub8x8 blocks.
3927 if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3928 continue;
3929
3930 if (second_ref_frame > 0 &&
3931 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3932 continue;
3933
3934 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3935 mbmi->uv_mode = DC_PRED;
3936
3937 // Evaluate all sub-pel filters irrespective of whether we can use
3938 // them for this frame.
3939 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3940 : cm->interp_filter;
3941
3942 if (comp_pred) {
3943 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3944 continue;
3945
3946 mode_excluded = mode_excluded ? mode_excluded
3947 : cm->reference_mode == SINGLE_REFERENCE;
3948 } else {
3949 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3950 mode_excluded = mode_excluded ?
3951 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3952 }
3953 }
3954
3955 // Select prediction reference frames.
3956 for (i = 0; i < MAX_MB_PLANE; i++) {
3957 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3958 if (comp_pred)
3959 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3960 }
3961
3962 // If the segment reference frame feature is enabled....
3963 // then do nothing if the current ref frame is not allowed..
3964 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3965 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3966 (int)ref_frame) {
3967 continue;
3968 // If the segment skip feature is enabled....
3969 // then do nothing if the current mode is not allowed..
3970 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3971 ref_frame != INTRA_FRAME) {
3972 continue;
3973 // Disable this drop out case if the ref frame
3974 // segment level feature is enabled for this segment. This is to
3975 // prevent the possibility that we end up unable to pick any mode.
3976 } else if (!vp9_segfeature_active(seg, segment_id,
3977 SEG_LVL_REF_FRAME)) {
3978 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3979 // unless ARNR filtering is enabled in which case we want
3980 // an unfiltered alternative. We allow near/nearest as well
3981 // because they may result in zero-zero MVs but be cheaper.
3982 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3983 continue;
3984 }
3985
3986 #ifdef MODE_TEST_HIT_STATS
3987 // TEST/DEBUG CODE
3988 // Keep a rcord of the number of test hits at each size
3989 cpi->mode_test_hits[bsize]++;
3990 #endif
3991
3992 if (ref_frame == INTRA_FRAME) {
3993 int rate;
3994 mbmi->tx_size = TX_4X4;
3995 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3996 &distortion_y, best_rd) >= best_rd)
3997 continue;
3998 rate2 += rate;
3999 rate2 += intra_cost_penalty;
4000 distortion2 += distortion_y;
4001
4002 if (rate_uv_intra[TX_4X4] == INT_MAX) {
4003 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
4004 &rate_uv_intra[TX_4X4],
4005 &rate_uv_tokenonly[TX_4X4],
4006 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
4007 &mode_uv[TX_4X4]);
4008 }
4009 rate2 += rate_uv_intra[TX_4X4];
4010 rate_uv = rate_uv_tokenonly[TX_4X4];
4011 distortion2 += dist_uv[TX_4X4];
4012 distortion_uv = dist_uv[TX_4X4];
4013 mbmi->uv_mode = mode_uv[TX_4X4];
4014 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4015 for (i = 0; i < TX_MODES; ++i)
4016 tx_cache[i] = tx_cache[ONLY_4X4];
4017 } else {
4018 int rate;
4019 int64_t distortion;
4020 int64_t this_rd_thresh;
4021 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4022 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4023 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4024 int tmp_best_skippable = 0;
4025 int switchable_filter_index;
4026 int_mv *second_ref = comp_pred ?
4027 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
4028 b_mode_info tmp_best_bmodes[16];
4029 MB_MODE_INFO tmp_best_mbmode;
4030 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4031 int pred_exists = 0;
4032 int uv_skippable;
4033
4034 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4035 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4036 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4037 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4038 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4039 xd->mi[0]->mbmi.tx_size = TX_4X4;
4040
4041 cpi->mask_filter_rd = 0;
4042 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4043 cpi->rd_filter_cache[i] = INT64_MAX;
4044
4045 if (cm->interp_filter != BILINEAR) {
4046 tmp_best_filter = EIGHTTAP;
4047 if (x->source_variance <
4048 cpi->sf.disable_filter_search_var_thresh) {
4049 tmp_best_filter = EIGHTTAP;
4050 } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
4051 ctx->pred_interp_filter < SWITCHABLE) {
4052 tmp_best_filter = ctx->pred_interp_filter;
4053 } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
4054 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4055 ctx->pred_interp_filter : 0;
4056 } else {
4057 for (switchable_filter_index = 0;
4058 switchable_filter_index < SWITCHABLE_FILTERS;
4059 ++switchable_filter_index) {
4060 int newbest, rs;
4061 int64_t rs_rd;
4062 mbmi->interp_filter = switchable_filter_index;
4063 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4064 &mbmi->ref_mvs[ref_frame][0],
4065 second_ref,
4066 best_yrd,
4067 &rate, &rate_y, &distortion,
4068 &skippable, &total_sse,
4069 (int)this_rd_thresh, seg_mvs,
4070 bsi, switchable_filter_index,
4071 mi_row, mi_col);
4072
4073 if (tmp_rd == INT64_MAX)
4074 continue;
4075 rs = get_switchable_rate(x);
4076 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4077 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4078 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4079 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4080 tmp_rd + rs_rd);
4081 if (cm->interp_filter == SWITCHABLE)
4082 tmp_rd += rs_rd;
4083
4084 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
4085
4086 newbest = (tmp_rd < tmp_best_rd);
4087 if (newbest) {
4088 tmp_best_filter = mbmi->interp_filter;
4089 tmp_best_rd = tmp_rd;
4090 }
4091 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4092 (mbmi->interp_filter == cm->interp_filter &&
4093 cm->interp_filter != SWITCHABLE)) {
4094 tmp_best_rdu = tmp_rd;
4095 tmp_best_rate = rate;
4096 tmp_best_ratey = rate_y;
4097 tmp_best_distortion = distortion;
4098 tmp_best_sse = total_sse;
4099 tmp_best_skippable = skippable;
4100 tmp_best_mbmode = *mbmi;
4101 for (i = 0; i < 4; i++) {
4102 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4103 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4104 }
4105 pred_exists = 1;
4106 if (switchable_filter_index == 0 &&
4107 cpi->sf.use_rd_breakout &&
4108 best_rd < INT64_MAX) {
4109 if (tmp_best_rdu / 2 > best_rd) {
4110 // skip searching the other filters if the first is
4111 // already substantially larger than the best so far
4112 tmp_best_filter = mbmi->interp_filter;
4113 tmp_best_rdu = INT64_MAX;
4114 break;
4115 }
4116 }
4117 }
4118 } // switchable_filter_index loop
4119 }
4120 }
4121
4122 if (tmp_best_rdu == INT64_MAX && pred_exists)
4123 continue;
4124
4125 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4126 tmp_best_filter : cm->interp_filter);
4127 if (!pred_exists) {
4128 // Handles the special case when a filter that is not in the
4129 // switchable list (bilinear, 6-tap) is indicated at the frame level
4130 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4131 &mbmi->ref_mvs[ref_frame][0],
4132 second_ref,
4133 best_yrd,
4134 &rate, &rate_y, &distortion,
4135 &skippable, &total_sse,
4136 (int)this_rd_thresh, seg_mvs,
4137 bsi, 0,
4138 mi_row, mi_col);
4139 if (tmp_rd == INT64_MAX)
4140 continue;
4141 } else {
4142 total_sse = tmp_best_sse;
4143 rate = tmp_best_rate;
4144 rate_y = tmp_best_ratey;
4145 distortion = tmp_best_distortion;
4146 skippable = tmp_best_skippable;
4147 *mbmi = tmp_best_mbmode;
4148 for (i = 0; i < 4; i++)
4149 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4150 }
4151
4152 rate2 += rate;
4153 distortion2 += distortion;
4154
4155 if (cm->interp_filter == SWITCHABLE)
4156 rate2 += get_switchable_rate(x);
4157
4158 if (!mode_excluded)
4159 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4160 : cm->reference_mode == COMPOUND_REFERENCE;
4161
4162 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4163
4164 tmp_best_rdu = best_rd -
4165 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4166 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4167
4168 if (tmp_best_rdu > 0) {
4169 // If even the 'Y' rd value of split is higher than best so far
4170 // then dont bother looking at UV
4171 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4172 BLOCK_8X8);
4173 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4174 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4175 if (rate_uv == INT_MAX)
4176 continue;
4177 rate2 += rate_uv;
4178 distortion2 += distortion_uv;
4179 skippable = skippable && uv_skippable;
4180 total_sse += uv_sse;
4181
4182 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4183 for (i = 0; i < TX_MODES; ++i)
4184 tx_cache[i] = tx_cache[ONLY_4X4];
4185 }
4186 }
4187
4188 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4189 rate2 += compmode_cost;
4190
4191 // Estimate the reference frame signaling cost and add it
4192 // to the rolling cost variable.
4193 if (second_ref_frame > INTRA_FRAME) {
4194 rate2 += ref_costs_comp[ref_frame];
4195 } else {
4196 rate2 += ref_costs_single[ref_frame];
4197 }
4198
4199 if (!disable_skip) {
4200 // Test for the condition where skip block will be activated
4201 // because there are no non zero coefficients and make any
4202 // necessary adjustment for rate. Ignore if skip is coded at
4203 // segment level as the cost wont have been added in.
4204 // Is Mb level skip allowed (i.e. not coded at segment level).
4205 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4206 SEG_LVL_SKIP);
4207
4208 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4209 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4210 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4211 // Add in the cost of the no skip flag.
4212 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4213 } else {
4214 // FIXME(rbultje) make this work for splitmv also
4215 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4216 distortion2 = total_sse;
4217 assert(total_sse >= 0);
4218 rate2 -= (rate_y + rate_uv);
4219 rate_y = 0;
4220 rate_uv = 0;
4221 this_skip2 = 1;
4222 }
4223 } else if (mb_skip_allowed) {
4224 // Add in the cost of the no skip flag.
4225 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4226 }
4227
4228 // Calculate the final RD estimate for this mode.
4229 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4230 }
4231
4232 // Keep record of best inter rd with single reference
4233 if (is_inter_block(&xd->mi[0]->mbmi) &&
4234 !has_second_ref(&xd->mi[0]->mbmi) &&
4235 !mode_excluded &&
4236 this_rd < best_inter_rd) {
4237 best_inter_rd = this_rd;
4238 best_inter_ref_frame = ref_frame;
4239 }
4240
4241 if (!disable_skip && ref_frame == INTRA_FRAME) {
4242 for (i = 0; i < REFERENCE_MODES; ++i)
4243 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4244 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4245 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4246 }
4247
4248 // Did this mode help.. i.e. is it the new best mode
4249 if (this_rd < best_rd || x->skip) {
4250 if (!mode_excluded) {
4251 int max_plane = MAX_MB_PLANE;
4252 // Note index of best mode so far
4253 best_mode_index = mode_index;
4254
4255 if (ref_frame == INTRA_FRAME) {
4256 /* required for left and above block mv */
4257 mbmi->mv[0].as_int = 0;
4258 max_plane = 1;
4259 }
4260
4261 *returnrate = rate2;
4262 *returndistortion = distortion2;
4263 best_rd = this_rd;
4264 best_yrd = best_rd -
4265 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4266 best_mbmode = *mbmi;
4267 best_skip2 = this_skip2;
4268 if (!x->select_txfm_size)
4269 swap_block_ptr(x, ctx, max_plane);
4270 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4271 sizeof(uint8_t) * ctx->num_4x4_blk);
4272
4273 for (i = 0; i < 4; i++)
4274 best_bmodes[i] = xd->mi[0]->bmi[i];
4275
4276 // TODO(debargha): enhance this test with a better distortion prediction
4277 // based on qp, activity mask and history
4278 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4279 (mode_index > MIN_EARLY_TERM_INDEX)) {
4280 const int qstep = xd->plane[0].dequant[1];
4281 // TODO(debargha): Enhance this by specializing for each mode_index
4282 int scale = 4;
4283 if (x->source_variance < UINT_MAX) {
4284 const int var_adjust = (x->source_variance < 16);
4285 scale -= var_adjust;
4286 }
4287 if (ref_frame > INTRA_FRAME &&
4288 distortion2 * scale < qstep * qstep) {
4289 early_term = 1;
4290 }
4291 }
4292 }
4293 }
4294
4295 /* keep record of best compound/single-only prediction */
4296 if (!disable_skip && ref_frame != INTRA_FRAME) {
4297 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4298
4299 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4300 single_rate = rate2 - compmode_cost;
4301 hybrid_rate = rate2;
4302 } else {
4303 single_rate = rate2;
4304 hybrid_rate = rate2 + compmode_cost;
4305 }
4306
4307 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4308 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4309
4310 if (second_ref_frame <= INTRA_FRAME &&
4311 single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4312 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4313 } else if (second_ref_frame > INTRA_FRAME &&
4314 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4315 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4316 }
4317 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4318 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4319 }
4320
4321 /* keep record of best filter type */
4322 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4323 cm->interp_filter != BILINEAR) {
4324 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
4325 SWITCHABLE_FILTERS : cm->interp_filter];
4326 int64_t adj_rd;
4327 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4328 if (ref == INT64_MAX)
4329 adj_rd = 0;
4330 else if (cpi->rd_filter_cache[i] == INT64_MAX)
4331 // when early termination is triggered, the encoder does not have
4332 // access to the rate-distortion cost. it only knows that the cost
4333 // should be above the maximum valid value. hence it takes the known
4334 // maximum plus an arbitrary constant as the rate-distortion cost.
4335 adj_rd = cpi->mask_filter_rd - ref + 10;
4336 else
4337 adj_rd = cpi->rd_filter_cache[i] - ref;
4338
4339 adj_rd += this_rd;
4340 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4341 }
4342 }
4343
4344 /* keep record of best txfm size */
4345 if (bsize < BLOCK_32X32) {
4346 if (bsize < BLOCK_16X16) {
4347 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4348 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4349 }
4350 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4351 }
4352 if (!mode_excluded && this_rd != INT64_MAX) {
4353 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4354 int64_t adj_rd = INT64_MAX;
4355 if (ref_frame > INTRA_FRAME)
4356 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4357 else
4358 adj_rd = this_rd;
4359
4360 if (adj_rd < best_tx_rd[i])
4361 best_tx_rd[i] = adj_rd;
4362 }
4363 }
4364
4365 if (early_term)
4366 break;
4367
4368 if (x->skip && !comp_pred)
4369 break;
4370 }
4371
4372 if (best_rd >= best_rd_so_far)
4373 return INT64_MAX;
4374
4375 // If we used an estimate for the uv intra rd in the loop above...
4376 if (cpi->sf.use_uv_intra_rd_estimate) {
4377 // Do Intra UV best rd mode selection if best mode choice above was intra.
4378 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
4379 TX_SIZE uv_tx_size;
4380 *mbmi = best_mbmode;
4381 uv_tx_size = get_uv_tx_size(mbmi);
4382 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4383 &rate_uv_tokenonly[uv_tx_size],
4384 &dist_uv[uv_tx_size],
4385 &skip_uv[uv_tx_size],
4386 BLOCK_8X8, uv_tx_size);
4387 }
4388 }
4389
4390 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4391 *returnrate = INT_MAX;
4392 *returndistortion = INT64_MAX;
4393 return best_rd;
4394 }
4395
4396 assert((cm->interp_filter == SWITCHABLE) ||
4397 (cm->interp_filter == best_mbmode.interp_filter) ||
4398 !is_inter_block(&best_mbmode));
4399
4400 // Updating rd_thresh_freq_fact[] here means that the different
4401 // partition/block sizes are handled independently based on the best
4402 // choice for the current partition. It may well be better to keep a scaled
4403 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4404 // combination that wins out.
4405 if (cpi->sf.adaptive_rd_thresh) {
4406 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4407 int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
4408
4409 if (mode_index == best_mode_index) {
4410 *fact -= (*fact >> 3);
4411 } else {
4412 *fact = MIN(*fact + RD_THRESH_INC,
4413 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
4414 }
4415 }
4416 }
4417
4418 // macroblock modes
4419 *mbmi = best_mbmode;
4420 x->skip |= best_skip2;
4421 if (!is_inter_block(&best_mbmode)) {
4422 for (i = 0; i < 4; i++)
4423 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4424 } else {
4425 for (i = 0; i < 4; ++i)
4426 vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4427
4428 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4429 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4430 }
4431
4432 for (i = 0; i < REFERENCE_MODES; ++i) {
4433 if (best_pred_rd[i] == INT64_MAX)
4434 best_pred_diff[i] = INT_MIN;
4435 else
4436 best_pred_diff[i] = best_rd - best_pred_rd[i];
4437 }
4438
4439 if (!x->skip) {
4440 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4441 if (best_filter_rd[i] == INT64_MAX)
4442 best_filter_diff[i] = 0;
4443 else
4444 best_filter_diff[i] = best_rd - best_filter_rd[i];
4445 }
4446 if (cm->interp_filter == SWITCHABLE)
4447 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4448 } else {
4449 vp9_zero(best_filter_diff);
4450 }
4451
4452 if (!x->skip) {
4453 for (i = 0; i < TX_MODES; i++) {
4454 if (best_tx_rd[i] == INT64_MAX)
4455 best_tx_diff[i] = 0;
4456 else
4457 best_tx_diff[i] = best_rd - best_tx_rd[i];
4458 }
4459 } else {
4460 vp9_zero(best_tx_diff);
4461 }
4462
4463 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4464 store_coding_context(x, ctx, best_mode_index,
4465 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4466 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4467 mbmi->ref_frame[1]][0],
4468 best_pred_diff, best_tx_diff, best_filter_diff);
4469
4470 return best_rd;
4471 }
4472