1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "vpx_ports/config.h"
13 #include "encodemb.h"
14 #include "encodemv.h"
15 #include "vp8/common/common.h"
16 #include "onyx_int.h"
17 #include "vp8/common/extend.h"
18 #include "vp8/common/entropymode.h"
19 #include "vp8/common/quant_common.h"
20 #include "segmentation.h"
21 #include "vp8/common/setupintrarecon.h"
22 #include "encodeintra.h"
23 #include "vp8/common/reconinter.h"
24 #include "rdopt.h"
25 #include "pickinter.h"
26 #include "vp8/common/findnearmv.h"
27 #include "vp8/common/reconintra.h"
28 #include <stdio.h>
29 #include <limits.h>
30 #include "vp8/common/subpixel.h"
31 #include "vpx_ports/vpx_timer.h"
32
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD(x) &cpi->common.rtcd.x
35 #define IF_RTCD(x) (x)
36 #else
37 #define RTCD(x) NULL
38 #define IF_RTCD(x) NULL
39 #endif
40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
41
42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
45 MACROBLOCK *x,
46 MB_ROW_COMP *mbr_ei,
47 int mb_row,
48 int count);
49 void vp8_build_block_offsets(MACROBLOCK *x);
50 void vp8_setup_block_ptrs(MACROBLOCK *x);
51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
53
54 #ifdef MODE_STATS
55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
57 unsigned int inter_b_modes[15] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
58 unsigned int y_modes[5] = {0, 0, 0, 0, 0};
59 unsigned int uv_modes[4] = {0, 0, 0, 0};
60 unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
61 #endif
62
63 static const int qrounding_factors[129] =
64 {
65 48, 48, 48, 48, 48, 48, 48, 48,
66 48, 48, 48, 48, 48, 48, 48, 48,
67 48, 48, 48, 48, 48, 48, 48, 48,
68 48, 48, 48, 48, 48, 48, 48, 48,
69 48, 48, 48, 48, 48, 48, 48, 48,
70 48, 48, 48, 48, 48, 48, 48, 48,
71 48, 48, 48, 48, 48, 48, 48, 48,
72 48, 48, 48, 48, 48, 48, 48, 48,
73 48, 48, 48, 48, 48, 48, 48, 48,
74 48, 48, 48, 48, 48, 48, 48, 48,
75 48, 48, 48, 48, 48, 48, 48, 48,
76 48, 48, 48, 48, 48, 48, 48, 48,
77 48, 48, 48, 48, 48, 48, 48, 48,
78 48, 48, 48, 48, 48, 48, 48, 48,
79 48, 48, 48, 48, 48, 48, 48, 48,
80 48, 48, 48, 48, 48, 48, 48, 48,
81 48
82 };
83
84 static const int qzbin_factors[129] =
85 {
86 84, 84, 84, 84, 84, 84, 84, 84,
87 84, 84, 84, 84, 84, 84, 84, 84,
88 84, 84, 84, 84, 84, 84, 84, 84,
89 84, 84, 84, 84, 84, 84, 84, 84,
90 84, 84, 84, 84, 84, 84, 84, 84,
91 84, 84, 84, 84, 84, 84, 84, 84,
92 80, 80, 80, 80, 80, 80, 80, 80,
93 80, 80, 80, 80, 80, 80, 80, 80,
94 80, 80, 80, 80, 80, 80, 80, 80,
95 80, 80, 80, 80, 80, 80, 80, 80,
96 80, 80, 80, 80, 80, 80, 80, 80,
97 80, 80, 80, 80, 80, 80, 80, 80,
98 80, 80, 80, 80, 80, 80, 80, 80,
99 80, 80, 80, 80, 80, 80, 80, 80,
100 80, 80, 80, 80, 80, 80, 80, 80,
101 80, 80, 80, 80, 80, 80, 80, 80,
102 80
103 };
104
105 static const int qrounding_factors_y2[129] =
106 {
107 48, 48, 48, 48, 48, 48, 48, 48,
108 48, 48, 48, 48, 48, 48, 48, 48,
109 48, 48, 48, 48, 48, 48, 48, 48,
110 48, 48, 48, 48, 48, 48, 48, 48,
111 48, 48, 48, 48, 48, 48, 48, 48,
112 48, 48, 48, 48, 48, 48, 48, 48,
113 48, 48, 48, 48, 48, 48, 48, 48,
114 48, 48, 48, 48, 48, 48, 48, 48,
115 48, 48, 48, 48, 48, 48, 48, 48,
116 48, 48, 48, 48, 48, 48, 48, 48,
117 48, 48, 48, 48, 48, 48, 48, 48,
118 48, 48, 48, 48, 48, 48, 48, 48,
119 48, 48, 48, 48, 48, 48, 48, 48,
120 48, 48, 48, 48, 48, 48, 48, 48,
121 48, 48, 48, 48, 48, 48, 48, 48,
122 48, 48, 48, 48, 48, 48, 48, 48,
123 48
124 };
125
126 static const int qzbin_factors_y2[129] =
127 {
128 84, 84, 84, 84, 84, 84, 84, 84,
129 84, 84, 84, 84, 84, 84, 84, 84,
130 84, 84, 84, 84, 84, 84, 84, 84,
131 84, 84, 84, 84, 84, 84, 84, 84,
132 84, 84, 84, 84, 84, 84, 84, 84,
133 84, 84, 84, 84, 84, 84, 84, 84,
134 80, 80, 80, 80, 80, 80, 80, 80,
135 80, 80, 80, 80, 80, 80, 80, 80,
136 80, 80, 80, 80, 80, 80, 80, 80,
137 80, 80, 80, 80, 80, 80, 80, 80,
138 80, 80, 80, 80, 80, 80, 80, 80,
139 80, 80, 80, 80, 80, 80, 80, 80,
140 80, 80, 80, 80, 80, 80, 80, 80,
141 80, 80, 80, 80, 80, 80, 80, 80,
142 80, 80, 80, 80, 80, 80, 80, 80,
143 80, 80, 80, 80, 80, 80, 80, 80,
144 80
145 };
146
147 #define EXACT_QUANT
148 #ifdef EXACT_QUANT
vp8cx_invert_quant(int improved_quant,short * quant,short * shift,short d)149 static void vp8cx_invert_quant(int improved_quant, short *quant,
150 short *shift, short d)
151 {
152 if(improved_quant)
153 {
154 unsigned t;
155 int l;
156 t = d;
157 for(l = 0; t > 1; l++)
158 t>>=1;
159 t = 1 + (1<<(16+l))/d;
160 *quant = (short)(t - (1<<16));
161 *shift = l;
162 }
163 else
164 {
165 *quant = (1 << 16) / d;
166 *shift = 0;
167 }
168 }
169
vp8cx_init_quantizer(VP8_COMP * cpi)170 void vp8cx_init_quantizer(VP8_COMP *cpi)
171 {
172 int i;
173 int quant_val;
174 int Q;
175
176 int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
177
178 for (Q = 0; Q < QINDEX_RANGE; Q++)
179 {
180 // dc values
181 quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
182 cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
183 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
184 cpi->Y1quant_shift[Q] + 0, quant_val);
185 cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
186 cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
187 cpi->common.Y1dequant[Q][0] = quant_val;
188 cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
189
190 quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
191 cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
192 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
193 cpi->Y2quant_shift[Q] + 0, quant_val);
194 cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
195 cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
196 cpi->common.Y2dequant[Q][0] = quant_val;
197 cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
198
199 quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
200 cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
201 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
202 cpi->UVquant_shift[Q] + 0, quant_val);
203 cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
204 cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
205 cpi->common.UVdequant[Q][0] = quant_val;
206 cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
207
208 // all the ac values = ;
209 for (i = 1; i < 16; i++)
210 {
211 int rc = vp8_default_zig_zag1d[i];
212
213 quant_val = vp8_ac_yquant(Q);
214 cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
215 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
216 cpi->Y1quant_shift[Q] + rc, quant_val);
217 cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
218 cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
219 cpi->common.Y1dequant[Q][rc] = quant_val;
220 cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
221
222 quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
223 cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
224 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
225 cpi->Y2quant_shift[Q] + rc, quant_val);
226 cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
227 cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
228 cpi->common.Y2dequant[Q][rc] = quant_val;
229 cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
230
231 quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
232 cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
233 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
234 cpi->UVquant_shift[Q] + rc, quant_val);
235 cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
236 cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
237 cpi->common.UVdequant[Q][rc] = quant_val;
238 cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
239 }
240 }
241 }
242 #else
vp8cx_init_quantizer(VP8_COMP * cpi)243 void vp8cx_init_quantizer(VP8_COMP *cpi)
244 {
245 int i;
246 int quant_val;
247 int Q;
248
249 int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
250
251 for (Q = 0; Q < QINDEX_RANGE; Q++)
252 {
253 // dc values
254 quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
255 cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
256 cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
257 cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
258 cpi->common.Y1dequant[Q][0] = quant_val;
259 cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
260
261 quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
262 cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
263 cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
264 cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
265 cpi->common.Y2dequant[Q][0] = quant_val;
266 cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
267
268 quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
269 cpi->UVquant[Q][0] = (1 << 16) / quant_val;
270 cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
271 cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
272 cpi->common.UVdequant[Q][0] = quant_val;
273 cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
274
275 // all the ac values = ;
276 for (i = 1; i < 16; i++)
277 {
278 int rc = vp8_default_zig_zag1d[i];
279
280 quant_val = vp8_ac_yquant(Q);
281 cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
282 cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
283 cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
284 cpi->common.Y1dequant[Q][rc] = quant_val;
285 cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
286
287 quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
288 cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
289 cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
290 cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
291 cpi->common.Y2dequant[Q][rc] = quant_val;
292 cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
293
294 quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
295 cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
296 cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
297 cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
298 cpi->common.UVdequant[Q][rc] = quant_val;
299 cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
300 }
301 }
302 }
303 #endif
vp8cx_mb_init_quantizer(VP8_COMP * cpi,MACROBLOCK * x)304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
305 {
306 int i;
307 int QIndex;
308 MACROBLOCKD *xd = &x->e_mbd;
309 int zbin_extra;
310
311 // Select the baseline MB Q index.
312 if (xd->segmentation_enabled)
313 {
314 // Abs Value
315 if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
316
317 QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
318 // Delta Value
319 else
320 {
321 QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
322 QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
323 }
324 }
325 else
326 QIndex = cpi->common.base_qindex;
327
328 // Y
329 zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
330
331 for (i = 0; i < 16; i++)
332 {
333 x->block[i].quant = cpi->Y1quant[QIndex];
334 x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
335 x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
336 x->block[i].zbin = cpi->Y1zbin[QIndex];
337 x->block[i].round = cpi->Y1round[QIndex];
338 x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
339 x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
340 x->block[i].zbin_extra = (short)zbin_extra;
341 }
342
343 // UV
344 zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
345
346 for (i = 16; i < 24; i++)
347 {
348 x->block[i].quant = cpi->UVquant[QIndex];
349 x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
350 x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
351 x->block[i].zbin = cpi->UVzbin[QIndex];
352 x->block[i].round = cpi->UVround[QIndex];
353 x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
354 x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
355 x->block[i].zbin_extra = (short)zbin_extra;
356 }
357
358 // Y2
359 zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
360 x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
361 x->block[24].quant = cpi->Y2quant[QIndex];
362 x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
363 x->block[24].zbin = cpi->Y2zbin[QIndex];
364 x->block[24].round = cpi->Y2round[QIndex];
365 x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
366 x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
367 x->block[24].zbin_extra = (short)zbin_extra;
368
369 /* save this macroblock QIndex for vp8_update_zbin_extra() */
370 x->q_index = QIndex;
371 }
vp8_update_zbin_extra(VP8_COMP * cpi,MACROBLOCK * x)372 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
373 {
374 int i;
375 int QIndex = x->q_index;
376 int zbin_extra;
377
378 // Y
379 zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
380 for (i = 0; i < 16; i++)
381 {
382 x->block[i].zbin_extra = (short)zbin_extra;
383 }
384
385 // UV
386 zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
387 for (i = 16; i < 24; i++)
388 {
389 x->block[i].zbin_extra = (short)zbin_extra;
390 }
391
392 // Y2
393 zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
394 x->block[24].zbin_extra = (short)zbin_extra;
395 }
396
vp8cx_frame_init_quantizer(VP8_COMP * cpi)397 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
398 {
399 // Clear Zbin mode boost for default case
400 cpi->zbin_mode_boost = 0;
401
402 // MB level quantizer setup
403 vp8cx_mb_init_quantizer(cpi, &cpi->mb);
404 }
405
406
407 /* activity_avg must be positive, or flat regions could get a zero weight
408 * (infinite lambda), which confounds analysis.
409 * This also avoids the need for divide by zero checks in
410 * vp8_activity_masking().
411 */
412 #define VP8_ACTIVITY_AVG_MIN (64)
413
414 /* This is used as a reference when computing the source variance for the
415 * purposes of activity masking.
416 * Eventually this should be replaced by custom no-reference routines,
417 * which will be faster.
418 */
419 static const unsigned char VP8_VAR_OFFS[16]=
420 {
421 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
422 };
423
vp8_activity_masking(VP8_COMP * cpi,MACROBLOCK * x)424 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
425 {
426 unsigned int act;
427 unsigned int sse;
428 int sum;
429 unsigned int a;
430 unsigned int b;
431 /* TODO: This could also be done over smaller areas (8x8), but that would
432 * require extensive changes elsewhere, as lambda is assumed to be fixed
433 * over an entire MB in most of the code.
434 * Another option is to compute four 8x8 variances, and pick a single
435 * lambda using a non-linear combination (e.g., the smallest, or second
436 * smallest, etc.).
437 */
438 VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
439 x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
440 /* This requires a full 32 bits of precision. */
441 act = (sse<<8) - sum*sum;
442 /* Drop 4 to give us some headroom to work with. */
443 act = (act + 8) >> 4;
444 /* If the region is flat, lower the activity some more. */
445 if (act < 8<<12)
446 act = act < 5<<12 ? act : 5<<12;
447 /* TODO: For non-flat regions, edge regions should receive less masking
448 * than textured regions, but identifying edge regions quickly and
449 * reliably enough is still a subject of experimentation.
450 * This will be most noticable near edges with a complex shape (e.g.,
451 * text), but the 4x4 transform size should make this less of a problem
452 * than it would be for an 8x8 transform.
453 */
454 /* Apply the masking to the RD multiplier. */
455 a = act + 4*cpi->activity_avg;
456 b = 4*act + cpi->activity_avg;
457 x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
458 return act;
459 }
460
461
462
463 static
encode_mb_row(VP8_COMP * cpi,VP8_COMMON * cm,int mb_row,MACROBLOCK * x,MACROBLOCKD * xd,TOKENEXTRA ** tp,int * segment_counts,int * totalrate)464 void encode_mb_row(VP8_COMP *cpi,
465 VP8_COMMON *cm,
466 int mb_row,
467 MACROBLOCK *x,
468 MACROBLOCKD *xd,
469 TOKENEXTRA **tp,
470 int *segment_counts,
471 int *totalrate)
472 {
473 INT64 activity_sum = 0;
474 int i;
475 int recon_yoffset, recon_uvoffset;
476 int mb_col;
477 int ref_fb_idx = cm->lst_fb_idx;
478 int dst_fb_idx = cm->new_fb_idx;
479 int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
480 int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
481 int seg_map_index = (mb_row * cpi->common.mb_cols);
482
483 #if CONFIG_MULTITHREAD
484 const int nsync = cpi->mt_sync_range;
485 const int rightmost_col = cm->mb_cols - 1;
486 volatile const int *last_row_current_mb_col;
487
488 if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
489 last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
490 else
491 last_row_current_mb_col = &rightmost_col;
492 #endif
493
494 // reset above block coeffs
495 xd->above_context = cm->above_context;
496
497 xd->up_available = (mb_row != 0);
498 recon_yoffset = (mb_row * recon_y_stride * 16);
499 recon_uvoffset = (mb_row * recon_uv_stride * 8);
500
501 cpi->tplist[mb_row].start = *tp;
502 //printf("Main mb_row = %d\n", mb_row);
503
504 // Distance of Mb to the top & bottom edges, specified in 1/8th pel
505 // units as they are always compared to values that are in 1/8th pel units
506 xd->mb_to_top_edge = -((mb_row * 16) << 3);
507 xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
508
509 // Set up limit values for vertical motion vector components
510 // to prevent them extending beyond the UMV borders
511 x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
512 x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
513 + (VP8BORDERINPIXELS - 16);
514
515 // for each macroblock col in image
516 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
517 {
518 // Distance of Mb to the left & right edges, specified in
519 // 1/8th pel units as they are always compared to values
520 // that are in 1/8th pel units
521 xd->mb_to_left_edge = -((mb_col * 16) << 3);
522 xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
523
524 // Set up limit values for horizontal motion vector components
525 // to prevent them extending beyond the UMV borders
526 x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
527 x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
528 + (VP8BORDERINPIXELS - 16);
529
530 xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
531 xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
532 xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
533 xd->left_available = (mb_col != 0);
534
535 x->rddiv = cpi->RDDIV;
536 x->rdmult = cpi->RDMULT;
537
538 #if CONFIG_MULTITHREAD
539 if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
540 {
541 if ((mb_col & (nsync - 1)) == 0)
542 {
543 while (mb_col > (*last_row_current_mb_col - nsync)
544 && (*last_row_current_mb_col) != (cm->mb_cols - 1))
545 {
546 x86_pause_hint();
547 thread_sleep(0);
548 }
549 }
550 }
551 #endif
552
553 if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
554 activity_sum += vp8_activity_masking(cpi, x);
555
556 // Is segmentation enabled
557 // MB level adjutment to quantizer
558 if (xd->segmentation_enabled)
559 {
560 // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
561 if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
562 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
563 else
564 xd->mode_info_context->mbmi.segment_id = 0;
565
566 vp8cx_mb_init_quantizer(cpi, x);
567 }
568 else
569 xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
570
571 x->active_ptr = cpi->active_map + seg_map_index + mb_col;
572
573 if (cm->frame_type == KEY_FRAME)
574 {
575 *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
576 #ifdef MODE_STATS
577 y_modes[xd->mbmi.mode] ++;
578 #endif
579 }
580 else
581 {
582 *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
583
584 #ifdef MODE_STATS
585 inter_y_modes[xd->mbmi.mode] ++;
586
587 if (xd->mbmi.mode == SPLITMV)
588 {
589 int b;
590
591 for (b = 0; b < xd->mbmi.partition_count; b++)
592 {
593 inter_b_modes[x->partition->bmi[b].mode] ++;
594 }
595 }
596
597 #endif
598
599 // Count of last ref frame 0,0 useage
600 if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
601 cpi->inter_zz_count ++;
602
603 // Special case code for cyclic refresh
604 // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
605 // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
606 if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
607 {
608 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
609
610 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
611 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
612 // else mark it as dirty (1).
613 if (xd->mode_info_context->mbmi.segment_id)
614 cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
615 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
616 {
617 if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
618 cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
619 }
620 else
621 cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
622
623 }
624 }
625
626 cpi->tplist[mb_row].stop = *tp;
627
628 x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
629
630 for (i = 0; i < 16; i++)
631 vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
632
633 // adjust to the next column of macroblocks
634 x->src.y_buffer += 16;
635 x->src.u_buffer += 8;
636 x->src.v_buffer += 8;
637
638 recon_yoffset += 16;
639 recon_uvoffset += 8;
640
641 // Keep track of segment useage
642 segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
643
644 // skip to next mb
645 xd->mode_info_context++;
646 x->partition_info++;
647
648 xd->above_context++;
649 #if CONFIG_MULTITHREAD
650 if (cpi->b_multi_threaded != 0)
651 {
652 cpi->mt_current_mb_col[mb_row] = mb_col;
653 }
654 #endif
655 }
656
657 //extend the recon for intra prediction
658 vp8_extend_mb_row(
659 &cm->yv12_fb[dst_fb_idx],
660 xd->dst.y_buffer + 16,
661 xd->dst.u_buffer + 8,
662 xd->dst.v_buffer + 8);
663
664 // this is to account for the border
665 xd->mode_info_context++;
666 x->partition_info++;
667 x->activity_sum += activity_sum;
668
669 #if CONFIG_MULTITHREAD
670 if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
671 {
672 sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
673 }
674 #endif
675 }
676
vp8_encode_frame(VP8_COMP * cpi)677 void vp8_encode_frame(VP8_COMP *cpi)
678 {
679 int mb_row;
680 MACROBLOCK *const x = & cpi->mb;
681 VP8_COMMON *const cm = & cpi->common;
682 MACROBLOCKD *const xd = & x->e_mbd;
683
684 TOKENEXTRA *tp = cpi->tok;
685 int segment_counts[MAX_MB_SEGMENTS];
686 int totalrate;
687
688 // Functions setup for all frame types so we can use MC in AltRef
689 if (cm->mcomp_filter_type == SIXTAP)
690 {
691 xd->subpixel_predict = SUBPIX_INVOKE(
692 &cpi->common.rtcd.subpix, sixtap4x4);
693 xd->subpixel_predict8x4 = SUBPIX_INVOKE(
694 &cpi->common.rtcd.subpix, sixtap8x4);
695 xd->subpixel_predict8x8 = SUBPIX_INVOKE(
696 &cpi->common.rtcd.subpix, sixtap8x8);
697 xd->subpixel_predict16x16 = SUBPIX_INVOKE(
698 &cpi->common.rtcd.subpix, sixtap16x16);
699 }
700 else
701 {
702 xd->subpixel_predict = SUBPIX_INVOKE(
703 &cpi->common.rtcd.subpix, bilinear4x4);
704 xd->subpixel_predict8x4 = SUBPIX_INVOKE(
705 &cpi->common.rtcd.subpix, bilinear8x4);
706 xd->subpixel_predict8x8 = SUBPIX_INVOKE(
707 &cpi->common.rtcd.subpix, bilinear8x8);
708 xd->subpixel_predict16x16 = SUBPIX_INVOKE(
709 &cpi->common.rtcd.subpix, bilinear16x16);
710 }
711
712 x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure
713
714 x->vector_range = 32;
715
716 // Count of MBs using the alternate Q if any
717 cpi->alt_qcount = 0;
718
719 // Reset frame count of inter 0,0 motion vector useage.
720 cpi->inter_zz_count = 0;
721
722 vpx_memset(segment_counts, 0, sizeof(segment_counts));
723
724 cpi->prediction_error = 0;
725 cpi->intra_error = 0;
726 cpi->skip_true_count = 0;
727 cpi->skip_false_count = 0;
728
729 #if 0
730 // Experimental code
731 cpi->frame_distortion = 0;
732 cpi->last_mb_distortion = 0;
733 #endif
734
735 totalrate = 0;
736
737 x->partition_info = x->pi;
738
739 xd->mode_info_context = cm->mi;
740 xd->mode_info_stride = cm->mode_info_stride;
741
742 xd->frame_type = cm->frame_type;
743
744 xd->frames_since_golden = cm->frames_since_golden;
745 xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
746 vp8_zero(cpi->MVcount);
747 // vp8_zero( Contexts)
748 vp8_zero(cpi->coef_counts);
749
750 // reset intra mode contexts
751 if (cm->frame_type == KEY_FRAME)
752 vp8_init_mbmode_probs(cm);
753
754
755 vp8cx_frame_init_quantizer(cpi);
756
757 if (cpi->compressor_speed == 2)
758 {
759 if (cpi->oxcf.cpu_used < 0)
760 cpi->Speed = -(cpi->oxcf.cpu_used);
761 else
762 vp8_auto_select_speed(cpi);
763 }
764
765 vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
766 vp8cx_initialize_me_consts(cpi, cm->base_qindex);
767
768 // Copy data over into macro block data sturctures.
769
770 x->src = * cpi->Source;
771 xd->pre = cm->yv12_fb[cm->lst_fb_idx];
772 xd->dst = cm->yv12_fb[cm->new_fb_idx];
773
774 // set up frame new frame for intra coded blocks
775
776 vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
777
778 vp8_build_block_offsets(x);
779
780 vp8_setup_block_dptrs(&x->e_mbd);
781
782 vp8_setup_block_ptrs(x);
783
784 x->activity_sum = 0;
785
786 xd->mode_info_context->mbmi.mode = DC_PRED;
787 xd->mode_info_context->mbmi.uv_mode = DC_PRED;
788
789 xd->left_context = &cm->left_context;
790
791 vp8_zero(cpi->count_mb_ref_frame_usage)
792 vp8_zero(cpi->ymode_count)
793 vp8_zero(cpi->uv_mode_count)
794
795 x->mvc = cm->fc.mvc;
796
797 vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
798
799 {
800 struct vpx_usec_timer emr_timer;
801 vpx_usec_timer_start(&emr_timer);
802
803 #if CONFIG_MULTITHREAD
804 if (cpi->b_multi_threaded)
805 {
806 int i;
807
808 vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
809
810 for (i = 0; i < cm->mb_rows; i++)
811 cpi->mt_current_mb_col[i] = -1;
812
813 for (i = 0; i < cpi->encoding_thread_count; i++)
814 {
815 sem_post(&cpi->h_event_start_encoding[i]);
816 }
817
818 for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
819 {
820 vp8_zero(cm->left_context)
821
822 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
823
824 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
825
826 // adjust to the next row of mbs
827 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
828 x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
829 x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
830
831 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
832 x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
833
834 }
835
836 sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
837
838 cpi->tok_count = 0;
839
840 for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
841 {
842 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
843 }
844
845 if (xd->segmentation_enabled)
846 {
847 int i, j;
848
849 if (xd->segmentation_enabled)
850 {
851
852 for (i = 0; i < cpi->encoding_thread_count; i++)
853 {
854 for (j = 0; j < 4; j++)
855 segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
856 }
857 }
858 }
859
860 for (i = 0; i < cpi->encoding_thread_count; i++)
861 {
862 totalrate += cpi->mb_row_ei[i].totalrate;
863 }
864
865 for (i = 0; i < cpi->encoding_thread_count; i++)
866 {
867 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
868 }
869
870 }
871 else
872 #endif
873 {
874 // for each macroblock row in image
875 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
876 {
877
878 vp8_zero(cm->left_context)
879
880 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
881
882 // adjust to the next row of mbs
883 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
884 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
885 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
886 }
887
888 cpi->tok_count = tp - cpi->tok;
889
890 }
891
892 vpx_usec_timer_mark(&emr_timer);
893 cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
894
895 }
896
897
898 // Work out the segment probabilites if segmentation is enabled
899 if (xd->segmentation_enabled)
900 {
901 int tot_count;
902 int i;
903
904 // Set to defaults
905 vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
906
907 tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
908
909 if (tot_count)
910 {
911 xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
912
913 tot_count = segment_counts[0] + segment_counts[1];
914
915 if (tot_count > 0)
916 {
917 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
918 }
919
920 tot_count = segment_counts[2] + segment_counts[3];
921
922 if (tot_count > 0)
923 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
924
925 // Zero probabilities not allowed
926 for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
927 {
928 if (xd->mb_segment_tree_probs[i] == 0)
929 xd->mb_segment_tree_probs[i] = 1;
930 }
931 }
932 }
933
934 // 256 rate units to the bit
935 cpi->projected_frame_size = totalrate >> 8; // projected_frame_size in units of BYTES
936
937 // Make a note of the percentage MBs coded Intra.
938 if (cm->frame_type == KEY_FRAME)
939 {
940 cpi->this_frame_percent_intra = 100;
941 }
942 else
943 {
944 int tot_modes;
945
946 tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
947 + cpi->count_mb_ref_frame_usage[LAST_FRAME]
948 + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
949 + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
950
951 if (tot_modes)
952 cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
953
954 }
955
956 #if 0
957 {
958 int cnt = 0;
959 int flag[2] = {0, 0};
960
961 for (cnt = 0; cnt < MVPcount; cnt++)
962 {
963 if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
964 {
965 flag[0] = 1;
966 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
967 break;
968 }
969 }
970
971 for (cnt = 0; cnt < MVPcount; cnt++)
972 {
973 if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
974 {
975 flag[1] = 1;
976 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
977 break;
978 }
979 }
980
981 if (flag[0] || flag[1])
982 vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
983 }
984 #endif
985
986 // Adjust the projected reference frame useage probability numbers to reflect
987 // what we have just seen. This may be usefull when we make multiple itterations
988 // of the recode loop rather than continuing to use values from the previous frame.
989 if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
990 {
991 const int *const rfct = cpi->count_mb_ref_frame_usage;
992 const int rf_intra = rfct[INTRA_FRAME];
993 const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
994
995 if ((rf_intra + rf_inter) > 0)
996 {
997 cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
998
999 if (cpi->prob_intra_coded < 1)
1000 cpi->prob_intra_coded = 1;
1001
1002 if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
1003 {
1004 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
1005
1006 if (cpi->prob_last_coded < 1)
1007 cpi->prob_last_coded = 1;
1008
1009 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
1010 ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
1011
1012 if (cpi->prob_gf_coded < 1)
1013 cpi->prob_gf_coded = 1;
1014 }
1015 }
1016 }
1017
1018 #if 0
1019 // Keep record of the total distortion this time around for future use
1020 cpi->last_frame_distortion = cpi->frame_distortion;
1021 #endif
1022
1023 /* Update the average activity for the next frame.
1024 * This is feed-forward for now; it could also be saved in two-pass, or
1025 * done during lookahead when that is eventually added.
1026 */
1027 cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1028 if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1029 cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1030
1031 }
vp8_setup_block_ptrs(MACROBLOCK * x)1032 void vp8_setup_block_ptrs(MACROBLOCK *x)
1033 {
1034 int r, c;
1035 int i;
1036
1037 for (r = 0; r < 4; r++)
1038 {
1039 for (c = 0; c < 4; c++)
1040 {
1041 x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1042 }
1043 }
1044
1045 for (r = 0; r < 2; r++)
1046 {
1047 for (c = 0; c < 2; c++)
1048 {
1049 x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1050 }
1051 }
1052
1053
1054 for (r = 0; r < 2; r++)
1055 {
1056 for (c = 0; c < 2; c++)
1057 {
1058 x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1059 }
1060 }
1061
1062 x->block[24].src_diff = x->src_diff + 384;
1063
1064
1065 for (i = 0; i < 25; i++)
1066 {
1067 x->block[i].coeff = x->coeff + i * 16;
1068 }
1069 }
1070
vp8_build_block_offsets(MACROBLOCK * x)1071 void vp8_build_block_offsets(MACROBLOCK *x)
1072 {
1073 int block = 0;
1074 int br, bc;
1075
1076 vp8_build_block_doffsets(&x->e_mbd);
1077
1078 // y blocks
1079 for (br = 0; br < 4; br++)
1080 {
1081 for (bc = 0; bc < 4; bc++)
1082 {
1083 BLOCK *this_block = &x->block[block];
1084 this_block->base_src = &x->src.y_buffer;
1085 this_block->src_stride = x->src.y_stride;
1086 this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1087 ++block;
1088 }
1089 }
1090
1091 // u blocks
1092 for (br = 0; br < 2; br++)
1093 {
1094 for (bc = 0; bc < 2; bc++)
1095 {
1096 BLOCK *this_block = &x->block[block];
1097 this_block->base_src = &x->src.u_buffer;
1098 this_block->src_stride = x->src.uv_stride;
1099 this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1100 ++block;
1101 }
1102 }
1103
1104 // v blocks
1105 for (br = 0; br < 2; br++)
1106 {
1107 for (bc = 0; bc < 2; bc++)
1108 {
1109 BLOCK *this_block = &x->block[block];
1110 this_block->base_src = &x->src.v_buffer;
1111 this_block->src_stride = x->src.uv_stride;
1112 this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1113 ++block;
1114 }
1115 }
1116 }
1117
sum_intra_stats(VP8_COMP * cpi,MACROBLOCK * x)1118 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1119 {
1120 const MACROBLOCKD *xd = & x->e_mbd;
1121 const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1122 const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1123
1124 #ifdef MODE_STATS
1125 const int is_key = cpi->common.frame_type == KEY_FRAME;
1126
1127 ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1128
1129 if (m == B_PRED)
1130 {
1131 unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1132
1133 int b = 0;
1134
1135 do
1136 {
1137 ++ bct[xd->block[b].bmi.mode];
1138 }
1139 while (++b < 16);
1140 }
1141
1142 #endif
1143
1144 ++cpi->ymode_count[m];
1145 ++cpi->uv_mode_count[uvm];
1146
1147 }
vp8cx_encode_intra_macro_block(VP8_COMP * cpi,MACROBLOCK * x,TOKENEXTRA ** t)1148 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1149 {
1150 int Error4x4, Error16x16, error_uv;
1151 int rate4x4, rate16x16, rateuv;
1152 int dist4x4, dist16x16, distuv;
1153 int rate = 0;
1154 int rate4x4_tokenonly = 0;
1155 int rate16x16_tokenonly = 0;
1156 int rateuv_tokenonly = 0;
1157
1158 x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1159
1160 #if !(CONFIG_REALTIME_ONLY)
1161 if (cpi->sf.RD && cpi->compressor_speed != 2)
1162 {
1163 error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1164 rate += rateuv;
1165
1166 Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1167
1168 Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
1169
1170 rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
1171 }
1172 else
1173 #endif
1174 {
1175 int rate2, best_distortion;
1176 MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1177 int this_rd;
1178 Error16x16 = INT_MAX;
1179
1180 vp8_pick_intra_mbuv_mode(x);
1181
1182 for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1183 {
1184 int distortion2;
1185
1186 x->e_mbd.mode_info_context->mbmi.mode = mode;
1187 RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
1188 (&x->e_mbd);
1189 distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1190 rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
1191 this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1192
1193 if (Error16x16 > this_rd)
1194 {
1195 Error16x16 = this_rd;
1196 best_mode = mode;
1197 best_distortion = distortion2;
1198 }
1199 }
1200 x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1201
1202 Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
1203 }
1204
1205 if (Error4x4 < Error16x16)
1206 {
1207 x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1208 vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1209 }
1210 else
1211 {
1212 vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1213 }
1214
1215 vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1216 sum_intra_stats(cpi, x);
1217 vp8_tokenize_mb(cpi, &x->e_mbd, t);
1218
1219 return rate;
1220 }
1221 #ifdef SPEEDSTATS
1222 extern int cnt_pm;
1223 #endif
1224
1225 extern void vp8_fix_contexts(MACROBLOCKD *x);
1226
vp8cx_encode_inter_macroblock(VP8_COMP * cpi,MACROBLOCK * x,TOKENEXTRA ** t,int recon_yoffset,int recon_uvoffset)1227 int vp8cx_encode_inter_macroblock
1228 (
1229 VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1230 int recon_yoffset, int recon_uvoffset
1231 )
1232 {
1233 MACROBLOCKD *const xd = &x->e_mbd;
1234 int inter_error;
1235 int intra_error = 0;
1236 int rate;
1237 int distortion;
1238
1239 x->skip = 0;
1240
1241 if (xd->segmentation_enabled)
1242 x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1243 else
1244 x->encode_breakout = cpi->oxcf.encode_breakout;
1245
1246 #if !(CONFIG_REALTIME_ONLY)
1247
1248 if (cpi->sf.RD)
1249 {
1250 int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
1251
1252 /* Are we using the fast quantizer for the mode selection? */
1253 if(cpi->sf.use_fastquant_for_pick)
1254 {
1255 cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1256
1257 /* the fast quantizer does not use zbin_extra, so
1258 * do not recalculate */
1259 cpi->zbin_mode_boost_enabled = 0;
1260 }
1261 inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1262
1263 /* switch back to the regular quantizer for the encode */
1264 if (cpi->sf.improved_quant)
1265 {
1266 cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1267 }
1268
1269 /* restore cpi->zbin_mode_boost_enabled */
1270 cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
1271
1272 }
1273 else
1274 #endif
1275 inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1276
1277
1278 cpi->prediction_error += inter_error;
1279 cpi->intra_error += intra_error;
1280
1281 #if 0
1282 // Experimental RD code
1283 cpi->frame_distortion += distortion;
1284 cpi->last_mb_distortion = distortion;
1285 #endif
1286
1287 // MB level adjutment to quantizer setup
1288 if (xd->segmentation_enabled)
1289 {
1290 // If cyclic update enabled
1291 if (cpi->cyclic_refresh_mode_enabled)
1292 {
1293 // Clear segment_id back to 0 if not coded (last frame 0,0)
1294 if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1295 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1296 {
1297 xd->mode_info_context->mbmi.segment_id = 0;
1298
1299 /* segment_id changed, so update */
1300 vp8cx_mb_init_quantizer(cpi, x);
1301 }
1302 }
1303 }
1304
1305 {
1306 // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1307 if (cpi->zbin_mode_boost_enabled)
1308 {
1309 if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1310 cpi->zbin_mode_boost = 0;
1311 else
1312 {
1313 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1314 {
1315 if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1316 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1317 else
1318 cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1319 }
1320 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1321 cpi->zbin_mode_boost = 0;
1322 else
1323 cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1324 }
1325 }
1326 else
1327 cpi->zbin_mode_boost = 0;
1328
1329 vp8_update_zbin_extra(cpi, x);
1330 }
1331
1332 cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1333
1334 if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1335 {
1336 vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1337
1338 if (xd->mode_info_context->mbmi.mode == B_PRED)
1339 {
1340 vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1341 }
1342 else
1343 {
1344 vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1345 }
1346
1347 sum_intra_stats(cpi, x);
1348 }
1349 else
1350 {
1351 MV best_ref_mv;
1352 MV nearest, nearby;
1353 int mdcounts[4];
1354 int ref_fb_idx;
1355
1356 vp8_find_near_mvs(xd, xd->mode_info_context,
1357 &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1358
1359 vp8_build_uvmvs(xd, cpi->common.full_pixel);
1360
1361 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1362 ref_fb_idx = cpi->common.lst_fb_idx;
1363 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1364 ref_fb_idx = cpi->common.gld_fb_idx;
1365 else
1366 ref_fb_idx = cpi->common.alt_fb_idx;
1367
1368 xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1369 xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1370 xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1371
1372 if (xd->mode_info_context->mbmi.mode == SPLITMV)
1373 {
1374 int i;
1375
1376 for (i = 0; i < 16; i++)
1377 {
1378 if (xd->block[i].bmi.mode == NEW4X4)
1379 {
1380 cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1381 cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1382 }
1383 }
1384 }
1385 else if (xd->mode_info_context->mbmi.mode == NEWMV)
1386 {
1387 cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1388 cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1389 }
1390
1391 if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1392 {
1393 vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1394
1395 // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1396 if (!cpi->common.mb_no_coeff_skip)
1397 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1398
1399 }
1400 else
1401 vp8_stuff_inter16x16(x);
1402 }
1403
1404 if (!x->skip)
1405 vp8_tokenize_mb(cpi, xd, t);
1406 else
1407 {
1408 if (cpi->common.mb_no_coeff_skip)
1409 {
1410 if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1411 xd->mode_info_context->mbmi.dc_diff = 0;
1412 else
1413 xd->mode_info_context->mbmi.dc_diff = 1;
1414
1415 xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1416 cpi->skip_true_count ++;
1417 vp8_fix_contexts(xd);
1418 }
1419 else
1420 {
1421 vp8_stuff_mb(cpi, xd, t);
1422 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1423 cpi->skip_false_count ++;
1424 }
1425 }
1426
1427 return rate;
1428 }
1429