• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vpx_ports/config.h"
13 #include "encodemb.h"
14 #include "encodemv.h"
15 #include "vp8/common/common.h"
16 #include "onyx_int.h"
17 #include "vp8/common/extend.h"
18 #include "vp8/common/entropymode.h"
19 #include "vp8/common/quant_common.h"
20 #include "segmentation.h"
21 #include "vp8/common/setupintrarecon.h"
22 #include "encodeintra.h"
23 #include "vp8/common/reconinter.h"
24 #include "rdopt.h"
25 #include "pickinter.h"
26 #include "vp8/common/findnearmv.h"
27 #include "vp8/common/reconintra.h"
28 #include <stdio.h>
29 #include <limits.h>
30 #include "vp8/common/subpixel.h"
31 #include "vpx_ports/vpx_timer.h"
32 
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD(x)     &cpi->common.rtcd.x
35 #define IF_RTCD(x)  (x)
36 #else
37 #define RTCD(x)     NULL
38 #define IF_RTCD(x)  NULL
39 #endif
40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
41 
42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
45                                       MACROBLOCK *x,
46                                       MB_ROW_COMP *mbr_ei,
47                                       int mb_row,
48                                       int count);
49 void vp8_build_block_offsets(MACROBLOCK *x);
50 void vp8_setup_block_ptrs(MACROBLOCK *x);
51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
53 
54 #ifdef MODE_STATS
55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
61 #endif
62 
63 static const int qrounding_factors[129] =
64 {
65     48, 48, 48, 48, 48, 48, 48, 48,
66     48, 48, 48, 48, 48, 48, 48, 48,
67     48, 48, 48, 48, 48, 48, 48, 48,
68     48, 48, 48, 48, 48, 48, 48, 48,
69     48, 48, 48, 48, 48, 48, 48, 48,
70     48, 48, 48, 48, 48, 48, 48, 48,
71     48, 48, 48, 48, 48, 48, 48, 48,
72     48, 48, 48, 48, 48, 48, 48, 48,
73     48, 48, 48, 48, 48, 48, 48, 48,
74     48, 48, 48, 48, 48, 48, 48, 48,
75     48, 48, 48, 48, 48, 48, 48, 48,
76     48, 48, 48, 48, 48, 48, 48, 48,
77     48, 48, 48, 48, 48, 48, 48, 48,
78     48, 48, 48, 48, 48, 48, 48, 48,
79     48, 48, 48, 48, 48, 48, 48, 48,
80     48, 48, 48, 48, 48, 48, 48, 48,
81     48
82 };
83 
84 static const int qzbin_factors[129] =
85 {
86     84, 84, 84, 84, 84, 84, 84, 84,
87     84, 84, 84, 84, 84, 84, 84, 84,
88     84, 84, 84, 84, 84, 84, 84, 84,
89     84, 84, 84, 84, 84, 84, 84, 84,
90     84, 84, 84, 84, 84, 84, 84, 84,
91     84, 84, 84, 84, 84, 84, 84, 84,
92     80, 80, 80, 80, 80, 80, 80, 80,
93     80, 80, 80, 80, 80, 80, 80, 80,
94     80, 80, 80, 80, 80, 80, 80, 80,
95     80, 80, 80, 80, 80, 80, 80, 80,
96     80, 80, 80, 80, 80, 80, 80, 80,
97     80, 80, 80, 80, 80, 80, 80, 80,
98     80, 80, 80, 80, 80, 80, 80, 80,
99     80, 80, 80, 80, 80, 80, 80, 80,
100     80, 80, 80, 80, 80, 80, 80, 80,
101     80, 80, 80, 80, 80, 80, 80, 80,
102     80
103 };
104 
105 static const int qrounding_factors_y2[129] =
106 {
107     48, 48, 48, 48, 48, 48, 48, 48,
108     48, 48, 48, 48, 48, 48, 48, 48,
109     48, 48, 48, 48, 48, 48, 48, 48,
110     48, 48, 48, 48, 48, 48, 48, 48,
111     48, 48, 48, 48, 48, 48, 48, 48,
112     48, 48, 48, 48, 48, 48, 48, 48,
113     48, 48, 48, 48, 48, 48, 48, 48,
114     48, 48, 48, 48, 48, 48, 48, 48,
115     48, 48, 48, 48, 48, 48, 48, 48,
116     48, 48, 48, 48, 48, 48, 48, 48,
117     48, 48, 48, 48, 48, 48, 48, 48,
118     48, 48, 48, 48, 48, 48, 48, 48,
119     48, 48, 48, 48, 48, 48, 48, 48,
120     48, 48, 48, 48, 48, 48, 48, 48,
121     48, 48, 48, 48, 48, 48, 48, 48,
122     48, 48, 48, 48, 48, 48, 48, 48,
123     48
124 };
125 
126 static const int qzbin_factors_y2[129] =
127 {
128     84, 84, 84, 84, 84, 84, 84, 84,
129     84, 84, 84, 84, 84, 84, 84, 84,
130     84, 84, 84, 84, 84, 84, 84, 84,
131     84, 84, 84, 84, 84, 84, 84, 84,
132     84, 84, 84, 84, 84, 84, 84, 84,
133     84, 84, 84, 84, 84, 84, 84, 84,
134     80, 80, 80, 80, 80, 80, 80, 80,
135     80, 80, 80, 80, 80, 80, 80, 80,
136     80, 80, 80, 80, 80, 80, 80, 80,
137     80, 80, 80, 80, 80, 80, 80, 80,
138     80, 80, 80, 80, 80, 80, 80, 80,
139     80, 80, 80, 80, 80, 80, 80, 80,
140     80, 80, 80, 80, 80, 80, 80, 80,
141     80, 80, 80, 80, 80, 80, 80, 80,
142     80, 80, 80, 80, 80, 80, 80, 80,
143     80, 80, 80, 80, 80, 80, 80, 80,
144     80
145 };
146 
147 #define EXACT_QUANT
148 #ifdef EXACT_QUANT
vp8cx_invert_quant(int improved_quant,short * quant,short * shift,short d)149 static void vp8cx_invert_quant(int improved_quant, short *quant,
150                                short *shift, short d)
151 {
152     if(improved_quant)
153     {
154         unsigned t;
155         int l;
156         t = d;
157         for(l = 0; t > 1; l++)
158             t>>=1;
159         t = 1 + (1<<(16+l))/d;
160         *quant = (short)(t - (1<<16));
161         *shift = l;
162     }
163     else
164     {
165         *quant = (1 << 16) / d;
166         *shift = 0;
167     }
168 }
169 
vp8cx_init_quantizer(VP8_COMP * cpi)170 void vp8cx_init_quantizer(VP8_COMP *cpi)
171 {
172     int i;
173     int quant_val;
174     int Q;
175 
176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
177 
178     for (Q = 0; Q < QINDEX_RANGE; Q++)
179     {
180         // dc values
181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
184                            cpi->Y1quant_shift[Q] + 0, quant_val);
185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
187         cpi->common.Y1dequant[Q][0] = quant_val;
188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
189 
190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
193                            cpi->Y2quant_shift[Q] + 0, quant_val);
194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
196         cpi->common.Y2dequant[Q][0] = quant_val;
197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
198 
199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
202                            cpi->UVquant_shift[Q] + 0, quant_val);
203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
205         cpi->common.UVdequant[Q][0] = quant_val;
206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
207 
208         // all the ac values = ;
209         for (i = 1; i < 16; i++)
210         {
211             int rc = vp8_default_zig_zag1d[i];
212 
213             quant_val = vp8_ac_yquant(Q);
214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
216                                cpi->Y1quant_shift[Q] + rc, quant_val);
217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
219             cpi->common.Y1dequant[Q][rc] = quant_val;
220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
221 
222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
225                                cpi->Y2quant_shift[Q] + rc, quant_val);
226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
228             cpi->common.Y2dequant[Q][rc] = quant_val;
229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
230 
231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
234                                cpi->UVquant_shift[Q] + rc, quant_val);
235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
237             cpi->common.UVdequant[Q][rc] = quant_val;
238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
239         }
240     }
241 }
242 #else
vp8cx_init_quantizer(VP8_COMP * cpi)243 void vp8cx_init_quantizer(VP8_COMP *cpi)
244 {
245     int i;
246     int quant_val;
247     int Q;
248 
249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
250 
251     for (Q = 0; Q < QINDEX_RANGE; Q++)
252     {
253         // dc values
254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
258         cpi->common.Y1dequant[Q][0] = quant_val;
259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
260 
261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
265         cpi->common.Y2dequant[Q][0] = quant_val;
266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
267 
268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
272         cpi->common.UVdequant[Q][0] = quant_val;
273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
274 
275         // all the ac values = ;
276         for (i = 1; i < 16; i++)
277         {
278             int rc = vp8_default_zig_zag1d[i];
279 
280             quant_val = vp8_ac_yquant(Q);
281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
284             cpi->common.Y1dequant[Q][rc] = quant_val;
285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
286 
287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
291             cpi->common.Y2dequant[Q][rc] = quant_val;
292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
293 
294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
298             cpi->common.UVdequant[Q][rc] = quant_val;
299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
300         }
301     }
302 }
303 #endif
vp8cx_mb_init_quantizer(VP8_COMP * cpi,MACROBLOCK * x)304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
305 {
306     int i;
307     int QIndex;
308     MACROBLOCKD *xd = &x->e_mbd;
309     int zbin_extra;
310 
311     // Select the baseline MB Q index.
312     if (xd->segmentation_enabled)
313     {
314         // Abs Value
315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
316 
317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
318         // Delta Value
319         else
320         {
321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
323         }
324     }
325     else
326         QIndex = cpi->common.base_qindex;
327 
328     // Y
329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
330 
331     for (i = 0; i < 16; i++)
332     {
333         x->block[i].quant = cpi->Y1quant[QIndex];
334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
336         x->block[i].zbin = cpi->Y1zbin[QIndex];
337         x->block[i].round = cpi->Y1round[QIndex];
338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
340         x->block[i].zbin_extra = (short)zbin_extra;
341     }
342 
343     // UV
344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
345 
346     for (i = 16; i < 24; i++)
347     {
348         x->block[i].quant = cpi->UVquant[QIndex];
349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
351         x->block[i].zbin = cpi->UVzbin[QIndex];
352         x->block[i].round = cpi->UVround[QIndex];
353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
355         x->block[i].zbin_extra = (short)zbin_extra;
356     }
357 
358     // Y2
359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
361     x->block[24].quant = cpi->Y2quant[QIndex];
362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
363     x->block[24].zbin = cpi->Y2zbin[QIndex];
364     x->block[24].round = cpi->Y2round[QIndex];
365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
367     x->block[24].zbin_extra = (short)zbin_extra;
368 
369     /* save this macroblock QIndex for vp8_update_zbin_extra() */
370     x->q_index = QIndex;
371 }
vp8_update_zbin_extra(VP8_COMP * cpi,MACROBLOCK * x)372 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
373 {
374     int i;
375     int QIndex = x->q_index;
376     int zbin_extra;
377 
378     // Y
379     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
380     for (i = 0; i < 16; i++)
381     {
382         x->block[i].zbin_extra = (short)zbin_extra;
383     }
384 
385     // UV
386     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
387     for (i = 16; i < 24; i++)
388     {
389         x->block[i].zbin_extra = (short)zbin_extra;
390     }
391 
392     // Y2
393     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
394     x->block[24].zbin_extra = (short)zbin_extra;
395 }
396 
vp8cx_frame_init_quantizer(VP8_COMP * cpi)397 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
398 {
399     // Clear Zbin mode boost for default case
400     cpi->zbin_mode_boost = 0;
401 
402     // MB level quantizer setup
403     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
404 }
405 
406 
407 /* activity_avg must be positive, or flat regions could get a zero weight
408  *  (infinite lambda), which confounds analysis.
409  * This also avoids the need for divide by zero checks in
410  *  vp8_activity_masking().
411  */
412 #define VP8_ACTIVITY_AVG_MIN (64)
413 
414 /* This is used as a reference when computing the source variance for the
415  *  purposes of activity masking.
416  * Eventually this should be replaced by custom no-reference routines,
417  *  which will be faster.
418  */
419 static const unsigned char VP8_VAR_OFFS[16]=
420 {
421     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
422 };
423 
vp8_activity_masking(VP8_COMP * cpi,MACROBLOCK * x)424 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
425 {
426     unsigned int act;
427     unsigned int sse;
428     int sum;
429     unsigned int a;
430     unsigned int b;
431     /* TODO: This could also be done over smaller areas (8x8), but that would
432      *  require extensive changes elsewhere, as lambda is assumed to be fixed
433      *  over an entire MB in most of the code.
434      * Another option is to compute four 8x8 variances, and pick a single
435      *  lambda using a non-linear combination (e.g., the smallest, or second
436      *  smallest, etc.).
437      */
438     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
439      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
440     /* This requires a full 32 bits of precision. */
441     act = (sse<<8) - sum*sum;
442     /* Drop 4 to give us some headroom to work with. */
443     act = (act + 8) >> 4;
444     /* If the region is flat, lower the activity some more. */
445     if (act < 8<<12)
446         act = act < 5<<12 ? act : 5<<12;
447     /* TODO: For non-flat regions, edge regions should receive less masking
448      *  than textured regions, but identifying edge regions quickly and
449      *  reliably enough is still a subject of experimentation.
450      * This will be most noticable near edges with a complex shape (e.g.,
451      *  text), but the 4x4 transform size should make this less of a problem
452      *  than it would be for an 8x8 transform.
453      */
454     /* Apply the masking to the RD multiplier. */
455     a = act + 4*cpi->activity_avg;
456     b = 4*act + cpi->activity_avg;
457     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
458     return act;
459 }
460 
461 
462 
463 static
encode_mb_row(VP8_COMP * cpi,VP8_COMMON * cm,int mb_row,MACROBLOCK * x,MACROBLOCKD * xd,TOKENEXTRA ** tp,int * segment_counts,int * totalrate)464 void encode_mb_row(VP8_COMP *cpi,
465                    VP8_COMMON *cm,
466                    int mb_row,
467                    MACROBLOCK  *x,
468                    MACROBLOCKD *xd,
469                    TOKENEXTRA **tp,
470                    int *segment_counts,
471                    int *totalrate)
472 {
473     INT64 activity_sum = 0;
474     int i;
475     int recon_yoffset, recon_uvoffset;
476     int mb_col;
477     int ref_fb_idx = cm->lst_fb_idx;
478     int dst_fb_idx = cm->new_fb_idx;
479     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
480     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
481     int seg_map_index = (mb_row * cpi->common.mb_cols);
482 
483 #if CONFIG_MULTITHREAD
484     const int nsync = cpi->mt_sync_range;
485     const int rightmost_col = cm->mb_cols - 1;
486     volatile const int *last_row_current_mb_col;
487 
488     if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
489         last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
490     else
491         last_row_current_mb_col = &rightmost_col;
492 #endif
493 
494     // reset above block coeffs
495     xd->above_context = cm->above_context;
496 
497     xd->up_available = (mb_row != 0);
498     recon_yoffset = (mb_row * recon_y_stride * 16);
499     recon_uvoffset = (mb_row * recon_uv_stride * 8);
500 
501     cpi->tplist[mb_row].start = *tp;
502     //printf("Main mb_row = %d\n", mb_row);
503 
504     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
505     // units as they are always compared to values that are in 1/8th pel units
506     xd->mb_to_top_edge = -((mb_row * 16) << 3);
507     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
508 
509     // Set up limit values for vertical motion vector components
510     // to prevent them extending beyond the UMV borders
511     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
512     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
513                         + (VP8BORDERINPIXELS - 16);
514 
515     // for each macroblock col in image
516     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
517     {
518         // Distance of Mb to the left & right edges, specified in
519         // 1/8th pel units as they are always compared to values
520         // that are in 1/8th pel units
521         xd->mb_to_left_edge = -((mb_col * 16) << 3);
522         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
523 
524         // Set up limit values for horizontal motion vector components
525         // to prevent them extending beyond the UMV borders
526         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
527         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
528                             + (VP8BORDERINPIXELS - 16);
529 
530         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
531         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
532         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
533         xd->left_available = (mb_col != 0);
534 
535         x->rddiv = cpi->RDDIV;
536         x->rdmult = cpi->RDMULT;
537 
538 #if CONFIG_MULTITHREAD
539         if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
540         {
541             if ((mb_col & (nsync - 1)) == 0)
542             {
543                 while (mb_col > (*last_row_current_mb_col - nsync)
544                         && (*last_row_current_mb_col) != (cm->mb_cols - 1))
545                 {
546                     x86_pause_hint();
547                     thread_sleep(0);
548                 }
549             }
550         }
551 #endif
552 
553         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
554             activity_sum += vp8_activity_masking(cpi, x);
555 
556         // Is segmentation enabled
557         // MB level adjutment to quantizer
558         if (xd->segmentation_enabled)
559         {
560             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
561             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
562                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
563             else
564                 xd->mode_info_context->mbmi.segment_id = 0;
565 
566             vp8cx_mb_init_quantizer(cpi, x);
567         }
568         else
569             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
570 
571         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
572 
573         if (cm->frame_type == KEY_FRAME)
574         {
575             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
576 #ifdef MODE_STATS
577             y_modes[xd->mbmi.mode] ++;
578 #endif
579         }
580         else
581         {
582             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
583 
584 #ifdef MODE_STATS
585             inter_y_modes[xd->mbmi.mode] ++;
586 
587             if (xd->mbmi.mode == SPLITMV)
588             {
589                 int b;
590 
591                 for (b = 0; b < xd->mbmi.partition_count; b++)
592                 {
593                     inter_b_modes[x->partition->bmi[b].mode] ++;
594                 }
595             }
596 
597 #endif
598 
599             // Count of last ref frame 0,0 useage
600             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
601                 cpi->inter_zz_count ++;
602 
603             // Special case code for cyclic refresh
604             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
605             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
606             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
607             {
608                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
609 
610                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
611                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
612                 // else mark it as dirty (1).
613                 if (xd->mode_info_context->mbmi.segment_id)
614                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
615                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
616                 {
617                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
618                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
619                 }
620                 else
621                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
622 
623             }
624         }
625 
626         cpi->tplist[mb_row].stop = *tp;
627 
628         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
629 
630         for (i = 0; i < 16; i++)
631             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
632 
633         // adjust to the next column of macroblocks
634         x->src.y_buffer += 16;
635         x->src.u_buffer += 8;
636         x->src.v_buffer += 8;
637 
638         recon_yoffset += 16;
639         recon_uvoffset += 8;
640 
641         // Keep track of segment useage
642         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
643 
644         // skip to next mb
645         xd->mode_info_context++;
646         x->partition_info++;
647 
648         xd->above_context++;
649 #if CONFIG_MULTITHREAD
650         if (cpi->b_multi_threaded != 0)
651         {
652             cpi->mt_current_mb_col[mb_row] = mb_col;
653         }
654 #endif
655     }
656 
657     //extend the recon for intra prediction
658     vp8_extend_mb_row(
659         &cm->yv12_fb[dst_fb_idx],
660         xd->dst.y_buffer + 16,
661         xd->dst.u_buffer + 8,
662         xd->dst.v_buffer + 8);
663 
664     // this is to account for the border
665     xd->mode_info_context++;
666     x->partition_info++;
667     x->activity_sum += activity_sum;
668 
669 #if CONFIG_MULTITHREAD
670     if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
671     {
672         sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
673     }
674 #endif
675 }
676 
vp8_encode_frame(VP8_COMP * cpi)677 void vp8_encode_frame(VP8_COMP *cpi)
678 {
679     int mb_row;
680     MACROBLOCK *const x = & cpi->mb;
681     VP8_COMMON *const cm = & cpi->common;
682     MACROBLOCKD *const xd = & x->e_mbd;
683 
684     TOKENEXTRA *tp = cpi->tok;
685     int segment_counts[MAX_MB_SEGMENTS];
686     int totalrate;
687 
688     // Functions setup for all frame types so we can use MC in AltRef
689     if (cm->mcomp_filter_type == SIXTAP)
690     {
691         xd->subpixel_predict        = SUBPIX_INVOKE(
692                                         &cpi->common.rtcd.subpix, sixtap4x4);
693         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
694                                         &cpi->common.rtcd.subpix, sixtap8x4);
695         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
696                                         &cpi->common.rtcd.subpix, sixtap8x8);
697         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
698                                         &cpi->common.rtcd.subpix, sixtap16x16);
699     }
700     else
701     {
702         xd->subpixel_predict        = SUBPIX_INVOKE(
703                                         &cpi->common.rtcd.subpix, bilinear4x4);
704         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
705                                         &cpi->common.rtcd.subpix, bilinear8x4);
706         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
707                                         &cpi->common.rtcd.subpix, bilinear8x8);
708         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
709                                       &cpi->common.rtcd.subpix, bilinear16x16);
710     }
711 
712     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
713 
714     x->vector_range = 32;
715 
716     // Count of MBs using the alternate Q if any
717     cpi->alt_qcount = 0;
718 
719     // Reset frame count of inter 0,0 motion vector useage.
720     cpi->inter_zz_count = 0;
721 
722     vpx_memset(segment_counts, 0, sizeof(segment_counts));
723 
724     cpi->prediction_error = 0;
725     cpi->intra_error = 0;
726     cpi->skip_true_count = 0;
727     cpi->skip_false_count = 0;
728 
729 #if 0
730     // Experimental code
731     cpi->frame_distortion = 0;
732     cpi->last_mb_distortion = 0;
733 #endif
734 
735     totalrate = 0;
736 
737     x->partition_info = x->pi;
738 
739     xd->mode_info_context = cm->mi;
740     xd->mode_info_stride = cm->mode_info_stride;
741 
742     xd->frame_type = cm->frame_type;
743 
744     xd->frames_since_golden = cm->frames_since_golden;
745     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
746     vp8_zero(cpi->MVcount);
747     // vp8_zero( Contexts)
748     vp8_zero(cpi->coef_counts);
749 
750     // reset intra mode contexts
751     if (cm->frame_type == KEY_FRAME)
752         vp8_init_mbmode_probs(cm);
753 
754 
755     vp8cx_frame_init_quantizer(cpi);
756 
757     if (cpi->compressor_speed == 2)
758     {
759         if (cpi->oxcf.cpu_used < 0)
760             cpi->Speed = -(cpi->oxcf.cpu_used);
761         else
762             vp8_auto_select_speed(cpi);
763     }
764 
765     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
766     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
767 
768     // Copy data over into macro block data sturctures.
769 
770     x->src = * cpi->Source;
771     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
772     xd->dst = cm->yv12_fb[cm->new_fb_idx];
773 
774     // set up frame new frame for intra coded blocks
775 
776     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
777 
778     vp8_build_block_offsets(x);
779 
780     vp8_setup_block_dptrs(&x->e_mbd);
781 
782     vp8_setup_block_ptrs(x);
783 
784     x->activity_sum = 0;
785 
786     xd->mode_info_context->mbmi.mode = DC_PRED;
787     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
788 
789     xd->left_context = &cm->left_context;
790 
791     vp8_zero(cpi->count_mb_ref_frame_usage)
792     vp8_zero(cpi->ymode_count)
793     vp8_zero(cpi->uv_mode_count)
794 
795     x->mvc = cm->fc.mvc;
796 
797     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
798 
799     {
800         struct vpx_usec_timer  emr_timer;
801         vpx_usec_timer_start(&emr_timer);
802 
803 #if CONFIG_MULTITHREAD
804         if (cpi->b_multi_threaded)
805         {
806             int i;
807 
808             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
809 
810             for (i = 0; i < cm->mb_rows; i++)
811                 cpi->mt_current_mb_col[i] = -1;
812 
813             for (i = 0; i < cpi->encoding_thread_count; i++)
814             {
815                 sem_post(&cpi->h_event_start_encoding[i]);
816             }
817 
818             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
819             {
820                 vp8_zero(cm->left_context)
821 
822                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
823 
824                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
825 
826                 // adjust to the next row of mbs
827                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
828                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
829                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
830 
831                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
832                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
833 
834             }
835 
836             sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
837 
838             cpi->tok_count = 0;
839 
840             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
841             {
842                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
843             }
844 
845             if (xd->segmentation_enabled)
846             {
847                 int i, j;
848 
849                 if (xd->segmentation_enabled)
850                 {
851 
852                     for (i = 0; i < cpi->encoding_thread_count; i++)
853                     {
854                         for (j = 0; j < 4; j++)
855                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
856                     }
857                 }
858             }
859 
860             for (i = 0; i < cpi->encoding_thread_count; i++)
861             {
862                 totalrate += cpi->mb_row_ei[i].totalrate;
863             }
864 
865             for (i = 0; i < cpi->encoding_thread_count; i++)
866             {
867                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
868             }
869 
870         }
871         else
872 #endif
873         {
874             // for each macroblock row in image
875             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
876             {
877 
878                 vp8_zero(cm->left_context)
879 
880                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
881 
882                 // adjust to the next row of mbs
883                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
884                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
885                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
886             }
887 
888             cpi->tok_count = tp - cpi->tok;
889 
890         }
891 
892         vpx_usec_timer_mark(&emr_timer);
893         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
894 
895     }
896 
897 
898     // Work out the segment probabilites if segmentation is enabled
899     if (xd->segmentation_enabled)
900     {
901         int tot_count;
902         int i;
903 
904         // Set to defaults
905         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
906 
907         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
908 
909         if (tot_count)
910         {
911             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
912 
913             tot_count = segment_counts[0] + segment_counts[1];
914 
915             if (tot_count > 0)
916             {
917                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
918             }
919 
920             tot_count = segment_counts[2] + segment_counts[3];
921 
922             if (tot_count > 0)
923                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
924 
925             // Zero probabilities not allowed
926             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
927             {
928                 if (xd->mb_segment_tree_probs[i] == 0)
929                     xd->mb_segment_tree_probs[i] = 1;
930             }
931         }
932     }
933 
934     // 256 rate units to the bit
935     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
936 
937     // Make a note of the percentage MBs coded Intra.
938     if (cm->frame_type == KEY_FRAME)
939     {
940         cpi->this_frame_percent_intra = 100;
941     }
942     else
943     {
944         int tot_modes;
945 
946         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
947                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
948                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
949                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
950 
951         if (tot_modes)
952             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
953 
954     }
955 
956 #if 0
957     {
958         int cnt = 0;
959         int flag[2] = {0, 0};
960 
961         for (cnt = 0; cnt < MVPcount; cnt++)
962         {
963             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
964             {
965                 flag[0] = 1;
966                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
967                 break;
968             }
969         }
970 
971         for (cnt = 0; cnt < MVPcount; cnt++)
972         {
973             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
974             {
975                 flag[1] = 1;
976                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
977                 break;
978             }
979         }
980 
981         if (flag[0] || flag[1])
982             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
983     }
984 #endif
985 
986     // Adjust the projected reference frame useage probability numbers to reflect
987     // what we have just seen. This may be usefull when we make multiple itterations
988     // of the recode loop rather than continuing to use values from the previous frame.
989     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
990     {
991         const int *const rfct = cpi->count_mb_ref_frame_usage;
992         const int rf_intra = rfct[INTRA_FRAME];
993         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
994 
995         if ((rf_intra + rf_inter) > 0)
996         {
997             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
998 
999             if (cpi->prob_intra_coded < 1)
1000                 cpi->prob_intra_coded = 1;
1001 
1002             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
1003             {
1004                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
1005 
1006                 if (cpi->prob_last_coded < 1)
1007                     cpi->prob_last_coded = 1;
1008 
1009                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
1010                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
1011 
1012                 if (cpi->prob_gf_coded < 1)
1013                     cpi->prob_gf_coded = 1;
1014             }
1015         }
1016     }
1017 
1018 #if 0
1019     // Keep record of the total distortion this time around for future use
1020     cpi->last_frame_distortion = cpi->frame_distortion;
1021 #endif
1022 
1023     /* Update the average activity for the next frame.
1024      * This is feed-forward for now; it could also be saved in two-pass, or
1025      *  done during lookahead when that is eventually added.
1026      */
1027     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1028     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1029         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1030 
1031 }
vp8_setup_block_ptrs(MACROBLOCK * x)1032 void vp8_setup_block_ptrs(MACROBLOCK *x)
1033 {
1034     int r, c;
1035     int i;
1036 
1037     for (r = 0; r < 4; r++)
1038     {
1039         for (c = 0; c < 4; c++)
1040         {
1041             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1042         }
1043     }
1044 
1045     for (r = 0; r < 2; r++)
1046     {
1047         for (c = 0; c < 2; c++)
1048         {
1049             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1050         }
1051     }
1052 
1053 
1054     for (r = 0; r < 2; r++)
1055     {
1056         for (c = 0; c < 2; c++)
1057         {
1058             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1059         }
1060     }
1061 
1062     x->block[24].src_diff = x->src_diff + 384;
1063 
1064 
1065     for (i = 0; i < 25; i++)
1066     {
1067         x->block[i].coeff = x->coeff + i * 16;
1068     }
1069 }
1070 
vp8_build_block_offsets(MACROBLOCK * x)1071 void vp8_build_block_offsets(MACROBLOCK *x)
1072 {
1073     int block = 0;
1074     int br, bc;
1075 
1076     vp8_build_block_doffsets(&x->e_mbd);
1077 
1078     // y blocks
1079     for (br = 0; br < 4; br++)
1080     {
1081         for (bc = 0; bc < 4; bc++)
1082         {
1083             BLOCK *this_block = &x->block[block];
1084             this_block->base_src = &x->src.y_buffer;
1085             this_block->src_stride = x->src.y_stride;
1086             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1087             ++block;
1088         }
1089     }
1090 
1091     // u blocks
1092     for (br = 0; br < 2; br++)
1093     {
1094         for (bc = 0; bc < 2; bc++)
1095         {
1096             BLOCK *this_block = &x->block[block];
1097             this_block->base_src = &x->src.u_buffer;
1098             this_block->src_stride = x->src.uv_stride;
1099             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1100             ++block;
1101         }
1102     }
1103 
1104     // v blocks
1105     for (br = 0; br < 2; br++)
1106     {
1107         for (bc = 0; bc < 2; bc++)
1108         {
1109             BLOCK *this_block = &x->block[block];
1110             this_block->base_src = &x->src.v_buffer;
1111             this_block->src_stride = x->src.uv_stride;
1112             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1113             ++block;
1114         }
1115     }
1116 }
1117 
sum_intra_stats(VP8_COMP * cpi,MACROBLOCK * x)1118 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1119 {
1120     const MACROBLOCKD *xd = & x->e_mbd;
1121     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1122     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1123 
1124 #ifdef MODE_STATS
1125     const int is_key = cpi->common.frame_type == KEY_FRAME;
1126 
1127     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1128 
1129     if (m == B_PRED)
1130     {
1131         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1132 
1133         int b = 0;
1134 
1135         do
1136         {
1137             ++ bct[xd->block[b].bmi.mode];
1138         }
1139         while (++b < 16);
1140     }
1141 
1142 #endif
1143 
1144     ++cpi->ymode_count[m];
1145     ++cpi->uv_mode_count[uvm];
1146 
1147 }
vp8cx_encode_intra_macro_block(VP8_COMP * cpi,MACROBLOCK * x,TOKENEXTRA ** t)1148 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1149 {
1150     int Error4x4, Error16x16, error_uv;
1151     int rate4x4, rate16x16, rateuv;
1152     int dist4x4, dist16x16, distuv;
1153     int rate = 0;
1154     int rate4x4_tokenonly = 0;
1155     int rate16x16_tokenonly = 0;
1156     int rateuv_tokenonly = 0;
1157 
1158     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1159 
1160 #if !(CONFIG_REALTIME_ONLY)
1161     if (cpi->sf.RD && cpi->compressor_speed != 2)
1162     {
1163         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1164         rate += rateuv;
1165 
1166         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1167 
1168         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
1169 
1170         rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
1171     }
1172     else
1173 #endif
1174     {
1175         int rate2, best_distortion;
1176         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1177         int this_rd;
1178         Error16x16 = INT_MAX;
1179 
1180         vp8_pick_intra_mbuv_mode(x);
1181 
1182         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1183         {
1184             int distortion2;
1185 
1186             x->e_mbd.mode_info_context->mbmi.mode = mode;
1187             RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
1188                 (&x->e_mbd);
1189             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1190             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1191             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1192 
1193             if (Error16x16 > this_rd)
1194             {
1195                 Error16x16 = this_rd;
1196                 best_mode = mode;
1197                 best_distortion = distortion2;
1198             }
1199         }
1200         x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1201 
1202         Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
1203     }
1204 
1205     if (Error4x4 < Error16x16)
1206     {
1207         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1208         vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1209     }
1210     else
1211     {
1212         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1213     }
1214 
1215     vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1216     sum_intra_stats(cpi, x);
1217     vp8_tokenize_mb(cpi, &x->e_mbd, t);
1218 
1219     return rate;
1220 }
1221 #ifdef SPEEDSTATS
1222 extern int cnt_pm;
1223 #endif
1224 
1225 extern void vp8_fix_contexts(MACROBLOCKD *x);
1226 
vp8cx_encode_inter_macroblock(VP8_COMP * cpi,MACROBLOCK * x,TOKENEXTRA ** t,int recon_yoffset,int recon_uvoffset)1227 int vp8cx_encode_inter_macroblock
1228 (
1229     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1230     int recon_yoffset, int recon_uvoffset
1231 )
1232 {
1233     MACROBLOCKD *const xd = &x->e_mbd;
1234     int inter_error;
1235     int intra_error = 0;
1236     int rate;
1237     int distortion;
1238 
1239     x->skip = 0;
1240 
1241     if (xd->segmentation_enabled)
1242         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1243     else
1244         x->encode_breakout = cpi->oxcf.encode_breakout;
1245 
1246 #if !(CONFIG_REALTIME_ONLY)
1247 
1248     if (cpi->sf.RD)
1249     {
1250         int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
1251 
1252         /* Are we using the fast quantizer for the mode selection? */
1253         if(cpi->sf.use_fastquant_for_pick)
1254         {
1255             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1256 
1257             /* the fast quantizer does not use zbin_extra, so
1258              * do not recalculate */
1259             cpi->zbin_mode_boost_enabled = 0;
1260         }
1261         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1262 
1263         /* switch back to the regular quantizer for the encode */
1264         if (cpi->sf.improved_quant)
1265         {
1266             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1267         }
1268 
1269         /* restore cpi->zbin_mode_boost_enabled */
1270         cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
1271 
1272     }
1273     else
1274 #endif
1275         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1276 
1277 
1278     cpi->prediction_error += inter_error;
1279     cpi->intra_error += intra_error;
1280 
1281 #if 0
1282     // Experimental RD code
1283     cpi->frame_distortion += distortion;
1284     cpi->last_mb_distortion = distortion;
1285 #endif
1286 
1287     // MB level adjutment to quantizer setup
1288     if (xd->segmentation_enabled)
1289     {
1290         // If cyclic update enabled
1291         if (cpi->cyclic_refresh_mode_enabled)
1292         {
1293             // Clear segment_id back to 0 if not coded (last frame 0,0)
1294             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1295                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1296             {
1297                 xd->mode_info_context->mbmi.segment_id = 0;
1298 
1299                 /* segment_id changed, so update */
1300                 vp8cx_mb_init_quantizer(cpi, x);
1301             }
1302         }
1303     }
1304 
1305     {
1306         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1307         if (cpi->zbin_mode_boost_enabled)
1308         {
1309             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1310                  cpi->zbin_mode_boost = 0;
1311             else
1312             {
1313                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1314                 {
1315                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1316                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1317                     else
1318                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1319                 }
1320                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1321                     cpi->zbin_mode_boost = 0;
1322                 else
1323                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1324             }
1325         }
1326         else
1327             cpi->zbin_mode_boost = 0;
1328 
1329         vp8_update_zbin_extra(cpi, x);
1330     }
1331 
1332     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1333 
1334     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1335     {
1336         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1337 
1338         if (xd->mode_info_context->mbmi.mode == B_PRED)
1339         {
1340             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1341         }
1342         else
1343         {
1344             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1345         }
1346 
1347         sum_intra_stats(cpi, x);
1348     }
1349     else
1350     {
1351         MV best_ref_mv;
1352         MV nearest, nearby;
1353         int mdcounts[4];
1354         int ref_fb_idx;
1355 
1356         vp8_find_near_mvs(xd, xd->mode_info_context,
1357                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1358 
1359         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1360 
1361         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1362             ref_fb_idx = cpi->common.lst_fb_idx;
1363         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1364             ref_fb_idx = cpi->common.gld_fb_idx;
1365         else
1366             ref_fb_idx = cpi->common.alt_fb_idx;
1367 
1368         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1369         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1370         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1371 
1372         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1373         {
1374             int i;
1375 
1376             for (i = 0; i < 16; i++)
1377             {
1378                 if (xd->block[i].bmi.mode == NEW4X4)
1379                 {
1380                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1381                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1382                 }
1383             }
1384         }
1385         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1386         {
1387             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1388             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1389         }
1390 
1391         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1392         {
1393             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1394 
1395             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1396             if (!cpi->common.mb_no_coeff_skip)
1397                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1398 
1399         }
1400         else
1401             vp8_stuff_inter16x16(x);
1402     }
1403 
1404     if (!x->skip)
1405         vp8_tokenize_mb(cpi, xd, t);
1406     else
1407     {
1408         if (cpi->common.mb_no_coeff_skip)
1409         {
1410             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1411                 xd->mode_info_context->mbmi.dc_diff = 0;
1412             else
1413                 xd->mode_info_context->mbmi.dc_diff = 1;
1414 
1415             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1416             cpi->skip_true_count ++;
1417             vp8_fix_contexts(xd);
1418         }
1419         else
1420         {
1421             vp8_stuff_mb(cpi, xd, t);
1422             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1423             cpi->skip_false_count ++;
1424         }
1425     }
1426 
1427     return rate;
1428 }
1429