• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include <errno.h>
11 #include <stdio.h>
12 #include <limits.h>
13 
14 #include "vpx_config.h"
15 #include "vp8_rtcd.h"
16 #include "./vpx_dsp_rtcd.h"
17 #include "bitstream.h"
18 #include "encodemb.h"
19 #include "encodemv.h"
20 #if CONFIG_MULTITHREAD
21 #include "ethreading.h"
22 #endif
23 #include "vp8/common/common.h"
24 #include "onyx_int.h"
25 #include "vp8/common/extend.h"
26 #include "vp8/common/entropymode.h"
27 #include "vp8/common/quant_common.h"
28 #include "segmentation.h"
29 #include "vp8/common/setupintrarecon.h"
30 #include "encodeintra.h"
31 #include "vp8/common/reconinter.h"
32 #include "rdopt.h"
33 #include "pickinter.h"
34 #include "vp8/common/findnearmv.h"
35 #include "vp8/common/invtrans.h"
36 #include "vpx/internal/vpx_codec_internal.h"
37 #include "vpx_mem/vpx_mem.h"
38 #include "vpx_ports/vpx_timer.h"
39 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
40 #include "bitstream.h"
41 #endif
42 #include "encodeframe.h"
43 
44 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
45 static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x);
46 
47 #ifdef MODE_STATS
48 unsigned int inter_y_modes[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
49 unsigned int inter_uv_modes[4] = { 0, 0, 0, 0 };
50 unsigned int inter_b_modes[15] = {
51   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
52 };
53 unsigned int y_modes[5] = { 0, 0, 0, 0, 0 };
54 unsigned int uv_modes[4] = { 0, 0, 0, 0 };
55 unsigned int b_modes[14] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
56 #endif
57 
58 /* activity_avg must be positive, or flat regions could get a zero weight
59  *  (infinite lambda), which confounds analysis.
60  * This also avoids the need for divide by zero checks in
61  *  vp8_activity_masking().
62  */
63 #define VP8_ACTIVITY_AVG_MIN (64)
64 
65 /* This is used as a reference when computing the source variance for the
66  *  purposes of activity masking.
67  * Eventually this should be replaced by custom no-reference routines,
68  *  which will be faster.
69  */
70 static const unsigned char VP8_VAR_OFFS[16] = { 128, 128, 128, 128, 128, 128,
71                                                 128, 128, 128, 128, 128, 128,
72                                                 128, 128, 128, 128 };
73 
74 /* Original activity measure from Tim T's code. */
tt_activity_measure(MACROBLOCK * x)75 static unsigned int tt_activity_measure(MACROBLOCK *x) {
76   unsigned int act;
77   unsigned int sse;
78   /* TODO: This could also be done over smaller areas (8x8), but that would
79    *  require extensive changes elsewhere, as lambda is assumed to be fixed
80    *  over an entire MB in most of the code.
81    * Another option is to compute four 8x8 variances, and pick a single
82    *  lambda using a non-linear combination (e.g., the smallest, or second
83    *  smallest, etc.).
84    */
85   act = vpx_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0,
86                           &sse);
87   act = act << 4;
88 
89   /* If the region is flat, lower the activity some more. */
90   if (act < 8 << 12) act = act < 5 << 12 ? act : 5 << 12;
91 
92   return act;
93 }
94 
95 /* Measure the activity of the current macroblock
96  * What we measure here is TBD so abstracted to this function
97  */
98 #define ALT_ACT_MEASURE 1
mb_activity_measure(MACROBLOCK * x,int mb_row,int mb_col)99 static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) {
100   unsigned int mb_activity;
101 
102   if (ALT_ACT_MEASURE) {
103     int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
104 
105     /* Or use an alternative. */
106     mb_activity = vp8_encode_intra(x, use_dc_pred);
107   } else {
108     /* Original activity measure from Tim T's code. */
109     mb_activity = tt_activity_measure(x);
110   }
111 
112   if (mb_activity < VP8_ACTIVITY_AVG_MIN) mb_activity = VP8_ACTIVITY_AVG_MIN;
113 
114   return mb_activity;
115 }
116 
117 /* Calculate an "average" mb activity value for the frame */
118 #define ACT_MEDIAN 0
calc_av_activity(VP8_COMP * cpi,int64_t activity_sum)119 static void calc_av_activity(VP8_COMP *cpi, int64_t activity_sum) {
120 #if ACT_MEDIAN
121   /* Find median: Simple n^2 algorithm for experimentation */
122   {
123     unsigned int median;
124     unsigned int i, j;
125     unsigned int *sortlist;
126     unsigned int tmp;
127 
128     /* Create a list to sort to */
129     CHECK_MEM_ERROR(&cpi->common.error, sortlist,
130                     vpx_calloc(sizeof(unsigned int), cpi->common.MBs));
131 
132     /* Copy map to sort list */
133     memcpy(sortlist, cpi->mb_activity_map,
134            sizeof(unsigned int) * cpi->common.MBs);
135 
136     /* Ripple each value down to its correct position */
137     for (i = 1; i < cpi->common.MBs; ++i) {
138       for (j = i; j > 0; j--) {
139         if (sortlist[j] < sortlist[j - 1]) {
140           /* Swap values */
141           tmp = sortlist[j - 1];
142           sortlist[j - 1] = sortlist[j];
143           sortlist[j] = tmp;
144         } else
145           break;
146       }
147     }
148 
149     /* Even number MBs so estimate median as mean of two either side. */
150     median = (1 + sortlist[cpi->common.MBs >> 1] +
151               sortlist[(cpi->common.MBs >> 1) + 1]) >>
152              1;
153 
154     cpi->activity_avg = median;
155 
156     vpx_free(sortlist);
157   }
158 #else
159   /* Simple mean for now */
160   cpi->activity_avg = (unsigned int)(activity_sum / cpi->common.MBs);
161 #endif
162 
163   if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) {
164     cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
165   }
166 
167   /* Experimental code: return fixed value normalized for several clips */
168   if (ALT_ACT_MEASURE) cpi->activity_avg = 100000;
169 }
170 
171 #define USE_ACT_INDEX 0
172 #define OUTPUT_NORM_ACT_STATS 0
173 
174 #if USE_ACT_INDEX
175 /* Calculate and activity index for each mb */
calc_activity_index(VP8_COMP * cpi,MACROBLOCK * x)176 static void calc_activity_index(VP8_COMP *cpi, MACROBLOCK *x) {
177   VP8_COMMON *const cm = &cpi->common;
178   int mb_row, mb_col;
179 
180   int64_t act;
181   int64_t a;
182   int64_t b;
183 
184 #if OUTPUT_NORM_ACT_STATS
185   FILE *f = fopen("norm_act.stt", "a");
186   fprintf(f, "\n%12d\n", cpi->activity_avg);
187 #endif
188 
189   /* Reset pointers to start of activity map */
190   x->mb_activity_ptr = cpi->mb_activity_map;
191 
192   /* Calculate normalized mb activity number. */
193   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
194     /* for each macroblock col in image */
195     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
196       /* Read activity from the map */
197       act = *(x->mb_activity_ptr);
198 
199       /* Calculate a normalized activity number */
200       a = act + 4 * cpi->activity_avg;
201       b = 4 * act + cpi->activity_avg;
202 
203       if (b >= a)
204         *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1;
205       else
206         *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b);
207 
208 #if OUTPUT_NORM_ACT_STATS
209       fprintf(f, " %6d", *(x->mb_activity_ptr));
210 #endif
211       /* Increment activity map pointers */
212       x->mb_activity_ptr++;
213     }
214 
215 #if OUTPUT_NORM_ACT_STATS
216     fprintf(f, "\n");
217 #endif
218   }
219 
220 #if OUTPUT_NORM_ACT_STATS
221   fclose(f);
222 #endif
223 }
224 #endif
225 
226 /* Loop through all MBs. Note activity of each, average activity and
227  * calculate a normalized activity for each
228  */
build_activity_map(VP8_COMP * cpi)229 static void build_activity_map(VP8_COMP *cpi) {
230   MACROBLOCK *const x = &cpi->mb;
231   MACROBLOCKD *xd = &x->e_mbd;
232   VP8_COMMON *const cm = &cpi->common;
233 
234 #if ALT_ACT_MEASURE
235   YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
236   int recon_yoffset;
237   int recon_y_stride = new_yv12->y_stride;
238 #endif
239 
240   int mb_row, mb_col;
241   unsigned int mb_activity;
242   int64_t activity_sum = 0;
243 
244   /* for each macroblock row in image */
245   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
246 #if ALT_ACT_MEASURE
247     /* reset above block coeffs */
248     xd->up_available = (mb_row != 0);
249     recon_yoffset = (mb_row * recon_y_stride * 16);
250 #endif
251     /* for each macroblock col in image */
252     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
253 #if ALT_ACT_MEASURE
254       xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
255       xd->left_available = (mb_col != 0);
256       recon_yoffset += 16;
257 #endif
258       /* Copy current mb to a buffer */
259       vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
260 
261       /* measure activity */
262       mb_activity = mb_activity_measure(x, mb_row, mb_col);
263 
264       /* Keep frame sum */
265       activity_sum += mb_activity;
266 
267       /* Store MB level activity details. */
268       *x->mb_activity_ptr = mb_activity;
269 
270       /* Increment activity map pointer */
271       x->mb_activity_ptr++;
272 
273       /* adjust to the next column of source macroblocks */
274       x->src.y_buffer += 16;
275     }
276 
277     /* adjust to the next row of mbs */
278     x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
279 
280 #if ALT_ACT_MEASURE
281     /* extend the recon for intra prediction */
282     vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8,
283                       xd->dst.v_buffer + 8);
284 #endif
285   }
286 
287   /* Calculate an "average" MB activity */
288   calc_av_activity(cpi, activity_sum);
289 
290 #if USE_ACT_INDEX
291   /* Calculate an activity index number of each mb */
292   calc_activity_index(cpi, x);
293 #endif
294 }
295 
296 /* Macroblock activity masking */
vp8_activity_masking(VP8_COMP * cpi,MACROBLOCK * x)297 void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) {
298 #if USE_ACT_INDEX
299   x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
300   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
301   x->errorperbit += (x->errorperbit == 0);
302 #else
303   int64_t a;
304   int64_t b;
305   int64_t act = *(x->mb_activity_ptr);
306 
307   /* Apply the masking to the RD multiplier. */
308   a = act + (2 * cpi->activity_avg);
309   b = (2 * act) + cpi->activity_avg;
310 
311   x->rdmult = (unsigned int)(((int64_t)x->rdmult * b + (a >> 1)) / a);
312   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
313   x->errorperbit += (x->errorperbit == 0);
314 #endif
315 
316   /* Activity based Zbin adjustment */
317   adjust_act_zbin(cpi, x);
318 }
319 
encode_mb_row(VP8_COMP * cpi,VP8_COMMON * cm,int mb_row,MACROBLOCK * x,MACROBLOCKD * xd,TOKENEXTRA ** tp,int * segment_counts,int * totalrate)320 static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row,
321                           MACROBLOCK *x, MACROBLOCKD *xd, TOKENEXTRA **tp,
322                           int *segment_counts, int *totalrate) {
323   int recon_yoffset, recon_uvoffset;
324   int mb_col;
325   int ref_fb_idx = cm->lst_fb_idx;
326   int dst_fb_idx = cm->new_fb_idx;
327   int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
328   int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
329   int map_index = (mb_row * cpi->common.mb_cols);
330 
331 #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
332   const int num_part = (1 << cm->multi_token_partition);
333   TOKENEXTRA *tp_start = cpi->tok;
334   vp8_writer *w;
335 #endif
336 
337 #if CONFIG_MULTITHREAD
338   const int nsync = cpi->mt_sync_range;
339   vpx_atomic_int rightmost_col = VPX_ATOMIC_INIT(cm->mb_cols + nsync);
340   const vpx_atomic_int *last_row_current_mb_col;
341   vpx_atomic_int *current_mb_col = NULL;
342 
343   if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
344     current_mb_col = &cpi->mt_current_mb_col[mb_row];
345   }
346   if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0 && mb_row != 0) {
347     last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
348   } else {
349     last_row_current_mb_col = &rightmost_col;
350   }
351 #endif
352 
353 #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
354   if (num_part > 1)
355     w = &cpi->bc[1 + (mb_row % num_part)];
356   else
357     w = &cpi->bc[1];
358 #endif
359 
360   /* reset above block coeffs */
361   xd->above_context = cm->above_context;
362 
363   xd->up_available = (mb_row != 0);
364   recon_yoffset = (mb_row * recon_y_stride * 16);
365   recon_uvoffset = (mb_row * recon_uv_stride * 8);
366 
367   cpi->tplist[mb_row].start = *tp;
368   /* printf("Main mb_row = %d\n", mb_row); */
369 
370   /* Distance of Mb to the top & bottom edges, specified in 1/8th pel
371    * units as they are always compared to values that are in 1/8th pel
372    */
373   xd->mb_to_top_edge = -((mb_row * 16) << 3);
374   xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
375 
376   /* Set up limit values for vertical motion vector components
377    * to prevent them extending beyond the UMV borders
378    */
379   x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
380   x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
381 
382   /* Set the mb activity pointer to the start of the row. */
383   x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
384 
385   /* for each macroblock col in image */
386   for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
387 #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
388     *tp = cpi->tok;
389 #endif
390     /* Distance of Mb to the left & right edges, specified in
391      * 1/8th pel units as they are always compared to values
392      * that are in 1/8th pel units
393      */
394     xd->mb_to_left_edge = -((mb_col * 16) << 3);
395     xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
396 
397     /* Set up limit values for horizontal motion vector components
398      * to prevent them extending beyond the UMV borders
399      */
400     x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
401     x->mv_col_max =
402         ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
403 
404     xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
405     xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
406     xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
407     xd->left_available = (mb_col != 0);
408 
409     x->rddiv = cpi->RDDIV;
410     x->rdmult = cpi->RDMULT;
411 
412     /* Copy current mb to a buffer */
413     vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
414 
415 #if CONFIG_MULTITHREAD
416     if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
417       if (((mb_col - 1) % nsync) == 0) {
418         vpx_atomic_store_release(current_mb_col, mb_col - 1);
419       }
420 
421       if (mb_row && !(mb_col & (nsync - 1))) {
422         vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
423       }
424     }
425 #endif
426 
427     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x);
428 
429     /* Is segmentation enabled */
430     /* MB level adjustment to quantizer */
431     if (xd->segmentation_enabled) {
432       /* Code to set segment id in xd->mbmi.segment_id for current MB
433        * (with range checking)
434        */
435       if (cpi->segmentation_map[map_index + mb_col] <= 3) {
436         xd->mode_info_context->mbmi.segment_id =
437             cpi->segmentation_map[map_index + mb_col];
438       } else {
439         xd->mode_info_context->mbmi.segment_id = 0;
440       }
441 
442       vp8cx_mb_init_quantizer(cpi, x, 1);
443     } else {
444       /* Set to Segment 0 by default */
445       xd->mode_info_context->mbmi.segment_id = 0;
446     }
447 
448     x->active_ptr = cpi->active_map + map_index + mb_col;
449 
450     if (cm->frame_type == KEY_FRAME) {
451       const int intra_rate_cost = vp8cx_encode_intra_macroblock(cpi, x, tp);
452       if (INT_MAX - *totalrate > intra_rate_cost)
453         *totalrate += intra_rate_cost;
454       else
455         *totalrate = INT_MAX;
456 #ifdef MODE_STATS
457       y_modes[xd->mbmi.mode]++;
458 #endif
459     } else {
460       const int inter_rate_cost = vp8cx_encode_inter_macroblock(
461           cpi, x, tp, recon_yoffset, recon_uvoffset, mb_row, mb_col);
462       if (INT_MAX - *totalrate > inter_rate_cost)
463         *totalrate += inter_rate_cost;
464       else
465         *totalrate = INT_MAX;
466 
467 #ifdef MODE_STATS
468       inter_y_modes[xd->mbmi.mode]++;
469 
470       if (xd->mbmi.mode == SPLITMV) {
471         int b;
472 
473         for (b = 0; b < xd->mbmi.partition_count; ++b) {
474           inter_b_modes[x->partition->bmi[b].mode]++;
475         }
476       }
477 
478 #endif
479 
480       // Keep track of how many (consecutive) times a  block is coded
481       // as ZEROMV_LASTREF, for base layer frames.
482       // Reset to 0 if its coded as anything else.
483       if (cpi->current_layer == 0) {
484         if (xd->mode_info_context->mbmi.mode == ZEROMV &&
485             xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) {
486           // Increment, check for wrap-around.
487           if (cpi->consec_zero_last[map_index + mb_col] < 255) {
488             cpi->consec_zero_last[map_index + mb_col] += 1;
489           }
490           if (cpi->consec_zero_last_mvbias[map_index + mb_col] < 255) {
491             cpi->consec_zero_last_mvbias[map_index + mb_col] += 1;
492           }
493         } else {
494           cpi->consec_zero_last[map_index + mb_col] = 0;
495           cpi->consec_zero_last_mvbias[map_index + mb_col] = 0;
496         }
497         if (x->zero_last_dot_suppress) {
498           cpi->consec_zero_last_mvbias[map_index + mb_col] = 0;
499         }
500       }
501 
502       /* Special case code for cyclic refresh
503        * If cyclic update enabled then copy xd->mbmi.segment_id; (which
504        * may have been updated based on mode during
505        * vp8cx_encode_inter_macroblock()) back into the global
506        * segmentation map
507        */
508       if ((cpi->current_layer == 0) &&
509           (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) {
510         cpi->segmentation_map[map_index + mb_col] =
511             xd->mode_info_context->mbmi.segment_id;
512 
513         /* If the block has been refreshed mark it as clean (the
514          * magnitude of the -ve influences how long it will be before
515          * we consider another refresh):
516          * Else if it was coded (last frame 0,0) and has not already
517          * been refreshed then mark it as a candidate for cleanup
518          * next time (marked 0) else mark it as dirty (1).
519          */
520         if (xd->mode_info_context->mbmi.segment_id) {
521           cpi->cyclic_refresh_map[map_index + mb_col] = -1;
522         } else if ((xd->mode_info_context->mbmi.mode == ZEROMV) &&
523                    (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) {
524           if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) {
525             cpi->cyclic_refresh_map[map_index + mb_col] = 0;
526           }
527         } else {
528           cpi->cyclic_refresh_map[map_index + mb_col] = 1;
529         }
530       }
531     }
532 
533     cpi->tplist[mb_row].stop = *tp;
534 
535 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
536     /* pack tokens for this MB */
537     {
538       int tok_count = *tp - tp_start;
539       vp8_pack_tokens(w, tp_start, tok_count);
540     }
541 #endif
542     /* Increment pointer into gf usage flags structure. */
543     x->gf_active_ptr++;
544 
545     /* Increment the activity mask pointers. */
546     x->mb_activity_ptr++;
547 
548     /* adjust to the next column of macroblocks */
549     x->src.y_buffer += 16;
550     x->src.u_buffer += 8;
551     x->src.v_buffer += 8;
552 
553     recon_yoffset += 16;
554     recon_uvoffset += 8;
555 
556     /* Keep track of segment usage */
557     segment_counts[xd->mode_info_context->mbmi.segment_id]++;
558 
559     /* skip to next mb */
560     xd->mode_info_context++;
561     x->partition_info++;
562     xd->above_context++;
563   }
564 
565   /* extend the recon for intra prediction */
566   vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16,
567                     xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
568 
569 #if CONFIG_MULTITHREAD
570   if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {
571     vpx_atomic_store_release(current_mb_col,
572                              vpx_atomic_load_acquire(&rightmost_col));
573   }
574 #endif
575 
576   /* this is to account for the border */
577   xd->mode_info_context++;
578   x->partition_info++;
579 }
580 
init_encode_frame_mb_context(VP8_COMP * cpi)581 static void init_encode_frame_mb_context(VP8_COMP *cpi) {
582   MACROBLOCK *const x = &cpi->mb;
583   VP8_COMMON *const cm = &cpi->common;
584   MACROBLOCKD *const xd = &x->e_mbd;
585 
586   /* GF active flags data structure */
587   x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
588 
589   /* Activity map pointer */
590   x->mb_activity_ptr = cpi->mb_activity_map;
591 
592   x->act_zbin_adj = 0;
593 
594   x->partition_info = x->pi;
595 
596   xd->mode_info_context = cm->mi;
597   xd->mode_info_stride = cm->mode_info_stride;
598 
599   xd->frame_type = cm->frame_type;
600 
601   /* reset intra mode contexts */
602   if (cm->frame_type == KEY_FRAME) vp8_init_mbmode_probs(cm);
603 
604   /* Copy data over into macro block data structures. */
605   x->src = *cpi->Source;
606   xd->pre = cm->yv12_fb[cm->lst_fb_idx];
607   xd->dst = cm->yv12_fb[cm->new_fb_idx];
608 
609   /* set up frame for intra coded blocks */
610   vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
611 
612   vp8_build_block_offsets(x);
613 
614   xd->mode_info_context->mbmi.mode = DC_PRED;
615   xd->mode_info_context->mbmi.uv_mode = DC_PRED;
616 
617   xd->left_context = &cm->left_context;
618 
619   x->mvc = cm->fc.mvc;
620 
621   memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
622 
623   /* Special case treatment when GF and ARF are not sensible options
624    * for reference
625    */
626   if (cpi->ref_frame_flags == VP8_LAST_FRAME) {
627     vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 255,
628                              128);
629   } else if ((cpi->oxcf.number_of_layers > 1) &&
630              (cpi->ref_frame_flags == VP8_GOLD_FRAME)) {
631     vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 1, 255);
632   } else if ((cpi->oxcf.number_of_layers > 1) &&
633              (cpi->ref_frame_flags == VP8_ALTR_FRAME)) {
634     vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 1, 1);
635   } else {
636     vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded,
637                              cpi->prob_last_coded, cpi->prob_gf_coded);
638   }
639 
640   xd->fullpixel_mask = ~0;
641   if (cm->full_pixel) xd->fullpixel_mask = ~7;
642 
643   vp8_zero(x->coef_counts);
644   vp8_zero(x->ymode_count);
645   vp8_zero(x->uv_mode_count);
646   x->prediction_error = 0;
647   x->intra_error = 0;
648   vp8_zero(x->count_mb_ref_frame_usage);
649 }
650 
651 #if CONFIG_MULTITHREAD
sum_coef_counts(MACROBLOCK * x,MACROBLOCK * x_thread)652 static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) {
653   int i = 0;
654   do {
655     int j = 0;
656     do {
657       int k = 0;
658       do {
659         /* at every context */
660 
661         /* calc probs and branch cts for this frame only */
662         int t = 0; /* token/prob index */
663 
664         do {
665           x->coef_counts[i][j][k][t] += x_thread->coef_counts[i][j][k][t];
666         } while (++t < ENTROPY_NODES);
667       } while (++k < PREV_COEF_CONTEXTS);
668     } while (++j < COEF_BANDS);
669   } while (++i < BLOCK_TYPES);
670 }
671 #endif  // CONFIG_MULTITHREAD
672 
vp8_encode_frame(VP8_COMP * cpi)673 void vp8_encode_frame(VP8_COMP *cpi) {
674   int mb_row;
675   MACROBLOCK *const x = &cpi->mb;
676   VP8_COMMON *const cm = &cpi->common;
677   MACROBLOCKD *const xd = &x->e_mbd;
678   TOKENEXTRA *tp = cpi->tok;
679   int segment_counts[MAX_MB_SEGMENTS];
680   int totalrate;
681 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
682   BOOL_CODER *bc = &cpi->bc[1]; /* bc[0] is for control partition */
683   const int num_part = (1 << cm->multi_token_partition);
684 #endif
685 
686   memset(segment_counts, 0, sizeof(segment_counts));
687   totalrate = 0;
688 
689   if (cpi->compressor_speed == 2) {
690     if (cpi->oxcf.cpu_used < 0) {
691       cpi->Speed = -(cpi->oxcf.cpu_used);
692     } else {
693       vp8_auto_select_speed(cpi);
694     }
695   }
696 
697   /* Functions setup for all frame types so we can use MC in AltRef */
698   if (!cm->use_bilinear_mc_filter) {
699     xd->subpixel_predict = vp8_sixtap_predict4x4;
700     xd->subpixel_predict8x4 = vp8_sixtap_predict8x4;
701     xd->subpixel_predict8x8 = vp8_sixtap_predict8x8;
702     xd->subpixel_predict16x16 = vp8_sixtap_predict16x16;
703   } else {
704     xd->subpixel_predict = vp8_bilinear_predict4x4;
705     xd->subpixel_predict8x4 = vp8_bilinear_predict8x4;
706     xd->subpixel_predict8x8 = vp8_bilinear_predict8x8;
707     xd->subpixel_predict16x16 = vp8_bilinear_predict16x16;
708   }
709 
710   cpi->mb.skip_true_count = 0;
711   cpi->tok_count = 0;
712 
713 #if 0
714     /* Experimental code */
715     cpi->frame_distortion = 0;
716     cpi->last_mb_distortion = 0;
717 #endif
718 
719   xd->mode_info_context = cm->mi;
720 
721   vp8_zero(cpi->mb.MVcount);
722 
723   vp8cx_frame_init_quantizer(cpi);
724 
725   vp8_initialize_rd_consts(cpi, x,
726                            vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
727 
728   vp8cx_initialize_me_consts(cpi, cm->base_qindex);
729 
730   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
731     /* Initialize encode frame context. */
732     init_encode_frame_mb_context(cpi);
733 
734     /* Build a frame level activity map */
735     build_activity_map(cpi);
736   }
737 
738   /* re-init encode frame context. */
739   init_encode_frame_mb_context(cpi);
740 
741 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
742   {
743     int i;
744     for (i = 0; i < num_part; ++i) {
745       vp8_start_encode(&bc[i], cpi->partition_d[i + 1],
746                        cpi->partition_d_end[i + 1]);
747       bc[i].error = &cm->error;
748     }
749   }
750 
751 #endif
752 
753   {
754     struct vpx_usec_timer emr_timer;
755     vpx_usec_timer_start(&emr_timer);
756 
757 #if CONFIG_MULTITHREAD
758     if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {
759       int i;
760 
761       vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,
762                                 cpi->encoding_thread_count);
763 
764       if (cpi->mt_current_mb_col_size != cm->mb_rows) {
765         vpx_free(cpi->mt_current_mb_col);
766         cpi->mt_current_mb_col = NULL;
767         cpi->mt_current_mb_col_size = 0;
768         CHECK_MEM_ERROR(
769             &cpi->common.error, cpi->mt_current_mb_col,
770             vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
771         cpi->mt_current_mb_col_size = cm->mb_rows;
772       }
773       for (i = 0; i < cm->mb_rows; ++i)
774         vpx_atomic_store_release(&cpi->mt_current_mb_col[i], -1);
775 
776       for (i = 0; i < cpi->encoding_thread_count; ++i) {
777         sem_post(&cpi->h_event_start_encoding[i]);
778       }
779 
780       for (mb_row = 0; mb_row < cm->mb_rows;
781            mb_row += (cpi->encoding_thread_count + 1)) {
782         vp8_zero(cm->left_context);
783 
784 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
785         tp = cpi->tok;
786 #else
787         tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
788 #endif
789 
790         encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
791 
792         /* adjust to the next row of mbs */
793         x->src.y_buffer +=
794             16 * x->src.y_stride * (cpi->encoding_thread_count + 1) -
795             16 * cm->mb_cols;
796         x->src.u_buffer +=
797             8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) -
798             8 * cm->mb_cols;
799         x->src.v_buffer +=
800             8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) -
801             8 * cm->mb_cols;
802 
803         xd->mode_info_context +=
804             xd->mode_info_stride * cpi->encoding_thread_count;
805         x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
806         x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count;
807       }
808       /* Wait for all the threads to finish. */
809       for (i = 0; i < cpi->encoding_thread_count; ++i) {
810         errno = 0;
811         while (sem_wait(&cpi->h_event_end_encoding[i]) != 0 && errno == EINTR) {
812         }
813       }
814 
815       for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
816         cpi->tok_count += (unsigned int)(cpi->tplist[mb_row].stop -
817                                          cpi->tplist[mb_row].start);
818       }
819 
820       if (xd->segmentation_enabled) {
821         int j;
822 
823         if (xd->segmentation_enabled) {
824           for (i = 0; i < cpi->encoding_thread_count; ++i) {
825             for (j = 0; j < 4; ++j) {
826               segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
827             }
828           }
829         }
830       }
831 
832       for (i = 0; i < cpi->encoding_thread_count; ++i) {
833         int mode_count;
834         int c_idx;
835         totalrate += cpi->mb_row_ei[i].totalrate;
836 
837         cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count;
838 
839         for (mode_count = 0; mode_count < VP8_YMODES; ++mode_count) {
840           cpi->mb.ymode_count[mode_count] +=
841               cpi->mb_row_ei[i].mb.ymode_count[mode_count];
842         }
843 
844         for (mode_count = 0; mode_count < VP8_UV_MODES; ++mode_count) {
845           cpi->mb.uv_mode_count[mode_count] +=
846               cpi->mb_row_ei[i].mb.uv_mode_count[mode_count];
847         }
848 
849         for (c_idx = 0; c_idx < MVvals; ++c_idx) {
850           cpi->mb.MVcount[0][c_idx] += cpi->mb_row_ei[i].mb.MVcount[0][c_idx];
851           cpi->mb.MVcount[1][c_idx] += cpi->mb_row_ei[i].mb.MVcount[1][c_idx];
852         }
853 
854         cpi->mb.prediction_error += cpi->mb_row_ei[i].mb.prediction_error;
855         cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error;
856 
857         for (c_idx = 0; c_idx < MAX_REF_FRAMES; ++c_idx) {
858           cpi->mb.count_mb_ref_frame_usage[c_idx] +=
859               cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx];
860         }
861 
862         for (c_idx = 0; c_idx < MAX_ERROR_BINS; ++c_idx) {
863           cpi->mb.error_bins[c_idx] += cpi->mb_row_ei[i].mb.error_bins[c_idx];
864         }
865 
866         /* add up counts for each thread */
867         sum_coef_counts(x, &cpi->mb_row_ei[i].mb);
868       }
869 
870     } else
871 #endif  // CONFIG_MULTITHREAD
872     {
873 
874       /* for each macroblock row in image */
875       for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
876         vp8_zero(cm->left_context);
877 
878 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
879         tp = cpi->tok;
880 #endif
881 
882         encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
883 
884         /* adjust to the next row of mbs */
885         x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
886         x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
887         x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
888       }
889 
890       cpi->tok_count = (unsigned int)(tp - cpi->tok);
891     }
892 
893 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
894     {
895       int i;
896       for (i = 0; i < num_part; ++i) {
897         vp8_stop_encode(&bc[i]);
898         cpi->partition_sz[i + 1] = bc[i].pos;
899       }
900     }
901 #endif
902 
903     vpx_usec_timer_mark(&emr_timer);
904     cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
905   }
906 
907   // Work out the segment probabilities if segmentation is enabled
908   // and needs to be updated
909   if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
910     int tot_count;
911     int i;
912 
913     /* Set to defaults */
914     memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
915 
916     tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] +
917                 segment_counts[3];
918 
919     if (tot_count) {
920       xd->mb_segment_tree_probs[0] =
921           ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
922 
923       tot_count = segment_counts[0] + segment_counts[1];
924 
925       if (tot_count > 0) {
926         xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
927       }
928 
929       tot_count = segment_counts[2] + segment_counts[3];
930 
931       if (tot_count > 0) {
932         xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
933       }
934 
935       /* Zero probabilities not allowed */
936       for (i = 0; i < MB_FEATURE_TREE_PROBS; ++i) {
937         if (xd->mb_segment_tree_probs[i] == 0) xd->mb_segment_tree_probs[i] = 1;
938       }
939     }
940   }
941 
942   /* projected_frame_size in units of BYTES */
943   cpi->projected_frame_size = totalrate >> 8;
944 
945   /* Make a note of the percentage MBs coded Intra. */
946   if (cm->frame_type == KEY_FRAME) {
947     cpi->this_frame_percent_intra = 100;
948   } else {
949     int tot_modes;
950 
951     tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] +
952                 cpi->mb.count_mb_ref_frame_usage[LAST_FRAME] +
953                 cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME] +
954                 cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME];
955 
956     if (tot_modes) {
957       cpi->this_frame_percent_intra =
958           cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
959     }
960   }
961 
962 #if !CONFIG_REALTIME_ONLY
963   /* Adjust the projected reference frame usage probability numbers to
964    * reflect what we have just seen. This may be useful when we make
965    * multiple iterations of the recode loop rather than continuing to use
966    * values from the previous frame.
967    */
968   if ((cm->frame_type != KEY_FRAME) &&
969       ((cpi->oxcf.number_of_layers > 1) ||
970        (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) {
971     vp8_convert_rfct_to_prob(cpi);
972   }
973 #endif
974 }
vp8_setup_block_ptrs(MACROBLOCK * x)975 void vp8_setup_block_ptrs(MACROBLOCK *x) {
976   int r, c;
977   int i;
978 
979   for (r = 0; r < 4; ++r) {
980     for (c = 0; c < 4; ++c) {
981       x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
982     }
983   }
984 
985   for (r = 0; r < 2; ++r) {
986     for (c = 0; c < 2; ++c) {
987       x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
988     }
989   }
990 
991   for (r = 0; r < 2; ++r) {
992     for (c = 0; c < 2; ++c) {
993       x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
994     }
995   }
996 
997   x->block[24].src_diff = x->src_diff + 384;
998 
999   for (i = 0; i < 25; ++i) {
1000     x->block[i].coeff = x->coeff + i * 16;
1001   }
1002 }
1003 
vp8_build_block_offsets(MACROBLOCK * x)1004 void vp8_build_block_offsets(MACROBLOCK *x) {
1005   int block = 0;
1006   int br, bc;
1007 
1008   vp8_build_block_doffsets(&x->e_mbd);
1009 
1010   /* y blocks */
1011   x->thismb_ptr = &x->thismb[0];
1012   for (br = 0; br < 4; ++br) {
1013     for (bc = 0; bc < 4; ++bc) {
1014       BLOCK *this_block = &x->block[block];
1015       this_block->base_src = &x->thismb_ptr;
1016       this_block->src_stride = 16;
1017       this_block->src = 4 * br * 16 + 4 * bc;
1018       ++block;
1019     }
1020   }
1021 
1022   /* u blocks */
1023   for (br = 0; br < 2; ++br) {
1024     for (bc = 0; bc < 2; ++bc) {
1025       BLOCK *this_block = &x->block[block];
1026       this_block->base_src = &x->src.u_buffer;
1027       this_block->src_stride = x->src.uv_stride;
1028       this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1029       ++block;
1030     }
1031   }
1032 
1033   /* v blocks */
1034   for (br = 0; br < 2; ++br) {
1035     for (bc = 0; bc < 2; ++bc) {
1036       BLOCK *this_block = &x->block[block];
1037       this_block->base_src = &x->src.v_buffer;
1038       this_block->src_stride = x->src.uv_stride;
1039       this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1040       ++block;
1041     }
1042   }
1043 }
1044 
sum_intra_stats(VP8_COMP * cpi,MACROBLOCK * x)1045 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) {
1046   const MACROBLOCKD *xd = &x->e_mbd;
1047   const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1048   const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1049 
1050 #ifdef MODE_STATS
1051   const int is_key = cpi->common.frame_type == KEY_FRAME;
1052 
1053   ++(is_key ? uv_modes : inter_uv_modes)[uvm];
1054 
1055   if (m == B_PRED) {
1056     unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1057 
1058     int b = 0;
1059 
1060     do {
1061       ++bct[xd->block[b].bmi.mode];
1062     } while (++b < 16);
1063   }
1064 
1065 #else
1066   (void)cpi;
1067 #endif
1068 
1069   ++x->ymode_count[m];
1070   ++x->uv_mode_count[uvm];
1071 }
1072 
1073 /* Experimental stub function to create a per MB zbin adjustment based on
1074  * some previously calculated measure of MB activity.
1075  */
adjust_act_zbin(VP8_COMP * cpi,MACROBLOCK * x)1076 static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x) {
1077 #if USE_ACT_INDEX
1078   x->act_zbin_adj = *(x->mb_activity_ptr);
1079 #else
1080   int64_t a;
1081   int64_t b;
1082   int64_t act = *(x->mb_activity_ptr);
1083 
1084   /* Apply the masking to the RD multiplier. */
1085   a = act + 4 * cpi->activity_avg;
1086   b = 4 * act + cpi->activity_avg;
1087 
1088   if (act > cpi->activity_avg) {
1089     x->act_zbin_adj = (int)(((int64_t)b + (a >> 1)) / a) - 1;
1090   } else {
1091     x->act_zbin_adj = 1 - (int)(((int64_t)a + (b >> 1)) / b);
1092   }
1093 #endif
1094 }
1095 
vp8cx_encode_intra_macroblock(VP8_COMP * cpi,MACROBLOCK * x,TOKENEXTRA ** t)1096 int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
1097                                   TOKENEXTRA **t) {
1098   MACROBLOCKD *xd = &x->e_mbd;
1099   int rate;
1100 
1101   if (cpi->sf.RD && cpi->compressor_speed != 2) {
1102     vp8_rd_pick_intra_mode(x, &rate);
1103   } else {
1104     vp8_pick_intra_mode(x, &rate);
1105   }
1106 
1107   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
1108     adjust_act_zbin(cpi, x);
1109     vp8_update_zbin_extra(cpi, x);
1110   }
1111 
1112   if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) {
1113     vp8_encode_intra4x4mby(x);
1114   } else {
1115     vp8_encode_intra16x16mby(x);
1116   }
1117 
1118   vp8_encode_intra16x16mbuv(x);
1119 
1120   sum_intra_stats(cpi, x);
1121 
1122   vp8_tokenize_mb(cpi, x, t);
1123 
1124   if (xd->mode_info_context->mbmi.mode != B_PRED) vp8_inverse_transform_mby(xd);
1125 
1126   vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv,
1127                                 xd->dst.u_buffer, xd->dst.v_buffer,
1128                                 xd->dst.uv_stride, xd->eobs + 16);
1129   return rate;
1130 }
1131 #ifdef SPEEDSTATS
1132 extern int cnt_pm;
1133 #endif
1134 
1135 extern void vp8_fix_contexts(MACROBLOCKD *x);
1136 
vp8cx_encode_inter_macroblock(VP8_COMP * cpi,MACROBLOCK * x,TOKENEXTRA ** t,int recon_yoffset,int recon_uvoffset,int mb_row,int mb_col)1137 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1138                                   int recon_yoffset, int recon_uvoffset,
1139                                   int mb_row, int mb_col) {
1140   MACROBLOCKD *const xd = &x->e_mbd;
1141   int intra_error = 0;
1142   int rate;
1143   int distortion;
1144 
1145   x->skip = 0;
1146 
1147   if (xd->segmentation_enabled) {
1148     x->encode_breakout =
1149         cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1150   } else {
1151     x->encode_breakout = cpi->oxcf.encode_breakout;
1152   }
1153 
1154 #if CONFIG_TEMPORAL_DENOISING
1155   /* Reset the best sse mode/mv for each macroblock. */
1156   x->best_reference_frame = INTRA_FRAME;
1157   x->best_zeromv_reference_frame = INTRA_FRAME;
1158   x->best_sse_inter_mode = 0;
1159   x->best_sse_mv.as_int = 0;
1160   x->need_to_clamp_best_mvs = 0;
1161 #endif
1162 
1163   if (cpi->sf.RD) {
1164     int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
1165 
1166     /* Are we using the fast quantizer for the mode selection? */
1167     if (cpi->sf.use_fastquant_for_pick) {
1168       x->quantize_b = vp8_fast_quantize_b;
1169 
1170       /* the fast quantizer does not use zbin_extra, so
1171        * do not recalculate */
1172       x->zbin_mode_boost_enabled = 0;
1173     }
1174     vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
1175                            &distortion, &intra_error, mb_row, mb_col);
1176 
1177     /* switch back to the regular quantizer for the encode */
1178     if (cpi->sf.improved_quant) {
1179       x->quantize_b = vp8_regular_quantize_b;
1180     }
1181 
1182     /* restore cpi->zbin_mode_boost_enabled */
1183     x->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
1184 
1185   } else {
1186     vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
1187                         &distortion, &intra_error, mb_row, mb_col);
1188   }
1189 
1190   x->prediction_error += distortion;
1191   x->intra_error += intra_error;
1192 
1193   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
1194     /* Adjust the zbin based on this MB rate. */
1195     adjust_act_zbin(cpi, x);
1196   }
1197 
1198 #if 0
1199     /* Experimental RD code */
1200     cpi->frame_distortion += distortion;
1201     cpi->last_mb_distortion = distortion;
1202 #endif
1203 
1204   /* MB level adjutment to quantizer setup */
1205   if (xd->segmentation_enabled) {
1206     /* If cyclic update enabled */
1207     if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) {
1208       /* Clear segment_id back to 0 if not coded (last frame 0,0) */
1209       if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1210           ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) ||
1211            (xd->mode_info_context->mbmi.mode != ZEROMV))) {
1212         xd->mode_info_context->mbmi.segment_id = 0;
1213 
1214         /* segment_id changed, so update */
1215         vp8cx_mb_init_quantizer(cpi, x, 1);
1216       }
1217     }
1218   }
1219 
1220   {
1221     /* Experimental code.
1222      * Special case for gf and arf zeromv modes, for 1 temporal layer.
1223      * Increase zbin size to supress noise.
1224      */
1225     x->zbin_mode_boost = 0;
1226     if (x->zbin_mode_boost_enabled) {
1227       if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) {
1228         if (xd->mode_info_context->mbmi.mode == ZEROMV) {
1229           if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME &&
1230               cpi->oxcf.number_of_layers == 1) {
1231             x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1232           } else {
1233             x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1234           }
1235         } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
1236           x->zbin_mode_boost = 0;
1237         } else {
1238           x->zbin_mode_boost = MV_ZBIN_BOOST;
1239         }
1240       }
1241     }
1242 
1243     /* The fast quantizer doesn't use zbin_extra, only do so with
1244      * the regular quantizer. */
1245     if (cpi->sf.improved_quant) vp8_update_zbin_extra(cpi, x);
1246   }
1247 
1248   x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame]++;
1249 
1250   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
1251     vp8_encode_intra16x16mbuv(x);
1252 
1253     if (xd->mode_info_context->mbmi.mode == B_PRED) {
1254       vp8_encode_intra4x4mby(x);
1255     } else {
1256       vp8_encode_intra16x16mby(x);
1257     }
1258 
1259     sum_intra_stats(cpi, x);
1260   } else {
1261     int ref_fb_idx;
1262 
1263     if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) {
1264       ref_fb_idx = cpi->common.lst_fb_idx;
1265     } else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) {
1266       ref_fb_idx = cpi->common.gld_fb_idx;
1267     } else {
1268       ref_fb_idx = cpi->common.alt_fb_idx;
1269     }
1270 
1271     xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1272     xd->pre.u_buffer =
1273         cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1274     xd->pre.v_buffer =
1275         cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1276 
1277     if (!x->skip) {
1278       vp8_encode_inter16x16(x);
1279     } else {
1280       vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer,
1281                                          xd->dst.v_buffer, xd->dst.y_stride,
1282                                          xd->dst.uv_stride);
1283     }
1284   }
1285 
1286   if (!x->skip) {
1287     vp8_tokenize_mb(cpi, x, t);
1288 
1289     if (xd->mode_info_context->mbmi.mode != B_PRED) {
1290       vp8_inverse_transform_mby(xd);
1291     }
1292 
1293     vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv,
1294                                   xd->dst.u_buffer, xd->dst.v_buffer,
1295                                   xd->dst.uv_stride, xd->eobs + 16);
1296   } else {
1297     /* always set mb_skip_coeff as it is needed by the loopfilter */
1298     xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1299 
1300     if (cpi->common.mb_no_coeff_skip) {
1301       x->skip_true_count++;
1302       vp8_fix_contexts(xd);
1303     } else {
1304       vp8_stuff_mb(cpi, x, t);
1305     }
1306   }
1307 
1308   return rate;
1309 }
1310